github.com/bananabytelabs/wazero@v0.0.0-20240105073314-54b22a776da8/internal/engine/compiler/impl_amd64.go (about) 1 package compiler 2 3 // This file implements the compiler for amd64/x86_64 target. 4 // Please refer to https://www.felixcloutier.com/x86/index.html 5 // if unfamiliar with amd64 instructions used here. 6 7 import ( 8 "fmt" 9 "math" 10 11 "github.com/bananabytelabs/wazero/internal/asm" 12 "github.com/bananabytelabs/wazero/internal/asm/amd64" 13 "github.com/bananabytelabs/wazero/internal/platform" 14 "github.com/bananabytelabs/wazero/internal/u32" 15 "github.com/bananabytelabs/wazero/internal/u64" 16 "github.com/bananabytelabs/wazero/internal/wasm" 17 "github.com/bananabytelabs/wazero/internal/wazeroir" 18 ) 19 20 var ( 21 _minimum32BitSignedInt int32 = math.MinInt32 22 _maximum32BitSignedInt int32 = math.MaxInt32 23 _maximum32BitUnsignedInt uint32 = math.MaxUint32 24 _minimum64BitSignedInt int64 = math.MinInt64 25 _maximum64BitSignedInt int64 = math.MaxInt64 26 _maximum64BitUnsignedInt uint64 = math.MaxUint64 27 _float32SignBitMask uint32 = 1 << 31 28 _float32RestBitMask = ^_float32SignBitMask 29 _float64SignBitMask uint64 = 1 << 63 30 _float64RestBitMask = ^_float64SignBitMask 31 _float32ForMinimumSigned32bitInteger = uint32(0xCF00_0000) 32 _float64ForMinimumSigned32bitInteger = uint64(0xC1E0_0000_0020_0000) 33 _float32ForMinimumSigned64bitInteger = uint32(0xDF00_0000) 34 _float64ForMinimumSigned64bitInteger = uint64(0xC3E0_0000_0000_0000) 35 _float32ForMaximumSigned32bitIntPlusOne = uint32(0x4F00_0000) 36 _float64ForMaximumSigned32bitIntPlusOne = uint64(0x41E0_0000_0000_0000) 37 _float32ForMaximumSigned64bitIntPlusOne = uint32(0x5F00_0000) 38 _float64ForMaximumSigned64bitIntPlusOne = uint64(0x43E0_0000_0000_0000) 39 ) 40 41 var ( 42 // amd64ReservedRegisterForCallEngine: pointer to callEngine (i.e. *callEngine as uintptr) 43 amd64ReservedRegisterForCallEngine = amd64.RegR13 44 // amd64ReservedRegisterForStackBasePointerAddress: stack base pointer's address (callEngine.stackBasePointer) in the current function call. 45 amd64ReservedRegisterForStackBasePointerAddress = amd64.RegR14 46 // amd64ReservedRegisterForMemory: pointer to the memory slice's data (i.e. &memory.Buffer[0] as uintptr). 47 amd64ReservedRegisterForMemory = amd64.RegR15 48 ) 49 50 var ( 51 amd64UnreservedVectorRegisters = []asm.Register{ //nolint 52 amd64.RegX0, amd64.RegX1, amd64.RegX2, amd64.RegX3, 53 amd64.RegX4, amd64.RegX5, amd64.RegX6, amd64.RegX7, 54 amd64.RegX8, amd64.RegX9, amd64.RegX10, amd64.RegX11, 55 amd64.RegX12, amd64.RegX13, amd64.RegX14, amd64.RegX15, 56 } 57 // Note that we never invoke "call" instruction, 58 // so we don't need to care about the calling convention. 59 // TODO: Maybe it is safe just save rbp, rsp somewhere 60 // in Go-allocated variables, and reuse these registers 61 // in compiled functions and write them back before returns. 62 amd64UnreservedGeneralPurposeRegisters = []asm.Register{ //nolint 63 amd64.RegAX, amd64.RegCX, amd64.RegDX, amd64.RegBX, 64 amd64.RegSI, amd64.RegDI, amd64.RegR8, amd64.RegR9, 65 amd64.RegR10, amd64.RegR11, amd64.RegR12, 66 } 67 ) 68 69 // amd64CallingConventionDestinationFunctionModuleInstanceAddressRegister holds *wasm.ModuleInstance of the 70 // next executing function instance. The value is set and used when making function calls 71 // or function returns in the ModuleContextInitialization. See compileModuleContextInitialization. 72 var amd64CallingConventionDestinationFunctionModuleInstanceAddressRegister = amd64.RegR12 73 74 func (c *amd64Compiler) String() string { 75 return c.locationStack.String() 76 } 77 78 // compileNOP implements compiler.compileNOP for the amd64 architecture. 79 func (c *amd64Compiler) compileNOP() asm.Node { 80 return c.assembler.CompileStandAlone(amd64.NOP) 81 } 82 83 type amd64Compiler struct { 84 assembler amd64.Assembler 85 ir *wazeroir.CompilationResult 86 cpuFeatures platform.CpuFeatureFlags 87 // locationStack holds the state of wazeroir virtual stack. 88 // and each item is either placed in register or the actual memory stack. 89 locationStack *runtimeValueLocationStack 90 // labels hold per wazeroir label specific information in this function. 91 labels [wazeroir.LabelKindNum][]amd64LabelInfo 92 // stackPointerCeil is the greatest stack pointer value (from runtimeValueLocationStack) seen during compilation. 93 stackPointerCeil uint64 94 // assignStackPointerCeilNeeded holds an asm.Node whose AssignDestinationConstant must be called with the determined stack pointer ceiling. 95 assignStackPointerCeilNeeded asm.Node 96 compiledTrapTargets [nativeCallStatusModuleClosed]asm.Node 97 withListener bool 98 typ *wasm.FunctionType 99 // locationStackForEntrypoint is the initial location stack for all functions. To reuse the allocated stack, 100 // we cache it here, and reset and set to .locationStack in the Init method. 101 locationStackForEntrypoint runtimeValueLocationStack 102 // frameIDMax tracks the maximum value of frame id per function. 103 frameIDMax int 104 brTableTmp []runtimeValueLocation 105 106 fourZeros, 107 eightZeros, 108 minimum32BitSignedInt, 109 maximum32BitSignedInt, 110 maximum32BitUnsignedInt, 111 minimum64BitSignedInt, 112 maximum64BitSignedInt, 113 maximum64BitUnsignedInt, 114 float32SignBitMask, 115 float32RestBitMask, 116 float64SignBitMask, 117 float64RestBitMask, 118 float32ForMinimumSigned32bitInteger, 119 float64ForMinimumSigned32bitInteger, 120 float32ForMinimumSigned64bitInteger, 121 float64ForMinimumSigned64bitInteger, 122 float32ForMaximumSigned32bitIntPlusOne, 123 float64ForMaximumSigned32bitIntPlusOne, 124 float32ForMaximumSigned64bitIntPlusOne, 125 float64ForMaximumSigned64bitIntPlusOne *asm.StaticConst 126 } 127 128 func newAmd64Compiler() compiler { 129 c := &amd64Compiler{ 130 assembler: amd64.NewAssembler(), 131 locationStackForEntrypoint: newRuntimeValueLocationStack(), 132 cpuFeatures: platform.CpuFeatures, 133 } 134 135 c.fourZeros = asm.NewStaticConst([]byte{0, 0, 0, 0}) 136 c.eightZeros = asm.NewStaticConst([]byte{0, 0, 0, 0, 0, 0, 0, 0}) 137 c.minimum32BitSignedInt = asm.NewStaticConst(u32.LeBytes(uint32(_minimum32BitSignedInt))) 138 c.maximum32BitSignedInt = asm.NewStaticConst(u32.LeBytes(uint32(_maximum32BitSignedInt))) 139 c.maximum32BitUnsignedInt = asm.NewStaticConst(u32.LeBytes(_maximum32BitUnsignedInt)) 140 c.minimum64BitSignedInt = asm.NewStaticConst(u64.LeBytes(uint64(_minimum64BitSignedInt))) 141 c.maximum64BitSignedInt = asm.NewStaticConst(u64.LeBytes(uint64(_maximum64BitSignedInt))) 142 c.maximum64BitUnsignedInt = asm.NewStaticConst(u64.LeBytes(_maximum64BitUnsignedInt)) 143 c.float32SignBitMask = asm.NewStaticConst(u32.LeBytes(_float32SignBitMask)) 144 c.float32RestBitMask = asm.NewStaticConst(u32.LeBytes(_float32RestBitMask)) 145 c.float64SignBitMask = asm.NewStaticConst(u64.LeBytes(_float64SignBitMask)) 146 c.float64RestBitMask = asm.NewStaticConst(u64.LeBytes(_float64RestBitMask)) 147 c.float32ForMinimumSigned32bitInteger = asm.NewStaticConst(u32.LeBytes(_float32ForMinimumSigned32bitInteger)) 148 c.float64ForMinimumSigned32bitInteger = asm.NewStaticConst(u64.LeBytes(_float64ForMinimumSigned32bitInteger)) 149 c.float32ForMinimumSigned64bitInteger = asm.NewStaticConst(u32.LeBytes(_float32ForMinimumSigned64bitInteger)) 150 c.float64ForMinimumSigned64bitInteger = asm.NewStaticConst(u64.LeBytes(_float64ForMinimumSigned64bitInteger)) 151 c.float32ForMaximumSigned32bitIntPlusOne = asm.NewStaticConst(u32.LeBytes(_float32ForMaximumSigned32bitIntPlusOne)) 152 c.float64ForMaximumSigned32bitIntPlusOne = asm.NewStaticConst(u64.LeBytes(_float64ForMaximumSigned32bitIntPlusOne)) 153 c.float32ForMaximumSigned64bitIntPlusOne = asm.NewStaticConst(u32.LeBytes(_float32ForMaximumSigned64bitIntPlusOne)) 154 c.float64ForMaximumSigned64bitIntPlusOne = asm.NewStaticConst(u64.LeBytes(_float64ForMaximumSigned64bitIntPlusOne)) 155 return c 156 } 157 158 // Init implements compiler.Init. 159 func (c *amd64Compiler) Init(typ *wasm.FunctionType, ir *wazeroir.CompilationResult, withListener bool) { 160 c.assembler.Reset() 161 c.locationStackForEntrypoint.reset() 162 c.resetLabels() 163 *c = amd64Compiler{ 164 ir: ir, 165 withListener: withListener, 166 typ: typ, 167 assembler: c.assembler, 168 cpuFeatures: c.cpuFeatures, 169 labels: c.labels, 170 locationStackForEntrypoint: c.locationStackForEntrypoint, 171 brTableTmp: c.brTableTmp, 172 fourZeros: c.fourZeros, 173 eightZeros: c.eightZeros, 174 minimum32BitSignedInt: c.minimum32BitSignedInt, 175 maximum32BitSignedInt: c.maximum32BitSignedInt, 176 maximum32BitUnsignedInt: c.maximum32BitUnsignedInt, 177 minimum64BitSignedInt: c.minimum64BitSignedInt, 178 maximum64BitSignedInt: c.maximum64BitSignedInt, 179 maximum64BitUnsignedInt: c.maximum64BitUnsignedInt, 180 float32SignBitMask: c.float32SignBitMask, 181 float32RestBitMask: c.float32RestBitMask, 182 float64SignBitMask: c.float64SignBitMask, 183 float64RestBitMask: c.float64RestBitMask, 184 float32ForMinimumSigned32bitInteger: c.float32ForMinimumSigned32bitInteger, 185 float64ForMinimumSigned32bitInteger: c.float64ForMinimumSigned32bitInteger, 186 float32ForMinimumSigned64bitInteger: c.float32ForMinimumSigned64bitInteger, 187 float64ForMinimumSigned64bitInteger: c.float64ForMinimumSigned64bitInteger, 188 float32ForMaximumSigned32bitIntPlusOne: c.float32ForMaximumSigned32bitIntPlusOne, 189 float64ForMaximumSigned32bitIntPlusOne: c.float64ForMaximumSigned32bitIntPlusOne, 190 float32ForMaximumSigned64bitIntPlusOne: c.float32ForMaximumSigned64bitIntPlusOne, 191 float64ForMaximumSigned64bitIntPlusOne: c.float64ForMaximumSigned64bitIntPlusOne, 192 } 193 194 // Reuses the initial location stack for the compilation of subsequent functions. 195 c.locationStack = &c.locationStackForEntrypoint 196 } 197 198 // resetLabels resets the existing content in arm64Compiler.labels so that 199 // we could reuse the allocated slices and stacks in the subsequent compilations. 200 func (c *amd64Compiler) resetLabels() { 201 for i := range c.labels { 202 for j := range c.labels[i] { 203 if j > c.frameIDMax { 204 // Only need to reset until the maximum frame id. This makes the compilation faster for large binary. 205 break 206 } 207 l := &c.labels[i][j] 208 l.initialInstruction = nil 209 l.stackInitialized = false 210 l.initialStack.reset() 211 } 212 } 213 } 214 215 // runtimeValueLocationStack implements compilerImpl.runtimeValueLocationStack for the amd64 architecture. 216 func (c *amd64Compiler) runtimeValueLocationStack() *runtimeValueLocationStack { 217 return c.locationStack 218 } 219 220 // setLocationStack sets the given runtimeValueLocationStack to .locationStack field, 221 // while allowing us to track runtimeValueLocationStack.stackPointerCeil across multiple stacks. 222 // This is called when we branch into different block. 223 func (c *amd64Compiler) setLocationStack(newStack *runtimeValueLocationStack) { 224 if c.stackPointerCeil < c.locationStack.stackPointerCeil { 225 c.stackPointerCeil = c.locationStack.stackPointerCeil 226 } 227 c.locationStack = newStack 228 } 229 230 // pushRuntimeValueLocationOnRegister implements compiler.pushRuntimeValueLocationOnRegister for amd64. 231 func (c *amd64Compiler) pushRuntimeValueLocationOnRegister(reg asm.Register, vt runtimeValueType) (ret *runtimeValueLocation) { 232 ret = c.locationStack.pushRuntimeValueLocationOnRegister(reg, vt) 233 c.locationStack.markRegisterUsed(reg) 234 return 235 } 236 237 // pushVectorRuntimeValueLocationOnRegister implements compiler.pushVectorRuntimeValueLocationOnRegister for amd64. 238 func (c *amd64Compiler) pushVectorRuntimeValueLocationOnRegister(reg asm.Register) (lowerBitsLocation *runtimeValueLocation) { 239 lowerBitsLocation = c.locationStack.pushRuntimeValueLocationOnRegister(reg, runtimeValueTypeV128Lo) 240 c.locationStack.pushRuntimeValueLocationOnRegister(reg, runtimeValueTypeV128Hi) 241 c.locationStack.markRegisterUsed(reg) 242 return 243 } 244 245 type amd64LabelInfo struct { 246 // initialInstruction is the initial instruction for this label so other block can jump into it. 247 initialInstruction asm.Node 248 // initialStack is the initial value location stack from which we start compiling this label. 249 initialStack runtimeValueLocationStack 250 stackInitialized bool 251 } 252 253 func (c *amd64Compiler) label(label wazeroir.Label) *amd64LabelInfo { 254 kind := label.Kind() 255 frames := c.labels[kind] 256 frameID := label.FrameID() 257 if c.frameIDMax < frameID { 258 c.frameIDMax = frameID 259 } 260 // If the frameID is not allocated yet, expand the slice by twice of the diff, 261 // so that we could reduce the allocation in the subsequent compilation. 262 if diff := frameID - len(frames) + 1; diff > 0 { 263 for i := 0; i < diff; i++ { 264 frames = append(frames, amd64LabelInfo{initialStack: newRuntimeValueLocationStack()}) 265 } 266 c.labels[kind] = frames 267 } 268 return &frames[frameID] 269 } 270 271 // compileBuiltinFunctionCheckExitCode implements compiler.compileBuiltinFunctionCheckExitCode for the amd64 architecture. 272 func (c *amd64Compiler) compileBuiltinFunctionCheckExitCode() error { 273 if err := c.compileCallBuiltinFunction(builtinFunctionIndexCheckExitCode); err != nil { 274 return err 275 } 276 277 // After the function call, we have to initialize the stack base pointer and memory reserved registers. 278 c.compileReservedStackBasePointerInitialization() 279 c.compileReservedMemoryPointerInitialization() 280 return nil 281 } 282 283 // compileGoDefinedHostFunction constructs the entire code to enter the host function implementation, 284 // and return to the caller. 285 func (c *amd64Compiler) compileGoDefinedHostFunction() error { 286 // First we must update the location stack to reflect the number of host function inputs. 287 c.locationStack.init(c.typ) 288 289 if c.withListener { 290 if err := c.compileCallBuiltinFunction(builtinFunctionIndexFunctionListenerBefore); err != nil { 291 return err 292 } 293 } 294 295 // Host function needs access to the caller's Function Instance, and the caller's information is stored in the stack 296 // (as described in the doc of callEngine.stack). Here, we get the caller's *wasm.FunctionInstance from the stack, 297 // and save it in callEngine.exitContext.callerFunctionInstance so we can pass it to the host function 298 // without sacrificing the performance. 299 c.compileReservedStackBasePointerInitialization() 300 // Alias for readability. 301 tmp := amd64.RegAX 302 // Get the location of the callerFunction (*function) in the stack, which depends on the signature. 303 _, _, callerFunction := c.locationStack.getCallFrameLocations(c.typ) 304 // Load the value into the tmp register: tmp = &function{..} 305 callerFunction.setRegister(tmp) 306 c.compileLoadValueOnStackToRegister(callerFunction) 307 // tmp = *(tmp+functionSourceOffset) = &wasm.ModuleInstance{...} 308 c.assembler.CompileMemoryToRegister(amd64.MOVQ, tmp, functionModuleInstanceOffset, tmp) 309 // Load it onto callEngine.exitContext.callerFunctionInstance. 310 c.assembler.CompileRegisterToMemory(amd64.MOVQ, 311 tmp, 312 amd64ReservedRegisterForCallEngine, callEngineExitContextCallerModuleInstanceOffset) 313 // Reset the state of callerFunction value location so that we won't mess up subsequent code generation below. 314 c.locationStack.releaseRegister(callerFunction) 315 316 if err := c.compileCallGoHostFunction(); err != nil { 317 return err 318 } 319 320 // Initializes the reserved stack base pointer which is used to retrieve the call frame stack. 321 c.compileReservedStackBasePointerInitialization() 322 323 // Go function can change the module state in arbitrary way, so we have to force 324 // the callEngine.moduleContext initialization on the function return. To do so, 325 // we zero-out callEngine.moduleInstance. 326 c.assembler.CompileConstToMemory(amd64.MOVQ, 327 0, amd64ReservedRegisterForCallEngine, callEngineModuleContextModuleInstanceOffset) 328 return c.compileReturnFunction() 329 } 330 331 // compile implements compiler.compile for the amd64 architecture. 332 func (c *amd64Compiler) compile(buf asm.Buffer) (stackPointerCeil uint64, err error) { 333 // c.stackPointerCeil tracks the stack pointer ceiling (max seen) value across all runtimeValueLocationStack(s) 334 // used for all labels (via setLocationStack), excluding the current one. 335 // Hence, we check here if the final block's max one exceeds the current c.stackPointerCeil. 336 stackPointerCeil = c.stackPointerCeil 337 if stackPointerCeil < c.locationStack.stackPointerCeil { 338 stackPointerCeil = c.locationStack.stackPointerCeil 339 } 340 341 // Now that the max stack pointer is determined, we are invoking the callback. 342 // Note this MUST be called before Assemble() below. 343 c.assignStackPointerCeil(stackPointerCeil) 344 345 err = c.assembler.Assemble(buf) 346 return 347 } 348 349 // compileUnreachable implements compiler.compileUnreachable for the amd64 architecture. 350 func (c *amd64Compiler) compileUnreachable() error { 351 c.compileExitFromNativeCode(nativeCallStatusCodeUnreachable) 352 return nil 353 } 354 355 // assignStackPointerCeil implements compilerImpl.assignStackPointerCeil for the amd64 architecture. 356 func (c *amd64Compiler) assignStackPointerCeil(ceil uint64) { 357 if c.assignStackPointerCeilNeeded != nil { 358 c.assignStackPointerCeilNeeded.AssignDestinationConstant(int64(ceil) << 3) 359 } 360 } 361 362 // compileSet implements compiler.compileSet for the amd64 architecture. 363 func (c *amd64Compiler) compileSet(o *wazeroir.UnionOperation) error { 364 depth := int(o.U1) 365 isTargetVector := o.B3 366 367 setTargetIndex := int(c.locationStack.sp) - 1 - depth 368 369 if isTargetVector { 370 _ = c.locationStack.pop() // ignore the higher 64-bits. 371 } 372 v := c.locationStack.pop() 373 if err := c.compileEnsureOnRegister(v); err != nil { 374 return err 375 } 376 377 targetLocation := &c.locationStack.stack[setTargetIndex] 378 if targetLocation.onRegister() { 379 // We no longer need the register previously used by the target location. 380 c.locationStack.markRegisterUnused(targetLocation.register) 381 } 382 383 reg := v.register 384 targetLocation.setRegister(reg) 385 targetLocation.valueType = v.valueType 386 if isTargetVector { 387 hi := &c.locationStack.stack[setTargetIndex+1] 388 hi.setRegister(reg) 389 } 390 return nil 391 } 392 393 // compileGlobalGet implements compiler.compileGlobalGet for the amd64 architecture. 394 func (c *amd64Compiler) compileGlobalGet(o *wazeroir.UnionOperation) error { 395 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 396 return err 397 } 398 399 intReg, err := c.allocateRegister(registerTypeGeneralPurpose) 400 if err != nil { 401 return err 402 } 403 404 // First, move the pointer to the global slice into the allocated register. 405 c.assembler.CompileMemoryToRegister(amd64.MOVQ, amd64ReservedRegisterForCallEngine, callEngineModuleContextGlobalElement0AddressOffset, intReg) 406 407 index := o.U1 408 409 // Now, move the location of the global instance into the register. 410 c.assembler.CompileMemoryToRegister(amd64.MOVQ, intReg, 8*int64(index), intReg) 411 412 // When an integer, reuse the pointer register for the value. Otherwise, allocate a float register for it. 413 valueReg := intReg 414 var vt runtimeValueType 415 var inst asm.Instruction 416 switch c.ir.Globals[index].ValType { 417 case wasm.ValueTypeI32: 418 inst = amd64.MOVL 419 vt = runtimeValueTypeI32 420 case wasm.ValueTypeI64, wasm.ValueTypeExternref, wasm.ValueTypeFuncref: 421 inst = amd64.MOVQ 422 vt = runtimeValueTypeI64 423 case wasm.ValueTypeF32: 424 inst = amd64.MOVL 425 vt = runtimeValueTypeF32 426 valueReg, err = c.allocateRegister(registerTypeVector) 427 if err != nil { 428 return err 429 } 430 case wasm.ValueTypeF64: 431 inst = amd64.MOVQ 432 vt = runtimeValueTypeF64 433 valueReg, err = c.allocateRegister(registerTypeVector) 434 if err != nil { 435 return err 436 } 437 case wasm.ValueTypeV128: 438 inst = amd64.MOVDQU 439 vt = runtimeValueTypeV128Lo 440 valueReg, err = c.allocateRegister(registerTypeVector) 441 if err != nil { 442 return err 443 } 444 default: 445 panic("BUG: unknown runtime value type") 446 } 447 448 // Using the register holding the pointer to the target instance, move its value into a register. 449 c.assembler.CompileMemoryToRegister(inst, intReg, globalInstanceValueOffset, valueReg) 450 451 // Record that the retrieved global value on the top of the stack is now in a register. 452 if vt == runtimeValueTypeV128Lo { 453 c.pushVectorRuntimeValueLocationOnRegister(valueReg) 454 } else { 455 c.pushRuntimeValueLocationOnRegister(valueReg, vt) 456 } 457 return nil 458 } 459 460 // compileGlobalSet implements compiler.compileGlobalSet for the amd64 architecture. 461 func (c *amd64Compiler) compileGlobalSet(o *wazeroir.UnionOperation) error { 462 index := o.U1 463 464 wasmValueType := c.ir.Globals[index].ValType 465 isV128 := wasmValueType == wasm.ValueTypeV128 466 467 // First, move the value to set into a temporary register. 468 val := c.locationStack.pop() 469 if isV128 { 470 // The previous val is higher 64-bits, and have to use lower 64-bit's runtimeValueLocation for allocation, etc. 471 val = c.locationStack.pop() 472 } 473 if err := c.compileEnsureOnRegister(val); err != nil { 474 return err 475 } 476 477 // Allocate a register to hold the memory location of the target global instance. 478 intReg, err := c.allocateRegister(registerTypeGeneralPurpose) 479 if err != nil { 480 return err 481 } 482 483 // First, move the pointer to the global slice into the allocated register. 484 c.assembler.CompileMemoryToRegister(amd64.MOVQ, amd64ReservedRegisterForCallEngine, callEngineModuleContextGlobalElement0AddressOffset, intReg) 485 486 // Now, move the location of the global instance into the register. 487 c.assembler.CompileMemoryToRegister(amd64.MOVQ, intReg, 8*int64(index), intReg) 488 489 // Now ready to write the value to the global instance location. 490 var inst asm.Instruction 491 if isV128 { 492 inst = amd64.MOVDQU 493 } else if wasmValueType == wasm.ValueTypeI32 || wasmValueType == wasm.ValueTypeF32 { 494 inst = amd64.MOVL 495 } else { 496 inst = amd64.MOVQ 497 } 498 c.assembler.CompileRegisterToMemory(inst, val.register, intReg, globalInstanceValueOffset) 499 500 // Since the value is now written to memory, release the value register. 501 c.locationStack.releaseRegister(val) 502 return nil 503 } 504 505 // compileBr implements compiler.compileBr for the amd64 architecture. 506 func (c *amd64Compiler) compileBr(o *wazeroir.UnionOperation) error { 507 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 508 return err 509 } 510 return c.branchInto(wazeroir.Label(o.U1)) 511 } 512 513 // branchInto adds instruction necessary to jump into the given branch target. 514 func (c *amd64Compiler) branchInto(target wazeroir.Label) error { 515 if target.IsReturnTarget() { 516 return c.compileReturnFunction() 517 } else { 518 if c.ir.LabelCallers[target] > 1 { 519 // We can only re-use register state if when there's a single call-site. 520 // Release existing values on registers to the stack if there's multiple ones to have 521 // the consistent value location state at the beginning of label. 522 if err := c.compileReleaseAllRegistersToStack(); err != nil { 523 return err 524 } 525 } 526 // Set the initial stack of the target label, so we can start compiling the label 527 // with the appropriate value locations. Note we clone the stack here as we maybe 528 // manipulate the stack before compiler reaches the label. 529 targetLabel := c.label(target) 530 if !targetLabel.stackInitialized { 531 targetLabel.initialStack.cloneFrom(*c.locationStack) 532 targetLabel.stackInitialized = true 533 } 534 jmp := c.assembler.CompileJump(amd64.JMP) 535 c.assignJumpTarget(target, jmp) 536 } 537 return nil 538 } 539 540 // compileBrIf implements compiler.compileBrIf for the amd64 architecture. 541 func (c *amd64Compiler) compileBrIf(o *wazeroir.UnionOperation) error { 542 cond := c.locationStack.pop() 543 var jmpWithCond asm.Node 544 if cond.onConditionalRegister() { 545 var inst asm.Instruction 546 switch cond.conditionalRegister { 547 case amd64.ConditionalRegisterStateE: 548 inst = amd64.JEQ 549 case amd64.ConditionalRegisterStateNE: 550 inst = amd64.JNE 551 case amd64.ConditionalRegisterStateS: 552 inst = amd64.JMI 553 case amd64.ConditionalRegisterStateNS: 554 inst = amd64.JPL 555 case amd64.ConditionalRegisterStateG: 556 inst = amd64.JGT 557 case amd64.ConditionalRegisterStateGE: 558 inst = amd64.JGE 559 case amd64.ConditionalRegisterStateL: 560 inst = amd64.JLT 561 case amd64.ConditionalRegisterStateLE: 562 inst = amd64.JLE 563 case amd64.ConditionalRegisterStateA: 564 inst = amd64.JHI 565 case amd64.ConditionalRegisterStateAE: 566 inst = amd64.JCC 567 case amd64.ConditionalRegisterStateB: 568 inst = amd64.JCS 569 case amd64.ConditionalRegisterStateBE: 570 inst = amd64.JLS 571 } 572 jmpWithCond = c.assembler.CompileJump(inst) 573 } else { 574 // Usually the comparison operand for br_if is on the conditional register, 575 // but in some cases, they are on the stack or register. 576 // For example, the following code 577 // i64.const 1 578 // local.get 1 579 // i64.add 580 // br_if .... 581 // will try to use the result of i64.add, which resides on the (virtual) stack, 582 // as the operand for br_if instruction. 583 if err := c.compileEnsureOnRegister(cond); err != nil { 584 return err 585 } 586 // Check if the value not equals zero. 587 c.assembler.CompileRegisterToRegister(amd64.TESTQ, cond.register, cond.register) 588 589 // Emit jump instruction which jumps when the value does not equals zero. 590 jmpWithCond = c.assembler.CompileJump(amd64.JNE) 591 c.locationStack.markRegisterUnused(cond.register) 592 } 593 594 // Make sure that the next coming label is the else jump target. 595 thenTarget := wazeroir.Label(o.U1) 596 elseTarget := wazeroir.Label(o.U2) 597 thenToDrop := o.U3 598 599 // Here's the diagram of how we organize the instructions necessarily for brif operation. 600 // 601 // jmp_with_cond -> jmp (.Else) -> Then operations... 602 // |---------(satisfied)------------^^^ 603 // 604 // Note that .Else branch doesn't have ToDrop as .Else is in reality 605 // corresponding to either If's Else block or Br_if's else block in Wasm. 606 607 // Emit the else branch. 608 if elseTarget.IsReturnTarget() { 609 if err := c.compileReturnFunction(); err != nil { 610 return err 611 } 612 } else { 613 labelInfo := c.label(elseTarget) 614 if !labelInfo.stackInitialized { 615 labelInfo.initialStack.cloneFrom(*c.locationStack) 616 labelInfo.stackInitialized = true 617 } 618 619 elseJmp := c.assembler.CompileJump(amd64.JMP) 620 c.assignJumpTarget(elseTarget, elseJmp) 621 } 622 623 // Handle then branch. 624 c.assembler.SetJumpTargetOnNext(jmpWithCond) 625 if err := compileDropRange(c, thenToDrop); err != nil { 626 return err 627 } 628 if thenTarget.IsReturnTarget() { 629 return c.compileReturnFunction() 630 } else { 631 thenLabel := thenTarget 632 if c.ir.LabelCallers[thenLabel] > 1 { 633 // We can only re-use register state if when there's a single call-site. 634 // Release existing values on registers to the stack if there's multiple ones to have 635 // the consistent value location state at the beginning of label. 636 if err := c.compileReleaseAllRegistersToStack(); err != nil { 637 return err 638 } 639 } 640 // Set the initial stack of the target label, so we can start compiling the label 641 // with the appropriate value locations. Note we clone the stack here as we maybe 642 // manipulate the stack before compiler reaches the label. 643 labelInfo := c.label(thenLabel) 644 if !labelInfo.stackInitialized { 645 labelInfo.initialStack.cloneFrom(*c.locationStack) 646 labelInfo.stackInitialized = true 647 } 648 thenJmp := c.assembler.CompileJump(amd64.JMP) 649 c.assignJumpTarget(thenLabel, thenJmp) 650 return nil 651 } 652 } 653 654 // compileBrTable implements compiler.compileBrTable for the amd64 architecture. 655 func (c *amd64Compiler) compileBrTable(o *wazeroir.UnionOperation) error { 656 index := c.locationStack.pop() 657 658 // If the operation only consists of the default target, we branch into it and return early. 659 if len(o.Us) == 2 { 660 c.locationStack.releaseRegister(index) 661 if err := compileDropRange(c, o.Us[1]); err != nil { 662 return err 663 } 664 return c.branchInto(wazeroir.Label(o.Us[0])) 665 } 666 667 // Otherwise, we jump into the selected branch. 668 if err := c.compileEnsureOnRegister(index); err != nil { 669 return err 670 } 671 672 tmp, err := c.allocateRegister(registerTypeGeneralPurpose) 673 if err != nil { 674 return err 675 } 676 677 // First, we move the length of target list into the tmp register. 678 c.assembler.CompileConstToRegister(amd64.MOVQ, int64(len(o.Us)/2-1), tmp) 679 680 // Then, we compare the value with the length of targets. 681 c.assembler.CompileRegisterToRegister(amd64.CMPL, tmp, index.register) 682 683 // If the value is larger than the length, 684 // we round the index to the length as the spec states that 685 // if the index is larger than or equal the length of list, 686 // branch into the default branch. 687 c.assembler.CompileRegisterToRegister(amd64.CMOVQCS, tmp, index.register) 688 689 // We prepare the static data which holds the offset of 690 // each target's first instruction (incl. default) 691 // relative to the beginning of label tables. 692 // 693 // For example, if we have targets=[L0, L1] and default=L_DEFAULT, 694 // we emit the the code like this at [Emit the code for each targets and default branch] below. 695 // 696 // L0: 697 // 0x123001: XXXX, ... 698 // ..... 699 // L1: 700 // 0x123005: YYY, ... 701 // ..... 702 // L_DEFAULT: 703 // 0x123009: ZZZ, ... 704 // 705 // then offsetData becomes like [0x0, 0x5, 0x8]. 706 // By using this offset list, we could jump into the label for the index by 707 // "jmp offsetData[index]+0x123001" and "0x123001" can be acquired by "LEA" 708 // instruction. 709 // 710 // Note: We store each offset of 32-bite unsigned integer as 4 consecutive bytes. So more precisely, 711 // the above example's offsetData would be [0x0, 0x0, 0x0, 0x0, 0x5, 0x0, 0x0, 0x0, 0x8, 0x0, 0x0, 0x0]. 712 // 713 // Note: this is similar to how GCC implements Switch statements in C. 714 offsetData := asm.NewStaticConst(make([]byte, 4*(len(o.Us)/2))) 715 716 // Load the offsetData's address into tmp. 717 if err = c.assembler.CompileStaticConstToRegister(amd64.LEAQ, offsetData, tmp); err != nil { 718 return err 719 } 720 721 // Now we have the address of first byte of offsetData in tmp register. 722 // So the target offset's first byte is at tmp+index*4 as we store 723 // the offset as 4 bytes for a 32-byte integer. 724 // Here, we store the offset into the index.register. 725 c.assembler.CompileMemoryWithIndexToRegister(amd64.MOVL, tmp, 0, index.register, 4, index.register) 726 727 // Now we read the address of the beginning of the jump table. 728 // In the above example, this corresponds to reading the address of 0x123001. 729 c.assembler.CompileReadInstructionAddress(tmp, amd64.JMP) 730 731 // Now we have the address of L0 in tmp register, and the offset to the target label in the index.register. 732 // So we could achieve the br_table jump by adding them and jump into the resulting address. 733 c.assembler.CompileRegisterToRegister(amd64.ADDQ, index.register, tmp) 734 735 c.assembler.CompileJumpToRegister(amd64.JMP, tmp) 736 737 // We no longer need the index's register, so mark it unused. 738 c.locationStack.markRegisterUnused(index.register) 739 740 // [Emit the code for each targets and default branch] 741 labelInitialInstructions := make([]asm.Node, len(o.Us)/2) 742 743 // Since we might end up having the different stack state in each branch, 744 // we need to save the initial stack state here, and use the same initial state 745 // for each iteration. 746 initialLocationStack := c.getSavedTemporaryLocationStack() 747 748 for i := range labelInitialInstructions { 749 // Emit the initial instruction of each target. 750 // We use NOP as we don't yet know the next instruction in each label. 751 // Assembler would optimize out this NOP during code generation, so this is harmless. 752 labelInitialInstructions[i] = c.assembler.CompileStandAlone(amd64.NOP) 753 754 targetLabel := wazeroir.Label(o.Us[i*2]) 755 targetToDrop := o.Us[i*2+1] 756 if err = compileDropRange(c, targetToDrop); err != nil { 757 return err 758 } 759 if err = c.branchInto(targetLabel); err != nil { 760 return err 761 } 762 // After the iteration, reset the stack's state with initialLocationStack. 763 c.locationStack.cloneFrom(initialLocationStack) 764 } 765 766 c.assembler.BuildJumpTable(offsetData, labelInitialInstructions) 767 return nil 768 } 769 770 func (c *amd64Compiler) getSavedTemporaryLocationStack() runtimeValueLocationStack { 771 initialLocationStack := *c.locationStack // Take copy! 772 // Use c.brTableTmp for the underlying stack so that we could reduce the allocations. 773 if diff := int(initialLocationStack.sp) - len(c.brTableTmp); diff > 0 { 774 c.brTableTmp = append(c.brTableTmp, make([]runtimeValueLocation, diff)...) 775 } 776 copy(c.brTableTmp, initialLocationStack.stack[:initialLocationStack.sp]) 777 initialLocationStack.stack = c.brTableTmp 778 return initialLocationStack 779 } 780 781 func (c *amd64Compiler) assignJumpTarget(label wazeroir.Label, jmpInstruction asm.Node) { 782 jmpTargetLabel := c.label(label) 783 targetInst := jmpTargetLabel.initialInstruction 784 if targetInst == nil { 785 // If the label isn't compiled yet, allocate the NOP node, and set as the initial instruction. 786 targetInst = c.assembler.AllocateNOP() 787 jmpTargetLabel.initialInstruction = targetInst 788 } 789 jmpInstruction.AssignJumpTarget(targetInst) 790 } 791 792 // compileLabel implements compiler.compileLabel for the amd64 architecture. 793 func (c *amd64Compiler) compileLabel(o *wazeroir.UnionOperation) (skipLabel bool) { 794 label := wazeroir.Label(o.U1) 795 labelInfo := c.label(label) 796 797 // If initialStack is not set, that means this label has never been reached. 798 if !labelInfo.stackInitialized { 799 skipLabel = true 800 return 801 } 802 803 // We use NOP as a beginning of instructions in a label. 804 if labelBegin := labelInfo.initialInstruction; labelBegin == nil { 805 // We use NOP as a beginning of instructions in a label. 806 // This should be eventually optimized out by assembler. 807 labelInfo.initialInstruction = c.assembler.CompileStandAlone(amd64.NOP) 808 } else { 809 c.assembler.Add(labelBegin) 810 } 811 812 // Set the initial stack. 813 c.setLocationStack(&labelInfo.initialStack) 814 return 815 } 816 817 // compileCall implements compiler.compileCall for the amd64 architecture. 818 func (c *amd64Compiler) compileCall(o *wazeroir.UnionOperation) error { 819 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 820 return err 821 } 822 823 functionIndex := o.U1 824 825 target := c.ir.Functions[functionIndex] 826 targetType := &c.ir.Types[target] 827 828 targetAddressRegister, err := c.allocateRegister(registerTypeGeneralPurpose) 829 if err != nil { 830 return err 831 } 832 833 // First, push the index to the callEngine.functionsElement0Address into the target register. 834 c.assembler.CompileConstToRegister(amd64.MOVQ, int64(functionIndex)*functionSize, targetAddressRegister) 835 836 // Next, we add the address of the first item of callEngine.functions slice (= &callEngine.functions[0]) 837 // to the target register. 838 c.assembler.CompileMemoryToRegister(amd64.ADDQ, amd64ReservedRegisterForCallEngine, 839 callEngineModuleContextFunctionsElement0AddressOffset, targetAddressRegister) 840 841 if err := c.compileCallFunctionImpl(targetAddressRegister, targetType); err != nil { 842 return err 843 } 844 return nil 845 } 846 847 // compileCallIndirect implements compiler.compileCallIndirect for the amd64 architecture. 848 func (c *amd64Compiler) compileCallIndirect(o *wazeroir.UnionOperation) error { 849 offset := c.locationStack.pop() 850 if err := c.compileEnsureOnRegister(offset); err != nil { 851 return nil 852 } 853 typeIndex := o.U1 854 tableIndex := o.U2 855 856 tmp, err := c.allocateRegister(registerTypeGeneralPurpose) 857 if err != nil { 858 return err 859 } 860 c.locationStack.markRegisterUsed(tmp) 861 862 tmp2, err := c.allocateRegister(registerTypeGeneralPurpose) 863 if err != nil { 864 return err 865 } 866 c.locationStack.markRegisterUsed(tmp2) 867 868 // Load the address of the target table: tmp = &module.Tables[0] 869 c.assembler.CompileMemoryToRegister(amd64.MOVQ, amd64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset, tmp) 870 // tmp = &module.Tables[0] + Index*8 = &module.Tables[0] + sizeOf(*TableInstance)*index = module.Tables[o.TableIndex]. 871 c.assembler.CompileMemoryToRegister(amd64.MOVQ, tmp, int64(tableIndex*8), tmp) 872 873 // Then, we need to trap if the offset exceeds the length of table. 874 c.assembler.CompileMemoryToRegister(amd64.CMPQ, tmp, tableInstanceTableLenOffset, offset.register) 875 c.compileMaybeExitFromNativeCode(amd64.JHI, nativeCallStatusCodeInvalidTableAccess) 876 877 // next we check if the target's type matches the operation's one. 878 // In order to get the type instance's address, we have to multiply the offset 879 // by 8 as the offset is the "length" of table in Go's "[]uintptr{}", 880 // and size of uintptr equals 8 bytes == (2^3). 881 c.assembler.CompileConstToRegister(amd64.SHLQ, pointerSizeLog2, offset.register) 882 883 // Adds the address of wasm.Table[0] stored as callEngine.tableElement0Address to the offset. 884 c.assembler.CompileMemoryToRegister(amd64.ADDQ, 885 tmp, tableInstanceTableOffset, offset.register) 886 887 // "offset = (*offset) (== table[offset] == *code type)" 888 c.assembler.CompileMemoryToRegister(amd64.MOVQ, offset.register, 0, offset.register) 889 890 // At this point offset.register holds the address of *code (as uintptr) at wasm.Table[offset]. 891 // 892 // Check if the value of table[offset] equals zero, meaning that the target is uninitialized. 893 c.assembler.CompileRegisterToRegister(amd64.TESTQ, offset.register, offset.register) 894 895 // Skipped if the target is initialized. 896 c.compileMaybeExitFromNativeCode(amd64.JNE, nativeCallStatusCodeInvalidTableAccess) 897 898 // Next, we need to check the type matches, i.e. table[offset].source.TypeID == targetFunctionType's typeID. 899 // 900 // "tmp2 = [&moduleInstance.TypeIDs[0] + index * 4] (== moduleInstance.TypeIDs[index])" 901 c.assembler.CompileMemoryToRegister(amd64.MOVQ, 902 amd64ReservedRegisterForCallEngine, callEngineModuleContextTypeIDsElement0AddressOffset, 903 tmp2) 904 c.assembler.CompileMemoryToRegister(amd64.MOVL, tmp2, int64(typeIndex)*4, tmp2) 905 906 // Skipped if the type matches. 907 c.assembler.CompileMemoryToRegister(amd64.CMPL, offset.register, functionTypeIDOffset, tmp2) 908 c.compileMaybeExitFromNativeCode(amd64.JEQ, nativeCallStatusCodeTypeMismatchOnIndirectCall) 909 targetFunctionType := &c.ir.Types[typeIndex] 910 if err = c.compileCallFunctionImpl(offset.register, targetFunctionType); err != nil { 911 return nil 912 } 913 914 // The offset register should be marked as un-used as we consumed in the function call. 915 c.locationStack.markRegisterUnused(offset.register, tmp, tmp2) 916 return nil 917 } 918 919 // compileDrop implements compiler.compileDrop for the amd64 architecture. 920 func (c *amd64Compiler) compileDrop(o *wazeroir.UnionOperation) error { 921 return compileDropRange(c, o.U1) 922 } 923 924 // compileSelectV128Impl implements compileSelect for vector values. 925 func (c *amd64Compiler) compileSelectV128Impl(selectorReg asm.Register) error { 926 x2 := c.locationStack.popV128() 927 if err := c.compileEnsureOnRegister(x2); err != nil { 928 return err 929 } 930 931 x1 := c.locationStack.popV128() 932 if err := c.compileEnsureOnRegister(x1); err != nil { 933 return err 934 } 935 936 // Compare the conditional value with zero. 937 c.assembler.CompileRegisterToRegister(amd64.TESTQ, selectorReg, selectorReg) 938 939 // Set the jump if the top value is not zero. 940 jmpIfNotZero := c.assembler.CompileJump(amd64.JNE) 941 942 // In this branch, we select the value of x2, so we move the value into x1.register so that 943 // we can have the result in x1.register regardless of the selection. 944 c.assembler.CompileRegisterToRegister(amd64.MOVDQU, x2.register, x1.register) 945 946 // Else, we don't need to adjust value, just need to jump to the next instruction. 947 c.assembler.SetJumpTargetOnNext(jmpIfNotZero) 948 949 // As noted, the result exists in x1.register regardless of the selector. 950 c.pushVectorRuntimeValueLocationOnRegister(x1.register) 951 // Plus, x2.register is no longer used. 952 c.locationStack.markRegisterUnused(x2.register) 953 c.locationStack.markRegisterUnused(selectorReg) 954 return nil 955 } 956 957 // compileSelect implements compiler.compileSelect for the amd64 architecture. 958 // 959 // The emitted native code depends on whether the values are on 960 // the physical registers or memory stack, or maybe conditional register. 961 func (c *amd64Compiler) compileSelect(o *wazeroir.UnionOperation) error { 962 cv := c.locationStack.pop() 963 if err := c.compileEnsureOnRegister(cv); err != nil { 964 return err 965 } 966 967 isTargetVector := o.B3 968 if isTargetVector { 969 return c.compileSelectV128Impl(cv.register) 970 } 971 972 x2 := c.locationStack.pop() 973 // We do not consume x1 here, but modify the value according to 974 // the conditional value "c" above. 975 peekedX1 := c.locationStack.peek() 976 977 // Compare the conditional value with zero. 978 c.assembler.CompileRegisterToRegister(amd64.TESTQ, cv.register, cv.register) 979 980 // Now we can use c.register as temporary location. 981 // We alias it here for readability. 982 tmpRegister := cv.register 983 984 // Set the jump if the top value is not zero. 985 jmpIfNotZero := c.assembler.CompileJump(amd64.JNE) 986 987 // If the value is zero, we must place the value of x2 onto the stack position of x1. 988 989 // First we copy the value of x2 to the temporary register if x2 is not currently on a register. 990 if x2.onStack() { 991 x2.register = tmpRegister 992 c.compileLoadValueOnStackToRegister(x2) 993 } 994 995 // 996 // At this point x2's value is always on a register. 997 // 998 999 // Then release the value in the x2's register to the x1's stack position. 1000 if peekedX1.onRegister() { 1001 c.assembler.CompileRegisterToRegister(amd64.MOVQ, x2.register, peekedX1.register) 1002 } else { 1003 peekedX1.register = x2.register 1004 c.compileReleaseRegisterToStack(peekedX1) // Note inside we mark the register unused! 1005 } 1006 1007 // Else, we don't need to adjust value, just need to jump to the next instruction. 1008 c.assembler.SetJumpTargetOnNext(jmpIfNotZero) 1009 1010 // In any case, we don't need x2 and c anymore! 1011 c.locationStack.releaseRegister(x2) 1012 c.locationStack.releaseRegister(cv) 1013 return nil 1014 } 1015 1016 // compilePick implements compiler.compilePick for the amd64 architecture. 1017 func (c *amd64Compiler) compilePick(o *wazeroir.UnionOperation) error { 1018 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 1019 return err 1020 } 1021 depth := o.U1 1022 isTargetVector := o.B3 1023 1024 // TODO: if we track the type of values on the stack, 1025 // we could optimize the instruction according to the bit size of the value. 1026 // For now, we just move the entire register i.e. as a quad word (8 bytes). 1027 pickTarget := &c.locationStack.stack[c.locationStack.sp-1-uint64(depth)] 1028 reg, err := c.allocateRegister(pickTarget.getRegisterType()) 1029 if err != nil { 1030 return err 1031 } 1032 1033 if pickTarget.onRegister() { 1034 var inst asm.Instruction 1035 if isTargetVector { 1036 inst = amd64.MOVDQU 1037 } else if pickTarget.valueType == runtimeValueTypeI32 { // amd64 cannot copy single-precisions between registers. 1038 inst = amd64.MOVL 1039 } else { 1040 inst = amd64.MOVQ 1041 } 1042 c.assembler.CompileRegisterToRegister(inst, pickTarget.register, reg) 1043 } else if pickTarget.onStack() { 1044 // Copy the value from the stack. 1045 var inst asm.Instruction 1046 if isTargetVector { 1047 inst = amd64.MOVDQU 1048 } else if pickTarget.valueType == runtimeValueTypeI32 || pickTarget.valueType == runtimeValueTypeF32 { 1049 inst = amd64.MOVL 1050 } else { 1051 inst = amd64.MOVQ 1052 } 1053 // Note: stack pointers are ensured not to exceed 2^27 so this offset never exceeds 32-bit range. 1054 c.assembler.CompileMemoryToRegister(inst, amd64ReservedRegisterForStackBasePointerAddress, 1055 int64(pickTarget.stackPointer)*8, reg) 1056 } 1057 // Now we already placed the picked value on the register, 1058 // so push the location onto the stack. 1059 if isTargetVector { 1060 c.pushVectorRuntimeValueLocationOnRegister(reg) 1061 } else { 1062 c.pushRuntimeValueLocationOnRegister(reg, pickTarget.valueType) 1063 } 1064 return nil 1065 } 1066 1067 // compileAdd implements compiler.compileAdd for the amd64 architecture. 1068 func (c *amd64Compiler) compileAdd(o *wazeroir.UnionOperation) error { 1069 // TODO: if the previous instruction is const, then 1070 // this can be optimized. Same goes for other arithmetic instructions. 1071 1072 var instruction asm.Instruction 1073 1074 unsignedType := wazeroir.UnsignedType(o.B1) 1075 switch unsignedType { 1076 case wazeroir.UnsignedTypeI32: 1077 instruction = amd64.ADDL 1078 case wazeroir.UnsignedTypeI64: 1079 instruction = amd64.ADDQ 1080 case wazeroir.UnsignedTypeF32: 1081 instruction = amd64.ADDSS 1082 case wazeroir.UnsignedTypeF64: 1083 instruction = amd64.ADDSD 1084 } 1085 1086 x2 := c.locationStack.pop() 1087 if err := c.compileEnsureOnRegister(x2); err != nil { 1088 return err 1089 } 1090 1091 x1 := c.locationStack.peek() // Note this is peek! 1092 if err := c.compileEnsureOnRegister(x1); err != nil { 1093 return err 1094 } 1095 1096 // x1 += x2. 1097 c.assembler.CompileRegisterToRegister(instruction, x2.register, x1.register) 1098 1099 // We no longer need x2 register after ADD operation here, 1100 // so we release it. 1101 c.locationStack.releaseRegister(x2) 1102 return nil 1103 } 1104 1105 // compileSub implements compiler.compileSub for the amd64 architecture. 1106 func (c *amd64Compiler) compileSub(o *wazeroir.UnionOperation) error { 1107 // TODO: if the previous instruction is const, then 1108 // this can be optimized. Same goes for other arithmetic instructions. 1109 1110 var instruction asm.Instruction 1111 unsignedType := wazeroir.UnsignedType(o.B1) 1112 switch unsignedType { 1113 case wazeroir.UnsignedTypeI32: 1114 instruction = amd64.SUBL 1115 case wazeroir.UnsignedTypeI64: 1116 instruction = amd64.SUBQ 1117 case wazeroir.UnsignedTypeF32: 1118 instruction = amd64.SUBSS 1119 case wazeroir.UnsignedTypeF64: 1120 instruction = amd64.SUBSD 1121 } 1122 1123 x2 := c.locationStack.pop() 1124 if err := c.compileEnsureOnRegister(x2); err != nil { 1125 return err 1126 } 1127 1128 x1 := c.locationStack.peek() // Note this is peek! 1129 if err := c.compileEnsureOnRegister(x1); err != nil { 1130 return err 1131 } 1132 1133 // x1 -= x2. 1134 c.assembler.CompileRegisterToRegister(instruction, x2.register, x1.register) 1135 1136 // We no longer need x2 register after ADD operation here, 1137 // so we release it. 1138 c.locationStack.releaseRegister(x2) 1139 return nil 1140 } 1141 1142 // compileMul implements compiler.compileMul for the amd64 architecture. 1143 func (c *amd64Compiler) compileMul(o *wazeroir.UnionOperation) (err error) { 1144 unsignedType := wazeroir.UnsignedType(o.B1) 1145 switch unsignedType { 1146 case wazeroir.UnsignedTypeI32: 1147 err = c.compileMulForInts(true, amd64.MULL) 1148 case wazeroir.UnsignedTypeI64: 1149 err = c.compileMulForInts(false, amd64.MULQ) 1150 case wazeroir.UnsignedTypeF32: 1151 err = c.compileMulForFloats(amd64.MULSS) 1152 case wazeroir.UnsignedTypeF64: 1153 err = c.compileMulForFloats(amd64.MULSD) 1154 } 1155 return 1156 } 1157 1158 // compileMulForInts emits instructions to perform integer multiplication for 1159 // top two values on the stack. If unfamiliar with the convention for integer 1160 // multiplication on x86, see https://www.felixcloutier.com/x86/mul. 1161 // 1162 // In summary, one of the values must be on the AX register, 1163 // and the mul instruction stores the overflow info in DX register which we don't use. 1164 // Here, we mean "the overflow info" by 65 bit or higher part of the result for 64 bit case. 1165 // 1166 // So, we have to ensure that 1167 // 1. Previously located value on DX must be saved to memory stack. That is because 1168 // the existing value will be overridden after the mul execution. 1169 // 2. One of the operands (x1 or x2) must be on AX register. 1170 // 1171 // See https://www.felixcloutier.com/x86/mul#description for detail semantics. 1172 func (c *amd64Compiler) compileMulForInts(is32Bit bool, mulInstruction asm.Instruction) error { 1173 const ( 1174 resultRegister = amd64.RegAX 1175 reservedRegister = amd64.RegDX 1176 ) 1177 1178 x2 := c.locationStack.pop() 1179 x1 := c.locationStack.pop() 1180 1181 var valueOnAX *runtimeValueLocation 1182 if x1.register == resultRegister { 1183 valueOnAX = x1 1184 } else if x2.register == resultRegister { 1185 valueOnAX = x2 1186 } else { 1187 valueOnAX = x2 1188 // This case we move x2 to AX register. 1189 c.onValueReleaseRegisterToStack(resultRegister) 1190 if x2.onConditionalRegister() { 1191 c.compileMoveConditionalToGeneralPurposeRegister(x2, resultRegister) 1192 } else if x2.onStack() { 1193 x2.setRegister(resultRegister) 1194 c.compileLoadValueOnStackToRegister(x2) 1195 c.locationStack.markRegisterUsed(resultRegister) 1196 } else { 1197 var inst asm.Instruction 1198 if is32Bit { 1199 inst = amd64.MOVL 1200 } else { 1201 inst = amd64.MOVQ 1202 } 1203 c.assembler.CompileRegisterToRegister(inst, x2.register, resultRegister) 1204 1205 // We no longer uses the prev register of x2. 1206 c.locationStack.releaseRegister(x2) 1207 x2.setRegister(resultRegister) 1208 c.locationStack.markRegisterUsed(resultRegister) 1209 } 1210 } 1211 1212 // We have to make sure that at this point the operands must be on registers. 1213 if err := c.compileEnsureOnRegister(x2); err != nil { 1214 return err 1215 } 1216 if err := c.compileEnsureOnRegister(x1); err != nil { 1217 return err 1218 } 1219 1220 // We have to save the existing value on DX. 1221 // If the DX register is used by either x1 or x2, we don't need to 1222 // save the value because it is consumed by mul anyway. 1223 if x1.register != reservedRegister && x2.register != reservedRegister { 1224 c.onValueReleaseRegisterToStack(reservedRegister) 1225 } 1226 1227 // Now ready to emit the mul instruction. 1228 if x1 == valueOnAX { 1229 c.assembler.CompileRegisterToNone(mulInstruction, x2.register) 1230 } else { 1231 c.assembler.CompileRegisterToNone(mulInstruction, x1.register) 1232 } 1233 1234 c.locationStack.markRegisterUnused(x2.register) 1235 c.locationStack.markRegisterUnused(x1.register) 1236 1237 // Now we have the result in the AX register, 1238 // so we record it. 1239 c.pushRuntimeValueLocationOnRegister(resultRegister, x1.valueType) 1240 return nil 1241 } 1242 1243 func (c *amd64Compiler) compileMulForFloats(instruction asm.Instruction) error { 1244 x2 := c.locationStack.pop() 1245 if err := c.compileEnsureOnRegister(x2); err != nil { 1246 return err 1247 } 1248 1249 x1 := c.locationStack.peek() // Note this is peek! 1250 if err := c.compileEnsureOnRegister(x1); err != nil { 1251 return err 1252 } 1253 1254 // x1 *= x2. 1255 c.assembler.CompileRegisterToRegister(instruction, x2.register, x1.register) 1256 1257 // We no longer need x2 register after MUL operation here, 1258 // so we release it. 1259 c.locationStack.releaseRegister(x2) 1260 return nil 1261 } 1262 1263 // compileClz implements compiler.compileClz for the amd64 architecture. 1264 func (c *amd64Compiler) compileClz(o *wazeroir.UnionOperation) error { 1265 target := c.locationStack.pop() 1266 if err := c.compileEnsureOnRegister(target); err != nil { 1267 return err 1268 } 1269 1270 unsignedInt := wazeroir.UnsignedInt(o.B1) 1271 if c.cpuFeatures.HasExtra(platform.CpuExtraFeatureABM) { 1272 if unsignedInt == wazeroir.UnsignedInt32 { 1273 c.assembler.CompileRegisterToRegister(amd64.LZCNTL, target.register, target.register) 1274 } else { 1275 c.assembler.CompileRegisterToRegister(amd64.LZCNTQ, target.register, target.register) 1276 } 1277 } else { 1278 // On processors that do not support LZCNT, we combine BSR (calculating 1279 // most significant set bit) with XOR. This logic is described in 1280 // "Replace Raw Assembly Code with Builtin Intrinsics" section in: 1281 // https://developer.apple.com/documentation/apple-silicon/addressing-architectural-differences-in-your-macos-code. 1282 1283 // First, we have to check if the target is non-zero as BSR is undefined 1284 // on zero. See https://www.felixcloutier.com/x86/bsr. 1285 c.assembler.CompileRegisterToRegister(amd64.TESTQ, target.register, target.register) 1286 jmpIfNonZero := c.assembler.CompileJump(amd64.JNE) 1287 1288 // If the value is zero, we just push the const value. 1289 if unsignedInt == wazeroir.UnsignedInt32 { 1290 c.assembler.CompileConstToRegister(amd64.MOVL, int64(32), target.register) 1291 } else { 1292 c.assembler.CompileConstToRegister(amd64.MOVL, int64(64), target.register) 1293 } 1294 1295 // Emit the jmp instruction to jump to the position right after 1296 // the non-zero case. 1297 jmpAtEndOfZero := c.assembler.CompileJump(amd64.JMP) 1298 1299 // Start emitting non-zero case. 1300 c.assembler.SetJumpTargetOnNext(jmpIfNonZero) 1301 // First, we calculate the most significant set bit. 1302 if unsignedInt == wazeroir.UnsignedInt32 { 1303 c.assembler.CompileRegisterToRegister(amd64.BSRL, target.register, target.register) 1304 } else { 1305 c.assembler.CompileRegisterToRegister(amd64.BSRQ, target.register, target.register) 1306 } 1307 1308 // Now we XOR the value with the bit length minus one. 1309 if unsignedInt == wazeroir.UnsignedInt32 { 1310 c.assembler.CompileConstToRegister(amd64.XORL, 31, target.register) 1311 } else { 1312 c.assembler.CompileConstToRegister(amd64.XORQ, 63, target.register) 1313 } 1314 1315 // Finally the end jump instruction of zero case must target towards 1316 // the next instruction. 1317 c.assembler.SetJumpTargetOnNext(jmpAtEndOfZero) 1318 } 1319 1320 // We reused the same register of target for the result. 1321 c.locationStack.markRegisterUnused(target.register) 1322 c.pushRuntimeValueLocationOnRegister(target.register, target.valueType) 1323 return nil 1324 } 1325 1326 // compileCtz implements compiler.compileCtz for the amd64 architecture. 1327 func (c *amd64Compiler) compileCtz(o *wazeroir.UnionOperation) error { 1328 target := c.locationStack.pop() 1329 if err := c.compileEnsureOnRegister(target); err != nil { 1330 return err 1331 } 1332 1333 unsignedInt := wazeroir.UnsignedInt(o.B1) 1334 if c.cpuFeatures.HasExtra(platform.CpuExtraFeatureABM) { 1335 if unsignedInt == wazeroir.UnsignedInt32 { 1336 c.assembler.CompileRegisterToRegister(amd64.TZCNTL, target.register, target.register) 1337 } else { 1338 c.assembler.CompileRegisterToRegister(amd64.TZCNTQ, target.register, target.register) 1339 } 1340 } else { 1341 // On processors that do not support TZCNT, the BSF instruction is 1342 // executed instead. The key difference between TZCNT and BSF 1343 // instruction is that if source operand is zero, the content of 1344 // destination operand is undefined. 1345 // https://www.felixcloutier.com/x86/tzcnt.html 1346 1347 // First we compare the target with zero. 1348 c.assembler.CompileRegisterToRegister(amd64.TESTQ, target.register, target.register) 1349 jmpIfNonZero := c.assembler.CompileJump(amd64.JNE) 1350 1351 // If the value is zero, we just push the const value. 1352 if unsignedInt == wazeroir.UnsignedInt32 { 1353 c.assembler.CompileConstToRegister(amd64.MOVL, int64(32), target.register) 1354 } else { 1355 c.assembler.CompileConstToRegister(amd64.MOVL, int64(64), target.register) 1356 } 1357 1358 // Emit the jmp instruction to jump to the position right after 1359 // the non-zero case. 1360 jmpAtEndOfZero := c.assembler.CompileJump(amd64.JMP) 1361 1362 // Otherwise, emit the TZCNT. 1363 c.assembler.SetJumpTargetOnNext(jmpIfNonZero) 1364 if unsignedInt == wazeroir.UnsignedInt32 { 1365 c.assembler.CompileRegisterToRegister(amd64.TZCNTL, target.register, target.register) 1366 } else { 1367 c.assembler.CompileRegisterToRegister(amd64.TZCNTQ, target.register, target.register) 1368 } 1369 1370 // Finally the end jump instruction of zero case must target towards 1371 // the next instruction. 1372 c.assembler.SetJumpTargetOnNext(jmpAtEndOfZero) 1373 } 1374 1375 // We reused the same register of target for the result. 1376 c.locationStack.markRegisterUnused(target.register) 1377 c.pushRuntimeValueLocationOnRegister(target.register, target.valueType) 1378 return nil 1379 } 1380 1381 // compilePopcnt implements compiler.compilePopcnt for the amd64 architecture. 1382 func (c *amd64Compiler) compilePopcnt(o *wazeroir.UnionOperation) error { 1383 target := c.locationStack.pop() 1384 if err := c.compileEnsureOnRegister(target); err != nil { 1385 return err 1386 } 1387 1388 unsignedInt := wazeroir.UnsignedInt(o.B1) 1389 if unsignedInt == wazeroir.UnsignedInt32 { 1390 c.assembler.CompileRegisterToRegister(amd64.POPCNTL, target.register, target.register) 1391 } else { 1392 c.assembler.CompileRegisterToRegister(amd64.POPCNTQ, target.register, target.register) 1393 } 1394 1395 // We reused the same register of target for the result. 1396 c.locationStack.markRegisterUnused(target.register) 1397 c.pushRuntimeValueLocationOnRegister(target.register, target.valueType) 1398 return nil 1399 } 1400 1401 // compileDiv implements compiler.compileDiv for the amd64 architecture. 1402 func (c *amd64Compiler) compileDiv(o *wazeroir.UnionOperation) (err error) { 1403 signedType := wazeroir.SignedType(o.B1) 1404 switch signedType { 1405 case wazeroir.SignedTypeUint32: 1406 err = c.compileDivForInts(true, false) 1407 case wazeroir.SignedTypeUint64: 1408 err = c.compileDivForInts(false, false) 1409 case wazeroir.SignedTypeInt32: 1410 err = c.compileDivForInts(true, true) 1411 case wazeroir.SignedTypeInt64: 1412 err = c.compileDivForInts(false, true) 1413 case wazeroir.SignedTypeFloat32: 1414 err = c.compileDivForFloats(true) 1415 case wazeroir.SignedTypeFloat64: 1416 err = c.compileDivForFloats(false) 1417 } 1418 return 1419 } 1420 1421 // compileDivForInts emits the instructions to perform division on the top 1422 // two values of integer type on the stack and puts the quotient of the result 1423 // onto the stack. For example, stack [..., 10, 3] results in [..., 3] where 1424 // the remainder is discarded. 1425 func (c *amd64Compiler) compileDivForInts(is32Bit bool, signed bool) error { 1426 if err := c.performDivisionOnInts(false, is32Bit, signed); err != nil { 1427 return err 1428 } 1429 // Now we have the quotient of the division result in the AX register, 1430 // so we record it. 1431 if is32Bit { 1432 c.pushRuntimeValueLocationOnRegister(amd64.RegAX, runtimeValueTypeI32) 1433 } else { 1434 c.pushRuntimeValueLocationOnRegister(amd64.RegAX, runtimeValueTypeI64) 1435 } 1436 return nil 1437 } 1438 1439 // compileRem implements compiler.compileRem for the amd64 architecture. 1440 func (c *amd64Compiler) compileRem(o *wazeroir.UnionOperation) (err error) { 1441 var vt runtimeValueType 1442 signedInt := wazeroir.SignedInt(o.B1) 1443 switch signedInt { 1444 case wazeroir.SignedInt32: 1445 err = c.performDivisionOnInts(true, true, true) 1446 vt = runtimeValueTypeI32 1447 case wazeroir.SignedInt64: 1448 err = c.performDivisionOnInts(true, false, true) 1449 vt = runtimeValueTypeI64 1450 case wazeroir.SignedUint32: 1451 err = c.performDivisionOnInts(true, true, false) 1452 vt = runtimeValueTypeI32 1453 case wazeroir.SignedUint64: 1454 err = c.performDivisionOnInts(true, false, false) 1455 vt = runtimeValueTypeI64 1456 } 1457 if err != nil { 1458 return err 1459 } 1460 1461 // Now we have the remainder of the division result in the DX register, 1462 // so we record it. 1463 c.pushRuntimeValueLocationOnRegister(amd64.RegDX, vt) 1464 return 1465 } 1466 1467 // performDivisionOnInts emits the instructions to do divisions on top two integers on the stack 1468 // via DIV (unsigned div) and IDIV (signed div) instructions. 1469 // See the following explanation of these instructions' semantics from https://www.lri.fr/~filliatr/ens/compil/x86-64.pdf 1470 // 1471 // >> Division requires special arrangements: idiv (signed) and div (unsigned) operate on a 2n-byte dividend and 1472 // >> an n-byte divisor to produce an n-byte quotient and n-byte remainder. The dividend always lives in a fixed pair of 1473 // >> registers (%edx and %eax for the 32-bit case; %rdx and %rax for the 64-bit case); the divisor is specified as the 1474 // >> source operand in the instruction. The quotient goes in %eax (resp. %rax); the remainder in %edx (resp. %rdx). For 1475 // >> signed division, the cltd (resp. ctqo) instruction is used to prepare %edx (resp. %rdx) with the sign extension of 1476 // >> %eax (resp. %rax). For example, if a,b, c are memory locations holding quad words, then we could set c = a/b 1477 // >> using the sequence: movq a(%rip), %rax; ctqo; idivq b(%rip); movq %rax, c(%rip). 1478 // 1479 // tl;dr is that the division result is placed in AX and DX registers after instructions emitted by this function 1480 // where AX holds the quotient while DX the remainder of the division result. 1481 func (c *amd64Compiler) performDivisionOnInts(isRem, is32Bit, signed bool) error { 1482 const ( 1483 quotientRegister = amd64.RegAX 1484 remainderRegister = amd64.RegDX 1485 ) 1486 1487 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 1488 return err 1489 } 1490 1491 // Ensures that previous values on these registers are saved to memory. 1492 c.onValueReleaseRegisterToStack(quotientRegister) 1493 c.onValueReleaseRegisterToStack(remainderRegister) 1494 1495 // In order to ensure x2 is placed on a temporary register for x2 value other than AX and DX, 1496 // we mark them as used here. 1497 c.locationStack.markRegisterUsed(quotientRegister) 1498 c.locationStack.markRegisterUsed(remainderRegister) 1499 1500 // Ensure that x2 is placed on a register which is not either AX or DX. 1501 x2 := c.locationStack.pop() 1502 if err := c.compileEnsureOnRegister(x2); err != nil { 1503 return err 1504 } 1505 1506 // Now we successfully place x2 on a temp register, so we no longer need to 1507 // mark these registers used. 1508 c.locationStack.markRegisterUnused(quotientRegister) 1509 c.locationStack.markRegisterUnused(remainderRegister) 1510 1511 // Check if the x2 equals zero. 1512 if is32Bit { 1513 c.assembler.CompileRegisterToRegister(amd64.TESTL, x2.register, x2.register) 1514 } else { 1515 c.assembler.CompileRegisterToRegister(amd64.TESTQ, x2.register, x2.register) 1516 } 1517 1518 // Skipped if the divisor is nonzero. 1519 c.compileMaybeExitFromNativeCode(amd64.JNE, nativeCallStatusIntegerDivisionByZero) 1520 1521 // next, we ensure that x1 is placed on AX. 1522 x1 := c.locationStack.pop() 1523 if x1.onRegister() && x1.register != quotientRegister { 1524 // Move x1 to quotientRegister. 1525 if is32Bit { 1526 c.assembler.CompileRegisterToRegister(amd64.MOVL, x1.register, quotientRegister) 1527 } else { 1528 c.assembler.CompileRegisterToRegister(amd64.MOVQ, x1.register, quotientRegister) 1529 } 1530 c.locationStack.markRegisterUnused(x1.register) 1531 x1.setRegister(quotientRegister) 1532 } else if x1.onStack() { 1533 x1.setRegister(quotientRegister) 1534 c.compileLoadValueOnStackToRegister(x1) 1535 } 1536 1537 // Note: at this point, x1 is placed on AX, x2 is on a register which is not AX or DX. 1538 1539 isSignedRem := isRem && signed 1540 isSignedDiv := !isRem && signed 1541 var signedRemMinusOneDivisorJmp asm.Node 1542 if isSignedRem { 1543 // If this is for getting remainder of signed division, 1544 // we have to treat the special case where the divisor equals -1. 1545 // For example, if this is 32-bit case, the result of (-2^31) / -1 equals (quotient=2^31, remainder=0) 1546 // where quotient doesn't fit in the 32-bit range whose maximum is 2^31-1. 1547 // x86 in this case cause floating point exception, but according to the Wasm spec 1548 // if the divisor equals -1, the result must be zero (not undefined!) as opposed to be "undefined" 1549 // for divisions on (-2^31) / -1 where we do not need to emit the special branches. 1550 // For detail, please refer to https://stackoverflow.com/questions/56303282/why-idiv-with-1-causes-floating-point-exception 1551 1552 // First we store zero into the remainder result register (DX) and compare the divisor with -1. 1553 if is32Bit { 1554 c.assembler.CompileRegisterToRegister(amd64.XORL, remainderRegister, remainderRegister) 1555 c.assembler.CompileRegisterToConst(amd64.CMPL, x2.register, -1) 1556 } else { 1557 c.assembler.CompileRegisterToRegister(amd64.XORQ, remainderRegister, remainderRegister) 1558 c.assembler.CompileRegisterToConst(amd64.CMPQ, x2.register, -1) 1559 } 1560 1561 // If it equals minus one, we skip the normal case. 1562 signedRemMinusOneDivisorJmp = c.assembler.CompileJump(amd64.JEQ) 1563 } else if isSignedDiv { 1564 // For signed division, we have to have branches for "math.MinInt{32,64} / -1" 1565 // case which results in the floating point exception via division error as 1566 // the resulting value exceeds the maximum of signed int. 1567 1568 // First we compare the division with -1. 1569 if is32Bit { 1570 c.assembler.CompileRegisterToConst(amd64.CMPL, x2.register, -1) 1571 } else { 1572 c.assembler.CompileRegisterToConst(amd64.CMPQ, x2.register, -1) 1573 } 1574 1575 // If it doesn't equal minus one, we jump to the normal case. 1576 nonMinusOneDivisorJmp := c.assembler.CompileJump(amd64.JNE) 1577 1578 // next we check if the quotient is the most negative value for the signed integer. 1579 // That means whether or not we try to do (math.MinInt32 / -1) or (math.MinInt64 / -1) respectively. 1580 if is32Bit { 1581 if err := c.assembler.CompileRegisterToStaticConst(amd64.CMPL, x1.register, c.minimum32BitSignedInt); err != nil { 1582 return err 1583 } 1584 } else { 1585 if err := c.assembler.CompileRegisterToStaticConst(amd64.CMPQ, x1.register, c.minimum64BitSignedInt); err != nil { 1586 return err 1587 } 1588 } 1589 1590 // Trap if we are trying to do (math.MinInt32 / -1) or (math.MinInt64 / -1), 1591 // as that is the overflow in division as the result becomes 2^31 which is larger than 1592 // the maximum of signed 32-bit int (2^31-1). 1593 c.compileMaybeExitFromNativeCode(amd64.JNE, nativeCallStatusIntegerOverflow) 1594 // Set the normal case's jump target. 1595 c.assembler.SetJumpTargetOnNext(nonMinusOneDivisorJmp) 1596 } 1597 1598 // Now ready to emit the div instruction. 1599 // Since the div instructions takes 2n byte dividend placed in DX:AX registers... 1600 // * signed case - we need to sign-extend the dividend into DX register via CDQ (32 bit) or CQO (64 bit). 1601 // * unsigned case - we need to zero DX register via "XOR DX DX" 1602 if is32Bit && signed { 1603 // Emit sign-extension to have 64 bit dividend over DX and AX registers. 1604 c.assembler.CompileStandAlone(amd64.CDQ) 1605 c.assembler.CompileRegisterToNone(amd64.IDIVL, x2.register) 1606 } else if is32Bit && !signed { 1607 // Zeros DX register to have 64 bit dividend over DX and AX registers. 1608 c.assembler.CompileRegisterToRegister(amd64.XORQ, amd64.RegDX, amd64.RegDX) 1609 c.assembler.CompileRegisterToNone(amd64.DIVL, x2.register) 1610 } else if !is32Bit && signed { 1611 // Emits sign-extension to have 128 bit dividend over DX and AX registers. 1612 c.assembler.CompileStandAlone(amd64.CQO) 1613 c.assembler.CompileRegisterToNone(amd64.IDIVQ, x2.register) 1614 } else if !is32Bit && !signed { 1615 // Zeros DX register to have 128 bit dividend over DX and AX registers. 1616 c.assembler.CompileRegisterToRegister(amd64.XORQ, amd64.RegDX, amd64.RegDX) 1617 c.assembler.CompileRegisterToNone(amd64.DIVQ, x2.register) 1618 } 1619 1620 // If this is signed rem instruction, we must set the jump target of 1621 // the exit jump from division -1 case towards the next instruction. 1622 if signedRemMinusOneDivisorJmp != nil { 1623 c.assembler.SetJumpTargetOnNext(signedRemMinusOneDivisorJmp) 1624 } 1625 1626 // We mark them as unused so that we can push one of them onto the location stack at call sites. 1627 c.locationStack.markRegisterUnused(remainderRegister) 1628 c.locationStack.markRegisterUnused(quotientRegister) 1629 c.locationStack.markRegisterUnused(x2.register) 1630 return nil 1631 } 1632 1633 // compileDivForFloats emits the instructions to perform division 1634 // on the top two values of float type on the stack, placing the result back onto the stack. 1635 // For example, stack [..., 1.0, 4.0] results in [..., 0.25]. 1636 func (c *amd64Compiler) compileDivForFloats(is32Bit bool) error { 1637 if is32Bit { 1638 return c.compileSimpleBinaryOp(amd64.DIVSS) 1639 } else { 1640 return c.compileSimpleBinaryOp(amd64.DIVSD) 1641 } 1642 } 1643 1644 // compileAnd implements compiler.compileAnd for the amd64 architecture. 1645 func (c *amd64Compiler) compileAnd(o *wazeroir.UnionOperation) (err error) { 1646 unsignedInt := wazeroir.UnsignedInt(o.B1) 1647 switch unsignedInt { 1648 case wazeroir.UnsignedInt32: 1649 err = c.compileSimpleBinaryOp(amd64.ANDL) 1650 case wazeroir.UnsignedInt64: 1651 err = c.compileSimpleBinaryOp(amd64.ANDQ) 1652 } 1653 return 1654 } 1655 1656 // compileOr implements compiler.compileOr for the amd64 architecture. 1657 func (c *amd64Compiler) compileOr(o *wazeroir.UnionOperation) (err error) { 1658 unsignedInt := wazeroir.UnsignedInt(o.B1) 1659 switch unsignedInt { 1660 case wazeroir.UnsignedInt32: 1661 err = c.compileSimpleBinaryOp(amd64.ORL) 1662 case wazeroir.UnsignedInt64: 1663 err = c.compileSimpleBinaryOp(amd64.ORQ) 1664 } 1665 return 1666 } 1667 1668 // compileXor implements compiler.compileXor for the amd64 architecture. 1669 func (c *amd64Compiler) compileXor(o *wazeroir.UnionOperation) (err error) { 1670 unsignedInt := wazeroir.UnsignedInt(o.B1) 1671 switch unsignedInt { 1672 case wazeroir.UnsignedInt32: 1673 err = c.compileSimpleBinaryOp(amd64.XORL) 1674 case wazeroir.UnsignedInt64: 1675 err = c.compileSimpleBinaryOp(amd64.XORQ) 1676 } 1677 return 1678 } 1679 1680 // compileSimpleBinaryOp emits instructions to pop two values from the stack 1681 // and perform the given instruction on these two values and push the result 1682 // onto the stack. 1683 func (c *amd64Compiler) compileSimpleBinaryOp(instruction asm.Instruction) error { 1684 x2 := c.locationStack.pop() 1685 if err := c.compileEnsureOnRegister(x2); err != nil { 1686 return err 1687 } 1688 1689 x1 := c.locationStack.pop() 1690 if err := c.compileEnsureOnRegister(x1); err != nil { 1691 return err 1692 } 1693 1694 c.assembler.CompileRegisterToRegister(instruction, x2.register, x1.register) 1695 1696 // We consumed x2 register after the operation here, 1697 // so we release it. 1698 c.locationStack.releaseRegister(x2) 1699 1700 // We already stored the result in the register used by x1 1701 // so we record it. 1702 c.locationStack.markRegisterUnused(x1.register) 1703 c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) 1704 return nil 1705 } 1706 1707 // compileShl implements compiler.compileShl for the amd64 architecture. 1708 func (c *amd64Compiler) compileShl(o *wazeroir.UnionOperation) (err error) { 1709 unsignedInt := wazeroir.UnsignedInt(o.B1) 1710 switch unsignedInt { 1711 case wazeroir.UnsignedInt32: 1712 err = c.compileShiftOp(amd64.SHLL, false) 1713 case wazeroir.UnsignedInt64: 1714 err = c.compileShiftOp(amd64.SHLQ, true) 1715 } 1716 return 1717 } 1718 1719 // compileShr implements compiler.compileShr for the amd64 architecture. 1720 func (c *amd64Compiler) compileShr(o *wazeroir.UnionOperation) (err error) { 1721 signedInt := wazeroir.SignedInt(o.B1) 1722 switch signedInt { 1723 case wazeroir.SignedInt32: 1724 err = c.compileShiftOp(amd64.SARL, true) 1725 case wazeroir.SignedInt64: 1726 err = c.compileShiftOp(amd64.SARQ, false) 1727 case wazeroir.SignedUint32: 1728 err = c.compileShiftOp(amd64.SHRL, true) 1729 case wazeroir.SignedUint64: 1730 err = c.compileShiftOp(amd64.SHRQ, false) 1731 } 1732 return 1733 } 1734 1735 // compileRotl implements compiler.compileRotl for the amd64 architecture. 1736 func (c *amd64Compiler) compileRotl(o *wazeroir.UnionOperation) (err error) { 1737 unsignedInt := wazeroir.UnsignedInt(o.B1) 1738 switch unsignedInt { 1739 case wazeroir.UnsignedInt32: 1740 err = c.compileShiftOp(amd64.ROLL, true) 1741 case wazeroir.UnsignedInt64: 1742 err = c.compileShiftOp(amd64.ROLQ, false) 1743 } 1744 return 1745 } 1746 1747 // compileRotr implements compiler.compileRotr for the amd64 architecture. 1748 func (c *amd64Compiler) compileRotr(o *wazeroir.UnionOperation) (err error) { 1749 unsignedInt := wazeroir.UnsignedInt(o.B1) 1750 switch unsignedInt { 1751 case wazeroir.UnsignedInt32: 1752 err = c.compileShiftOp(amd64.RORL, true) 1753 case wazeroir.UnsignedInt64: 1754 err = c.compileShiftOp(amd64.RORQ, false) 1755 } 1756 return 1757 } 1758 1759 // compileShiftOp adds instructions for shift operations (SHR, SHL, ROTR, ROTL) 1760 // where we have to place the second value (shift counts) on the CX register. 1761 func (c *amd64Compiler) compileShiftOp(instruction asm.Instruction, is32Bit bool) error { 1762 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 1763 return err 1764 } 1765 1766 x2 := c.locationStack.pop() 1767 1768 // Ensures that x2 (holding shift counts) is placed on the CX register. 1769 const shiftCountRegister = amd64.RegCX 1770 if (x2.onRegister() && x2.register != shiftCountRegister) || x2.onStack() { 1771 // If another value lives on the CX register, we release it to the stack. 1772 c.onValueReleaseRegisterToStack(shiftCountRegister) 1773 1774 if x2.onRegister() { 1775 x2r := x2.register 1776 // If x2 lives on a register, we move the value to CX. 1777 if is32Bit { 1778 c.assembler.CompileRegisterToRegister(amd64.MOVL, x2r, shiftCountRegister) 1779 } else { 1780 c.assembler.CompileRegisterToRegister(amd64.MOVQ, x2r, shiftCountRegister) 1781 } 1782 // We no longer place any value on the original register, so we record it. 1783 c.locationStack.markRegisterUnused(x2r) 1784 } else { 1785 // If it is on stack, we just move the memory allocated value to the CX register. 1786 x2.setRegister(shiftCountRegister) 1787 c.compileLoadValueOnStackToRegister(x2) 1788 } 1789 c.locationStack.markRegisterUsed(shiftCountRegister) 1790 } 1791 1792 x1 := c.locationStack.peek() // Note this is peek! 1793 x1r := x1.register 1794 1795 if x1.onRegister() { 1796 c.assembler.CompileRegisterToRegister(instruction, shiftCountRegister, x1r) 1797 } else { 1798 // Shift target can be placed on a memory location. 1799 // Note: stack pointers are ensured not to exceed 2^27 so this offset never exceeds 32-bit range. 1800 c.assembler.CompileRegisterToMemory(instruction, shiftCountRegister, amd64ReservedRegisterForStackBasePointerAddress, int64(x1.stackPointer)*8) 1801 } 1802 1803 // We consumed x2 register after the operation here, 1804 // so we release it. 1805 c.locationStack.markRegisterUnused(shiftCountRegister) 1806 return nil 1807 } 1808 1809 // compileAbs implements compiler.compileAbs for the amd64 architecture. 1810 // 1811 // See the following discussions for how we could take the abs of floats on x86 assembly. 1812 // https://stackoverflow.com/questions/32408665/fastest-way-to-compute-absolute-value-using-sse/32422471#32422471 1813 // https://stackoverflow.com/questions/44630015/how-would-fabsdouble-be-implemented-on-x86-is-it-an-expensive-operation 1814 func (c *amd64Compiler) compileAbs(o *wazeroir.UnionOperation) (err error) { 1815 target := c.locationStack.peek() // Note this is peek! 1816 if err = c.compileEnsureOnRegister(target); err != nil { 1817 return err 1818 } 1819 1820 // First shift left by one to clear the sign bit, and then shift right by one. 1821 if wazeroir.Float(o.B1) == wazeroir.Float32 { 1822 c.assembler.CompileConstToRegister(amd64.PSLLD, 1, target.register) 1823 c.assembler.CompileConstToRegister(amd64.PSRLD, 1, target.register) 1824 } else { 1825 c.assembler.CompileConstToRegister(amd64.PSLLQ, 1, target.register) 1826 c.assembler.CompileConstToRegister(amd64.PSRLQ, 1, target.register) 1827 } 1828 return nil 1829 } 1830 1831 // compileNeg implements compiler.compileNeg for the amd64 architecture. 1832 func (c *amd64Compiler) compileNeg(o *wazeroir.UnionOperation) (err error) { 1833 target := c.locationStack.peek() // Note this is peek! 1834 if err := c.compileEnsureOnRegister(target); err != nil { 1835 return err 1836 } 1837 1838 tmpReg, err := c.allocateRegister(registerTypeVector) 1839 if err != nil { 1840 return err 1841 } 1842 1843 // First we move the sign-bit mask (placed in memory) to the tmp register, 1844 // since we cannot take XOR directly with float reg and const. 1845 // And then negate the value by XOR it with the sign-bit mask. 1846 if wazeroir.Float(o.B1) == wazeroir.Float32 { 1847 err = c.assembler.CompileStaticConstToRegister(amd64.MOVL, c.float32SignBitMask, tmpReg) 1848 if err != nil { 1849 return err 1850 } 1851 c.assembler.CompileRegisterToRegister(amd64.XORPS, tmpReg, target.register) 1852 } else { 1853 err = c.assembler.CompileStaticConstToRegister(amd64.MOVQ, c.float64SignBitMask, tmpReg) 1854 if err != nil { 1855 return err 1856 } 1857 c.assembler.CompileRegisterToRegister(amd64.XORPD, tmpReg, target.register) 1858 } 1859 return nil 1860 } 1861 1862 // compileCeil implements compiler.compileCeil for the amd64 architecture. 1863 func (c *amd64Compiler) compileCeil(o *wazeroir.UnionOperation) (err error) { 1864 // Internally, ceil can be performed via ROUND instruction with 0x02 mode. 1865 // See https://android.googlesource.com/platform/bionic/+/882b8af/libm/x86_64/ceilf.S for example. 1866 return c.compileRoundInstruction(wazeroir.Float(o.B1) == wazeroir.Float32, 0x02) 1867 } 1868 1869 // compileFloor implements compiler.compileFloor for the amd64 architecture. 1870 func (c *amd64Compiler) compileFloor(o *wazeroir.UnionOperation) (err error) { 1871 // Internally, floor can be performed via ROUND instruction with 0x01 mode. 1872 // See https://android.googlesource.com/platform/bionic/+/882b8af/libm/x86_64/floorf.S for example. 1873 return c.compileRoundInstruction(wazeroir.Float(o.B1) == wazeroir.Float32, 0x01) 1874 } 1875 1876 // compileTrunc implements compiler.compileTrunc for the amd64 architecture. 1877 func (c *amd64Compiler) compileTrunc(o *wazeroir.UnionOperation) error { 1878 // Internally, trunc can be performed via ROUND instruction with 0x03 mode. 1879 // See https://android.googlesource.com/platform/bionic/+/882b8af/libm/x86_64/truncf.S for example. 1880 return c.compileRoundInstruction(wazeroir.Float(o.B1) == wazeroir.Float32, 0x03) 1881 } 1882 1883 // compileNearest implements compiler.compileNearest for the amd64 architecture. 1884 func (c *amd64Compiler) compileNearest(o *wazeroir.UnionOperation) error { 1885 // Nearest can be performed via ROUND instruction with 0x00 mode. 1886 return c.compileRoundInstruction(wazeroir.Float(o.B1) == wazeroir.Float32, 0x00) 1887 } 1888 1889 func (c *amd64Compiler) compileRoundInstruction(is32Bit bool, mode int64) error { 1890 target := c.locationStack.peek() // Note this is peek! 1891 if err := c.compileEnsureOnRegister(target); err != nil { 1892 return err 1893 } 1894 1895 if is32Bit { 1896 c.assembler.CompileRegisterToRegisterWithArg(amd64.ROUNDSS, target.register, target.register, byte(mode)) 1897 } else { 1898 c.assembler.CompileRegisterToRegisterWithArg(amd64.ROUNDSD, target.register, target.register, byte(mode)) 1899 } 1900 return nil 1901 } 1902 1903 // compileMin implements compiler.compileMin for the amd64 architecture. 1904 func (c *amd64Compiler) compileMin(o *wazeroir.UnionOperation) error { 1905 is32Bit := wazeroir.Float(o.B1) == wazeroir.Float32 1906 if is32Bit { 1907 return c.compileMinOrMax(is32Bit, true, amd64.MINSS) 1908 } else { 1909 return c.compileMinOrMax(is32Bit, true, amd64.MINSD) 1910 } 1911 } 1912 1913 // compileMax implements compiler.compileMax for the amd64 architecture. 1914 func (c *amd64Compiler) compileMax(o *wazeroir.UnionOperation) error { 1915 is32Bit := wazeroir.Float(o.B1) == wazeroir.Float32 1916 if is32Bit { 1917 return c.compileMinOrMax(is32Bit, false, amd64.MAXSS) 1918 } else { 1919 return c.compileMinOrMax(is32Bit, false, amd64.MAXSD) 1920 } 1921 } 1922 1923 // emitMinOrMax adds instructions to pop two values from the stack, and push back either minimum or 1924 // minimum of these two values onto the stack according to the minOrMaxInstruction argument. 1925 // minOrMaxInstruction must be one of MAXSS, MAXSD, MINSS or MINSD. 1926 // Note: These native min/max instructions are almost compatible with min/max in the Wasm specification, 1927 // but it is slightly different with respect to the NaN handling. 1928 // Native min/max instructions return non-NaN value if exactly one of target values 1929 // is NaN. For example native_{min,max}(5.0, NaN) returns always 5.0, not NaN. 1930 // However, WebAssembly specifies that min/max must always return NaN if one of values is NaN. 1931 // Therefore, in this function, we have to add conditional jumps to check if one of values is NaN before 1932 // the native min/max, which is why we cannot simply emit a native min/max instruction here. 1933 // 1934 // For the semantics, see wazeroir.Min and wazeroir.Max for detail. 1935 func (c *amd64Compiler) compileMinOrMax(is32Bit, isMin bool, minOrMaxInstruction asm.Instruction) error { 1936 x2 := c.locationStack.pop() 1937 if err := c.compileEnsureOnRegister(x2); err != nil { 1938 return err 1939 } 1940 x1 := c.locationStack.pop() 1941 if err := c.compileEnsureOnRegister(x1); err != nil { 1942 return err 1943 } 1944 1945 // Check if this is (either x1 or x2 is NaN) or (x1 equals x2) case 1946 if is32Bit { 1947 c.assembler.CompileRegisterToRegister(amd64.UCOMISS, x2.register, x1.register) 1948 } else { 1949 c.assembler.CompileRegisterToRegister(amd64.UCOMISD, x2.register, x1.register) 1950 } 1951 1952 // At this point, we have the three cases of conditional flags below 1953 // (See https://www.felixcloutier.com/x86/ucomiss#operation for detail.) 1954 // 1955 // 1) Two values are NaN-free and different: All flags are cleared. 1956 // 2) Two values are NaN-free and equal: Only ZF flags is set. 1957 // 3) One of Two values is NaN: ZF, PF and CF flags are set. 1958 1959 // Jump instruction to handle 1) case by checking the ZF flag 1960 // as ZF is only set for 2) and 3) cases. 1961 nanFreeOrDiffJump := c.assembler.CompileJump(amd64.JNE) 1962 1963 // Start handling 2) and 3). 1964 1965 // Jump if one of two values is NaN by checking the parity flag (PF). 1966 includeNaNJmp := c.assembler.CompileJump(amd64.JPS) 1967 1968 // Start handling 2). 1969 1970 // Before we exit this case, we have to ensure that positive zero (or negative zero for min instruction) is 1971 // returned if two values are positive and negative zeros. 1972 var inst asm.Instruction 1973 switch { 1974 case is32Bit && isMin: 1975 inst = amd64.ORPS 1976 case !is32Bit && isMin: 1977 inst = amd64.ORPD 1978 case is32Bit && !isMin: 1979 inst = amd64.ANDPS 1980 case !is32Bit && !isMin: 1981 inst = amd64.ANDPD 1982 } 1983 c.assembler.CompileRegisterToRegister(inst, x2.register, x1.register) 1984 1985 sameExitJmp := c.assembler.CompileJump(amd64.JMP) 1986 1987 // start handling 3). 1988 c.assembler.SetJumpTargetOnNext(includeNaNJmp) 1989 1990 // We emit the ADD instruction to produce the NaN in x1. 1991 if is32Bit { 1992 c.assembler.CompileRegisterToRegister(amd64.ADDSS, x2.register, x1.register) 1993 } else { 1994 c.assembler.CompileRegisterToRegister(amd64.ADDSD, x2.register, x1.register) 1995 } 1996 1997 // Exit from the NaN case branch. 1998 nanExitJmp := c.assembler.CompileJump(amd64.JMP) 1999 2000 // Start handling 1). 2001 c.assembler.SetJumpTargetOnNext(nanFreeOrDiffJump) 2002 2003 // Now handle the NaN-free and different values case. 2004 c.assembler.CompileRegisterToRegister(minOrMaxInstruction, x2.register, x1.register) 2005 2006 // Set the jump target of 1) and 2) cases to the next instruction after 3) case. 2007 c.assembler.SetJumpTargetOnNext(nanExitJmp) 2008 c.assembler.SetJumpTargetOnNext(sameExitJmp) 2009 2010 // Record that we consumed the x2 and placed the minOrMax result in the x1's register. 2011 c.locationStack.markRegisterUnused(x2.register) 2012 c.locationStack.markRegisterUnused(x1.register) 2013 c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) 2014 return nil 2015 } 2016 2017 // compileCopysign implements compiler.compileCopysign for the amd64 architecture. 2018 func (c *amd64Compiler) compileCopysign(o *wazeroir.UnionOperation) error { 2019 is32Bit := wazeroir.Float(o.B1) == wazeroir.Float32 2020 2021 x2 := c.locationStack.pop() 2022 if err := c.compileEnsureOnRegister(x2); err != nil { 2023 return err 2024 } 2025 x1 := c.locationStack.pop() 2026 if err := c.compileEnsureOnRegister(x1); err != nil { 2027 return err 2028 } 2029 tmpReg, err := c.allocateRegister(registerTypeVector) 2030 if err != nil { 2031 return err 2032 } 2033 2034 // Move the rest bit mask to the temp register. 2035 if is32Bit { 2036 err = c.assembler.CompileStaticConstToRegister(amd64.MOVL, c.float32RestBitMask, tmpReg) 2037 } else { 2038 err = c.assembler.CompileStaticConstToRegister(amd64.MOVQ, c.float64RestBitMask, tmpReg) 2039 } 2040 if err != nil { 2041 return err 2042 } 2043 2044 // Clear the sign bit of x1 via AND with the mask. 2045 if is32Bit { 2046 c.assembler.CompileRegisterToRegister(amd64.ANDPS, tmpReg, x1.register) 2047 } else { 2048 c.assembler.CompileRegisterToRegister(amd64.ANDPD, tmpReg, x1.register) 2049 } 2050 2051 // Move the sign bit mask to the temp register. 2052 if is32Bit { 2053 err = c.assembler.CompileStaticConstToRegister(amd64.MOVL, c.float32SignBitMask, tmpReg) 2054 } else { 2055 err = c.assembler.CompileStaticConstToRegister(amd64.MOVQ, c.float64SignBitMask, tmpReg) 2056 } 2057 if err != nil { 2058 return err 2059 } 2060 2061 // Clear the non-sign bits of x2 via AND with the mask. 2062 if is32Bit { 2063 c.assembler.CompileRegisterToRegister(amd64.ANDPS, tmpReg, x2.register) 2064 } else { 2065 c.assembler.CompileRegisterToRegister(amd64.ANDPD, tmpReg, x2.register) 2066 } 2067 2068 // Finally, copy the sign bit of x2 to x1. 2069 if is32Bit { 2070 c.assembler.CompileRegisterToRegister(amd64.ORPS, x2.register, x1.register) 2071 } else { 2072 c.assembler.CompileRegisterToRegister(amd64.ORPD, x2.register, x1.register) 2073 } 2074 2075 // Record that we consumed the x2 and placed the copysign result in the x1's register. 2076 c.locationStack.markRegisterUnused(x2.register) 2077 c.locationStack.markRegisterUnused(x1.register) 2078 c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) 2079 return nil 2080 } 2081 2082 // compileSqrt implements compiler.compileSqrt for the amd64 architecture. 2083 func (c *amd64Compiler) compileSqrt(o *wazeroir.UnionOperation) error { 2084 target := c.locationStack.peek() // Note this is peek! 2085 if err := c.compileEnsureOnRegister(target); err != nil { 2086 return err 2087 } 2088 if wazeroir.Float(o.B1) == wazeroir.Float32 { 2089 c.assembler.CompileRegisterToRegister(amd64.SQRTSS, target.register, target.register) 2090 } else { 2091 c.assembler.CompileRegisterToRegister(amd64.SQRTSD, target.register, target.register) 2092 } 2093 return nil 2094 } 2095 2096 // compileI32WrapFromI64 implements compiler.compileI32WrapFromI64 for the amd64 architecture. 2097 func (c *amd64Compiler) compileI32WrapFromI64() error { 2098 target := c.locationStack.peek() // Note this is peek! 2099 if err := c.compileEnsureOnRegister(target); err != nil { 2100 return err 2101 } 2102 c.assembler.CompileRegisterToRegister(amd64.MOVL, target.register, target.register) 2103 target.valueType = runtimeValueTypeI32 2104 return nil 2105 } 2106 2107 // compileITruncFromF implements compiler.compileITruncFromF for the amd64 architecture. 2108 // 2109 // Note: in the following implementation, we use CVTSS2SI and CVTSD2SI to convert floats to signed integers. 2110 // According to the Intel manual ([1],[2]), if the source float value is either +-Inf or NaN, or it exceeds representative ranges 2111 // of target signed integer, then the instruction returns "masked" response float32SignBitMask (or float64SignBitMask for 64 bit case). 2112 // [1] Chapter 11.5.2, SIMD Floating-Point Exception Conditions in "Vol 1, Intel® 64 and IA-32 Architectures Manual" 2113 // 2114 // https://www.intel.com/content/www/us/en/architecture-and-technology/64-ia-32-architectures-software-developer-vol-1-manual.html 2115 // 2116 // [2] https://xem.github.io/minix86/manual/intel-x86-and-64-manual-vol1/o_7281d5ea06a5b67a-268.html 2117 func (c *amd64Compiler) compileITruncFromF(o *wazeroir.UnionOperation) (err error) { 2118 inputType := wazeroir.Float(o.B1) 2119 outputType := wazeroir.SignedInt(o.B2) 2120 nonTrapping := o.B3 2121 if inputType == wazeroir.Float32 && outputType == wazeroir.SignedInt32 { 2122 err = c.emitSignedI32TruncFromFloat(true, nonTrapping) 2123 } else if inputType == wazeroir.Float32 && outputType == wazeroir.SignedInt64 { 2124 err = c.emitSignedI64TruncFromFloat(true, nonTrapping) 2125 } else if inputType == wazeroir.Float64 && outputType == wazeroir.SignedInt32 { 2126 err = c.emitSignedI32TruncFromFloat(false, nonTrapping) 2127 } else if inputType == wazeroir.Float64 && outputType == wazeroir.SignedInt64 { 2128 err = c.emitSignedI64TruncFromFloat(false, nonTrapping) 2129 } else if inputType == wazeroir.Float32 && outputType == wazeroir.SignedUint32 { 2130 err = c.emitUnsignedI32TruncFromFloat(true, nonTrapping) 2131 } else if inputType == wazeroir.Float32 && outputType == wazeroir.SignedUint64 { 2132 err = c.emitUnsignedI64TruncFromFloat(true, nonTrapping) 2133 } else if inputType == wazeroir.Float64 && outputType == wazeroir.SignedUint32 { 2134 err = c.emitUnsignedI32TruncFromFloat(false, nonTrapping) 2135 } else if inputType == wazeroir.Float64 && outputType == wazeroir.SignedUint64 { 2136 err = c.emitUnsignedI64TruncFromFloat(false, nonTrapping) 2137 } 2138 return 2139 } 2140 2141 // emitUnsignedI32TruncFromFloat implements compileITruncFromF when the destination type is a 32-bit unsigned integer. 2142 func (c *amd64Compiler) emitUnsignedI32TruncFromFloat(isFloat32Bit, nonTrapping bool) error { 2143 source := c.locationStack.pop() 2144 if err := c.compileEnsureOnRegister(source); err != nil { 2145 return err 2146 } 2147 2148 result, err := c.allocateRegister(registerTypeGeneralPurpose) 2149 if err != nil { 2150 return err 2151 } 2152 2153 // First, we check the source float value is above or equal math.MaxInt32+1. 2154 if isFloat32Bit { 2155 err = c.assembler.CompileStaticConstToRegister(amd64.UCOMISS, c.float32ForMaximumSigned32bitIntPlusOne, source.register) 2156 } else { 2157 err = c.assembler.CompileStaticConstToRegister(amd64.UCOMISD, c.float64ForMaximumSigned32bitIntPlusOne, source.register) 2158 } 2159 if err != nil { 2160 return err 2161 } 2162 2163 // Check the parity flag (set when the value is NaN), and if it is set, we should raise an exception. 2164 var nonTrappingNaNJump asm.Node 2165 if nonTrapping { 2166 jmpIfNotNaN := c.assembler.CompileJump(amd64.JPC) // jump if parity is not set. 2167 // In non trapping case, NaN is casted as zero. 2168 // Zero out the result register by XOR itsself. 2169 c.assembler.CompileRegisterToRegister(amd64.XORL, result, result) 2170 nonTrappingNaNJump = c.assembler.CompileJump(amd64.JMP) 2171 c.assembler.SetJumpTargetOnNext(jmpIfNotNaN) 2172 } else { 2173 c.compileMaybeExitFromNativeCode(amd64.JPC, nativeCallStatusCodeInvalidFloatToIntConversion) 2174 } 2175 2176 // Jump if the source float value is above or equal math.MaxInt32+1. 2177 jmpAboveOrEqualMaxIn32PlusOne := c.assembler.CompileJump(amd64.JCC) 2178 2179 // next we convert the value as a signed integer. 2180 if isFloat32Bit { 2181 c.assembler.CompileRegisterToRegister(amd64.CVTTSS2SL, source.register, result) 2182 } else { 2183 c.assembler.CompileRegisterToRegister(amd64.CVTTSD2SL, source.register, result) 2184 } 2185 2186 // Then if the result is minus, it is invalid conversion from minus float (incl. -Inf). 2187 c.assembler.CompileRegisterToRegister(amd64.TESTL, result, result) 2188 2189 var nonTrappingMinusJump asm.Node 2190 if nonTrapping { 2191 jmpIfNotMinusOrMinusInf := c.assembler.CompileJump(amd64.JPL) 2192 // In non trapping case, the minus value is casted as zero. 2193 // Zero out the result register by XOR itsself. 2194 c.assembler.CompileRegisterToRegister(amd64.XORL, result, result) 2195 nonTrappingMinusJump = c.assembler.CompileJump(amd64.JMP) 2196 c.assembler.SetJumpTargetOnNext(jmpIfNotMinusOrMinusInf) 2197 } else { 2198 c.compileMaybeExitFromNativeCode(amd64.JPL, nativeCallStatusIntegerOverflow) 2199 } 2200 2201 // Otherwise, the values is valid. 2202 okJmpForLessThanMaxInt32PlusOne := c.assembler.CompileJump(amd64.JMP) 2203 2204 // Now, start handling the case where the original float value is above or equal math.MaxInt32+1. 2205 // 2206 // First, we subtract the math.MaxInt32+1 from the original value so it can fit in signed 32-bit integer. 2207 c.assembler.SetJumpTargetOnNext(jmpAboveOrEqualMaxIn32PlusOne) 2208 if isFloat32Bit { 2209 err = c.assembler.CompileStaticConstToRegister(amd64.SUBSS, c.float32ForMaximumSigned32bitIntPlusOne, source.register) 2210 } else { 2211 err = c.assembler.CompileStaticConstToRegister(amd64.SUBSD, c.float64ForMaximumSigned32bitIntPlusOne, source.register) 2212 } 2213 if err != nil { 2214 return err 2215 } 2216 2217 // Then, convert the subtracted value as a signed 32-bit integer. 2218 if isFloat32Bit { 2219 c.assembler.CompileRegisterToRegister(amd64.CVTTSS2SL, source.register, result) 2220 } else { 2221 c.assembler.CompileRegisterToRegister(amd64.CVTTSD2SL, source.register, result) 2222 } 2223 2224 // next, we have to check if the value is from NaN, +Inf. 2225 // NaN or +Inf cases result in 0x8000_0000 according to the semantics of conversion, 2226 // This means we check if the result int value is minus or not. 2227 c.assembler.CompileRegisterToRegister(amd64.TESTL, result, result) 2228 2229 // If the result is minus, the conversion is invalid (from NaN or +Inf) 2230 var nonTrappingAboveOrEqualMaxInt32PlusOne asm.Node 2231 if nonTrapping { 2232 jmpIfNotPlusInf := c.assembler.CompileJump(amd64.JPL) 2233 err = c.assembler.CompileStaticConstToRegister(amd64.MOVL, c.maximum32BitUnsignedInt, result) 2234 if err != nil { 2235 return err 2236 } 2237 nonTrappingAboveOrEqualMaxInt32PlusOne = c.assembler.CompileJump(amd64.JMP) 2238 c.assembler.SetJumpTargetOnNext(jmpIfNotPlusInf) 2239 } else { 2240 c.compileMaybeExitFromNativeCode(amd64.JPL, nativeCallStatusIntegerOverflow) 2241 } 2242 2243 // Otherwise, we successfully converted the source float minus (math.MaxInt32+1) to int. 2244 // So, we retrieve the original source float value by adding the sign mask. 2245 if err = c.assembler.CompileStaticConstToRegister(amd64.ADDL, c.float32SignBitMask, result); err != nil { 2246 return err 2247 } 2248 2249 // We jump to the next instructions for valid cases. 2250 c.assembler.SetJumpTargetOnNext(okJmpForLessThanMaxInt32PlusOne) 2251 if nonTrapping { 2252 c.assembler.SetJumpTargetOnNext(nonTrappingAboveOrEqualMaxInt32PlusOne) 2253 c.assembler.SetJumpTargetOnNext(nonTrappingMinusJump) 2254 c.assembler.SetJumpTargetOnNext(nonTrappingNaNJump) 2255 } 2256 2257 // We consumed the source's register and placed the conversion result 2258 // in the result register. 2259 c.locationStack.markRegisterUnused(source.register) 2260 c.pushRuntimeValueLocationOnRegister(result, runtimeValueTypeI32) 2261 return nil 2262 } 2263 2264 // emitUnsignedI32TruncFromFloat implements compileITruncFromF when the destination type is a 64-bit unsigned integer. 2265 func (c *amd64Compiler) emitUnsignedI64TruncFromFloat(isFloat32Bit, nonTrapping bool) error { 2266 source := c.locationStack.pop() 2267 if err := c.compileEnsureOnRegister(source); err != nil { 2268 return err 2269 } 2270 2271 result, err := c.allocateRegister(registerTypeGeneralPurpose) 2272 if err != nil { 2273 return err 2274 } 2275 2276 // First, we check the source float value is above or equal math.MaxInt64+1. 2277 if isFloat32Bit { 2278 err = c.assembler.CompileStaticConstToRegister(amd64.UCOMISS, c.float32ForMaximumSigned64bitIntPlusOne, source.register) 2279 } else { 2280 err = c.assembler.CompileStaticConstToRegister(amd64.UCOMISD, c.float64ForMaximumSigned64bitIntPlusOne, source.register) 2281 } 2282 if err != nil { 2283 return err 2284 } 2285 2286 // Check the parity flag (set when the value is NaN), and if it is set, we should raise an exception. 2287 var nonTrappingNaNJump asm.Node 2288 if nonTrapping { 2289 jmpIfNotNaN := c.assembler.CompileJump(amd64.JPC) // jump if parity is c.not set. 2290 // In non trapping case, NaN is casted as zero. 2291 // Zero out the result register by XOR itsself. 2292 c.assembler.CompileRegisterToRegister(amd64.XORQ, result, result) 2293 nonTrappingNaNJump = c.assembler.CompileJump(amd64.JMP) 2294 c.assembler.SetJumpTargetOnNext(jmpIfNotNaN) 2295 } else { 2296 c.compileMaybeExitFromNativeCode(amd64.JPC, nativeCallStatusCodeInvalidFloatToIntConversion) 2297 } 2298 2299 // Jump if the source float values is above or equal math.MaxInt64+1. 2300 jmpAboveOrEqualMaxIn32PlusOne := c.assembler.CompileJump(amd64.JCC) 2301 2302 // next we convert the value as a signed integer. 2303 if isFloat32Bit { 2304 c.assembler.CompileRegisterToRegister(amd64.CVTTSS2SQ, source.register, result) 2305 } else { 2306 c.assembler.CompileRegisterToRegister(amd64.CVTTSD2SQ, source.register, result) 2307 } 2308 2309 // Then if the result is minus, it is invalid conversion from minus float (incl. -Inf). 2310 c.assembler.CompileRegisterToRegister(amd64.TESTQ, result, result) 2311 2312 var nonTrappingMinusJump asm.Node 2313 if nonTrapping { 2314 jmpIfNotMinusOrMinusInf := c.assembler.CompileJump(amd64.JPL) 2315 // In non trapping case, the minus value is casted as zero. 2316 // Zero out the result register by XOR itsself. 2317 c.assembler.CompileRegisterToRegister(amd64.XORQ, result, result) 2318 nonTrappingMinusJump = c.assembler.CompileJump(amd64.JMP) 2319 c.assembler.SetJumpTargetOnNext(jmpIfNotMinusOrMinusInf) 2320 } else { 2321 c.compileMaybeExitFromNativeCode(amd64.JPL, nativeCallStatusIntegerOverflow) 2322 } 2323 2324 // Otherwise, the values is valid. 2325 okJmpForLessThanMaxInt64PlusOne := c.assembler.CompileJump(amd64.JMP) 2326 2327 // Now, start handling the case where the original float value is above or equal math.MaxInt64+1. 2328 // 2329 // First, we subtract the math.MaxInt64+1 from the original value so it can fit in signed 64-bit integer. 2330 c.assembler.SetJumpTargetOnNext(jmpAboveOrEqualMaxIn32PlusOne) 2331 if isFloat32Bit { 2332 err = c.assembler.CompileStaticConstToRegister(amd64.SUBSS, c.float32ForMaximumSigned64bitIntPlusOne, source.register) 2333 } else { 2334 err = c.assembler.CompileStaticConstToRegister(amd64.SUBSD, c.float64ForMaximumSigned64bitIntPlusOne, source.register) 2335 } 2336 if err != nil { 2337 return err 2338 } 2339 2340 // Then, convert the subtracted value as a signed 64-bit integer. 2341 if isFloat32Bit { 2342 c.assembler.CompileRegisterToRegister(amd64.CVTTSS2SQ, source.register, result) 2343 } else { 2344 c.assembler.CompileRegisterToRegister(amd64.CVTTSD2SQ, source.register, result) 2345 } 2346 2347 // next, we have to check if the value is from NaN, +Inf. 2348 // NaN or +Inf cases result in 0x8000_0000 according to the semantics of conversion, 2349 // This means we check if the result int value is minus or not. 2350 c.assembler.CompileRegisterToRegister(amd64.TESTQ, result, result) 2351 2352 // If the result is minus, the conversion is invalid (from NaN or +Inf) 2353 var nonTrappingAboveOrEqualMaxInt64PlusOne asm.Node 2354 if nonTrapping { 2355 jmpIfNotPlusInf := c.assembler.CompileJump(amd64.JPL) 2356 err = c.assembler.CompileStaticConstToRegister(amd64.MOVQ, c.maximum64BitUnsignedInt, result) 2357 if err != nil { 2358 return err 2359 } 2360 nonTrappingAboveOrEqualMaxInt64PlusOne = c.assembler.CompileJump(amd64.JMP) 2361 c.assembler.SetJumpTargetOnNext(jmpIfNotPlusInf) 2362 } else { 2363 c.compileMaybeExitFromNativeCode(amd64.JPL, nativeCallStatusIntegerOverflow) 2364 } 2365 2366 // Otherwise, we successfully converted the the source float minus (math.MaxInt64+1) to int. 2367 // So, we retrieve the original source float value by adding the sign mask. 2368 if err = c.assembler.CompileStaticConstToRegister(amd64.ADDQ, c.float64SignBitMask, result); err != nil { 2369 return err 2370 } 2371 2372 // We jump to the next instructions for valid cases. 2373 c.assembler.SetJumpTargetOnNext(okJmpForLessThanMaxInt64PlusOne) 2374 if nonTrapping { 2375 c.assembler.SetJumpTargetOnNext(nonTrappingAboveOrEqualMaxInt64PlusOne) 2376 c.assembler.SetJumpTargetOnNext(nonTrappingMinusJump) 2377 c.assembler.SetJumpTargetOnNext(nonTrappingNaNJump) 2378 } 2379 2380 // We consumed the source's register and placed the conversion result 2381 // in the result register. 2382 c.locationStack.markRegisterUnused(source.register) 2383 c.pushRuntimeValueLocationOnRegister(result, runtimeValueTypeI64) 2384 return nil 2385 } 2386 2387 // emitSignedI32TruncFromFloat implements compileITruncFromF when the destination type is a 32-bit signed integer. 2388 func (c *amd64Compiler) emitSignedI32TruncFromFloat(isFloat32Bit, nonTrapping bool) error { 2389 source := c.locationStack.pop() 2390 if err := c.compileEnsureOnRegister(source); err != nil { 2391 return err 2392 } 2393 2394 result, err := c.allocateRegister(registerTypeGeneralPurpose) 2395 if err != nil { 2396 return err 2397 } 2398 2399 // First we unconditionally convert source to integer via CVTTSS2SI (CVTTSD2SI for 64bit float). 2400 if isFloat32Bit { 2401 c.assembler.CompileRegisterToRegister(amd64.CVTTSS2SL, source.register, result) 2402 } else { 2403 c.assembler.CompileRegisterToRegister(amd64.CVTTSD2SL, source.register, result) 2404 } 2405 2406 // We compare the conversion result with the sign bit mask to check if it is either 2407 // 1) the source float value is either +-Inf or NaN, or it exceeds representative ranges of 32bit signed integer, or 2408 // 2) the source equals the minimum signed 32-bit (=-2147483648.000000) whose bit pattern is float32ForMinimumSigned32bitIntegerAddress for 32 bit float 2409 // or float64ForMinimumSigned32bitIntegerAddress for 64bit float. 2410 err = c.assembler.CompileStaticConstToRegister(amd64.CMPL, c.float32SignBitMask, result) 2411 if err != nil { 2412 return err 2413 } 2414 2415 // Otherwise, jump to exit as the result is valid. 2416 okJmp := c.assembler.CompileJump(amd64.JNE) 2417 2418 // Start handling the case of 1) and 2). 2419 // First, check if the value is NaN. 2420 if isFloat32Bit { 2421 c.assembler.CompileRegisterToRegister(amd64.UCOMISS, source.register, source.register) 2422 } else { 2423 c.assembler.CompileRegisterToRegister(amd64.UCOMISD, source.register, source.register) 2424 } 2425 2426 // Check the parity flag (set when the value is NaN), and if it is set, we should raise an exception. 2427 var nontrappingNanJump asm.Node 2428 if nonTrapping { 2429 jmpIfNotNaN := c.assembler.CompileJump(amd64.JPC) // jump if parity is not set. 2430 // In non trapping case, NaN is casted as zero. 2431 // Zero out the result register by XOR itsself. 2432 c.assembler.CompileRegisterToRegister(amd64.XORL, result, result) 2433 nontrappingNanJump = c.assembler.CompileJump(amd64.JMP) 2434 c.assembler.SetJumpTargetOnNext(jmpIfNotNaN) 2435 } else { 2436 // If the value is NaN, we return the function with nativeCallStatusCodeInvalidFloatToIntConversion. 2437 c.compileMaybeExitFromNativeCode(amd64.JPC, nativeCallStatusCodeInvalidFloatToIntConversion) 2438 } 2439 2440 // Check if the value is larger than or equal the minimum 32-bit integer value, 2441 // meaning that the value exceeds the lower bound of 32-bit signed integer range. 2442 if isFloat32Bit { 2443 err = c.assembler.CompileStaticConstToRegister(amd64.UCOMISS, c.float32ForMinimumSigned32bitInteger, source.register) 2444 } else { 2445 err = c.assembler.CompileStaticConstToRegister(amd64.UCOMISD, c.float64ForMinimumSigned32bitInteger, source.register) 2446 } 2447 if err != nil { 2448 return err 2449 } 2450 2451 if !nonTrapping { 2452 // Trap if the value does not exceed the lower bound. 2453 if isFloat32Bit { 2454 c.compileMaybeExitFromNativeCode(amd64.JCC, nativeCallStatusIntegerOverflow) 2455 } else { 2456 c.compileMaybeExitFromNativeCode(amd64.JHI, nativeCallStatusIntegerOverflow) 2457 } 2458 2459 // At this point, the value is the minimum signed 32-bit int (=-2147483648.000000) or larger than 32-bit maximum. 2460 // So, check if the value equals the minimum signed 32-bit int. 2461 if isFloat32Bit { 2462 err = c.assembler.CompileStaticConstToRegister(amd64.UCOMISS, c.fourZeros, source.register) 2463 } else { 2464 err = c.assembler.CompileStaticConstToRegister(amd64.UCOMISD, c.eightZeros, source.register) 2465 } 2466 if err != nil { 2467 return err 2468 } 2469 2470 // Trap if the value is not minus (= the minimum signed 32-bit int). 2471 c.compileMaybeExitFromNativeCode(amd64.JCS, nativeCallStatusIntegerOverflow) 2472 2473 // We jump to the next instructions for valid cases. 2474 c.assembler.SetJumpTargetOnNext(okJmp) 2475 } else { 2476 // Jump if the value does not exceed the lower bound. 2477 var jmpIfNotExceedsLowerBound asm.Node 2478 if isFloat32Bit { 2479 jmpIfNotExceedsLowerBound = c.assembler.CompileJump(amd64.JCC) 2480 } else { 2481 jmpIfNotExceedsLowerBound = c.assembler.CompileJump(amd64.JHI) 2482 } 2483 2484 // If the value exceeds the lower bound, we "saturate" it to the minimum. 2485 if err = c.assembler.CompileStaticConstToRegister(amd64.MOVL, c.minimum32BitSignedInt, result); err != nil { 2486 return err 2487 } 2488 nonTrappingSaturatedMinimumJump := c.assembler.CompileJump(amd64.JMP) 2489 2490 // Otherwise, the value is the minimum signed 32-bit int (=-2147483648.000000) or larger than 32-bit maximum. 2491 c.assembler.SetJumpTargetOnNext(jmpIfNotExceedsLowerBound) 2492 if isFloat32Bit { 2493 err = c.assembler.CompileStaticConstToRegister(amd64.UCOMISS, c.fourZeros, source.register) 2494 } else { 2495 err = c.assembler.CompileStaticConstToRegister(amd64.UCOMISD, c.eightZeros, source.register) 2496 } 2497 if err != nil { 2498 return err 2499 } 2500 jmpIfMinimumSignedInt := c.assembler.CompileJump(amd64.JCS) // jump if the value is minus (= the minimum signed 32-bit int). 2501 2502 // If the value exceeds signed 32-bit maximum, we saturate it to the maximum. 2503 if err = c.assembler.CompileStaticConstToRegister(amd64.MOVL, c.maximum32BitSignedInt, result); err != nil { 2504 return err 2505 } 2506 2507 c.assembler.SetJumpTargetOnNext(okJmp) 2508 c.assembler.SetJumpTargetOnNext(nontrappingNanJump) 2509 c.assembler.SetJumpTargetOnNext(nonTrappingSaturatedMinimumJump) 2510 c.assembler.SetJumpTargetOnNext(jmpIfMinimumSignedInt) 2511 } 2512 2513 // We consumed the source's register and placed the conversion result 2514 // in the result register. 2515 c.locationStack.markRegisterUnused(source.register) 2516 c.pushRuntimeValueLocationOnRegister(result, runtimeValueTypeI32) 2517 return nil 2518 } 2519 2520 // emitSignedI64TruncFromFloat implements compileITruncFromF when the destination type is a 64-bit signed integer. 2521 func (c *amd64Compiler) emitSignedI64TruncFromFloat(isFloat32Bit, nonTrapping bool) error { 2522 source := c.locationStack.pop() 2523 if err := c.compileEnsureOnRegister(source); err != nil { 2524 return err 2525 } 2526 2527 result, err := c.allocateRegister(registerTypeGeneralPurpose) 2528 if err != nil { 2529 return err 2530 } 2531 2532 // First we unconditionally convert source to integer via CVTTSS2SI (CVTTSD2SI for 64bit float). 2533 if isFloat32Bit { 2534 c.assembler.CompileRegisterToRegister(amd64.CVTTSS2SQ, source.register, result) 2535 } else { 2536 c.assembler.CompileRegisterToRegister(amd64.CVTTSD2SQ, source.register, result) 2537 } 2538 2539 // We compare the conversion result with the sign bit mask to check if it is either 2540 // 1) the source float value is either +-Inf or NaN, or it exceeds representative ranges of 32bit signed integer, or 2541 // 2) the source equals the minimum signed 32-bit (=-9223372036854775808.0) whose bit pattern is float32ForMinimumSigned64bitIntegerAddress for 32 bit float 2542 // or float64ForMinimumSigned64bitIntegerAddress for 64bit float. 2543 err = c.assembler.CompileStaticConstToRegister(amd64.CMPQ, c.float64SignBitMask, result) 2544 if err != nil { 2545 return err 2546 } 2547 2548 // Otherwise, we simply jump to exit as the result is valid. 2549 okJmp := c.assembler.CompileJump(amd64.JNE) 2550 2551 // Start handling the case of 1) and 2). 2552 // First, check if the value is NaN. 2553 if isFloat32Bit { 2554 c.assembler.CompileRegisterToRegister(amd64.UCOMISS, source.register, source.register) 2555 } else { 2556 c.assembler.CompileRegisterToRegister(amd64.UCOMISD, source.register, source.register) 2557 } 2558 2559 // Check the parity flag (set when the value is NaN), and if it is set, we should raise an exception. 2560 var nontrappingNanJump asm.Node 2561 if nonTrapping { 2562 jmpIfNotNaN := c.assembler.CompileJump(amd64.JPC) // jump if parity is not set. 2563 // In non trapping case, NaN is casted as zero. 2564 // Zero out the result register by XOR itsself. 2565 c.assembler.CompileRegisterToRegister(amd64.XORQ, result, result) 2566 nontrappingNanJump = c.assembler.CompileJump(amd64.JMP) 2567 c.assembler.SetJumpTargetOnNext(jmpIfNotNaN) 2568 } else { 2569 c.compileMaybeExitFromNativeCode(amd64.JPC, nativeCallStatusCodeInvalidFloatToIntConversion) 2570 } 2571 2572 // Check if the value is larger than or equal the minimum 64-bit integer value, 2573 // meaning that the value exceeds the lower bound of 64-bit signed integer range. 2574 if isFloat32Bit { 2575 err = c.assembler.CompileStaticConstToRegister(amd64.UCOMISS, c.float32ForMinimumSigned64bitInteger, source.register) 2576 } else { 2577 err = c.assembler.CompileStaticConstToRegister(amd64.UCOMISD, c.float64ForMinimumSigned64bitInteger, source.register) 2578 } 2579 if err != nil { 2580 return err 2581 } 2582 2583 if !nonTrapping { 2584 // Jump if the value is -Inf. 2585 c.compileMaybeExitFromNativeCode(amd64.JCC, nativeCallStatusIntegerOverflow) 2586 2587 // At this point, the value is the minimum signed 64-bit int (=-9223372036854775808.0) or larger than 64-bit maximum. 2588 // So, check if the value equals the minimum signed 64-bit int. 2589 if isFloat32Bit { 2590 err = c.assembler.CompileStaticConstToRegister(amd64.UCOMISS, c.fourZeros, source.register) 2591 } else { 2592 err = c.assembler.CompileStaticConstToRegister(amd64.UCOMISD, c.eightZeros, source.register) 2593 } 2594 if err != nil { 2595 return err 2596 } 2597 2598 // Trap if the value is not minus (= the minimum signed 64-bit int). 2599 c.compileMaybeExitFromNativeCode(amd64.JCS, nativeCallStatusIntegerOverflow) 2600 2601 // We jump to the next instructions for valid cases. 2602 c.assembler.SetJumpTargetOnNext(okJmp) 2603 } else { 2604 // Jump if the value is not -Inf. 2605 jmpIfNotExceedsLowerBound := c.assembler.CompileJump(amd64.JCC) 2606 2607 // If the value exceeds the lower bound, we "saturate" it to the minimum. 2608 err = c.assembler.CompileStaticConstToRegister(amd64.MOVQ, c.minimum64BitSignedInt, result) 2609 if err != nil { 2610 return err 2611 } 2612 2613 nonTrappingSaturatedMinimumJump := c.assembler.CompileJump(amd64.JMP) 2614 2615 // Otherwise, the value is the minimum signed 64-bit int (=-9223372036854775808.0) or larger than 64-bit maximum. 2616 // So, check if the value equals the minimum signed 64-bit int. 2617 c.assembler.SetJumpTargetOnNext(jmpIfNotExceedsLowerBound) 2618 if isFloat32Bit { 2619 err = c.assembler.CompileStaticConstToRegister(amd64.UCOMISS, c.fourZeros, source.register) 2620 } else { 2621 err = c.assembler.CompileStaticConstToRegister(amd64.UCOMISD, c.eightZeros, source.register) 2622 } 2623 if err != nil { 2624 return err 2625 } 2626 2627 jmpIfMinimumSignedInt := c.assembler.CompileJump(amd64.JCS) // jump if the value is minus (= the minimum signed 64-bit int). 2628 2629 // If the value exceeds signed 64-bit maximum, we saturate it to the maximum. 2630 if err = c.assembler.CompileStaticConstToRegister(amd64.MOVQ, c.maximum64BitSignedInt, result); err != nil { 2631 return err 2632 } 2633 2634 c.assembler.SetJumpTargetOnNext(okJmp) 2635 c.assembler.SetJumpTargetOnNext(jmpIfMinimumSignedInt) 2636 c.assembler.SetJumpTargetOnNext(nonTrappingSaturatedMinimumJump) 2637 c.assembler.SetJumpTargetOnNext(nontrappingNanJump) 2638 } 2639 2640 // We consumed the source's register and placed the conversion result 2641 // in the result register. 2642 c.locationStack.markRegisterUnused(source.register) 2643 c.pushRuntimeValueLocationOnRegister(result, runtimeValueTypeI64) 2644 return nil 2645 } 2646 2647 // compileFConvertFromI implements compiler.compileFConvertFromI for the amd64 architecture. 2648 func (c *amd64Compiler) compileFConvertFromI(o *wazeroir.UnionOperation) (err error) { 2649 inputType := wazeroir.SignedInt(o.B1) 2650 outputType := wazeroir.Float(o.B2) 2651 if outputType == wazeroir.Float32 && inputType == wazeroir.SignedInt32 { 2652 err = c.compileSimpleConversion(amd64.CVTSL2SS, registerTypeVector, runtimeValueTypeF32) // = CVTSI2SS for 32bit int 2653 } else if outputType == wazeroir.Float32 && inputType == wazeroir.SignedInt64 { 2654 err = c.compileSimpleConversion(amd64.CVTSQ2SS, registerTypeVector, runtimeValueTypeF32) // = CVTSI2SS for 64bit int 2655 } else if outputType == wazeroir.Float64 && inputType == wazeroir.SignedInt32 { 2656 err = c.compileSimpleConversion(amd64.CVTSL2SD, registerTypeVector, runtimeValueTypeF64) // = CVTSI2SD for 32bit int 2657 } else if outputType == wazeroir.Float64 && inputType == wazeroir.SignedInt64 { 2658 err = c.compileSimpleConversion(amd64.CVTSQ2SD, registerTypeVector, runtimeValueTypeF64) // = CVTSI2SD for 64bit int 2659 } else if outputType == wazeroir.Float32 && inputType == wazeroir.SignedUint32 { 2660 // See the following link for why we use 64bit conversion for unsigned 32bit integer sources: 2661 // https://stackoverflow.com/questions/41495498/fpu-operations-generated-by-gcc-during-casting-integer-to-float. 2662 // 2663 // Here's the summary: 2664 // >> CVTSI2SS is indeed designed for converting a signed integer to a scalar single-precision float, 2665 // >> not an unsigned integer like you have here. So what gives? Well, a 64-bit processor has 64-bit wide 2666 // >> registers available, so the unsigned 32-bit input values can be stored as signed 64-bit intermediate values, 2667 // >> which allows CVTSI2SS to be used after all. 2668 err = c.compileSimpleConversion(amd64.CVTSQ2SS, registerTypeVector, runtimeValueTypeF32) // = CVTSI2SS for 64bit int. 2669 } else if outputType == wazeroir.Float64 && inputType == wazeroir.SignedUint32 { 2670 // For the same reason above, we use 64bit conversion for unsigned 32bit. 2671 err = c.compileSimpleConversion(amd64.CVTSQ2SD, registerTypeVector, runtimeValueTypeF64) // = CVTSI2SD for 64bit int. 2672 } else if outputType == wazeroir.Float32 && inputType == wazeroir.SignedUint64 { 2673 err = c.emitUnsignedInt64ToFloatConversion(true) 2674 } else if outputType == wazeroir.Float64 && inputType == wazeroir.SignedUint64 { 2675 err = c.emitUnsignedInt64ToFloatConversion(false) 2676 } 2677 return 2678 } 2679 2680 // emitUnsignedInt64ToFloatConversion is handling the case of unsigned 64-bit integer 2681 // in compileFConvertFromI. 2682 func (c *amd64Compiler) emitUnsignedInt64ToFloatConversion(isFloat32bit bool) error { 2683 // The logic here is exactly the same as GCC emits for the following code: 2684 // 2685 // float convert(int num) { 2686 // float foo; 2687 // uint64_t ptr1 = 100; 2688 // foo = (float)(ptr1); 2689 // return foo; 2690 // } 2691 // 2692 // which is compiled by GCC as 2693 // 2694 // convert: 2695 // push rbp 2696 // mov rbp, rsp 2697 // mov DWORD PTR [rbp-20], edi 2698 // mov DWORD PTR [rbp-4], 100 2699 // mov eax, DWORD PTR [rbp-4] 2700 // test rax, rax 2701 // js .handle_sign_bit_case 2702 // cvtsi2ss xmm0, rax 2703 // jmp .exit 2704 // .handle_sign_bit_case: 2705 // mov rdx, rax 2706 // shr rdx 2707 // and eax, 1 2708 // or rdx, rax 2709 // cvtsi2ss xmm0, rdx 2710 // addsd xmm0, xmm0 2711 // .exit: ... 2712 // 2713 // tl;dr is that we have a branch depending on whether or not sign bit is set. 2714 2715 origin := c.locationStack.pop() 2716 if err := c.compileEnsureOnRegister(origin); err != nil { 2717 return err 2718 } 2719 2720 dest, err := c.allocateRegister(registerTypeVector) 2721 if err != nil { 2722 return err 2723 } 2724 2725 c.locationStack.markRegisterUsed(dest) 2726 2727 tmpReg, err := c.allocateRegister(registerTypeGeneralPurpose) 2728 if err != nil { 2729 return err 2730 } 2731 2732 // Check if the most significant bit (sign bit) is set. 2733 c.assembler.CompileRegisterToRegister(amd64.TESTQ, origin.register, origin.register) 2734 2735 // Jump if the sign bit is set. 2736 jmpIfSignbitSet := c.assembler.CompileJump(amd64.JMI) 2737 2738 // Otherwise, we could fit the unsigned int into float32. 2739 // So, we convert it to float32 and emit jump instruction to exit from this branch. 2740 if isFloat32bit { 2741 c.assembler.CompileRegisterToRegister(amd64.CVTSQ2SS, origin.register, dest) 2742 } else { 2743 c.assembler.CompileRegisterToRegister(amd64.CVTSQ2SD, origin.register, dest) 2744 } 2745 exitFromSignbitUnSet := c.assembler.CompileJump(amd64.JMP) 2746 2747 // Now handling the case where sign-bit is set. 2748 // We emit the following sequences: 2749 // mov tmpReg, origin 2750 // shr tmpReg, 1 2751 // and origin, 1 2752 // or tmpReg, origin 2753 // cvtsi2ss xmm0, tmpReg 2754 // addsd xmm0, xmm0 2755 2756 c.assembler.SetJumpTargetOnNext(jmpIfSignbitSet) 2757 c.assembler.CompileRegisterToRegister(amd64.MOVQ, origin.register, tmpReg) 2758 c.assembler.CompileConstToRegister(amd64.SHRQ, 1, tmpReg) 2759 c.assembler.CompileConstToRegister(amd64.ANDQ, 1, origin.register) 2760 c.assembler.CompileRegisterToRegister(amd64.ORQ, origin.register, tmpReg) 2761 if isFloat32bit { 2762 c.assembler.CompileRegisterToRegister(amd64.CVTSQ2SS, tmpReg, dest) 2763 } else { 2764 c.assembler.CompileRegisterToRegister(amd64.CVTSQ2SD, tmpReg, dest) 2765 } 2766 if isFloat32bit { 2767 c.assembler.CompileRegisterToRegister(amd64.ADDSS, dest, dest) 2768 } else { 2769 c.assembler.CompileRegisterToRegister(amd64.ADDSD, dest, dest) 2770 } 2771 2772 // Now, we finished the sign-bit set branch. 2773 // We have to make the exit jump target of sign-bit unset branch 2774 // towards the next instruction. 2775 c.assembler.SetJumpTargetOnNext(exitFromSignbitUnSet) 2776 2777 // We consumed the origin's register and placed the conversion result 2778 // in the dest register. 2779 c.locationStack.markRegisterUnused(origin.register) 2780 if isFloat32bit { 2781 c.pushRuntimeValueLocationOnRegister(dest, runtimeValueTypeF32) 2782 } else { 2783 c.pushRuntimeValueLocationOnRegister(dest, runtimeValueTypeF64) 2784 } 2785 return nil 2786 } 2787 2788 // compileSimpleConversion pops a value type from the stack, and applies the 2789 // given instruction on it, and push the result onto a register of the given type. 2790 func (c *amd64Compiler) compileSimpleConversion(convInstruction asm.Instruction, 2791 destinationRegisterType registerType, destinationValueType runtimeValueType, 2792 ) error { 2793 origin := c.locationStack.pop() 2794 if err := c.compileEnsureOnRegister(origin); err != nil { 2795 return err 2796 } 2797 2798 dest, err := c.allocateRegister(destinationRegisterType) 2799 if err != nil { 2800 return err 2801 } 2802 2803 c.assembler.CompileRegisterToRegister(convInstruction, origin.register, dest) 2804 2805 c.locationStack.markRegisterUnused(origin.register) 2806 c.pushRuntimeValueLocationOnRegister(dest, destinationValueType) 2807 return nil 2808 } 2809 2810 // compileF32DemoteFromF64 implements compiler.compileF32DemoteFromF64 for the amd64 architecture. 2811 func (c *amd64Compiler) compileF32DemoteFromF64() error { 2812 target := c.locationStack.peek() // Note this is peek! 2813 if err := c.compileEnsureOnRegister(target); err != nil { 2814 return err 2815 } 2816 2817 c.assembler.CompileRegisterToRegister(amd64.CVTSD2SS, target.register, target.register) 2818 target.valueType = runtimeValueTypeF32 2819 return nil 2820 } 2821 2822 // compileF64PromoteFromF32 implements compiler.compileF64PromoteFromF32 for the amd64 architecture. 2823 func (c *amd64Compiler) compileF64PromoteFromF32() error { 2824 target := c.locationStack.peek() // Note this is peek! 2825 if err := c.compileEnsureOnRegister(target); err != nil { 2826 return err 2827 } 2828 2829 c.assembler.CompileRegisterToRegister(amd64.CVTSS2SD, target.register, target.register) 2830 target.valueType = runtimeValueTypeF64 2831 return nil 2832 } 2833 2834 // compileI32ReinterpretFromF32 implements compiler.compileI32ReinterpretFromF32 for the amd64 architecture. 2835 func (c *amd64Compiler) compileI32ReinterpretFromF32() error { 2836 if peek := c.locationStack.peek(); peek.onStack() { 2837 // If the value is on the stack, this is no-op as there is nothing to do for converting type. 2838 peek.valueType = runtimeValueTypeI32 2839 return nil 2840 } 2841 return c.compileSimpleConversion(amd64.MOVL, registerTypeGeneralPurpose, runtimeValueTypeI32) 2842 } 2843 2844 // compileI64ReinterpretFromF64 implements compiler.compileI64ReinterpretFromF64 for the amd64 architecture. 2845 func (c *amd64Compiler) compileI64ReinterpretFromF64() error { 2846 if peek := c.locationStack.peek(); peek.onStack() { 2847 // If the value is on the stack, this is no-op as there is nothing to do for converting type. 2848 peek.valueType = runtimeValueTypeI64 2849 return nil 2850 } 2851 return c.compileSimpleConversion(amd64.MOVQ, registerTypeGeneralPurpose, runtimeValueTypeI64) 2852 } 2853 2854 // compileF32ReinterpretFromI32 implements compiler.compileF32ReinterpretFromI32 for the amd64 architecture. 2855 func (c *amd64Compiler) compileF32ReinterpretFromI32() error { 2856 if peek := c.locationStack.peek(); peek.onStack() { 2857 // If the value is on the stack, this is no-op as there is nothing to do for converting type. 2858 peek.valueType = runtimeValueTypeF32 2859 return nil 2860 } 2861 return c.compileSimpleConversion(amd64.MOVL, registerTypeVector, runtimeValueTypeF32) 2862 } 2863 2864 // compileF64ReinterpretFromI64 implements compiler.compileF64ReinterpretFromI64 for the amd64 architecture. 2865 func (c *amd64Compiler) compileF64ReinterpretFromI64() error { 2866 if peek := c.locationStack.peek(); peek.onStack() { 2867 // If the value is on the stack, this is no-op as there is nothing to do for converting type. 2868 peek.valueType = runtimeValueTypeF64 2869 return nil 2870 } 2871 return c.compileSimpleConversion(amd64.MOVQ, registerTypeVector, runtimeValueTypeF64) 2872 } 2873 2874 // compileExtend implements compiler.compileExtend for the amd64 architecture. 2875 func (c *amd64Compiler) compileExtend(o *wazeroir.UnionOperation) error { 2876 var inst asm.Instruction 2877 signed := o.B1 != 0 2878 if signed { 2879 inst = amd64.MOVLQSX // = MOVSXD https://www.felixcloutier.com/x86/movsx:movsxd 2880 } else { 2881 inst = amd64.MOVL 2882 } 2883 return c.compileExtendImpl(inst, runtimeValueTypeI64) 2884 } 2885 2886 // compileSignExtend32From8 implements compiler.compileSignExtend32From8 for the amd64 architecture. 2887 func (c *amd64Compiler) compileSignExtend32From8() error { 2888 return c.compileExtendImpl(amd64.MOVBLSX, runtimeValueTypeI32) 2889 } 2890 2891 // compileSignExtend32From16 implements compiler.compileSignExtend32From16 for the amd64 architecture. 2892 func (c *amd64Compiler) compileSignExtend32From16() error { 2893 return c.compileExtendImpl(amd64.MOVWLSX, runtimeValueTypeI32) 2894 } 2895 2896 // compileSignExtend64From8 implements compiler.compileSignExtend64From8 for the amd64 architecture. 2897 func (c *amd64Compiler) compileSignExtend64From8() error { 2898 return c.compileExtendImpl(amd64.MOVBQSX, runtimeValueTypeI64) 2899 } 2900 2901 // compileSignExtend64From16 implements compiler.compileSignExtend64From16 for the amd64 architecture. 2902 func (c *amd64Compiler) compileSignExtend64From16() error { 2903 return c.compileExtendImpl(amd64.MOVWQSX, runtimeValueTypeI64) 2904 } 2905 2906 // compileSignExtend64From32 implements compiler.compileSignExtend64From32 for the amd64 architecture. 2907 func (c *amd64Compiler) compileSignExtend64From32() error { 2908 return c.compileExtendImpl(amd64.MOVLQSX, runtimeValueTypeI64) 2909 } 2910 2911 func (c *amd64Compiler) compileExtendImpl(inst asm.Instruction, destinationType runtimeValueType) error { 2912 target := c.locationStack.peek() // Note this is peek! 2913 if err := c.compileEnsureOnRegister(target); err != nil { 2914 return err 2915 } 2916 2917 c.assembler.CompileRegisterToRegister(inst, target.register, target.register) 2918 target.valueType = destinationType 2919 return nil 2920 } 2921 2922 // compileEq implements compiler.compileEq for the amd64 architecture. 2923 func (c *amd64Compiler) compileEq(o *wazeroir.UnionOperation) error { 2924 return c.compileEqOrNe(wazeroir.UnsignedType(o.B1), true) 2925 } 2926 2927 // compileNe implements compiler.compileNe for the amd64 architecture. 2928 func (c *amd64Compiler) compileNe(o *wazeroir.UnionOperation) error { 2929 return c.compileEqOrNe(wazeroir.UnsignedType(o.B1), false) 2930 } 2931 2932 func (c *amd64Compiler) compileEqOrNe(t wazeroir.UnsignedType, shouldEqual bool) (err error) { 2933 x2 := c.locationStack.pop() 2934 if err := c.compileEnsureOnRegister(x2); err != nil { 2935 return err 2936 } 2937 2938 x1 := c.locationStack.pop() 2939 if err := c.compileEnsureOnRegister(x1); err != nil { 2940 return err 2941 } 2942 2943 x1r, x2r := x1.register, x2.register 2944 2945 // x1 and x2 are temporary registers only used for the cmp operation. Release them. 2946 c.locationStack.releaseRegister(x1) 2947 c.locationStack.releaseRegister(x2) 2948 2949 switch t { 2950 case wazeroir.UnsignedTypeI32: 2951 err = c.compileEqOrNeForInts(x1r, x2r, amd64.CMPL, shouldEqual) 2952 case wazeroir.UnsignedTypeI64: 2953 err = c.compileEqOrNeForInts(x1r, x2r, amd64.CMPQ, shouldEqual) 2954 case wazeroir.UnsignedTypeF32: 2955 err = c.compileEqOrNeForFloats(x1r, x2r, amd64.UCOMISS, shouldEqual) 2956 case wazeroir.UnsignedTypeF64: 2957 err = c.compileEqOrNeForFloats(x1r, x2r, amd64.UCOMISD, shouldEqual) 2958 } 2959 if err != nil { 2960 return 2961 } 2962 return 2963 } 2964 2965 func (c *amd64Compiler) compileEqOrNeForInts(x1Reg, x2Reg asm.Register, cmpInstruction asm.Instruction, 2966 shouldEqual bool, 2967 ) error { 2968 c.assembler.CompileRegisterToRegister(cmpInstruction, x2Reg, x1Reg) 2969 2970 // Record that the result is on the conditional register. 2971 var condReg asm.ConditionalRegisterState 2972 if shouldEqual { 2973 condReg = amd64.ConditionalRegisterStateE 2974 } else { 2975 condReg = amd64.ConditionalRegisterStateNE 2976 } 2977 loc := c.locationStack.pushRuntimeValueLocationOnConditionalRegister(condReg) 2978 loc.valueType = runtimeValueTypeI32 2979 return nil 2980 } 2981 2982 // For float EQ and NE, we have to take NaN values into account. 2983 // Notably, Wasm specification states that if one of targets is NaN, 2984 // the result must be zero for EQ or one for NE. 2985 func (c *amd64Compiler) compileEqOrNeForFloats(x1Reg, x2Reg asm.Register, cmpInstruction asm.Instruction, shouldEqual bool) error { 2986 // Before we allocate the result, we have to reserve two int registers. 2987 nanFragReg, err := c.allocateRegister(registerTypeGeneralPurpose) 2988 if err != nil { 2989 return err 2990 } 2991 c.locationStack.markRegisterUsed(nanFragReg) 2992 cmpResultReg, err := c.allocateRegister(registerTypeGeneralPurpose) 2993 if err != nil { 2994 return err 2995 } 2996 2997 // Then, execute the comparison. 2998 c.assembler.CompileRegisterToRegister(cmpInstruction, x2Reg, x1Reg) 2999 3000 // First, we get the parity flag which indicates whether one of values was NaN. 3001 if shouldEqual { 3002 // Set 1 if two values are NOT NaN. 3003 c.assembler.CompileNoneToRegister(amd64.SETPC, nanFragReg) 3004 } else { 3005 // Set 1 if one of values is NaN. 3006 c.assembler.CompileNoneToRegister(amd64.SETPS, nanFragReg) 3007 } 3008 3009 // next, we get the usual comparison flag. 3010 if shouldEqual { 3011 // Set 1 if equal. 3012 c.assembler.CompileNoneToRegister(amd64.SETEQ, cmpResultReg) 3013 } else { 3014 // Set 1 if not equal. 3015 c.assembler.CompileNoneToRegister(amd64.SETNE, cmpResultReg) 3016 } 3017 3018 // Do "and" or "or" operations on these two flags to get the actual result. 3019 if shouldEqual { 3020 c.assembler.CompileRegisterToRegister(amd64.ANDL, nanFragReg, cmpResultReg) 3021 } else { 3022 c.assembler.CompileRegisterToRegister(amd64.ORL, nanFragReg, cmpResultReg) 3023 } 3024 3025 // Clear the unnecessary bits by zero extending the first byte. 3026 // This is necessary the upper bits (5 to 32 bits) of SET* instruction result is undefined. 3027 c.assembler.CompileRegisterToRegister(amd64.MOVBLZX, cmpResultReg, cmpResultReg) 3028 3029 // Now we have the result in cmpResultReg register, so we record it. 3030 c.pushRuntimeValueLocationOnRegister(cmpResultReg, runtimeValueTypeI32) 3031 // Also, we no longer need nanFragRegister. 3032 c.locationStack.markRegisterUnused(nanFragReg) 3033 return nil 3034 } 3035 3036 // compileEqz implements compiler.compileEqz for the amd64 architecture. 3037 func (c *amd64Compiler) compileEqz(o *wazeroir.UnionOperation) (err error) { 3038 v := c.locationStack.pop() 3039 if err = c.compileEnsureOnRegister(v); err != nil { 3040 return err 3041 } 3042 3043 unsignedInt := wazeroir.UnsignedInt(o.B1) 3044 switch unsignedInt { 3045 case wazeroir.UnsignedInt32: 3046 err = c.assembler.CompileStaticConstToRegister(amd64.CMPL, c.fourZeros, v.register) 3047 case wazeroir.UnsignedInt64: 3048 err = c.assembler.CompileStaticConstToRegister(amd64.CMPQ, c.eightZeros, v.register) 3049 } 3050 if err != nil { 3051 return err 3052 } 3053 3054 // v is consumed by the cmp operation so release it. 3055 c.locationStack.releaseRegister(v) 3056 3057 // Finally, record that the result is on the conditional register. 3058 loc := c.locationStack.pushRuntimeValueLocationOnConditionalRegister(amd64.ConditionalRegisterStateE) 3059 loc.valueType = runtimeValueTypeI32 3060 return nil 3061 } 3062 3063 // compileLt implements compiler.compileLt for the amd64 architecture. 3064 func (c *amd64Compiler) compileLt(o *wazeroir.UnionOperation) error { 3065 x2 := c.locationStack.pop() 3066 if err := c.compileEnsureOnRegister(x2); err != nil { 3067 return err 3068 } 3069 3070 x1 := c.locationStack.pop() 3071 if err := c.compileEnsureOnRegister(x1); err != nil { 3072 return err 3073 } 3074 3075 // Emit the compare instruction. 3076 var resultConditionState asm.ConditionalRegisterState 3077 var inst asm.Instruction 3078 signedType := wazeroir.SignedType(o.B1) 3079 switch signedType { 3080 case wazeroir.SignedTypeInt32: 3081 resultConditionState = amd64.ConditionalRegisterStateL 3082 inst = amd64.CMPL 3083 case wazeroir.SignedTypeUint32: 3084 resultConditionState = amd64.ConditionalRegisterStateB 3085 inst = amd64.CMPL 3086 case wazeroir.SignedTypeInt64: 3087 inst = amd64.CMPQ 3088 resultConditionState = amd64.ConditionalRegisterStateL 3089 case wazeroir.SignedTypeUint64: 3090 resultConditionState = amd64.ConditionalRegisterStateB 3091 inst = amd64.CMPQ 3092 case wazeroir.SignedTypeFloat32: 3093 resultConditionState = amd64.ConditionalRegisterStateA 3094 inst = amd64.COMISS 3095 case wazeroir.SignedTypeFloat64: 3096 resultConditionState = amd64.ConditionalRegisterStateA 3097 inst = amd64.COMISD 3098 } 3099 c.assembler.CompileRegisterToRegister(inst, x1.register, x2.register) 3100 3101 // x1 and x2 are temporary registers only used for the cmp operation. Release them. 3102 c.locationStack.releaseRegister(x1) 3103 c.locationStack.releaseRegister(x2) 3104 3105 // Finally, record that the result is on the conditional register. 3106 loc := c.locationStack.pushRuntimeValueLocationOnConditionalRegister(resultConditionState) 3107 loc.valueType = runtimeValueTypeI32 3108 return nil 3109 } 3110 3111 // compileGt implements compiler.compileGt for the amd64 architecture. 3112 func (c *amd64Compiler) compileGt(o *wazeroir.UnionOperation) error { 3113 x2 := c.locationStack.pop() 3114 if err := c.compileEnsureOnRegister(x2); err != nil { 3115 return err 3116 } 3117 3118 x1 := c.locationStack.pop() 3119 if err := c.compileEnsureOnRegister(x1); err != nil { 3120 return err 3121 } 3122 3123 // Emit the compare instruction. 3124 var resultConditionState asm.ConditionalRegisterState 3125 signedType := wazeroir.SignedType(o.B1) 3126 switch signedType { 3127 case wazeroir.SignedTypeInt32: 3128 resultConditionState = amd64.ConditionalRegisterStateG 3129 c.assembler.CompileRegisterToRegister(amd64.CMPL, x1.register, x2.register) 3130 case wazeroir.SignedTypeUint32: 3131 c.assembler.CompileRegisterToRegister(amd64.CMPL, x1.register, x2.register) 3132 resultConditionState = amd64.ConditionalRegisterStateA 3133 case wazeroir.SignedTypeInt64: 3134 c.assembler.CompileRegisterToRegister(amd64.CMPQ, x1.register, x2.register) 3135 resultConditionState = amd64.ConditionalRegisterStateG 3136 case wazeroir.SignedTypeUint64: 3137 c.assembler.CompileRegisterToRegister(amd64.CMPQ, x1.register, x2.register) 3138 resultConditionState = amd64.ConditionalRegisterStateA 3139 case wazeroir.SignedTypeFloat32: 3140 c.assembler.CompileRegisterToRegister(amd64.UCOMISS, x2.register, x1.register) 3141 resultConditionState = amd64.ConditionalRegisterStateA 3142 case wazeroir.SignedTypeFloat64: 3143 c.assembler.CompileRegisterToRegister(amd64.UCOMISD, x2.register, x1.register) 3144 resultConditionState = amd64.ConditionalRegisterStateA 3145 } 3146 3147 // x1 and x2 are temporary registers only used for the cmp operation. Release them. 3148 c.locationStack.releaseRegister(x1) 3149 c.locationStack.releaseRegister(x2) 3150 3151 // Finally, record that the result is on the conditional register. 3152 loc := c.locationStack.pushRuntimeValueLocationOnConditionalRegister(resultConditionState) 3153 loc.valueType = runtimeValueTypeI32 3154 return nil 3155 } 3156 3157 // compileLe implements compiler.compileLe for the amd64 architecture. 3158 func (c *amd64Compiler) compileLe(o *wazeroir.UnionOperation) error { 3159 x2 := c.locationStack.pop() 3160 if err := c.compileEnsureOnRegister(x2); err != nil { 3161 return err 3162 } 3163 3164 x1 := c.locationStack.pop() 3165 if err := c.compileEnsureOnRegister(x1); err != nil { 3166 return err 3167 } 3168 3169 // Emit the compare instruction. 3170 var inst asm.Instruction 3171 var resultConditionState asm.ConditionalRegisterState 3172 signedType := wazeroir.SignedType(o.B1) 3173 switch signedType { 3174 case wazeroir.SignedTypeInt32: 3175 resultConditionState = amd64.ConditionalRegisterStateLE 3176 inst = amd64.CMPL 3177 case wazeroir.SignedTypeUint32: 3178 resultConditionState = amd64.ConditionalRegisterStateBE 3179 inst = amd64.CMPL 3180 case wazeroir.SignedTypeInt64: 3181 resultConditionState = amd64.ConditionalRegisterStateLE 3182 inst = amd64.CMPQ 3183 case wazeroir.SignedTypeUint64: 3184 resultConditionState = amd64.ConditionalRegisterStateBE 3185 inst = amd64.CMPQ 3186 case wazeroir.SignedTypeFloat32: 3187 resultConditionState = amd64.ConditionalRegisterStateAE 3188 inst = amd64.UCOMISS 3189 case wazeroir.SignedTypeFloat64: 3190 resultConditionState = amd64.ConditionalRegisterStateAE 3191 inst = amd64.UCOMISD 3192 } 3193 c.assembler.CompileRegisterToRegister(inst, x1.register, x2.register) 3194 3195 // x1 and x2 are temporary registers only used for the cmp operation. Release them. 3196 c.locationStack.releaseRegister(x1) 3197 c.locationStack.releaseRegister(x2) 3198 3199 // Finally, record that the result is on the conditional register. 3200 loc := c.locationStack.pushRuntimeValueLocationOnConditionalRegister(resultConditionState) 3201 loc.valueType = runtimeValueTypeI32 3202 return nil 3203 } 3204 3205 // compileGe implements compiler.compileGe for the amd64 architecture. 3206 func (c *amd64Compiler) compileGe(o *wazeroir.UnionOperation) error { 3207 x2 := c.locationStack.pop() 3208 if err := c.compileEnsureOnRegister(x2); err != nil { 3209 return err 3210 } 3211 3212 x1 := c.locationStack.pop() 3213 if err := c.compileEnsureOnRegister(x1); err != nil { 3214 return err 3215 } 3216 3217 // Emit the compare instruction. 3218 var resultConditionState asm.ConditionalRegisterState 3219 signedType := wazeroir.SignedType(o.B1) 3220 switch signedType { 3221 case wazeroir.SignedTypeInt32: 3222 c.assembler.CompileRegisterToRegister(amd64.CMPL, x1.register, x2.register) 3223 resultConditionState = amd64.ConditionalRegisterStateGE 3224 case wazeroir.SignedTypeUint32: 3225 c.assembler.CompileRegisterToRegister(amd64.CMPL, x1.register, x2.register) 3226 resultConditionState = amd64.ConditionalRegisterStateAE 3227 case wazeroir.SignedTypeInt64: 3228 c.assembler.CompileRegisterToRegister(amd64.CMPQ, x1.register, x2.register) 3229 resultConditionState = amd64.ConditionalRegisterStateGE 3230 case wazeroir.SignedTypeUint64: 3231 c.assembler.CompileRegisterToRegister(amd64.CMPQ, x1.register, x2.register) 3232 resultConditionState = amd64.ConditionalRegisterStateAE 3233 case wazeroir.SignedTypeFloat32: 3234 c.assembler.CompileRegisterToRegister(amd64.COMISS, x2.register, x1.register) 3235 resultConditionState = amd64.ConditionalRegisterStateAE 3236 case wazeroir.SignedTypeFloat64: 3237 c.assembler.CompileRegisterToRegister(amd64.COMISD, x2.register, x1.register) 3238 resultConditionState = amd64.ConditionalRegisterStateAE 3239 } 3240 3241 // x1 and x2 are temporary registers only used for the cmp operation. Release them. 3242 c.locationStack.releaseRegister(x1) 3243 c.locationStack.releaseRegister(x2) 3244 3245 // Finally, record that the result is on the conditional register. 3246 loc := c.locationStack.pushRuntimeValueLocationOnConditionalRegister(resultConditionState) 3247 loc.valueType = runtimeValueTypeI32 3248 return nil 3249 } 3250 3251 // compileLoad implements compiler.compileLoad for the amd64 architecture. 3252 func (c *amd64Compiler) compileLoad(o *wazeroir.UnionOperation) error { 3253 var ( 3254 isIntType bool 3255 movInst asm.Instruction 3256 targetSizeInBytes int64 3257 vt runtimeValueType 3258 ) 3259 3260 unsignedType := wazeroir.UnsignedType(o.B1) 3261 offset := uint32(o.U2) 3262 3263 switch unsignedType { 3264 case wazeroir.UnsignedTypeI32: 3265 isIntType = true 3266 movInst = amd64.MOVL 3267 targetSizeInBytes = 32 / 8 3268 vt = runtimeValueTypeI32 3269 case wazeroir.UnsignedTypeI64: 3270 isIntType = true 3271 movInst = amd64.MOVQ 3272 targetSizeInBytes = 64 / 8 3273 vt = runtimeValueTypeI64 3274 case wazeroir.UnsignedTypeF32: 3275 isIntType = false 3276 movInst = amd64.MOVL 3277 targetSizeInBytes = 32 / 8 3278 vt = runtimeValueTypeF32 3279 case wazeroir.UnsignedTypeF64: 3280 isIntType = false 3281 movInst = amd64.MOVQ 3282 targetSizeInBytes = 64 / 8 3283 vt = runtimeValueTypeF64 3284 } 3285 3286 reg, err := c.compileMemoryAccessCeilSetup(offset, targetSizeInBytes) 3287 if err != nil { 3288 return err 3289 } 3290 3291 if isIntType { 3292 // For integer types, read the corresponding bytes from the offset to the memory 3293 // and store the value to the int register. 3294 c.assembler.CompileMemoryWithIndexToRegister(movInst, 3295 // we access memory as memory.Buffer[ceil-targetSizeInBytes: ceil]. 3296 amd64ReservedRegisterForMemory, -targetSizeInBytes, reg, 1, 3297 reg) 3298 c.pushRuntimeValueLocationOnRegister(reg, vt) 3299 } else { 3300 // For float types, we read the value to the float register. 3301 floatReg, err := c.allocateRegister(registerTypeVector) 3302 if err != nil { 3303 return err 3304 } 3305 c.assembler.CompileMemoryWithIndexToRegister(movInst, 3306 // we access memory as memory.Buffer[ceil-targetSizeInBytes: ceil]. 3307 amd64ReservedRegisterForMemory, -targetSizeInBytes, reg, 1, 3308 floatReg) 3309 c.pushRuntimeValueLocationOnRegister(floatReg, vt) 3310 // We no longer need the int register so mark it unused. 3311 c.locationStack.markRegisterUnused(reg) 3312 } 3313 return nil 3314 } 3315 3316 // compileLoad8 implements compiler.compileLoad8 for the amd64 architecture. 3317 func (c *amd64Compiler) compileLoad8(o *wazeroir.UnionOperation) error { 3318 const targetSizeInBytes = 1 3319 offset := uint32(o.U2) 3320 reg, err := c.compileMemoryAccessCeilSetup(offset, targetSizeInBytes) 3321 if err != nil { 3322 return err 3323 } 3324 3325 // Then move a byte at the offset to the register. 3326 // Note that Load8 is only for integer types. 3327 var inst asm.Instruction 3328 var vt runtimeValueType 3329 signedInt := wazeroir.SignedInt(o.B1) 3330 switch signedInt { 3331 case wazeroir.SignedInt32: 3332 inst = amd64.MOVBLSX 3333 vt = runtimeValueTypeI32 3334 case wazeroir.SignedUint32: 3335 inst = amd64.MOVBLZX 3336 vt = runtimeValueTypeI32 3337 case wazeroir.SignedInt64: 3338 inst = amd64.MOVBQSX 3339 vt = runtimeValueTypeI64 3340 case wazeroir.SignedUint64: 3341 inst = amd64.MOVBQZX 3342 vt = runtimeValueTypeI64 3343 } 3344 3345 c.assembler.CompileMemoryWithIndexToRegister(inst, 3346 // we access memory as memory.Buffer[ceil-targetSizeInBytes: ceil]. 3347 amd64ReservedRegisterForMemory, -targetSizeInBytes, reg, 1, 3348 reg) 3349 3350 c.pushRuntimeValueLocationOnRegister(reg, vt) 3351 return nil 3352 } 3353 3354 // compileLoad16 implements compiler.compileLoad16 for the amd64 architecture. 3355 func (c *amd64Compiler) compileLoad16(o *wazeroir.UnionOperation) error { 3356 const targetSizeInBytes = 16 / 8 3357 offset := uint32(o.U2) 3358 reg, err := c.compileMemoryAccessCeilSetup(offset, targetSizeInBytes) 3359 if err != nil { 3360 return err 3361 } 3362 3363 // Then move 2 bytes at the offset to the register. 3364 // Note that Load16 is only for integer types. 3365 var inst asm.Instruction 3366 var vt runtimeValueType 3367 signedInt := wazeroir.SignedInt(o.B1) 3368 switch signedInt { 3369 case wazeroir.SignedInt32: 3370 inst = amd64.MOVWLSX 3371 vt = runtimeValueTypeI32 3372 case wazeroir.SignedInt64: 3373 inst = amd64.MOVWQSX 3374 vt = runtimeValueTypeI64 3375 case wazeroir.SignedUint32: 3376 inst = amd64.MOVWLZX 3377 vt = runtimeValueTypeI32 3378 case wazeroir.SignedUint64: 3379 inst = amd64.MOVWQZX 3380 vt = runtimeValueTypeI64 3381 } 3382 3383 c.assembler.CompileMemoryWithIndexToRegister(inst, 3384 // we access memory as memory.Buffer[ceil-targetSizeInBytes: ceil]. 3385 amd64ReservedRegisterForMemory, -targetSizeInBytes, reg, 1, 3386 reg) 3387 3388 c.pushRuntimeValueLocationOnRegister(reg, vt) 3389 return nil 3390 } 3391 3392 // compileLoad32 implements compiler.compileLoad32 for the amd64 architecture. 3393 func (c *amd64Compiler) compileLoad32(o *wazeroir.UnionOperation) error { 3394 const targetSizeInBytes = 32 / 8 3395 offset := uint32(o.U2) 3396 reg, err := c.compileMemoryAccessCeilSetup(offset, targetSizeInBytes) 3397 if err != nil { 3398 return err 3399 } 3400 3401 // Then move 4 bytes at the offset to the register. 3402 var inst asm.Instruction 3403 signed := o.B1 == 1 3404 if signed { 3405 inst = amd64.MOVLQSX 3406 } else { 3407 inst = amd64.MOVLQZX 3408 } 3409 c.assembler.CompileMemoryWithIndexToRegister(inst, 3410 // We access memory as memory.Buffer[ceil-targetSizeInBytes: ceil]. 3411 amd64ReservedRegisterForMemory, -targetSizeInBytes, reg, 1, 3412 reg) 3413 c.pushRuntimeValueLocationOnRegister(reg, runtimeValueTypeI64) 3414 return nil 3415 } 3416 3417 // compileMemoryAccessCeilSetup pops the top value from the stack (called "base"), stores "base + offsetArg + targetSizeInBytes" 3418 // into a register, and returns the stored register. We call the result "ceil" because we access the memory 3419 // as memory.Buffer[ceil-targetSizeInBytes: ceil]. 3420 // 3421 // Note: this also emits the instructions to check the out-of-bounds memory access. 3422 // In other words, if the ceil exceeds the memory size, the code exits with nativeCallStatusCodeMemoryOutOfBounds status. 3423 func (c *amd64Compiler) compileMemoryAccessCeilSetup(offsetArg uint32, targetSizeInBytes int64) (asm.Register, error) { 3424 base := c.locationStack.pop() 3425 if err := c.compileEnsureOnRegister(base); err != nil { 3426 return asm.NilRegister, err 3427 } 3428 3429 result := base.register 3430 if offsetConst := int64(offsetArg) + targetSizeInBytes; offsetConst <= math.MaxInt32 { 3431 c.assembler.CompileConstToRegister(amd64.ADDQ, offsetConst, result) 3432 } else if offsetConst <= math.MaxUint32 { 3433 // Note: in practice, this branch rarely happens as in this case, the wasm binary know that 3434 // memory has more than 1 GBi or at least tries to access above 1 GBi memory region. 3435 // 3436 // This case, we cannot directly add the offset to a register by ADDQ(const) instruction. 3437 // That is because the imm32 const is sign-extended to 64-bit in ADDQ(const), and we end up 3438 // making offsetConst as the negative number, which is wrong. 3439 tmp, err := c.allocateRegister(registerTypeGeneralPurpose) 3440 if err != nil { 3441 return asm.NilRegister, err 3442 } 3443 c.assembler.CompileConstToRegister(amd64.MOVL, int64(uint32(offsetConst)), tmp) 3444 c.assembler.CompileRegisterToRegister(amd64.ADDQ, tmp, result) 3445 } else { 3446 // If the offset const is too large, we exit with nativeCallStatusCodeMemoryOutOfBounds. 3447 c.compileExitFromNativeCode(nativeCallStatusCodeMemoryOutOfBounds) 3448 return result, nil 3449 } 3450 3451 // Now we compare the value with the memory length which is held by callEngine. 3452 c.assembler.CompileMemoryToRegister(amd64.CMPQ, 3453 amd64ReservedRegisterForCallEngine, callEngineModuleContextMemorySliceLenOffset, result) 3454 3455 // Trap if the value is out-of-bounds of memory length. 3456 c.compileMaybeExitFromNativeCode(amd64.JCC, nativeCallStatusCodeMemoryOutOfBounds) 3457 3458 c.locationStack.markRegisterUnused(result) 3459 return result, nil 3460 } 3461 3462 // compileStore implements compiler.compileStore for the amd64 architecture. 3463 func (c *amd64Compiler) compileStore(o *wazeroir.UnionOperation) error { 3464 var movInst asm.Instruction 3465 var targetSizeInByte int64 3466 unsignedType := wazeroir.UnsignedType(o.B1) 3467 offset := uint32(o.U2) 3468 switch unsignedType { 3469 case wazeroir.UnsignedTypeI32, wazeroir.UnsignedTypeF32: 3470 movInst = amd64.MOVL 3471 targetSizeInByte = 32 / 8 3472 case wazeroir.UnsignedTypeI64, wazeroir.UnsignedTypeF64: 3473 movInst = amd64.MOVQ 3474 targetSizeInByte = 64 / 8 3475 } 3476 return c.compileStoreImpl(offset, movInst, targetSizeInByte) 3477 } 3478 3479 // compileStore8 implements compiler.compileStore8 for the amd64 architecture. 3480 func (c *amd64Compiler) compileStore8(o *wazeroir.UnionOperation) error { 3481 return c.compileStoreImpl(uint32(o.U2), amd64.MOVB, 1) 3482 } 3483 3484 // compileStore32 implements compiler.compileStore32 for the amd64 architecture. 3485 func (c *amd64Compiler) compileStore16(o *wazeroir.UnionOperation) error { 3486 return c.compileStoreImpl(uint32(o.U2), amd64.MOVW, 16/8) 3487 } 3488 3489 // compileStore32 implements compiler.compileStore32 for the amd64 architecture. 3490 func (c *amd64Compiler) compileStore32(o *wazeroir.UnionOperation) error { 3491 return c.compileStoreImpl(uint32(o.U2), amd64.MOVL, 32/8) 3492 } 3493 3494 func (c *amd64Compiler) compileStoreImpl(offsetConst uint32, inst asm.Instruction, targetSizeInBytes int64) error { 3495 val := c.locationStack.pop() 3496 if err := c.compileEnsureOnRegister(val); err != nil { 3497 return err 3498 } 3499 3500 reg, err := c.compileMemoryAccessCeilSetup(offsetConst, targetSizeInBytes) 3501 if err != nil { 3502 return err 3503 } 3504 3505 c.assembler.CompileRegisterToMemoryWithIndex( 3506 inst, val.register, 3507 amd64ReservedRegisterForMemory, -targetSizeInBytes, reg, 1, 3508 ) 3509 3510 // We no longer need both the value and base registers. 3511 c.locationStack.releaseRegister(val) 3512 c.locationStack.markRegisterUnused(reg) 3513 return nil 3514 } 3515 3516 // compileMemoryGrow implements compiler.compileMemoryGrow for the amd64 architecture. 3517 func (c *amd64Compiler) compileMemoryGrow() error { 3518 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 3519 return err 3520 } 3521 3522 if err := c.compileCallBuiltinFunction(builtinFunctionIndexMemoryGrow); err != nil { 3523 return err 3524 } 3525 3526 // After the function call, we have to initialize the stack base pointer and memory reserved registers. 3527 c.compileReservedStackBasePointerInitialization() 3528 c.compileReservedMemoryPointerInitialization() 3529 return nil 3530 } 3531 3532 // compileMemorySize implements compiler.compileMemorySize for the amd64 architecture. 3533 func (c *amd64Compiler) compileMemorySize() error { 3534 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 3535 return err 3536 } 3537 3538 reg, err := c.allocateRegister(registerTypeGeneralPurpose) 3539 if err != nil { 3540 return err 3541 } 3542 loc := c.pushRuntimeValueLocationOnRegister(reg, runtimeValueTypeI32) 3543 3544 c.assembler.CompileMemoryToRegister(amd64.MOVQ, amd64ReservedRegisterForCallEngine, callEngineModuleContextMemorySliceLenOffset, loc.register) 3545 3546 // WebAssembly's memory.size returns the page size (65536) of memory region. 3547 // That is equivalent to divide the len of memory slice by 65536 and 3548 // that can be calculated as SHR by 16 bits as 65536 = 2^16. 3549 c.assembler.CompileConstToRegister(amd64.SHRQ, wasm.MemoryPageSizeInBits, loc.register) 3550 return nil 3551 } 3552 3553 // compileMemoryInit implements compiler.compileMemoryInit for the amd64 architecture. 3554 func (c *amd64Compiler) compileMemoryInit(o *wazeroir.UnionOperation) error { 3555 dataIndex := uint32(o.U1) 3556 return c.compileInitImpl(false, dataIndex, 0) 3557 } 3558 3559 // compileInitImpl implements compileTableInit and compileMemoryInit. 3560 // 3561 // TODO: the compiled code in this function should be reused and compile at once as 3562 // the code is independent of any module. 3563 func (c *amd64Compiler) compileInitImpl(isTable bool, index, tableIndex uint32) error { 3564 outOfBoundsErrorStatus := nativeCallStatusCodeMemoryOutOfBounds 3565 if isTable { 3566 outOfBoundsErrorStatus = nativeCallStatusCodeInvalidTableAccess 3567 } 3568 3569 copySize := c.locationStack.pop() 3570 if err := c.compileEnsureOnRegister(copySize); err != nil { 3571 return err 3572 } 3573 3574 sourceOffset := c.locationStack.pop() 3575 if err := c.compileEnsureOnRegister(sourceOffset); err != nil { 3576 return err 3577 } 3578 3579 destinationOffset := c.locationStack.pop() 3580 if err := c.compileEnsureOnRegister(destinationOffset); err != nil { 3581 return err 3582 } 3583 3584 instanceAddr, err := c.allocateRegister(registerTypeGeneralPurpose) 3585 if err != nil { 3586 return err 3587 } 3588 c.locationStack.markRegisterUsed(instanceAddr) 3589 if isTable { 3590 c.compileLoadElemInstanceAddress(index, instanceAddr) 3591 } else { 3592 c.compileLoadDataInstanceAddress(index, instanceAddr) 3593 } 3594 3595 tmp, err := c.allocateRegister(registerTypeGeneralPurpose) 3596 if err != nil { 3597 return err 3598 } 3599 c.locationStack.markRegisterUsed(tmp) 3600 3601 // sourceOffset += size. 3602 c.assembler.CompileRegisterToRegister(amd64.ADDQ, copySize.register, sourceOffset.register) 3603 // destinationOffset += size. 3604 c.assembler.CompileRegisterToRegister(amd64.ADDQ, copySize.register, destinationOffset.register) 3605 3606 // Check instance bounds and if exceeds the length, exit with out of bounds error. 3607 c.assembler.CompileMemoryToRegister(amd64.CMPQ, 3608 instanceAddr, 8, // DataInstance and Element instance holds the length is stored at offset 8. 3609 sourceOffset.register) 3610 c.compileMaybeExitFromNativeCode(amd64.JCC, outOfBoundsErrorStatus) 3611 3612 // Check destination bounds and if exceeds the length, exit with out of bounds error. 3613 if isTable { 3614 // Load the target table's address. 3615 c.assembler.CompileMemoryToRegister(amd64.MOVQ, amd64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset, tmp) 3616 c.assembler.CompileMemoryToRegister(amd64.MOVQ, tmp, int64(tableIndex*8), tmp) 3617 // Compare length. 3618 c.assembler.CompileMemoryToRegister(amd64.CMPQ, tmp, tableInstanceTableLenOffset, destinationOffset.register) 3619 } else { 3620 c.assembler.CompileMemoryToRegister(amd64.CMPQ, 3621 amd64ReservedRegisterForCallEngine, callEngineModuleContextMemorySliceLenOffset, 3622 destinationOffset.register) 3623 } 3624 3625 c.compileMaybeExitFromNativeCode(amd64.JCC, outOfBoundsErrorStatus) 3626 3627 // Otherwise, ready to copy the value from source to destination. 3628 // 3629 // If the copy size equal zero, we skip the entire instructions below. 3630 c.assembler.CompileRegisterToRegister(amd64.TESTQ, copySize.register, copySize.register) 3631 skipJump := c.assembler.CompileJump(amd64.JEQ) 3632 3633 var scale int16 3634 var memToReg, regToMem asm.Instruction 3635 if isTable { 3636 // Each element is of type uintptr; 2^3 = 1 << pointerSizeLog2. 3637 c.assembler.CompileConstToRegister(amd64.SHLQ, pointerSizeLog2, sourceOffset.register) 3638 c.assembler.CompileConstToRegister(amd64.SHLQ, pointerSizeLog2, destinationOffset.register) 3639 // destinationOffset += table buffer's absolute address. 3640 c.assembler.CompileMemoryToRegister(amd64.ADDQ, 3641 tmp, tableInstanceTableOffset, destinationOffset.register) 3642 // sourceOffset += data buffer's absolute address. 3643 c.assembler.CompileMemoryToRegister(amd64.ADDQ, 3644 instanceAddr, 0, sourceOffset.register) 3645 3646 // For tables, we move 8 bytes at once. 3647 memToReg = amd64.MOVQ 3648 regToMem = memToReg 3649 scale = 8 3650 } else { 3651 // destinationOffset += memory buffer's absolute address. 3652 c.assembler.CompileRegisterToRegister(amd64.ADDQ, amd64ReservedRegisterForMemory, destinationOffset.register) 3653 3654 // sourceOffset += data buffer's absolute address. 3655 c.assembler.CompileMemoryToRegister(amd64.ADDQ, instanceAddr, 0, sourceOffset.register) 3656 3657 // Move one byte at once. 3658 memToReg = amd64.MOVBQZX 3659 regToMem = amd64.MOVB 3660 scale = 1 3661 } 3662 3663 // Negate the counter. 3664 c.assembler.CompileNoneToRegister(amd64.NEGQ, copySize.register) 3665 3666 beginCopyLoop := c.assembler.CompileStandAlone(amd64.NOP) 3667 3668 c.assembler.CompileMemoryWithIndexToRegister(memToReg, 3669 sourceOffset.register, 0, copySize.register, scale, 3670 tmp) 3671 // [destinationOffset + (size.register)] = tmp. 3672 c.assembler.CompileRegisterToMemoryWithIndex(regToMem, 3673 tmp, 3674 destinationOffset.register, 0, copySize.register, scale, 3675 ) 3676 3677 // size += 1 3678 c.assembler.CompileNoneToRegister(amd64.INCQ, copySize.register) 3679 c.assembler.CompileJump(amd64.JMI).AssignJumpTarget(beginCopyLoop) 3680 3681 c.locationStack.markRegisterUnused(copySize.register, sourceOffset.register, 3682 destinationOffset.register, instanceAddr, tmp) 3683 c.assembler.SetJumpTargetOnNext(skipJump) 3684 return nil 3685 } 3686 3687 // compileDataDrop implements compiler.compileDataDrop for the amd64 architecture. 3688 func (c *amd64Compiler) compileDataDrop(o *wazeroir.UnionOperation) error { 3689 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 3690 return err 3691 } 3692 3693 tmp, err := c.allocateRegister(registerTypeGeneralPurpose) 3694 if err != nil { 3695 return err 3696 } 3697 3698 dataIndex := uint32(o.U1) 3699 c.compileLoadDataInstanceAddress(dataIndex, tmp) 3700 3701 // Clears the content of DataInstance[o.DataIndex] (== []byte type). 3702 c.assembler.CompileConstToMemory(amd64.MOVQ, 0, tmp, 0) 3703 c.assembler.CompileConstToMemory(amd64.MOVQ, 0, tmp, 8) 3704 c.assembler.CompileConstToMemory(amd64.MOVQ, 0, tmp, 16) 3705 return nil 3706 } 3707 3708 func (c *amd64Compiler) compileLoadDataInstanceAddress(dataIndex uint32, dst asm.Register) { 3709 // dst = dataIndex * dataInstanceStructSize. 3710 c.assembler.CompileConstToRegister(amd64.MOVQ, int64(dataIndex)*dataInstanceStructSize, dst) 3711 3712 // dst = &moduleInstance.DataInstances[0] + dst 3713 // = &moduleInstance.DataInstances[0] + dataIndex*dataInstanceStructSize 3714 // = &moduleInstance.DataInstances[dataIndex] 3715 c.assembler.CompileMemoryToRegister(amd64.ADDQ, 3716 amd64ReservedRegisterForCallEngine, callEngineModuleContextDataInstancesElement0AddressOffset, 3717 dst, 3718 ) 3719 } 3720 3721 // compileCopyLoopImpl implements a REP MOVSQ memory copy for the given range with support for both directions. 3722 func (c *amd64Compiler) compileCopyLoopImpl(destinationOffset, sourceOffset, copySize *runtimeValueLocation, backwards bool, bwOffset uint8) { 3723 // skip if nothing to copy 3724 c.assembler.CompileRegisterToRegister(amd64.TESTQ, copySize.register, copySize.register) 3725 emptyEightGroupsJump := c.assembler.CompileJump(amd64.JEQ) 3726 3727 // Prepare registers for swaps. There will never be more than 3 XCHGs in total. 3728 restoreCrossing := c.compilePreventCrossedTargetRegisters( 3729 []*runtimeValueLocation{destinationOffset, sourceOffset, copySize}, 3730 []asm.Register{amd64.RegDI, amd64.RegSI, amd64.RegCX}) 3731 3732 // Prepare registers for REP MOVSQ: copy from rsi to rdi, rcx times. 3733 c.compileMaybeSwapRegisters(destinationOffset.register, amd64.RegDI) 3734 c.compileMaybeSwapRegisters(sourceOffset.register, amd64.RegSI) 3735 c.compileMaybeSwapRegisters(copySize.register, amd64.RegCX) 3736 3737 // Point on first byte of first quadword to copy. 3738 if backwards { 3739 c.assembler.CompileConstToRegister(amd64.ADDQ, -int64(bwOffset), amd64.RegDI) 3740 c.assembler.CompileConstToRegister(amd64.ADDQ, -int64(bwOffset), amd64.RegSI) 3741 // Set REP prefix direction backwards. 3742 c.assembler.CompileStandAlone(amd64.STD) 3743 } 3744 3745 c.assembler.CompileStandAlone(amd64.REPMOVSQ) 3746 3747 if backwards { 3748 // Reset direction. 3749 c.assembler.CompileStandAlone(amd64.CLD) 3750 } 3751 3752 // Restore registers. 3753 c.compileMaybeSwapRegisters(destinationOffset.register, amd64.RegDI) 3754 c.compileMaybeSwapRegisters(sourceOffset.register, amd64.RegSI) 3755 c.compileMaybeSwapRegisters(copySize.register, amd64.RegCX) 3756 restoreCrossing() 3757 3758 c.assembler.SetJumpTargetOnNext(emptyEightGroupsJump) 3759 c.assembler.CompileStandAlone(amd64.NOP) 3760 } 3761 3762 // compileMemoryCopyLoopImpl is used for directly copying after bounds/direction check. 3763 func (c *amd64Compiler) compileMemoryCopyLoopImpl(destinationOffset, sourceOffset, copySize *runtimeValueLocation, tmp asm.Register, backwards bool) { 3764 // Point on first byte to be copied depending on direction. 3765 if backwards { 3766 c.assembler.CompileNoneToRegister(amd64.DECQ, sourceOffset.register) 3767 c.assembler.CompileNoneToRegister(amd64.DECQ, destinationOffset.register) 3768 } else { 3769 c.assembler.CompileRegisterToRegister(amd64.SUBQ, copySize.register, sourceOffset.register) 3770 c.assembler.CompileRegisterToRegister(amd64.SUBQ, copySize.register, destinationOffset.register) 3771 } 3772 3773 // destinationOffset += memory buffer's absolute address. 3774 c.assembler.CompileRegisterToRegister(amd64.ADDQ, amd64ReservedRegisterForMemory, destinationOffset.register) 3775 // sourceOffset += memory buffer's absolute address. 3776 c.assembler.CompileRegisterToRegister(amd64.ADDQ, amd64ReservedRegisterForMemory, sourceOffset.register) 3777 3778 // Copy copySize % 8 bytes in loop to allow copying in 8 byte groups afterward. 3779 beginLoop := c.assembler.CompileStandAlone(amd64.NOP) 3780 3781 // Check copySize % 8 == 0. 3782 c.assembler.CompileConstToRegister(amd64.TESTQ, 7, copySize.register) 3783 breakLoop := c.assembler.CompileJump(amd64.JEQ) 3784 3785 c.assembler.CompileMemoryToRegister(amd64.MOVBQZX, sourceOffset.register, 0, tmp) 3786 c.assembler.CompileRegisterToMemory(amd64.MOVB, tmp, destinationOffset.register, 0) 3787 3788 if backwards { 3789 c.assembler.CompileNoneToRegister(amd64.DECQ, sourceOffset.register) 3790 c.assembler.CompileNoneToRegister(amd64.DECQ, destinationOffset.register) 3791 } else { 3792 c.assembler.CompileNoneToRegister(amd64.INCQ, sourceOffset.register) 3793 c.assembler.CompileNoneToRegister(amd64.INCQ, destinationOffset.register) 3794 } 3795 3796 c.assembler.CompileNoneToRegister(amd64.DECQ, copySize.register) 3797 c.assembler.CompileJump(amd64.JMP).AssignJumpTarget(beginLoop) 3798 c.assembler.SetJumpTargetOnNext(breakLoop) 3799 3800 // compileCopyLoopImpl counts in groups of 8 bytes, so we have to divide the copySize by 8. 3801 c.assembler.CompileConstToRegister(amd64.SHRQ, 3, copySize.register) 3802 3803 c.compileCopyLoopImpl(destinationOffset, sourceOffset, copySize, backwards, 7) 3804 } 3805 3806 // compileMemoryCopy implements compiler.compileMemoryCopy for the amd64 architecture. 3807 // 3808 // This uses efficient `REP MOVSQ` instructions to copy in quadword (8 bytes) batches. The remaining bytes 3809 // are copied with a simple `MOV` loop. It uses backward copying for overlapped segments. 3810 func (c *amd64Compiler) compileMemoryCopy() error { 3811 copySize := c.locationStack.pop() 3812 if err := c.compileEnsureOnRegister(copySize); err != nil { 3813 return err 3814 } 3815 3816 sourceOffset := c.locationStack.pop() 3817 if err := c.compileEnsureOnRegister(sourceOffset); err != nil { 3818 return err 3819 } 3820 3821 destinationOffset := c.locationStack.pop() 3822 if err := c.compileEnsureOnRegister(destinationOffset); err != nil { 3823 return err 3824 } 3825 3826 tmp, err := c.allocateRegister(registerTypeGeneralPurpose) 3827 if err != nil { 3828 return err 3829 } 3830 c.locationStack.markRegisterUsed(tmp) 3831 3832 // sourceOffset += size. 3833 c.assembler.CompileRegisterToRegister(amd64.ADDQ, copySize.register, sourceOffset.register) 3834 // destinationOffset += size. 3835 c.assembler.CompileRegisterToRegister(amd64.ADDQ, copySize.register, destinationOffset.register) 3836 // tmp = max(sourceOffset, destinationOffset). 3837 c.assembler.CompileRegisterToRegister(amd64.CMPQ, sourceOffset.register, destinationOffset.register) 3838 c.assembler.CompileRegisterToRegister(amd64.MOVQ, sourceOffset.register, tmp) 3839 c.assembler.CompileRegisterToRegister(amd64.CMOVQCS, destinationOffset.register, tmp) 3840 3841 // Check maximum bounds and if exceeds the length, exit with out of bounds error. 3842 c.assembler.CompileMemoryToRegister(amd64.CMPQ, 3843 amd64ReservedRegisterForCallEngine, callEngineModuleContextMemorySliceLenOffset, tmp) 3844 c.compileMaybeExitFromNativeCode(amd64.JCC, nativeCallStatusCodeMemoryOutOfBounds) 3845 3846 // Skip zero size. 3847 c.assembler.CompileRegisterToRegister(amd64.TESTQ, copySize.register, copySize.register) 3848 skipJump := c.assembler.CompileJump(amd64.JEQ) 3849 3850 // If dest < source, we can copy forwards 3851 c.assembler.CompileRegisterToRegister(amd64.CMPQ, destinationOffset.register, sourceOffset.register) 3852 destLowerThanSourceJump := c.assembler.CompileJump(amd64.JLS) 3853 3854 // If source + size < dest, we can copy forwards 3855 c.assembler.CompileRegisterToRegister(amd64.MOVQ, destinationOffset.register, tmp) 3856 c.assembler.CompileRegisterToRegister(amd64.SUBQ, copySize.register, tmp) 3857 c.assembler.CompileRegisterToRegister(amd64.CMPQ, sourceOffset.register, tmp) 3858 sourceBoundLowerThanDestJump := c.assembler.CompileJump(amd64.JLS) 3859 3860 // Copy backwards. 3861 c.compileMemoryCopyLoopImpl(destinationOffset, sourceOffset, copySize, tmp, true) 3862 endJump := c.assembler.CompileJump(amd64.JMP) 3863 3864 // Copy forwards. 3865 c.assembler.SetJumpTargetOnNext(destLowerThanSourceJump) 3866 c.assembler.SetJumpTargetOnNext(sourceBoundLowerThanDestJump) 3867 c.compileMemoryCopyLoopImpl(destinationOffset, sourceOffset, copySize, tmp, false) 3868 3869 c.locationStack.markRegisterUnused(copySize.register, sourceOffset.register, 3870 destinationOffset.register, tmp) 3871 c.assembler.SetJumpTargetOnNext(skipJump) 3872 c.assembler.SetJumpTargetOnNext(endJump) 3873 3874 return nil 3875 } 3876 3877 // compileFillLoopImpl implements a REP STOSQ fill loop. 3878 func (c *amd64Compiler) compileFillLoopImpl(destinationOffset, value, fillSize *runtimeValueLocation, tmp asm.Register, replicateByte bool) { 3879 // Skip if nothing to fill. 3880 c.assembler.CompileRegisterToRegister(amd64.TESTQ, fillSize.register, fillSize.register) 3881 emptyEightGroupsJump := c.assembler.CompileJump(amd64.JEQ) 3882 3883 if replicateByte { 3884 // Truncate value.register to a single byte 3885 c.assembler.CompileConstToRegister(amd64.ANDQ, 0xff, value.register) 3886 // Replicate single byte onto full 8-byte register. 3887 c.assembler.CompileConstToRegister(amd64.MOVQ, 0x0101010101010101, tmp) 3888 c.assembler.CompileRegisterToRegister(amd64.IMULQ, tmp, value.register) 3889 } 3890 3891 // Prepare registers for swaps. There will never be more than 3 XCHGs in total. 3892 restoreCrossing := c.compilePreventCrossedTargetRegisters( 3893 []*runtimeValueLocation{destinationOffset, value, fillSize}, 3894 []asm.Register{amd64.RegDI, amd64.RegAX, amd64.RegCX}) 3895 3896 // Prepare registers for REP STOSQ: fill at [rdi] with rax, rcx times. 3897 c.compileMaybeSwapRegisters(destinationOffset.register, amd64.RegDI) 3898 c.compileMaybeSwapRegisters(value.register, amd64.RegAX) 3899 c.compileMaybeSwapRegisters(fillSize.register, amd64.RegCX) 3900 3901 c.assembler.CompileStandAlone(amd64.REPSTOSQ) 3902 3903 // Restore registers. 3904 c.compileMaybeSwapRegisters(destinationOffset.register, amd64.RegDI) 3905 c.compileMaybeSwapRegisters(value.register, amd64.RegAX) 3906 c.compileMaybeSwapRegisters(fillSize.register, amd64.RegCX) 3907 restoreCrossing() 3908 3909 c.assembler.SetJumpTargetOnNext(emptyEightGroupsJump) 3910 } 3911 3912 // compileMemoryFill implements compiler.compileMemoryFill for the amd64 architecture. 3913 // 3914 // This function uses efficient `REP STOSQ` instructions to copy in quadword (8 bytes) batches 3915 // if the size if above 15 bytes. For smaller sizes, a simple MOVB copy loop is the best 3916 // option. 3917 // 3918 // TODO: the compiled code in this function should be reused and compile at once as 3919 // the code is independent of any module. 3920 func (c *amd64Compiler) compileFillImpl(isTable bool, tableIndex uint32) error { 3921 copySize := c.locationStack.pop() 3922 if err := c.compileEnsureOnRegister(copySize); err != nil { 3923 return err 3924 } 3925 3926 value := c.locationStack.pop() 3927 if err := c.compileEnsureOnRegister(value); err != nil { 3928 return err 3929 } 3930 3931 destinationOffset := c.locationStack.pop() 3932 if err := c.compileEnsureOnRegister(destinationOffset); err != nil { 3933 return err 3934 } 3935 3936 tmp, err := c.allocateRegister(registerTypeGeneralPurpose) 3937 if err != nil { 3938 return err 3939 } 3940 c.locationStack.markRegisterUsed(tmp) 3941 3942 // destinationOffset += size. 3943 c.assembler.CompileRegisterToRegister(amd64.ADDQ, copySize.register, destinationOffset.register) 3944 3945 // Check destination bounds and if exceeds the length, exit with out of bounds error. 3946 if isTable { 3947 // tmp = &tables[0] 3948 c.assembler.CompileMemoryToRegister(amd64.MOVQ, 3949 amd64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset, 3950 tmp) 3951 3952 // tmp = [tmp + TableIndex*8] 3953 // = [&tables[0] + TableIndex*sizeOf(*tableInstance)] 3954 // = [&tables[TableIndex]] = tables[TableIndex]. 3955 c.assembler.CompileMemoryToRegister(amd64.MOVQ, tmp, int64(tableIndex)*8, tmp) 3956 3957 c.assembler.CompileMemoryToRegister(amd64.CMPQ, 3958 tmp, tableInstanceTableLenOffset, 3959 destinationOffset.register) 3960 } else { 3961 c.assembler.CompileMemoryToRegister(amd64.CMPQ, 3962 amd64ReservedRegisterForCallEngine, callEngineModuleContextMemorySliceLenOffset, 3963 destinationOffset.register) 3964 } 3965 if isTable { 3966 c.compileMaybeExitFromNativeCode(amd64.JCC, nativeCallStatusCodeInvalidTableAccess) 3967 } else { 3968 c.compileMaybeExitFromNativeCode(amd64.JCC, nativeCallStatusCodeMemoryOutOfBounds) 3969 } 3970 3971 // Otherwise, ready to copy the value from source to destination. 3972 // 3973 // If the copy size equal zero, we skip the entire instructions below. 3974 c.assembler.CompileRegisterToRegister(amd64.TESTQ, copySize.register, copySize.register) 3975 skipJump := c.assembler.CompileJump(amd64.JEQ) 3976 3977 // destinationOffset -= size. 3978 c.assembler.CompileRegisterToRegister(amd64.SUBQ, copySize.register, destinationOffset.register) 3979 3980 if isTable { 3981 // Each element is of type uintptr; 2^3 = 1 << pointerSizeLog2. 3982 c.assembler.CompileConstToRegister(amd64.SHLQ, pointerSizeLog2, destinationOffset.register) 3983 // destinationOffset += table buffer's absolute address. 3984 c.assembler.CompileMemoryToRegister(amd64.ADDQ, tmp, tableInstanceTableOffset, destinationOffset.register) 3985 3986 } else { 3987 // destinationOffset += memory buffer's absolute address. 3988 c.assembler.CompileRegisterToRegister(amd64.ADDQ, amd64ReservedRegisterForMemory, destinationOffset.register) 3989 3990 // Copy first % 16 bytes with simple MOVB instruction. 3991 beginCopyLoop := c.assembler.CompileStandAlone(amd64.NOP) 3992 c.assembler.CompileConstToRegister(amd64.TESTQ, 15, copySize.register) 3993 breakLoop := c.assembler.CompileJump(amd64.JEQ) 3994 3995 c.assembler.CompileRegisterToMemory(amd64.MOVB, value.register, destinationOffset.register, 0) 3996 3997 c.assembler.CompileNoneToRegister(amd64.INCQ, destinationOffset.register) 3998 c.assembler.CompileNoneToRegister(amd64.DECQ, copySize.register) 3999 c.assembler.CompileJump(amd64.JMP).AssignJumpTarget(beginCopyLoop) 4000 4001 c.assembler.SetJumpTargetOnNext(breakLoop) 4002 // compileFillLoopImpl counts in groups of 8 bytes, so we have to divide the copySize by 8. 4003 c.assembler.CompileConstToRegister(amd64.SHRQ, 3, copySize.register) 4004 } 4005 4006 c.compileFillLoopImpl(destinationOffset, value, copySize, tmp, !isTable) 4007 4008 c.locationStack.markRegisterUnused(copySize.register, value.register, 4009 destinationOffset.register, tmp) 4010 c.assembler.SetJumpTargetOnNext(skipJump) 4011 return nil 4012 } 4013 4014 // compileMemoryFill implements compiler.compileMemoryFill for the amd64 architecture. 4015 // 4016 // TODO: the compiled code in this function should be reused and compile at once as 4017 // the code is independent of any module. 4018 func (c *amd64Compiler) compileMemoryFill() error { 4019 return c.compileFillImpl(false, 0) 4020 } 4021 4022 // compileTableInit implements compiler.compileTableInit for the amd64 architecture. 4023 func (c *amd64Compiler) compileTableInit(o *wazeroir.UnionOperation) error { 4024 elemIndex := uint32(o.U1) 4025 tableIndex := uint32(o.U2) 4026 return c.compileInitImpl(true, elemIndex, tableIndex) 4027 } 4028 4029 // compileTableCopyLoopImpl is used for directly copying after bounds/direction check. 4030 func (c *amd64Compiler) compileTableCopyLoopImpl(srcTableIndex, dstTableIndex uint32, destinationOffset, sourceOffset, copySize *runtimeValueLocation, tmp asm.Register, backwards bool) { 4031 // Point on first byte to be copied. 4032 if !backwards { 4033 c.assembler.CompileRegisterToRegister(amd64.SUBQ, copySize.register, sourceOffset.register) 4034 c.assembler.CompileRegisterToRegister(amd64.SUBQ, copySize.register, destinationOffset.register) 4035 } 4036 4037 // Each element is of type uintptr; 2^3 = 1 << pointerSizeLog2. 4038 c.assembler.CompileConstToRegister(amd64.SHLQ, pointerSizeLog2, sourceOffset.register) 4039 c.assembler.CompileConstToRegister(amd64.SHLQ, pointerSizeLog2, destinationOffset.register) 4040 // destinationOffset += table buffer's absolute address. 4041 c.assembler.CompileMemoryToRegister(amd64.MOVQ, amd64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset, tmp) 4042 c.assembler.CompileMemoryToRegister(amd64.MOVQ, tmp, int64(dstTableIndex*8), tmp) 4043 c.assembler.CompileMemoryToRegister(amd64.ADDQ, tmp, tableInstanceTableOffset, destinationOffset.register) 4044 // sourceOffset += table buffer's absolute address. 4045 c.assembler.CompileMemoryToRegister(amd64.MOVQ, amd64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset, tmp) 4046 c.assembler.CompileMemoryToRegister(amd64.MOVQ, tmp, int64(srcTableIndex*8), tmp) 4047 c.assembler.CompileMemoryToRegister(amd64.ADDQ, tmp, tableInstanceTableOffset, sourceOffset.register) 4048 4049 c.compileCopyLoopImpl(destinationOffset, sourceOffset, copySize, backwards, 8) 4050 } 4051 4052 // compileTableCopy implements compiler.compileTableCopy for the amd64 architecture. 4053 // 4054 // It uses efficient `REP MOVSB` instructions for optimized copying. It uses backward copying for 4055 // overlapped segments. 4056 func (c *amd64Compiler) compileTableCopy(o *wazeroir.UnionOperation) error { 4057 copySize := c.locationStack.pop() 4058 if err := c.compileEnsureOnRegister(copySize); err != nil { 4059 return err 4060 } 4061 4062 sourceOffset := c.locationStack.pop() 4063 if err := c.compileEnsureOnRegister(sourceOffset); err != nil { 4064 return err 4065 } 4066 4067 destinationOffset := c.locationStack.pop() 4068 if err := c.compileEnsureOnRegister(destinationOffset); err != nil { 4069 return err 4070 } 4071 4072 tmp, err := c.allocateRegister(registerTypeGeneralPurpose) 4073 if err != nil { 4074 return err 4075 } 4076 4077 // sourceOffset += size. 4078 c.assembler.CompileRegisterToRegister(amd64.ADDQ, copySize.register, sourceOffset.register) 4079 // destinationOffset += size. 4080 c.assembler.CompileRegisterToRegister(amd64.ADDQ, copySize.register, destinationOffset.register) 4081 4082 srcTableIndex := uint32(o.U1) 4083 dstTableIndex := uint32(o.U2) 4084 4085 // Check source bounds and if exceeds the length, exit with out of bounds error. 4086 c.assembler.CompileMemoryToRegister(amd64.MOVQ, amd64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset, tmp) 4087 c.assembler.CompileMemoryToRegister(amd64.MOVQ, tmp, int64(srcTableIndex*8), tmp) 4088 c.assembler.CompileMemoryToRegister(amd64.CMPQ, tmp, tableInstanceTableLenOffset, sourceOffset.register) 4089 c.compileMaybeExitFromNativeCode(amd64.JCC, nativeCallStatusCodeInvalidTableAccess) 4090 4091 // Check destination bounds and if exceeds the length, exit with out of bounds error. 4092 c.assembler.CompileMemoryToRegister(amd64.MOVQ, amd64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset, tmp) 4093 c.assembler.CompileMemoryToRegister(amd64.MOVQ, tmp, int64(dstTableIndex*8), tmp) 4094 c.assembler.CompileMemoryToRegister(amd64.CMPQ, tmp, tableInstanceTableLenOffset, destinationOffset.register) 4095 c.compileMaybeExitFromNativeCode(amd64.JCC, nativeCallStatusCodeInvalidTableAccess) 4096 4097 // Skip zero size. 4098 c.assembler.CompileRegisterToRegister(amd64.TESTQ, copySize.register, copySize.register) 4099 skipJump := c.assembler.CompileJump(amd64.JEQ) 4100 4101 // If dest < source, we can copy forwards. 4102 c.assembler.CompileRegisterToRegister(amd64.CMPQ, destinationOffset.register, sourceOffset.register) 4103 destLowerThanSourceJump := c.assembler.CompileJump(amd64.JLS) 4104 4105 // If source + size < dest, we can copy forwards. 4106 c.assembler.CompileRegisterToRegister(amd64.MOVQ, destinationOffset.register, tmp) 4107 c.assembler.CompileRegisterToRegister(amd64.SUBQ, copySize.register, tmp) 4108 c.assembler.CompileRegisterToRegister(amd64.CMPQ, sourceOffset.register, tmp) 4109 sourceBoundLowerThanDestJump := c.assembler.CompileJump(amd64.JLS) 4110 4111 // Copy backwards. 4112 c.compileTableCopyLoopImpl(srcTableIndex, dstTableIndex, destinationOffset, sourceOffset, copySize, tmp, true) 4113 endJump := c.assembler.CompileJump(amd64.JMP) 4114 4115 // Copy forwards. 4116 c.assembler.SetJumpTargetOnNext(destLowerThanSourceJump) 4117 c.assembler.SetJumpTargetOnNext(sourceBoundLowerThanDestJump) 4118 c.compileTableCopyLoopImpl(srcTableIndex, dstTableIndex, destinationOffset, sourceOffset, copySize, tmp, false) 4119 4120 c.locationStack.markRegisterUnused(copySize.register, sourceOffset.register, 4121 destinationOffset.register, tmp) 4122 c.assembler.SetJumpTargetOnNext(skipJump) 4123 c.assembler.SetJumpTargetOnNext(endJump) 4124 return nil 4125 } 4126 4127 // compileElemDrop implements compiler.compileElemDrop for the amd64 architecture. 4128 func (c *amd64Compiler) compileElemDrop(o *wazeroir.UnionOperation) error { 4129 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 4130 return err 4131 } 4132 4133 tmp, err := c.allocateRegister(registerTypeGeneralPurpose) 4134 if err != nil { 4135 return err 4136 } 4137 4138 elemIndex := uint32(o.U1) 4139 c.compileLoadElemInstanceAddress(elemIndex, tmp) 4140 4141 // Clears the content of ElementInstances[o.ElemIndex].References (== []uintptr{} type). 4142 c.assembler.CompileConstToMemory(amd64.MOVQ, 0, tmp, 0) 4143 c.assembler.CompileConstToMemory(amd64.MOVQ, 0, tmp, 8) 4144 c.assembler.CompileConstToMemory(amd64.MOVQ, 0, tmp, 16) 4145 return nil 4146 } 4147 4148 func (c *amd64Compiler) compileLoadElemInstanceAddress(elemIndex uint32, dst asm.Register) { 4149 // dst = elemIndex * elementInstanceStructSize 4150 c.assembler.CompileConstToRegister(amd64.MOVQ, int64(elemIndex)*elementInstanceStructSize, dst) 4151 4152 // dst = &moduleInstance.ElementInstances[0] + dst 4153 // = &moduleInstance.ElementInstances[0] + elemIndex*elementInstanceStructSize 4154 // = &moduleInstance.ElementInstances[elemIndex] 4155 c.assembler.CompileMemoryToRegister(amd64.ADDQ, 4156 amd64ReservedRegisterForCallEngine, callEngineModuleContextElementInstancesElement0AddressOffset, 4157 dst, 4158 ) 4159 } 4160 4161 // compileTableGet implements compiler.compileTableGet for the amd64 architecture. 4162 func (c *amd64Compiler) compileTableGet(o *wazeroir.UnionOperation) error { 4163 ref, err := c.allocateRegister(registerTypeGeneralPurpose) 4164 if err != nil { 4165 return err 4166 } 4167 4168 c.locationStack.markRegisterUsed(ref) 4169 4170 offset := c.locationStack.pop() 4171 if err := c.compileEnsureOnRegister(offset); err != nil { 4172 return err 4173 } 4174 4175 // ref = &tables[0] 4176 c.assembler.CompileMemoryToRegister(amd64.MOVQ, 4177 amd64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset, 4178 ref) 4179 4180 // ref = [ref + TableIndex*8] 4181 // = [&tables[0] + TableIndex*sizeOf(*tableInstance)] 4182 // = [&tables[TableIndex]] = tables[TableIndex]. 4183 tableIndex := int64(o.U1) 4184 c.assembler.CompileMemoryToRegister(amd64.MOVQ, ref, tableIndex*8, ref) 4185 4186 // Out of bounds check. 4187 c.assembler.CompileMemoryToRegister(amd64.CMPQ, ref, tableInstanceTableLenOffset, offset.register) 4188 c.compileMaybeExitFromNativeCode(amd64.JHI, nativeCallStatusCodeInvalidTableAccess) 4189 4190 // ref = [&tables[TableIndex] + tableInstanceTableOffset] = &tables[TableIndex].References[0] 4191 c.assembler.CompileMemoryToRegister(amd64.MOVQ, ref, tableInstanceTableOffset, ref) 4192 4193 // ref = [ref + 0 + offset.register * 8] 4194 // = [&tables[TableIndex].References[0] + sizeOf(uintptr) * offset] 4195 // = [&tables[TableIndex].References[offset]] 4196 // = tables[TableIndex].References[offset] 4197 c.assembler.CompileMemoryWithIndexToRegister(amd64.MOVQ, ref, 4198 0, offset.register, 8, ref, 4199 ) 4200 4201 c.locationStack.markRegisterUnused(offset.register) 4202 c.pushRuntimeValueLocationOnRegister(ref, runtimeValueTypeI64) // table elements are opaque 64-bit at runtime. 4203 return nil 4204 } 4205 4206 // compileTableSet implements compiler.compileTableSet for the amd64 architecture. 4207 func (c *amd64Compiler) compileTableSet(o *wazeroir.UnionOperation) error { 4208 ref := c.locationStack.pop() 4209 if err := c.compileEnsureOnRegister(ref); err != nil { 4210 return err 4211 } 4212 4213 offset := c.locationStack.pop() 4214 if err := c.compileEnsureOnRegister(offset); err != nil { 4215 return err 4216 } 4217 4218 tmp, err := c.allocateRegister(registerTypeGeneralPurpose) 4219 if err != nil { 4220 return err 4221 } 4222 4223 // tmp = &tables[0] 4224 c.assembler.CompileMemoryToRegister(amd64.MOVQ, 4225 amd64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset, 4226 tmp) 4227 4228 // ref = [ref + TableIndex*8] 4229 // = [&tables[0] + TableIndex*sizeOf(*tableInstance)] 4230 // = [&tables[TableIndex]] = tables[TableIndex]. 4231 tableIndex := int64(o.U1) 4232 c.assembler.CompileMemoryToRegister(amd64.MOVQ, tmp, tableIndex*8, tmp) 4233 4234 // Out of bounds check. 4235 c.assembler.CompileMemoryToRegister(amd64.CMPQ, tmp, tableInstanceTableLenOffset, offset.register) 4236 c.compileMaybeExitFromNativeCode(amd64.JHI, nativeCallStatusCodeInvalidTableAccess) 4237 4238 // tmp = [&tables[TableIndex] + tableInstanceTableOffset] = &tables[TableIndex].References[0] 4239 c.assembler.CompileMemoryToRegister(amd64.MOVQ, tmp, tableInstanceTableOffset, tmp) 4240 4241 // [tmp + 0 + offset.register * 8] = ref 4242 // [&tables[TableIndex].References[0] + sizeOf(uintptr) * offset] = ref 4243 // [&tables[TableIndex].References[offset]] = ref 4244 // tables[TableIndex].References[offset] = ref 4245 c.assembler.CompileRegisterToMemoryWithIndex(amd64.MOVQ, 4246 ref.register, 4247 tmp, 0, offset.register, 8) 4248 4249 c.locationStack.markRegisterUnused(offset.register, ref.register) 4250 return nil 4251 } 4252 4253 // compileTableGrow implements compiler.compileTableGrow for the amd64 architecture. 4254 func (c *amd64Compiler) compileTableGrow(o *wazeroir.UnionOperation) error { 4255 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 4256 return err 4257 } 4258 4259 // Pushes the table index. 4260 tableIndex := uint32(o.U1) 4261 if err := c.compileConstI32Impl(tableIndex); err != nil { 4262 return err 4263 } 4264 4265 // Table grow cannot be done in assembly just like memory grow as it involves with allocation in Go. 4266 // Therefore, call out to the built function for this purpose. 4267 if err := c.compileCallBuiltinFunction(builtinFunctionIndexTableGrow); err != nil { 4268 return err 4269 } 4270 4271 // TableGrow consumes three values (table index, number of items, initial value). 4272 for i := 0; i < 3; i++ { 4273 c.locationStack.pop() 4274 } 4275 4276 // Then, the previous length was pushed as the result. 4277 loc := c.locationStack.pushRuntimeValueLocationOnStack() 4278 loc.valueType = runtimeValueTypeI32 4279 4280 // After return, we re-initialize reserved registers just like preamble of functions. 4281 c.compileReservedStackBasePointerInitialization() 4282 c.compileReservedMemoryPointerInitialization() 4283 return nil 4284 } 4285 4286 // compileTableSize implements compiler.compileTableSize for the amd64 architecture. 4287 func (c *amd64Compiler) compileTableSize(o *wazeroir.UnionOperation) error { 4288 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 4289 return err 4290 } 4291 4292 result, err := c.allocateRegister(registerTypeGeneralPurpose) 4293 if err != nil { 4294 return err 4295 } 4296 4297 // result = &tables[0] 4298 c.assembler.CompileMemoryToRegister(amd64.MOVQ, 4299 amd64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset, 4300 result) 4301 4302 // result = [result + TableIndex*8] 4303 // = [&tables[0] + TableIndex*sizeOf(*tableInstance)] 4304 // = [&tables[TableIndex]] = tables[TableIndex]. 4305 tableIndex := int64(o.U1) 4306 c.assembler.CompileMemoryToRegister(amd64.MOVQ, result, tableIndex*8, result) 4307 4308 // result = [result + tableInstanceTableLenOffset] 4309 // = [tables[TableIndex] + tableInstanceTableLenOffset] 4310 // = len(tables[TableIndex]) 4311 c.assembler.CompileMemoryToRegister(amd64.MOVQ, result, tableInstanceTableLenOffset, result) 4312 4313 c.pushRuntimeValueLocationOnRegister(result, runtimeValueTypeI32) 4314 return nil 4315 } 4316 4317 // compileTableFill implements compiler.compileTableFill for the amd64 architecture. 4318 func (c *amd64Compiler) compileTableFill(o *wazeroir.UnionOperation) error { 4319 tableIndex := uint32(o.U1) 4320 return c.compileFillImpl(true, tableIndex) 4321 } 4322 4323 // compileRefFunc implements compiler.compileRefFunc for the amd64 architecture. 4324 func (c *amd64Compiler) compileRefFunc(o *wazeroir.UnionOperation) error { 4325 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 4326 return err 4327 } 4328 4329 ref, err := c.allocateRegister(registerTypeGeneralPurpose) 4330 if err != nil { 4331 return err 4332 } 4333 4334 functionIndex := int64(o.U1) 4335 c.assembler.CompileConstToRegister(amd64.MOVQ, functionIndex*functionSize, ref) 4336 4337 // ref = [amd64ReservedRegisterForCallEngine + callEngineModuleContextFunctionsElement0AddressOffset + int64(o.FunctionIndex)*functionSize] 4338 // = &moduleEngine.functions[index] 4339 c.assembler.CompileMemoryToRegister( 4340 amd64.ADDQ, amd64ReservedRegisterForCallEngine, callEngineModuleContextFunctionsElement0AddressOffset, 4341 ref, 4342 ) 4343 4344 c.pushRuntimeValueLocationOnRegister(ref, runtimeValueTypeI64) 4345 return nil 4346 } 4347 4348 // compileConstI32 implements compiler.compileConstI32 for the amd64 architecture. 4349 func (c *amd64Compiler) compileConstI32(o *wazeroir.UnionOperation) error { 4350 return c.compileConstI32Impl(uint32(o.U1)) 4351 } 4352 4353 func (c *amd64Compiler) compileConstI32Impl(v uint32) error { 4354 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 4355 return err 4356 } 4357 4358 reg, err := c.allocateRegister(registerTypeGeneralPurpose) 4359 if err != nil { 4360 return err 4361 } 4362 c.pushRuntimeValueLocationOnRegister(reg, runtimeValueTypeI32) 4363 c.assembler.CompileConstToRegister(amd64.MOVL, int64(v), reg) 4364 return nil 4365 } 4366 4367 // compileConstI64 implements compiler.compileConstI64 for the amd64 architecture. 4368 func (c *amd64Compiler) compileConstI64(o *wazeroir.UnionOperation) error { 4369 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 4370 return err 4371 } 4372 4373 reg, err := c.allocateRegister(registerTypeGeneralPurpose) 4374 if err != nil { 4375 return err 4376 } 4377 c.pushRuntimeValueLocationOnRegister(reg, runtimeValueTypeI64) 4378 4379 c.assembler.CompileConstToRegister(amd64.MOVQ, int64(o.U1), reg) 4380 return nil 4381 } 4382 4383 // compileConstF32 implements compiler.compileConstF32 for the amd64 architecture. 4384 func (c *amd64Compiler) compileConstF32(o *wazeroir.UnionOperation) error { 4385 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 4386 return err 4387 } 4388 4389 reg, err := c.allocateRegister(registerTypeVector) 4390 if err != nil { 4391 return err 4392 } 4393 c.pushRuntimeValueLocationOnRegister(reg, runtimeValueTypeF32) 4394 4395 // We cannot directly load the value from memory to float regs, 4396 // so we move it to int reg temporarily. 4397 tmpReg, err := c.allocateRegister(registerTypeGeneralPurpose) 4398 if err != nil { 4399 return err 4400 } 4401 4402 c.assembler.CompileConstToRegister(amd64.MOVL, int64(o.U1) /*math.Float32bits(o.Value)*/, tmpReg) 4403 c.assembler.CompileRegisterToRegister(amd64.MOVL, tmpReg, reg) 4404 return nil 4405 } 4406 4407 // compileConstF64 implements compiler.compileConstF64 for the amd64 architecture. 4408 func (c *amd64Compiler) compileConstF64(o *wazeroir.UnionOperation) error { 4409 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 4410 return err 4411 } 4412 4413 reg, err := c.allocateRegister(registerTypeVector) 4414 if err != nil { 4415 return err 4416 } 4417 c.pushRuntimeValueLocationOnRegister(reg, runtimeValueTypeF64) 4418 4419 // We cannot directly load the value from memory to float regs, 4420 // so we move it to int reg temporarily. 4421 tmpReg, err := c.allocateRegister(registerTypeGeneralPurpose) 4422 if err != nil { 4423 return err 4424 } 4425 4426 c.assembler.CompileConstToRegister(amd64.MOVQ, int64(o.U1) /* math.Float64bits(o.Value) */, tmpReg) 4427 c.assembler.CompileRegisterToRegister(amd64.MOVQ, tmpReg, reg) 4428 return nil 4429 } 4430 4431 // compileLoadValueOnStackToRegister implements compiler.compileLoadValueOnStackToRegister for amd64. 4432 func (c *amd64Compiler) compileLoadValueOnStackToRegister(loc *runtimeValueLocation) { 4433 var inst asm.Instruction 4434 switch loc.valueType { 4435 case runtimeValueTypeV128Lo: 4436 inst = amd64.MOVDQU 4437 case runtimeValueTypeV128Hi: 4438 panic("BUG: V128Hi must be be loaded to a register along with V128Lo") 4439 case runtimeValueTypeI32, runtimeValueTypeF32: 4440 inst = amd64.MOVL 4441 case runtimeValueTypeI64, runtimeValueTypeF64: 4442 inst = amd64.MOVQ 4443 default: 4444 panic("BUG: unknown runtime value type") 4445 } 4446 4447 // Copy the value from the stack. 4448 c.assembler.CompileMemoryToRegister(inst, 4449 // Note: stack pointers are ensured not to exceed 2^27 so this offset never exceeds 32-bit range. 4450 amd64ReservedRegisterForStackBasePointerAddress, int64(loc.stackPointer)*8, 4451 loc.register) 4452 4453 if loc.valueType == runtimeValueTypeV128Lo { 4454 // Higher 64-bits are loaded as well ^^. 4455 hi := &c.locationStack.stack[loc.stackPointer+1] 4456 hi.setRegister(loc.register) 4457 } 4458 } 4459 4460 // maybeCompileMoveTopConditionalToGeneralPurposeRegister moves the top value on the stack 4461 // if the value is located on a conditional register. 4462 // 4463 // This is usually called at the beginning of methods on compiler interface where we possibly 4464 // compile instructions without saving the conditional register value. 4465 // The compileXXX functions without calling this function is saving the conditional 4466 // value to the stack or register by invoking compileEnsureOnRegister for the top. 4467 func (c *amd64Compiler) maybeCompileMoveTopConditionalToGeneralPurposeRegister() (err error) { 4468 if c.locationStack.sp > 0 { 4469 if loc := c.locationStack.peek(); loc.onConditionalRegister() { 4470 if err = c.compileLoadConditionalRegisterToGeneralPurposeRegister(loc); err != nil { 4471 return err 4472 } 4473 } 4474 } 4475 return 4476 } 4477 4478 // loadConditionalRegisterToGeneralPurposeRegister saves the conditional register value 4479 // to a general purpose register. 4480 func (c *amd64Compiler) compileLoadConditionalRegisterToGeneralPurposeRegister(loc *runtimeValueLocation) error { 4481 reg, err := c.allocateRegister(registerTypeGeneralPurpose) 4482 if err != nil { 4483 return err 4484 } 4485 c.compileMoveConditionalToGeneralPurposeRegister(loc, reg) 4486 return nil 4487 } 4488 4489 func (c *amd64Compiler) compileMoveConditionalToGeneralPurposeRegister(loc *runtimeValueLocation, reg asm.Register) { 4490 // Set the flag bit to the destination. See 4491 // - https://c9x.me/x86/html/file_module_x86_id_288.html 4492 // - https://github.com/golang/go/blob/master/src/cmd/internal/obj/x86/asm6.go#L1453-L1468 4493 // to translate conditionalRegisterState* to amd64.SET* 4494 var inst asm.Instruction 4495 switch loc.conditionalRegister { 4496 case amd64.ConditionalRegisterStateE: 4497 inst = amd64.SETEQ 4498 case amd64.ConditionalRegisterStateNE: 4499 inst = amd64.SETNE 4500 case amd64.ConditionalRegisterStateS: 4501 inst = amd64.SETMI 4502 case amd64.ConditionalRegisterStateNS: 4503 inst = amd64.SETPL 4504 case amd64.ConditionalRegisterStateG: 4505 inst = amd64.SETGT 4506 case amd64.ConditionalRegisterStateGE: 4507 inst = amd64.SETGE 4508 case amd64.ConditionalRegisterStateL: 4509 inst = amd64.SETLT 4510 case amd64.ConditionalRegisterStateLE: 4511 inst = amd64.SETLE 4512 case amd64.ConditionalRegisterStateA: 4513 inst = amd64.SETHI 4514 case amd64.ConditionalRegisterStateAE: 4515 inst = amd64.SETCC 4516 case amd64.ConditionalRegisterStateB: 4517 inst = amd64.SETCS 4518 case amd64.ConditionalRegisterStateBE: 4519 inst = amd64.SETLS 4520 } 4521 4522 c.assembler.CompileNoneToRegister(inst, reg) 4523 4524 // Then we reset the unnecessary bit. 4525 c.assembler.CompileConstToRegister(amd64.ANDQ, 0x1, reg) 4526 4527 // Mark it uses the register. 4528 loc.setRegister(reg) 4529 c.locationStack.markRegisterUsed(reg) 4530 } 4531 4532 // allocateRegister implements compiler.allocateRegister for amd64. 4533 func (c *amd64Compiler) allocateRegister(t registerType) (reg asm.Register, err error) { 4534 var ok bool 4535 // Try to get the unused register. 4536 reg, ok = c.locationStack.takeFreeRegister(t) 4537 if ok { 4538 return 4539 } 4540 4541 // If not found, we have to steal the register. 4542 stealTarget, ok := c.locationStack.takeStealTargetFromUsedRegister(t) 4543 if !ok { 4544 err = fmt.Errorf("cannot steal register") 4545 return 4546 } 4547 4548 // Release the steal target register value onto stack location. 4549 reg = stealTarget.register 4550 c.compileReleaseRegisterToStack(stealTarget) 4551 return 4552 } 4553 4554 // callFunction adds instructions to call a function whose address equals either addr parameter or the value on indexReg. 4555 // 4556 // Note: this is the counterpart for returnFunction, and see the comments there as well 4557 // to understand how the function calls are achieved. 4558 func (c *amd64Compiler) compileCallFunctionImpl(functionAddressRegister asm.Register, functype *wasm.FunctionType) error { 4559 // Release all the registers as our calling convention requires the caller-save. 4560 if err := c.compileReleaseAllRegistersToStack(); err != nil { 4561 return err 4562 } 4563 4564 c.locationStack.markRegisterUsed(functionAddressRegister) 4565 4566 // Obtain a temporary register to be used in the followings. 4567 tmpRegister, found := c.locationStack.takeFreeRegister(registerTypeGeneralPurpose) 4568 if !found { 4569 // This in theory never happen as all the registers must be free except codeAddressRegister. 4570 return fmt.Errorf("could not find enough free registers") 4571 } 4572 4573 // The stack should look like: 4574 // 4575 // reserved slots for results (if len(results) > len(args)) 4576 // | | 4577 // ,arg0, ..., argN, ..., _, .returnAddress, .returnStackBasePointerInBytes, .function, .... 4578 // | | | 4579 // | callFrame{^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^} 4580 // | 4581 // nextStackBasePointerOffset 4582 // 4583 // where callFrame is used to return to this currently executed function. 4584 4585 nextStackBasePointerOffset := int64(c.locationStack.sp) - int64(functype.ParamNumInUint64) 4586 4587 callFrameReturnAddressLoc, callFrameStackBasePointerInBytesLoc, callFrameFunctionLoc := c.locationStack.pushCallFrame(functype) 4588 4589 // Save the current stack base pointer at callFrameStackBasePointerInBytesLoc. 4590 c.assembler.CompileMemoryToRegister(amd64.MOVQ, 4591 amd64ReservedRegisterForCallEngine, callEngineStackContextStackBasePointerInBytesOffset, 4592 tmpRegister) 4593 callFrameStackBasePointerInBytesLoc.setRegister(tmpRegister) 4594 c.compileReleaseRegisterToStack(callFrameStackBasePointerInBytesLoc) 4595 4596 // Set callEngine.stackContext.stackBasePointer for the next function. 4597 c.assembler.CompileConstToRegister(amd64.ADDQ, nextStackBasePointerOffset<<3, tmpRegister) 4598 4599 // Write the calculated value to callEngine.stackContext.stackBasePointer. 4600 c.assembler.CompileRegisterToMemory(amd64.MOVQ, tmpRegister, 4601 amd64ReservedRegisterForCallEngine, callEngineStackContextStackBasePointerInBytesOffset) 4602 4603 // Save the currently executed *function (placed at callEngine.moduleContext.fn) into callFrameFunctionLoc. 4604 c.assembler.CompileMemoryToRegister(amd64.MOVQ, 4605 amd64ReservedRegisterForCallEngine, callEngineModuleContextFnOffset, 4606 tmpRegister) 4607 callFrameFunctionLoc.setRegister(tmpRegister) 4608 c.compileReleaseRegisterToStack(callFrameFunctionLoc) 4609 4610 // Set callEngine.moduleContext.fn to the next *function. 4611 c.assembler.CompileRegisterToMemory(amd64.MOVQ, functionAddressRegister, 4612 amd64ReservedRegisterForCallEngine, callEngineModuleContextFnOffset) 4613 4614 // Write the return address into callFrameReturnAddressLoc. 4615 c.assembler.CompileReadInstructionAddress(tmpRegister, amd64.JMP) 4616 callFrameReturnAddressLoc.setRegister(tmpRegister) 4617 c.compileReleaseRegisterToStack(callFrameReturnAddressLoc) 4618 4619 if amd64CallingConventionDestinationFunctionModuleInstanceAddressRegister == functionAddressRegister { 4620 // This case we must move the value on targetFunctionAddressRegister to another register, otherwise 4621 // the address (jump target below) will be modified and result in segfault. 4622 // See #526. 4623 c.assembler.CompileRegisterToRegister(amd64.MOVQ, functionAddressRegister, tmpRegister) 4624 functionAddressRegister = tmpRegister 4625 } 4626 4627 // Also, we have to put the target function's *wasm.ModuleInstance into amd64CallingConventionDestinationFunctionModuleInstanceAddressRegister. 4628 c.assembler.CompileMemoryToRegister(amd64.MOVQ, functionAddressRegister, functionModuleInstanceOffset, 4629 amd64CallingConventionDestinationFunctionModuleInstanceAddressRegister) 4630 4631 // And jump into the initial address of the target function. 4632 c.assembler.CompileJumpToMemory(amd64.JMP, functionAddressRegister, functionCodeInitialAddressOffset) 4633 4634 // All the registers used are temporary, so we mark them unused. 4635 c.locationStack.markRegisterUnused(tmpRegister, functionAddressRegister) 4636 4637 // On the function return, we have to initialize the state. 4638 if err := c.compileModuleContextInitialization(); err != nil { 4639 return err 4640 } 4641 4642 // Due to the change to callEngine.stackContext.stackBasePointer. 4643 c.compileReservedStackBasePointerInitialization() 4644 4645 // Due to the change to callEngine.moduleContext.moduleInstance as that might result in 4646 // the memory instance manipulation. 4647 c.compileReservedMemoryPointerInitialization() 4648 4649 // We consumed the function parameters, the call frame stack and reserved slots during the call. 4650 c.locationStack.sp = uint64(nextStackBasePointerOffset) 4651 4652 // Now the function results are pushed by the call. 4653 for _, t := range functype.Results { 4654 loc := c.locationStack.pushRuntimeValueLocationOnStack() 4655 switch t { 4656 case wasm.ValueTypeI32: 4657 loc.valueType = runtimeValueTypeI32 4658 case wasm.ValueTypeI64, wasm.ValueTypeFuncref, wasm.ValueTypeExternref: 4659 loc.valueType = runtimeValueTypeI64 4660 case wasm.ValueTypeF32: 4661 loc.valueType = runtimeValueTypeF32 4662 case wasm.ValueTypeF64: 4663 loc.valueType = runtimeValueTypeF64 4664 case wasm.ValueTypeV128: 4665 loc.valueType = runtimeValueTypeV128Lo 4666 hi := c.locationStack.pushRuntimeValueLocationOnStack() 4667 hi.valueType = runtimeValueTypeV128Hi 4668 default: 4669 panic("BUG: invalid type: " + wasm.ValueTypeName(t)) 4670 } 4671 } 4672 return nil 4673 } 4674 4675 // returnFunction adds instructions to return from the current callframe back to the caller's frame. 4676 // If this is the current one is the origin, we return to the callEngine.execWasmFunction with the Returned status. 4677 // Otherwise, we jump into the callers' return address stored in callFrame.returnAddress while setting 4678 // up all the necessary change on the callEngine's state. 4679 // 4680 // Note: this is the counterpart for callFunction, and see the comments there as well 4681 // to understand how the function calls are achieved. 4682 func (c *amd64Compiler) compileReturnFunction() error { 4683 // Release all the registers as our calling convention requires the caller-save. 4684 if err := c.compileReleaseAllRegistersToStack(); err != nil { 4685 return err 4686 } 4687 4688 if c.withListener { 4689 if err := c.compileCallBuiltinFunction(builtinFunctionIndexFunctionListenerAfter); err != nil { 4690 return err 4691 } 4692 // After return, we re-initialize the stack base pointer as that is used to return to the caller below. 4693 c.compileReservedStackBasePointerInitialization() 4694 } 4695 4696 // amd64CallingConventionDestinationFunctionModuleInstanceAddressRegister holds the module instance's address 4697 // so mark it used so that it won't be used as a free register. 4698 c.locationStack.markRegisterUsed(amd64CallingConventionDestinationFunctionModuleInstanceAddressRegister) 4699 defer c.locationStack.markRegisterUnused(amd64CallingConventionDestinationFunctionModuleInstanceAddressRegister) 4700 4701 // Obtain a temporary register to be used in the following. 4702 returnAddressRegister, found := c.locationStack.takeFreeRegister(registerTypeGeneralPurpose) 4703 if !found { 4704 panic("BUG: all the registers should be free at this point: " + c.locationStack.String()) 4705 } 4706 4707 returnAddress, callerStackBasePointerInBytes, callerFunction := c.locationStack.getCallFrameLocations(c.typ) 4708 4709 // A zero return address means return from the execution. 4710 c.assembler.CompileMemoryToRegister(amd64.MOVQ, 4711 amd64ReservedRegisterForStackBasePointerAddress, int64(returnAddress.stackPointer)*8, 4712 returnAddressRegister, 4713 ) 4714 c.assembler.CompileRegisterToRegister(amd64.TESTQ, returnAddressRegister, returnAddressRegister) 4715 4716 c.compileMaybeExitFromNativeCode(amd64.JNE, nativeCallStatusCodeReturned) 4717 4718 // Alias for readability. 4719 tmpRegister := amd64CallingConventionDestinationFunctionModuleInstanceAddressRegister 4720 4721 // First, restore the stackContext.stackBasePointerInBytesOffset from callerStackBasePointerInBytes. 4722 callerStackBasePointerInBytes.setRegister(tmpRegister) 4723 c.compileLoadValueOnStackToRegister(callerStackBasePointerInBytes) 4724 c.assembler.CompileRegisterToMemory(amd64.MOVQ, 4725 tmpRegister, amd64ReservedRegisterForCallEngine, callEngineStackContextStackBasePointerInBytesOffset) 4726 4727 // Next, restore moduleContext.fn from callerFunction. 4728 callerFunction.setRegister(tmpRegister) 4729 c.compileLoadValueOnStackToRegister(callerFunction) 4730 c.assembler.CompileRegisterToMemory(amd64.MOVQ, 4731 tmpRegister, amd64ReservedRegisterForCallEngine, callEngineModuleContextFnOffset) 4732 4733 // Also, we have to put the target function's *wasm.ModuleInstance into amd64CallingConventionDestinationFunctionModuleInstanceAddressRegister. 4734 c.assembler.CompileMemoryToRegister(amd64.MOVQ, 4735 tmpRegister, functionModuleInstanceOffset, 4736 amd64CallingConventionDestinationFunctionModuleInstanceAddressRegister) 4737 4738 // Then, jump into the return address! 4739 c.assembler.CompileJumpToRegister(amd64.JMP, returnAddressRegister) 4740 return nil 4741 } 4742 4743 func (c *amd64Compiler) compileCallGoHostFunction() error { 4744 return c.compileCallGoFunction(nativeCallStatusCodeCallGoHostFunction) 4745 } 4746 4747 func (c *amd64Compiler) compileCallBuiltinFunction(index wasm.Index) error { 4748 // Set the functionAddress to the callEngine.exitContext functionCallAddress. 4749 c.assembler.CompileConstToMemory(amd64.MOVL, int64(index), amd64ReservedRegisterForCallEngine, callEngineExitContextBuiltinFunctionCallIndexOffset) 4750 return c.compileCallGoFunction(nativeCallStatusCodeCallBuiltInFunction) 4751 } 4752 4753 func (c *amd64Compiler) compileCallGoFunction(compilerStatus nativeCallStatusCode) error { 4754 // Release all the registers as our calling convention requires the caller-save. 4755 if err := c.compileReleaseAllRegistersToStack(); err != nil { 4756 return err 4757 } 4758 4759 c.compileExitFromNativeCode(compilerStatus) 4760 return nil 4761 } 4762 4763 // compileReleaseAllRegistersToStack add the instructions to release all the LIVE value 4764 // in the value location stack at this point into the stack memory location. 4765 func (c *amd64Compiler) compileReleaseAllRegistersToStack() (err error) { 4766 for i := uint64(0); i < c.locationStack.sp; i++ { 4767 if loc := &c.locationStack.stack[i]; loc.onRegister() { 4768 c.compileReleaseRegisterToStack(loc) 4769 } else if loc.onConditionalRegister() { 4770 if err = c.compileLoadConditionalRegisterToGeneralPurposeRegister(loc); err != nil { 4771 return 4772 } 4773 c.compileReleaseRegisterToStack(loc) 4774 } 4775 } 4776 return 4777 } 4778 4779 func (c *amd64Compiler) onValueReleaseRegisterToStack(reg asm.Register) { 4780 for i := uint64(0); i < c.locationStack.sp; i++ { 4781 prevValue := &c.locationStack.stack[i] 4782 if prevValue.register == reg { 4783 c.compileReleaseRegisterToStack(prevValue) 4784 break 4785 } 4786 } 4787 } 4788 4789 // compileReleaseRegisterToStack implements compiler.compileReleaseRegisterToStack for amd64. 4790 func (c *amd64Compiler) compileReleaseRegisterToStack(loc *runtimeValueLocation) { 4791 var inst asm.Instruction 4792 switch loc.valueType { 4793 case runtimeValueTypeV128Lo: 4794 inst = amd64.MOVDQU 4795 case runtimeValueTypeV128Hi: 4796 panic("BUG: V128Hi must be released to the stack along with V128Lo") 4797 case runtimeValueTypeI32, runtimeValueTypeF32: 4798 inst = amd64.MOVL 4799 case runtimeValueTypeI64, runtimeValueTypeF64: 4800 inst = amd64.MOVQ 4801 default: 4802 panic("BUG: unknown runtime value type") 4803 } 4804 4805 c.assembler.CompileRegisterToMemory(inst, loc.register, 4806 // Note: stack pointers are ensured not to exceed 2^27 so this offset never exceeds 32-bit range. 4807 amd64ReservedRegisterForStackBasePointerAddress, int64(loc.stackPointer)*8) 4808 4809 // Mark the register is free. 4810 c.locationStack.releaseRegister(loc) 4811 4812 if loc.valueType == runtimeValueTypeV128Lo { 4813 // Higher 64-bits are released as well ^^. 4814 hi := &c.locationStack.stack[loc.stackPointer+1] 4815 c.locationStack.releaseRegister(hi) 4816 } 4817 } 4818 4819 func (c *amd64Compiler) compileMaybeExitFromNativeCode(skipCondition asm.Instruction, status nativeCallStatusCode) { 4820 if target := c.compiledTrapTargets[status]; target != nil { 4821 // We've already compiled this. 4822 // Invert the return condition to jump into the appropriate target. 4823 var returnCondition asm.Instruction 4824 switch skipCondition { 4825 case amd64.JHI: 4826 returnCondition = amd64.JLS 4827 case amd64.JLS: 4828 returnCondition = amd64.JHI 4829 case amd64.JNE: 4830 returnCondition = amd64.JEQ 4831 case amd64.JEQ: 4832 returnCondition = amd64.JNE 4833 case amd64.JCC: 4834 returnCondition = amd64.JCS 4835 case amd64.JCS: 4836 returnCondition = amd64.JCC 4837 case amd64.JPC: 4838 returnCondition = amd64.JPS 4839 case amd64.JPS: 4840 returnCondition = amd64.JPC 4841 case amd64.JPL: 4842 returnCondition = amd64.JMI 4843 case amd64.JMI: 4844 returnCondition = amd64.JPL 4845 default: 4846 panic("BUG: couldn't invert condition") 4847 } 4848 c.assembler.CompileJump(returnCondition).AssignJumpTarget(target) 4849 } else { 4850 skip := c.assembler.CompileJump(skipCondition) 4851 c.compileExitFromNativeCode(status) 4852 c.assembler.SetJumpTargetOnNext(skip) 4853 } 4854 } 4855 4856 func (c *amd64Compiler) compileExitFromNativeCode(status nativeCallStatusCode) { 4857 if target := c.compiledTrapTargets[status]; target != nil { 4858 c.assembler.CompileJump(amd64.JMP).AssignJumpTarget(target) 4859 return 4860 } 4861 4862 switch status { 4863 case nativeCallStatusCodeReturned: 4864 // Save the target for reuse. 4865 c.compiledTrapTargets[status] = c.compileNOP() 4866 case nativeCallStatusCodeCallGoHostFunction, nativeCallStatusCodeCallBuiltInFunction: 4867 // Read the return address, and write it to callEngine.exitContext.returnAddress. 4868 returnAddressReg, ok := c.locationStack.takeFreeRegister(registerTypeGeneralPurpose) 4869 if !ok { 4870 panic("BUG: cannot take free register") 4871 } 4872 c.assembler.CompileReadInstructionAddress(returnAddressReg, amd64.RET) 4873 c.assembler.CompileRegisterToMemory(amd64.MOVQ, 4874 returnAddressReg, amd64ReservedRegisterForCallEngine, callEngineExitContextReturnAddressOffset) 4875 default: 4876 if c.ir.IROperationSourceOffsetsInWasmBinary != nil { 4877 // This case, the execution traps and we want the top frame's source position in the stack trace. 4878 // Take RegR15 and store the instruction address onto callEngine.returnAddress. 4879 returnAddressReg := amd64.RegR15 4880 c.assembler.CompileReadInstructionAddress(returnAddressReg, amd64.MOVQ) 4881 c.assembler.CompileRegisterToMemory(amd64.MOVQ, 4882 returnAddressReg, amd64ReservedRegisterForCallEngine, callEngineExitContextReturnAddressOffset) 4883 } else { 4884 // We won't use the source position, so just save the target for reuse. 4885 c.compiledTrapTargets[status] = c.compileNOP() 4886 } 4887 } 4888 4889 // Write the status to callEngine.exitContext.statusCode. 4890 c.assembler.CompileConstToMemory(amd64.MOVB, int64(status), 4891 amd64ReservedRegisterForCallEngine, callEngineExitContextNativeCallStatusCodeOffset) 4892 4893 // Write back the cached SP to the actual eng.stackPointer. 4894 c.assembler.CompileConstToMemory(amd64.MOVQ, int64(c.locationStack.sp), 4895 amd64ReservedRegisterForCallEngine, callEngineStackContextStackPointerOffset) 4896 4897 c.assembler.CompileStandAlone(amd64.RET) 4898 } 4899 4900 func (c *amd64Compiler) compilePreamble() (err error) { 4901 // We assume all function parameters are already pushed onto the stack by 4902 // the caller. 4903 c.locationStack.init(c.typ) 4904 4905 if err := c.compileModuleContextInitialization(); err != nil { 4906 return err 4907 } 4908 4909 // Check if it's necessary to grow the value stack by using max stack pointer. 4910 if err = c.compileMaybeGrowStack(); err != nil { 4911 return err 4912 } 4913 4914 if c.withListener { 4915 if err = c.compileCallBuiltinFunction(builtinFunctionIndexFunctionListenerBefore); err != nil { 4916 return err 4917 } 4918 } 4919 4920 c.compileReservedStackBasePointerInitialization() 4921 4922 // Finally, we initialize the reserved memory register based on the module context. 4923 c.compileReservedMemoryPointerInitialization() 4924 return 4925 } 4926 4927 func (c *amd64Compiler) compileReservedStackBasePointerInitialization() { 4928 // First, make reservedRegisterForStackBasePointer point to the beginning of the slice backing array. 4929 c.assembler.CompileMemoryToRegister(amd64.MOVQ, 4930 amd64ReservedRegisterForCallEngine, callEngineStackContextStackElement0AddressOffset, 4931 amd64ReservedRegisterForStackBasePointerAddress) 4932 4933 // next we move the base pointer (callEngine.stackBasePointer) to the tmp register. 4934 c.assembler.CompileMemoryToRegister(amd64.ADDQ, 4935 amd64ReservedRegisterForCallEngine, callEngineStackContextStackBasePointerInBytesOffset, 4936 amd64ReservedRegisterForStackBasePointerAddress, 4937 ) 4938 } 4939 4940 func (c *amd64Compiler) compileReservedMemoryPointerInitialization() { 4941 if c.ir.HasMemory || c.ir.UsesMemory { 4942 c.assembler.CompileMemoryToRegister(amd64.MOVQ, 4943 amd64ReservedRegisterForCallEngine, callEngineModuleContextMemoryElement0AddressOffset, 4944 amd64ReservedRegisterForMemory, 4945 ) 4946 } 4947 } 4948 4949 // compileMaybeGrowStack adds instructions to check the necessity to grow the value stack, 4950 // and if so, make the builtin function call to do so. These instructions are called in the function's 4951 // preamble. 4952 func (c *amd64Compiler) compileMaybeGrowStack() error { 4953 tmpRegister, ok := c.locationStack.takeFreeRegister(registerTypeGeneralPurpose) 4954 if !ok { 4955 panic("BUG: cannot take free register") 4956 } 4957 4958 c.assembler.CompileMemoryToRegister(amd64.MOVQ, 4959 amd64ReservedRegisterForCallEngine, callEngineStackContextStackLenInBytesOffset, tmpRegister) 4960 c.assembler.CompileMemoryToRegister(amd64.SUBQ, 4961 amd64ReservedRegisterForCallEngine, callEngineStackContextStackBasePointerInBytesOffset, tmpRegister) 4962 4963 // If stack base pointer + max stack pointer > stackLen, we need to grow the stack. 4964 cmpWithStackPointerCeil := c.assembler.CompileRegisterToConst(amd64.CMPQ, tmpRegister, 0) 4965 c.assignStackPointerCeilNeeded = cmpWithStackPointerCeil 4966 4967 // Jump if we have no need to grow. 4968 jmpIfNoNeedToGrowStack := c.assembler.CompileJump(amd64.JCC) 4969 4970 // Otherwise, we have to make the builtin function call to grow the call stack. 4971 if err := c.compileCallBuiltinFunction(builtinFunctionIndexGrowStack); err != nil { 4972 return err 4973 } 4974 4975 c.assembler.SetJumpTargetOnNext(jmpIfNoNeedToGrowStack) 4976 return nil 4977 } 4978 4979 // compileModuleContextInitialization adds instructions to initialize callEngine.ModuleContext's fields based on 4980 // callEngine.ModuleContext.ModuleInstanceAddress. 4981 // This is called in two cases: in function preamble, and on the return from (non-Go) function calls. 4982 func (c *amd64Compiler) compileModuleContextInitialization() error { 4983 // amd64CallingConventionDestinationFunctionModuleInstanceAddressRegister holds the module instance's address 4984 // so mark it used so that it won't be used as a free register until the module context initialization finishes. 4985 c.locationStack.markRegisterUsed(amd64CallingConventionDestinationFunctionModuleInstanceAddressRegister) 4986 defer c.locationStack.markRegisterUnused(amd64CallingConventionDestinationFunctionModuleInstanceAddressRegister) 4987 4988 // Obtain the temporary registers to be used in the followings. 4989 tmpRegister, found := c.locationStack.takeFreeRegister(registerTypeGeneralPurpose) 4990 if !found { 4991 // This in theory never happen as all the registers must be free except indexReg. 4992 return fmt.Errorf("could not find enough free registers") 4993 } 4994 c.locationStack.markRegisterUsed(tmpRegister) 4995 tmpRegister2, found := c.locationStack.takeFreeRegister(registerTypeGeneralPurpose) 4996 if !found { 4997 // This in theory never happen as all the registers must be free except indexReg. 4998 return fmt.Errorf("could not find enough free registers") 4999 } 5000 c.locationStack.markRegisterUsed(tmpRegister2) 5001 5002 // If the module instance address stays the same, we could skip the entire code below. 5003 // The rationale/idea for this is that, in almost all use cases, users instantiate a single 5004 // Wasm binary and run the functions from it, rather than doing import/export on multiple 5005 // binaries. As a result, this cmp and jmp instruction sequence below must be easy for 5006 // x64 CPU to do branch prediction since almost 100% jump happens across function calls. 5007 c.assembler.CompileMemoryToRegister(amd64.CMPQ, 5008 amd64ReservedRegisterForCallEngine, callEngineModuleContextModuleInstanceOffset, amd64CallingConventionDestinationFunctionModuleInstanceAddressRegister) 5009 jmpIfModuleNotChange := c.assembler.CompileJump(amd64.JEQ) 5010 5011 // If engine.ModuleContext.ModuleInstance is not equal the value on amd64CallingConventionDestinationFunctionModuleInstanceAddressRegister, 5012 // we have to put the new value there. 5013 c.assembler.CompileRegisterToMemory(amd64.MOVQ, amd64CallingConventionDestinationFunctionModuleInstanceAddressRegister, 5014 amd64ReservedRegisterForCallEngine, callEngineModuleContextModuleInstanceOffset) 5015 5016 // Also, we have to update the following fields: 5017 // * callEngine.moduleContext.globalElement0Address 5018 // * callEngine.moduleContext.tableElement0Address 5019 // * callEngine.moduleContext.memoryInstance 5020 // * callEngine.moduleContext.memoryElement0Address 5021 // * callEngine.moduleContext.memorySliceLen 5022 // * callEngine.moduleContext.codesElement0Address 5023 // * callEngine.moduleContext.typeIDsElement0Address 5024 // * callEngine.moduleContext.dataInstancesElement0Address 5025 // * callEngine.moduleContext.elementInstancesElement0Address 5026 5027 // Update globalElement0Address. 5028 // 5029 // Note: if there's global.get or set instruction in the function, the existence of the globals 5030 // is ensured by function validation at module instantiation phase, and that's why it is ok to 5031 // skip the initialization if the module's globals slice is empty. 5032 if len(c.ir.Globals) > 0 { 5033 // Since ModuleInstance.Globals is []*globalInstance, internally 5034 // the address of the first item in the underlying array lies exactly on the globals offset. 5035 // See https://go.dev/blog/slices-intro if unfamiliar. 5036 c.assembler.CompileMemoryToRegister(amd64.MOVQ, amd64CallingConventionDestinationFunctionModuleInstanceAddressRegister, moduleInstanceGlobalsOffset, tmpRegister) 5037 5038 c.assembler.CompileRegisterToMemory(amd64.MOVQ, tmpRegister, amd64ReservedRegisterForCallEngine, callEngineModuleContextGlobalElement0AddressOffset) 5039 } 5040 5041 // Update tableElement0Address. 5042 // 5043 // Note: if there's table instruction in the function, the existence of the table 5044 // is ensured by function validation at module instantiation phase, and that's 5045 // why it is ok to skip the initialization if the module's table doesn't exist. 5046 if c.ir.HasTable { 5047 // First, we need to read the *wasm.Table. 5048 c.assembler.CompileMemoryToRegister(amd64.MOVQ, amd64CallingConventionDestinationFunctionModuleInstanceAddressRegister, moduleInstanceTablesOffset, tmpRegister) 5049 5050 // At this point, tmpRegister holds the address of ModuleInstance.Table. 5051 // So we are ready to read and put the first item's address stored in Table.Table. 5052 // Here we read the value into tmpRegister2. 5053 c.assembler.CompileRegisterToMemory(amd64.MOVQ, tmpRegister, 5054 amd64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset) 5055 5056 // Finally, we put &ModuleInstance.TypeIDs[0] into moduleContext.typeIDsElement0Address. 5057 c.assembler.CompileMemoryToRegister(amd64.MOVQ, 5058 amd64CallingConventionDestinationFunctionModuleInstanceAddressRegister, moduleInstanceTypeIDsOffset, tmpRegister) 5059 c.assembler.CompileRegisterToMemory(amd64.MOVQ, 5060 tmpRegister, amd64ReservedRegisterForCallEngine, callEngineModuleContextTypeIDsElement0AddressOffset) 5061 } 5062 5063 // Update memoryElement0Address and memorySliceLen. 5064 // 5065 // Note: if there's memory instruction in the function, memory instance must be non-nil. 5066 // That is ensured by function validation at module instantiation phase, and that's 5067 // why it is ok to skip the initialization if the module's memory instance is nil. 5068 if c.ir.HasMemory { 5069 c.assembler.CompileMemoryToRegister(amd64.MOVQ, 5070 amd64CallingConventionDestinationFunctionModuleInstanceAddressRegister, moduleInstanceMemoryOffset, 5071 tmpRegister) 5072 5073 // Set memory instance. 5074 c.assembler.CompileRegisterToMemory(amd64.MOVQ, tmpRegister, 5075 amd64ReservedRegisterForCallEngine, callEngineModuleContextMemoryInstanceOffset) 5076 5077 // Set length. 5078 c.assembler.CompileMemoryToRegister(amd64.MOVQ, tmpRegister, memoryInstanceBufferLenOffset, tmpRegister2) 5079 c.assembler.CompileRegisterToMemory(amd64.MOVQ, tmpRegister2, 5080 amd64ReservedRegisterForCallEngine, callEngineModuleContextMemorySliceLenOffset) 5081 5082 // Set element zero address. 5083 c.assembler.CompileMemoryToRegister(amd64.MOVQ, tmpRegister, memoryInstanceBufferOffset, tmpRegister2) 5084 c.assembler.CompileRegisterToMemory(amd64.MOVQ, tmpRegister2, 5085 amd64ReservedRegisterForCallEngine, callEngineModuleContextMemoryElement0AddressOffset) 5086 } 5087 5088 // Update moduleContext.codesElement0Address 5089 { 5090 // "tmpRegister = [moduleInstanceAddressRegister + moduleInstanceEngineOffset + interfaceDataOffset] (== *moduleEngine)" 5091 // 5092 // Go's interface is laid out on memory as two quad words as struct {tab, data uintptr} 5093 // where tab points to the interface table, and the latter points to the actual 5094 // implementation of interface. This case, we extract "data" pointer as *moduleEngine. 5095 // See the following references for detail: 5096 // * https://research.swtch.com/interfaces 5097 // * https://github.com/golang/go/blob/release-branch.go1.20/src/runtime/runtime2.go#L207-L210 5098 c.assembler.CompileMemoryToRegister(amd64.MOVQ, amd64CallingConventionDestinationFunctionModuleInstanceAddressRegister, moduleInstanceEngineOffset+interfaceDataOffset, tmpRegister) 5099 5100 // "tmpRegister = [tmpRegister + moduleEnginecodesOffset] (== &moduleEngine.codes[0])" 5101 c.assembler.CompileMemoryToRegister(amd64.MOVQ, tmpRegister, moduleEngineFunctionsOffset, tmpRegister) 5102 5103 // "callEngine.moduleContext.functionsElement0Address = tmpRegister". 5104 c.assembler.CompileRegisterToMemory(amd64.MOVQ, tmpRegister, amd64ReservedRegisterForCallEngine, 5105 callEngineModuleContextFunctionsElement0AddressOffset) 5106 } 5107 5108 // Update dataInstancesElement0Address. 5109 if c.ir.HasDataInstances { 5110 // "tmpRegister = &moduleInstance.DataInstances[0]" 5111 c.assembler.CompileMemoryToRegister( 5112 amd64.MOVQ, 5113 amd64CallingConventionDestinationFunctionModuleInstanceAddressRegister, moduleInstanceDataInstancesOffset, 5114 tmpRegister, 5115 ) 5116 // "callEngine.moduleContext.dataInstancesElement0Address = tmpRegister". 5117 c.assembler.CompileRegisterToMemory( 5118 amd64.MOVQ, 5119 tmpRegister, 5120 amd64ReservedRegisterForCallEngine, callEngineModuleContextDataInstancesElement0AddressOffset, 5121 ) 5122 } 5123 5124 // Update callEngine.moduleContext.elementInstancesElement0Address 5125 if c.ir.HasElementInstances { 5126 // "tmpRegister = &moduleInstance.ElementInstnaces[0]" 5127 c.assembler.CompileMemoryToRegister( 5128 amd64.MOVQ, 5129 amd64CallingConventionDestinationFunctionModuleInstanceAddressRegister, moduleInstanceElementInstancesOffset, 5130 tmpRegister, 5131 ) 5132 // "callEngine.moduleContext.dataInstancesElement0Address = tmpRegister". 5133 c.assembler.CompileRegisterToMemory( 5134 amd64.MOVQ, 5135 tmpRegister, 5136 amd64ReservedRegisterForCallEngine, callEngineModuleContextElementInstancesElement0AddressOffset, 5137 ) 5138 } 5139 5140 c.locationStack.markRegisterUnused(tmpRegister, tmpRegister2) 5141 5142 // Set the jump target towards the next instruction for the case where module instance address hasn't changed. 5143 c.assembler.SetJumpTargetOnNext(jmpIfModuleNotChange) 5144 return nil 5145 } 5146 5147 // compileEnsureOnRegister ensures that the given value is located on a 5148 // general purpose register of an appropriate type. 5149 func (c *amd64Compiler) compileEnsureOnRegister(loc *runtimeValueLocation) (err error) { 5150 if loc.onStack() { 5151 // Allocate the register. 5152 reg, err := c.allocateRegister(loc.getRegisterType()) 5153 if err != nil { 5154 return err 5155 } 5156 5157 // Mark it uses the register. 5158 loc.setRegister(reg) 5159 c.locationStack.markRegisterUsed(reg) 5160 5161 c.compileLoadValueOnStackToRegister(loc) 5162 } else if loc.onConditionalRegister() { 5163 err = c.compileLoadConditionalRegisterToGeneralPurposeRegister(loc) 5164 } 5165 return 5166 } 5167 5168 // compileMaybeSwapRegisters swaps two registers if they're not equal. 5169 func (c *amd64Compiler) compileMaybeSwapRegisters(reg1, reg2 asm.Register) { 5170 if reg1 != reg2 { 5171 c.assembler.CompileRegisterToRegister(amd64.XCHGQ, reg1, reg2) 5172 } 5173 } 5174 5175 // compilePreventCrossedTargetRegisters swaps registers in such a way, that for neither runtimeValueLocation from locs its 5176 // corresponding register with the same index from targets is occupied by some other runtimeValueLocation from locs. It returns a 5177 // closure to restore the original register placement. 5178 // 5179 // This function makes it possible to safely exchange one set of registers with another, where a register might be in both sets. 5180 // Each register will correspond either to itself or another register not present in its own set. 5181 // 5182 // For example, if we have locs = [AX, BX, CX], targets = [BX, SI, AX], then it'll do two swaps 5183 // to make locs = [BX, CX, AX]. 5184 func (c *amd64Compiler) compilePreventCrossedTargetRegisters(locs []*runtimeValueLocation, targets []asm.Register) (restore func()) { 5185 type swap struct{ srcIndex, dstIndex int } 5186 var swaps []swap 5187 for i := range locs { 5188 targetLocation := -1 // -1 means not found. 5189 for j := range locs { 5190 if locs[j].register == targets[i] { 5191 targetLocation = j 5192 break 5193 } 5194 } 5195 if targetLocation != -1 && targetLocation != i { 5196 c.compileMaybeSwapRegisters(locs[i].register, locs[targetLocation].register) 5197 locs[i].register, locs[targetLocation].register = locs[targetLocation].register, locs[i].register 5198 swaps = append(swaps, swap{i, targetLocation}) 5199 } 5200 } 5201 return func() { 5202 // Restore in reverse order because a register can be moved multiple times. 5203 for i := len(swaps) - 1; i >= 0; i -= 1 { 5204 r1, r2 := swaps[i].srcIndex, swaps[i].dstIndex 5205 c.compileMaybeSwapRegisters(locs[r1].register, locs[r2].register) 5206 locs[r1].register, locs[r2].register = locs[r2].register, locs[r1].register 5207 } 5208 } 5209 }