github.com/wasilibs/wazerox@v0.0.0-20240124024944-4923be63ab5f/internal/engine/compiler/impl_amd64.go (about) 1 package compiler 2 3 // This file implements the compiler for amd64/x86_64 target. 4 // Please refer to https://www.felixcloutier.com/x86/index.html 5 // if unfamiliar with amd64 instructions used here. 6 7 import ( 8 "fmt" 9 "math" 10 11 "github.com/wasilibs/wazerox/internal/asm" 12 "github.com/wasilibs/wazerox/internal/asm/amd64" 13 "github.com/wasilibs/wazerox/internal/platform" 14 "github.com/wasilibs/wazerox/internal/u32" 15 "github.com/wasilibs/wazerox/internal/u64" 16 "github.com/wasilibs/wazerox/internal/wasm" 17 "github.com/wasilibs/wazerox/internal/wazeroir" 18 ) 19 20 var ( 21 _minimum32BitSignedInt int32 = math.MinInt32 22 _maximum32BitSignedInt int32 = math.MaxInt32 23 _maximum32BitUnsignedInt uint32 = math.MaxUint32 24 _minimum64BitSignedInt int64 = math.MinInt64 25 _maximum64BitSignedInt int64 = math.MaxInt64 26 _maximum64BitUnsignedInt uint64 = math.MaxUint64 27 _float32SignBitMask uint32 = 1 << 31 28 _float32RestBitMask = ^_float32SignBitMask 29 _float64SignBitMask uint64 = 1 << 63 30 _float64RestBitMask = ^_float64SignBitMask 31 _float32ForMinimumSigned32bitInteger = uint32(0xCF00_0000) 32 _float64ForMinimumSigned32bitInteger = uint64(0xC1E0_0000_0020_0000) 33 _float32ForMinimumSigned64bitInteger = uint32(0xDF00_0000) 34 _float64ForMinimumSigned64bitInteger = uint64(0xC3E0_0000_0000_0000) 35 _float32ForMaximumSigned32bitIntPlusOne = uint32(0x4F00_0000) 36 _float64ForMaximumSigned32bitIntPlusOne = uint64(0x41E0_0000_0000_0000) 37 _float32ForMaximumSigned64bitIntPlusOne = uint32(0x5F00_0000) 38 _float64ForMaximumSigned64bitIntPlusOne = uint64(0x43E0_0000_0000_0000) 39 ) 40 41 var ( 42 // amd64ReservedRegisterForCallEngine: pointer to callEngine (i.e. *callEngine as uintptr) 43 amd64ReservedRegisterForCallEngine = amd64.RegR13 44 // amd64ReservedRegisterForStackBasePointerAddress: stack base pointer's address (callEngine.stackBasePointer) in the current function call. 45 amd64ReservedRegisterForStackBasePointerAddress = amd64.RegR14 46 // amd64ReservedRegisterForMemory: pointer to the memory slice's data (i.e. &memory.Buffer[0] as uintptr). 47 amd64ReservedRegisterForMemory = amd64.RegR15 48 ) 49 50 var ( 51 amd64UnreservedVectorRegisters = []asm.Register{ //nolint 52 amd64.RegX0, amd64.RegX1, amd64.RegX2, amd64.RegX3, 53 amd64.RegX4, amd64.RegX5, amd64.RegX6, amd64.RegX7, 54 amd64.RegX8, amd64.RegX9, amd64.RegX10, amd64.RegX11, 55 amd64.RegX12, amd64.RegX13, amd64.RegX14, amd64.RegX15, 56 } 57 // Note that we never invoke "call" instruction, 58 // so we don't need to care about the calling convention. 59 // TODO: Maybe it is safe just save rbp, rsp somewhere 60 // in Go-allocated variables, and reuse these registers 61 // in compiled functions and write them back before returns. 62 amd64UnreservedGeneralPurposeRegisters = []asm.Register{ //nolint 63 amd64.RegAX, amd64.RegCX, amd64.RegDX, amd64.RegBX, 64 amd64.RegSI, amd64.RegDI, amd64.RegR8, amd64.RegR9, 65 amd64.RegR10, amd64.RegR11, amd64.RegR12, 66 } 67 ) 68 69 // amd64CallingConventionDestinationFunctionModuleInstanceAddressRegister holds *wasm.ModuleInstance of the 70 // next executing function instance. The value is set and used when making function calls 71 // or function returns in the ModuleContextInitialization. See compileModuleContextInitialization. 72 var amd64CallingConventionDestinationFunctionModuleInstanceAddressRegister = amd64.RegR12 73 74 func (c *amd64Compiler) String() string { 75 return c.locationStack.String() 76 } 77 78 // compileNOP implements compiler.compileNOP for the amd64 architecture. 79 func (c *amd64Compiler) compileNOP() asm.Node { 80 return c.assembler.CompileStandAlone(amd64.NOP) 81 } 82 83 type amd64Compiler struct { 84 assembler amd64.Assembler 85 ir *wazeroir.CompilationResult 86 cpuFeatures platform.CpuFeatureFlags 87 // locationStack holds the state of wazeroir virtual stack. 88 // and each item is either placed in register or the actual memory stack. 89 locationStack *runtimeValueLocationStack 90 // labels hold per wazeroir label specific information in this function. 91 labels [wazeroir.LabelKindNum][]amd64LabelInfo 92 // stackPointerCeil is the greatest stack pointer value (from runtimeValueLocationStack) seen during compilation. 93 stackPointerCeil uint64 94 // assignStackPointerCeilNeeded holds an asm.Node whose AssignDestinationConstant must be called with the determined stack pointer ceiling. 95 assignStackPointerCeilNeeded asm.Node 96 compiledTrapTargets [nativeCallStatusModuleClosed]asm.Node 97 withListener bool 98 typ *wasm.FunctionType 99 // locationStackForEntrypoint is the initial location stack for all functions. To reuse the allocated stack, 100 // we cache it here, and reset and set to .locationStack in the Init method. 101 locationStackForEntrypoint runtimeValueLocationStack 102 // frameIDMax tracks the maximum value of frame id per function. 103 frameIDMax int 104 brTableTmp []runtimeValueLocation 105 106 fourZeros, 107 eightZeros, 108 minimum32BitSignedInt, 109 maximum32BitSignedInt, 110 maximum32BitUnsignedInt, 111 minimum64BitSignedInt, 112 maximum64BitSignedInt, 113 maximum64BitUnsignedInt, 114 float32SignBitMask, 115 float32RestBitMask, 116 float64SignBitMask, 117 float64RestBitMask, 118 float32ForMinimumSigned32bitInteger, 119 float64ForMinimumSigned32bitInteger, 120 float32ForMinimumSigned64bitInteger, 121 float64ForMinimumSigned64bitInteger, 122 float32ForMaximumSigned32bitIntPlusOne, 123 float64ForMaximumSigned32bitIntPlusOne, 124 float32ForMaximumSigned64bitIntPlusOne, 125 float64ForMaximumSigned64bitIntPlusOne *asm.StaticConst 126 } 127 128 func newAmd64Compiler() compiler { 129 c := &amd64Compiler{ 130 assembler: amd64.NewAssembler(), 131 locationStackForEntrypoint: newRuntimeValueLocationStack(), 132 cpuFeatures: platform.CpuFeatures, 133 } 134 135 c.fourZeros = asm.NewStaticConst([]byte{0, 0, 0, 0}) 136 c.eightZeros = asm.NewStaticConst([]byte{0, 0, 0, 0, 0, 0, 0, 0}) 137 c.minimum32BitSignedInt = asm.NewStaticConst(u32.LeBytes(uint32(_minimum32BitSignedInt))) 138 c.maximum32BitSignedInt = asm.NewStaticConst(u32.LeBytes(uint32(_maximum32BitSignedInt))) 139 c.maximum32BitUnsignedInt = asm.NewStaticConst(u32.LeBytes(_maximum32BitUnsignedInt)) 140 c.minimum64BitSignedInt = asm.NewStaticConst(u64.LeBytes(uint64(_minimum64BitSignedInt))) 141 c.maximum64BitSignedInt = asm.NewStaticConst(u64.LeBytes(uint64(_maximum64BitSignedInt))) 142 c.maximum64BitUnsignedInt = asm.NewStaticConst(u64.LeBytes(_maximum64BitUnsignedInt)) 143 c.float32SignBitMask = asm.NewStaticConst(u32.LeBytes(_float32SignBitMask)) 144 c.float32RestBitMask = asm.NewStaticConst(u32.LeBytes(_float32RestBitMask)) 145 c.float64SignBitMask = asm.NewStaticConst(u64.LeBytes(_float64SignBitMask)) 146 c.float64RestBitMask = asm.NewStaticConst(u64.LeBytes(_float64RestBitMask)) 147 c.float32ForMinimumSigned32bitInteger = asm.NewStaticConst(u32.LeBytes(_float32ForMinimumSigned32bitInteger)) 148 c.float64ForMinimumSigned32bitInteger = asm.NewStaticConst(u64.LeBytes(_float64ForMinimumSigned32bitInteger)) 149 c.float32ForMinimumSigned64bitInteger = asm.NewStaticConst(u32.LeBytes(_float32ForMinimumSigned64bitInteger)) 150 c.float64ForMinimumSigned64bitInteger = asm.NewStaticConst(u64.LeBytes(_float64ForMinimumSigned64bitInteger)) 151 c.float32ForMaximumSigned32bitIntPlusOne = asm.NewStaticConst(u32.LeBytes(_float32ForMaximumSigned32bitIntPlusOne)) 152 c.float64ForMaximumSigned32bitIntPlusOne = asm.NewStaticConst(u64.LeBytes(_float64ForMaximumSigned32bitIntPlusOne)) 153 c.float32ForMaximumSigned64bitIntPlusOne = asm.NewStaticConst(u32.LeBytes(_float32ForMaximumSigned64bitIntPlusOne)) 154 c.float64ForMaximumSigned64bitIntPlusOne = asm.NewStaticConst(u64.LeBytes(_float64ForMaximumSigned64bitIntPlusOne)) 155 return c 156 } 157 158 // Init implements compiler.Init. 159 func (c *amd64Compiler) Init(typ *wasm.FunctionType, ir *wazeroir.CompilationResult, withListener bool) { 160 c.assembler.Reset() 161 c.locationStackForEntrypoint.reset() 162 c.resetLabels() 163 *c = amd64Compiler{ 164 ir: ir, 165 withListener: withListener, 166 typ: typ, 167 assembler: c.assembler, 168 cpuFeatures: c.cpuFeatures, 169 labels: c.labels, 170 locationStackForEntrypoint: c.locationStackForEntrypoint, 171 brTableTmp: c.brTableTmp, 172 fourZeros: c.fourZeros, 173 eightZeros: c.eightZeros, 174 minimum32BitSignedInt: c.minimum32BitSignedInt, 175 maximum32BitSignedInt: c.maximum32BitSignedInt, 176 maximum32BitUnsignedInt: c.maximum32BitUnsignedInt, 177 minimum64BitSignedInt: c.minimum64BitSignedInt, 178 maximum64BitSignedInt: c.maximum64BitSignedInt, 179 maximum64BitUnsignedInt: c.maximum64BitUnsignedInt, 180 float32SignBitMask: c.float32SignBitMask, 181 float32RestBitMask: c.float32RestBitMask, 182 float64SignBitMask: c.float64SignBitMask, 183 float64RestBitMask: c.float64RestBitMask, 184 float32ForMinimumSigned32bitInteger: c.float32ForMinimumSigned32bitInteger, 185 float64ForMinimumSigned32bitInteger: c.float64ForMinimumSigned32bitInteger, 186 float32ForMinimumSigned64bitInteger: c.float32ForMinimumSigned64bitInteger, 187 float64ForMinimumSigned64bitInteger: c.float64ForMinimumSigned64bitInteger, 188 float32ForMaximumSigned32bitIntPlusOne: c.float32ForMaximumSigned32bitIntPlusOne, 189 float64ForMaximumSigned32bitIntPlusOne: c.float64ForMaximumSigned32bitIntPlusOne, 190 float32ForMaximumSigned64bitIntPlusOne: c.float32ForMaximumSigned64bitIntPlusOne, 191 float64ForMaximumSigned64bitIntPlusOne: c.float64ForMaximumSigned64bitIntPlusOne, 192 } 193 194 // Reuses the initial location stack for the compilation of subsequent functions. 195 c.locationStack = &c.locationStackForEntrypoint 196 } 197 198 // resetLabels resets the existing content in arm64Compiler.labels so that 199 // we could reuse the allocated slices and stacks in the subsequent compilations. 200 func (c *amd64Compiler) resetLabels() { 201 for i := range c.labels { 202 for j := range c.labels[i] { 203 if j > c.frameIDMax { 204 // Only need to reset until the maximum frame id. This makes the compilation faster for large binary. 205 break 206 } 207 l := &c.labels[i][j] 208 l.initialInstruction = nil 209 l.stackInitialized = false 210 l.initialStack.reset() 211 } 212 } 213 } 214 215 // runtimeValueLocationStack implements compilerImpl.runtimeValueLocationStack for the amd64 architecture. 216 func (c *amd64Compiler) runtimeValueLocationStack() *runtimeValueLocationStack { 217 return c.locationStack 218 } 219 220 // setLocationStack sets the given runtimeValueLocationStack to .locationStack field, 221 // while allowing us to track runtimeValueLocationStack.stackPointerCeil across multiple stacks. 222 // This is called when we branch into different block. 223 func (c *amd64Compiler) setLocationStack(newStack *runtimeValueLocationStack) { 224 if c.stackPointerCeil < c.locationStack.stackPointerCeil { 225 c.stackPointerCeil = c.locationStack.stackPointerCeil 226 } 227 c.locationStack = newStack 228 } 229 230 // pushRuntimeValueLocationOnRegister implements compiler.pushRuntimeValueLocationOnRegister for amd64. 231 func (c *amd64Compiler) pushRuntimeValueLocationOnRegister(reg asm.Register, vt runtimeValueType) (ret *runtimeValueLocation) { 232 ret = c.locationStack.pushRuntimeValueLocationOnRegister(reg, vt) 233 c.locationStack.markRegisterUsed(reg) 234 return 235 } 236 237 // pushVectorRuntimeValueLocationOnRegister implements compiler.pushVectorRuntimeValueLocationOnRegister for amd64. 238 func (c *amd64Compiler) pushVectorRuntimeValueLocationOnRegister(reg asm.Register) (lowerBitsLocation *runtimeValueLocation) { 239 lowerBitsLocation = c.locationStack.pushRuntimeValueLocationOnRegister(reg, runtimeValueTypeV128Lo) 240 c.locationStack.pushRuntimeValueLocationOnRegister(reg, runtimeValueTypeV128Hi) 241 c.locationStack.markRegisterUsed(reg) 242 return 243 } 244 245 type amd64LabelInfo struct { 246 // initialInstruction is the initial instruction for this label so other block can jump into it. 247 initialInstruction asm.Node 248 // initialStack is the initial value location stack from which we start compiling this label. 249 initialStack runtimeValueLocationStack 250 stackInitialized bool 251 } 252 253 func (c *amd64Compiler) label(label wazeroir.Label) *amd64LabelInfo { 254 kind := label.Kind() 255 frames := c.labels[kind] 256 frameID := label.FrameID() 257 if c.frameIDMax < frameID { 258 c.frameIDMax = frameID 259 } 260 // If the frameID is not allocated yet, expand the slice by twice of the diff, 261 // so that we could reduce the allocation in the subsequent compilation. 262 if diff := frameID - len(frames) + 1; diff > 0 { 263 for i := 0; i < diff; i++ { 264 frames = append(frames, amd64LabelInfo{initialStack: newRuntimeValueLocationStack()}) 265 } 266 c.labels[kind] = frames 267 } 268 return &frames[frameID] 269 } 270 271 // compileBuiltinFunctionCheckExitCode implements compiler.compileBuiltinFunctionCheckExitCode for the amd64 architecture. 272 func (c *amd64Compiler) compileBuiltinFunctionCheckExitCode() error { 273 if err := c.compileCallBuiltinFunction(builtinFunctionIndexCheckExitCode); err != nil { 274 return err 275 } 276 277 // After the function call, we have to initialize the stack base pointer and memory reserved registers. 278 c.compileReservedStackBasePointerInitialization() 279 c.compileReservedMemoryPointerInitialization() 280 return nil 281 } 282 283 // compileGoDefinedHostFunction constructs the entire code to enter the host function implementation, 284 // and return to the caller. 285 func (c *amd64Compiler) compileGoDefinedHostFunction() error { 286 // First we must update the location stack to reflect the number of host function inputs. 287 c.locationStack.init(c.typ) 288 289 if c.withListener { 290 if err := c.compileCallBuiltinFunction(builtinFunctionIndexFunctionListenerBefore); err != nil { 291 return err 292 } 293 } 294 295 // Host function needs access to the caller's Function Instance, and the caller's information is stored in the stack 296 // (as described in the doc of callEngine.stack). Here, we get the caller's *wasm.FunctionInstance from the stack, 297 // and save it in callEngine.exitContext.callerFunctionInstance so we can pass it to the host function 298 // without sacrificing the performance. 299 c.compileReservedStackBasePointerInitialization() 300 // Alias for readability. 301 tmp := amd64.RegAX 302 // Get the location of the callerFunction (*function) in the stack, which depends on the signature. 303 _, _, callerFunction := c.locationStack.getCallFrameLocations(c.typ) 304 // Load the value into the tmp register: tmp = &function{..} 305 callerFunction.setRegister(tmp) 306 c.compileLoadValueOnStackToRegister(callerFunction) 307 // tmp = *(tmp+functionSourceOffset) = &wasm.ModuleInstance{...} 308 c.assembler.CompileMemoryToRegister(amd64.MOVQ, tmp, functionModuleInstanceOffset, tmp) 309 // Load it onto callEngine.exitContext.callerFunctionInstance. 310 c.assembler.CompileRegisterToMemory(amd64.MOVQ, 311 tmp, 312 amd64ReservedRegisterForCallEngine, callEngineExitContextCallerModuleInstanceOffset) 313 // Reset the state of callerFunction value location so that we won't mess up subsequent code generation below. 314 c.locationStack.releaseRegister(callerFunction) 315 316 if err := c.compileCallGoHostFunction(); err != nil { 317 return err 318 } 319 320 // Initializes the reserved stack base pointer which is used to retrieve the call frame stack. 321 c.compileReservedStackBasePointerInitialization() 322 323 // Go function can change the module state in arbitrary way, so we have to force 324 // the callEngine.moduleContext initialization on the function return. To do so, 325 // we zero-out callEngine.moduleInstance. 326 c.assembler.CompileConstToMemory(amd64.MOVQ, 327 0, amd64ReservedRegisterForCallEngine, callEngineModuleContextModuleInstanceOffset) 328 return c.compileReturnFunction() 329 } 330 331 // compile implements compiler.compile for the amd64 architecture. 332 func (c *amd64Compiler) compile(buf asm.Buffer) (stackPointerCeil uint64, err error) { 333 // c.stackPointerCeil tracks the stack pointer ceiling (max seen) value across all runtimeValueLocationStack(s) 334 // used for all labels (via setLocationStack), excluding the current one. 335 // Hence, we check here if the final block's max one exceeds the current c.stackPointerCeil. 336 stackPointerCeil = c.stackPointerCeil 337 if stackPointerCeil < c.locationStack.stackPointerCeil { 338 stackPointerCeil = c.locationStack.stackPointerCeil 339 } 340 341 // Now that the max stack pointer is determined, we are invoking the callback. 342 // Note this MUST be called before Assemble() below. 343 c.assignStackPointerCeil(stackPointerCeil) 344 345 err = c.assembler.Assemble(buf) 346 return 347 } 348 349 // compileUnreachable implements compiler.compileUnreachable for the amd64 architecture. 350 func (c *amd64Compiler) compileUnreachable() error { 351 c.compileExitFromNativeCode(nativeCallStatusCodeUnreachable) 352 return nil 353 } 354 355 // assignStackPointerCeil implements compilerImpl.assignStackPointerCeil for the amd64 architecture. 356 func (c *amd64Compiler) assignStackPointerCeil(ceil uint64) { 357 if c.assignStackPointerCeilNeeded != nil { 358 c.assignStackPointerCeilNeeded.AssignDestinationConstant(int64(ceil) << 3) 359 } 360 } 361 362 // compileSet implements compiler.compileSet for the amd64 architecture. 363 func (c *amd64Compiler) compileSet(o *wazeroir.UnionOperation) error { 364 depth := int(o.U1) 365 isTargetVector := o.B3 366 367 setTargetIndex := int(c.locationStack.sp) - 1 - depth 368 369 if isTargetVector { 370 _ = c.locationStack.pop() // ignore the higher 64-bits. 371 } 372 v := c.locationStack.pop() 373 if err := c.compileEnsureOnRegister(v); err != nil { 374 return err 375 } 376 377 targetLocation := &c.locationStack.stack[setTargetIndex] 378 if targetLocation.onRegister() { 379 // We no longer need the register previously used by the target location. 380 c.locationStack.markRegisterUnused(targetLocation.register) 381 } 382 383 reg := v.register 384 targetLocation.setRegister(reg) 385 targetLocation.valueType = v.valueType 386 if isTargetVector { 387 hi := &c.locationStack.stack[setTargetIndex+1] 388 hi.setRegister(reg) 389 } 390 return nil 391 } 392 393 // compileGlobalGet implements compiler.compileGlobalGet for the amd64 architecture. 394 func (c *amd64Compiler) compileGlobalGet(o *wazeroir.UnionOperation) error { 395 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 396 return err 397 } 398 399 intReg, err := c.allocateRegister(registerTypeGeneralPurpose) 400 if err != nil { 401 return err 402 } 403 404 // First, move the pointer to the global slice into the allocated register. 405 c.assembler.CompileMemoryToRegister(amd64.MOVQ, amd64ReservedRegisterForCallEngine, callEngineModuleContextGlobalElement0AddressOffset, intReg) 406 407 index := o.U1 408 409 // Now, move the location of the global instance into the register. 410 c.assembler.CompileMemoryToRegister(amd64.MOVQ, intReg, 8*int64(index), intReg) 411 412 // When an integer, reuse the pointer register for the value. Otherwise, allocate a float register for it. 413 valueReg := intReg 414 var vt runtimeValueType 415 var inst asm.Instruction 416 switch c.ir.Globals[index].ValType { 417 case wasm.ValueTypeI32: 418 inst = amd64.MOVL 419 vt = runtimeValueTypeI32 420 case wasm.ValueTypeI64, wasm.ValueTypeExternref, wasm.ValueTypeFuncref: 421 inst = amd64.MOVQ 422 vt = runtimeValueTypeI64 423 case wasm.ValueTypeF32: 424 inst = amd64.MOVL 425 vt = runtimeValueTypeF32 426 valueReg, err = c.allocateRegister(registerTypeVector) 427 if err != nil { 428 return err 429 } 430 case wasm.ValueTypeF64: 431 inst = amd64.MOVQ 432 vt = runtimeValueTypeF64 433 valueReg, err = c.allocateRegister(registerTypeVector) 434 if err != nil { 435 return err 436 } 437 case wasm.ValueTypeV128: 438 inst = amd64.MOVDQU 439 vt = runtimeValueTypeV128Lo 440 valueReg, err = c.allocateRegister(registerTypeVector) 441 if err != nil { 442 return err 443 } 444 default: 445 panic("BUG: unknown runtime value type") 446 } 447 448 // Using the register holding the pointer to the target instance, move its value into a register. 449 c.assembler.CompileMemoryToRegister(inst, intReg, globalInstanceValueOffset, valueReg) 450 451 // Record that the retrieved global value on the top of the stack is now in a register. 452 if vt == runtimeValueTypeV128Lo { 453 c.pushVectorRuntimeValueLocationOnRegister(valueReg) 454 } else { 455 c.pushRuntimeValueLocationOnRegister(valueReg, vt) 456 } 457 return nil 458 } 459 460 // compileGlobalSet implements compiler.compileGlobalSet for the amd64 architecture. 461 func (c *amd64Compiler) compileGlobalSet(o *wazeroir.UnionOperation) error { 462 index := o.U1 463 464 wasmValueType := c.ir.Globals[index].ValType 465 isV128 := wasmValueType == wasm.ValueTypeV128 466 467 // First, move the value to set into a temporary register. 468 val := c.locationStack.pop() 469 if isV128 { 470 // The previous val is higher 64-bits, and have to use lower 64-bit's runtimeValueLocation for allocation, etc. 471 val = c.locationStack.pop() 472 } 473 if err := c.compileEnsureOnRegister(val); err != nil { 474 return err 475 } 476 477 // Allocate a register to hold the memory location of the target global instance. 478 intReg, err := c.allocateRegister(registerTypeGeneralPurpose) 479 if err != nil { 480 return err 481 } 482 483 // First, move the pointer to the global slice into the allocated register. 484 c.assembler.CompileMemoryToRegister(amd64.MOVQ, amd64ReservedRegisterForCallEngine, callEngineModuleContextGlobalElement0AddressOffset, intReg) 485 486 // Now, move the location of the global instance into the register. 487 c.assembler.CompileMemoryToRegister(amd64.MOVQ, intReg, 8*int64(index), intReg) 488 489 // Now ready to write the value to the global instance location. 490 var inst asm.Instruction 491 if isV128 { 492 inst = amd64.MOVDQU 493 } else if wasmValueType == wasm.ValueTypeI32 || wasmValueType == wasm.ValueTypeF32 { 494 inst = amd64.MOVL 495 } else { 496 inst = amd64.MOVQ 497 } 498 c.assembler.CompileRegisterToMemory(inst, val.register, intReg, globalInstanceValueOffset) 499 500 // Since the value is now written to memory, release the value register. 501 c.locationStack.releaseRegister(val) 502 return nil 503 } 504 505 // compileBr implements compiler.compileBr for the amd64 architecture. 506 func (c *amd64Compiler) compileBr(o *wazeroir.UnionOperation) error { 507 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 508 return err 509 } 510 return c.branchInto(wazeroir.Label(o.U1)) 511 } 512 513 // branchInto adds instruction necessary to jump into the given branch target. 514 func (c *amd64Compiler) branchInto(target wazeroir.Label) error { 515 if target.IsReturnTarget() { 516 return c.compileReturnFunction() 517 } else { 518 if c.ir.LabelCallers[target] > 1 { 519 // We can only re-use register state if when there's a single call-site. 520 // Release existing values on registers to the stack if there's multiple ones to have 521 // the consistent value location state at the beginning of label. 522 if err := c.compileReleaseAllRegistersToStack(); err != nil { 523 return err 524 } 525 } 526 // Set the initial stack of the target label, so we can start compiling the label 527 // with the appropriate value locations. Note we clone the stack here as we maybe 528 // manipulate the stack before compiler reaches the label. 529 targetLabel := c.label(target) 530 if !targetLabel.stackInitialized { 531 targetLabel.initialStack.cloneFrom(*c.locationStack) 532 targetLabel.stackInitialized = true 533 } 534 jmp := c.assembler.CompileJump(amd64.JMP) 535 c.assignJumpTarget(target, jmp) 536 } 537 return nil 538 } 539 540 // compileBrIf implements compiler.compileBrIf for the amd64 architecture. 541 func (c *amd64Compiler) compileBrIf(o *wazeroir.UnionOperation) error { 542 cond := c.locationStack.pop() 543 var jmpWithCond asm.Node 544 if cond.onConditionalRegister() { 545 var inst asm.Instruction 546 switch cond.conditionalRegister { 547 case amd64.ConditionalRegisterStateE: 548 inst = amd64.JEQ 549 case amd64.ConditionalRegisterStateNE: 550 inst = amd64.JNE 551 case amd64.ConditionalRegisterStateS: 552 inst = amd64.JMI 553 case amd64.ConditionalRegisterStateNS: 554 inst = amd64.JPL 555 case amd64.ConditionalRegisterStateG: 556 inst = amd64.JGT 557 case amd64.ConditionalRegisterStateGE: 558 inst = amd64.JGE 559 case amd64.ConditionalRegisterStateL: 560 inst = amd64.JLT 561 case amd64.ConditionalRegisterStateLE: 562 inst = amd64.JLE 563 case amd64.ConditionalRegisterStateA: 564 inst = amd64.JHI 565 case amd64.ConditionalRegisterStateAE: 566 inst = amd64.JCC 567 case amd64.ConditionalRegisterStateB: 568 inst = amd64.JCS 569 case amd64.ConditionalRegisterStateBE: 570 inst = amd64.JLS 571 } 572 jmpWithCond = c.assembler.CompileJump(inst) 573 } else { 574 // Usually the comparison operand for br_if is on the conditional register, 575 // but in some cases, they are on the stack or register. 576 // For example, the following code 577 // i64.const 1 578 // local.get 1 579 // i64.add 580 // br_if .... 581 // will try to use the result of i64.add, which resides on the (virtual) stack, 582 // as the operand for br_if instruction. 583 if err := c.compileEnsureOnRegister(cond); err != nil { 584 return err 585 } 586 // Check if the value not equals zero. 587 c.assembler.CompileRegisterToRegister(amd64.TESTQ, cond.register, cond.register) 588 589 // Emit jump instruction which jumps when the value does not equals zero. 590 jmpWithCond = c.assembler.CompileJump(amd64.JNE) 591 c.locationStack.markRegisterUnused(cond.register) 592 } 593 594 // Make sure that the next coming label is the else jump target. 595 thenTarget := wazeroir.Label(o.U1) 596 elseTarget := wazeroir.Label(o.U2) 597 thenToDrop := o.U3 598 599 // Here's the diagram of how we organize the instructions necessarily for brif operation. 600 // 601 // jmp_with_cond -> jmp (.Else) -> Then operations... 602 // |---------(satisfied)------------^^^ 603 // 604 // Note that .Else branch doesn't have ToDrop as .Else is in reality 605 // corresponding to either If's Else block or Br_if's else block in Wasm. 606 607 // Emit the else branch. 608 if elseTarget.IsReturnTarget() { 609 if err := c.compileReturnFunction(); err != nil { 610 return err 611 } 612 } else { 613 labelInfo := c.label(elseTarget) 614 if !labelInfo.stackInitialized { 615 labelInfo.initialStack.cloneFrom(*c.locationStack) 616 labelInfo.stackInitialized = true 617 } 618 619 elseJmp := c.assembler.CompileJump(amd64.JMP) 620 c.assignJumpTarget(elseTarget, elseJmp) 621 } 622 623 // Handle then branch. 624 c.assembler.SetJumpTargetOnNext(jmpWithCond) 625 if err := compileDropRange(c, thenToDrop); err != nil { 626 return err 627 } 628 if thenTarget.IsReturnTarget() { 629 return c.compileReturnFunction() 630 } else { 631 thenLabel := thenTarget 632 if c.ir.LabelCallers[thenLabel] > 1 { 633 // We can only re-use register state if when there's a single call-site. 634 // Release existing values on registers to the stack if there's multiple ones to have 635 // the consistent value location state at the beginning of label. 636 if err := c.compileReleaseAllRegistersToStack(); err != nil { 637 return err 638 } 639 } 640 // Set the initial stack of the target label, so we can start compiling the label 641 // with the appropriate value locations. Note we clone the stack here as we maybe 642 // manipulate the stack before compiler reaches the label. 643 labelInfo := c.label(thenLabel) 644 if !labelInfo.stackInitialized { 645 labelInfo.initialStack.cloneFrom(*c.locationStack) 646 labelInfo.stackInitialized = true 647 } 648 thenJmp := c.assembler.CompileJump(amd64.JMP) 649 c.assignJumpTarget(thenLabel, thenJmp) 650 return nil 651 } 652 } 653 654 // compileBrTable implements compiler.compileBrTable for the amd64 architecture. 655 func (c *amd64Compiler) compileBrTable(o *wazeroir.UnionOperation) error { 656 index := c.locationStack.pop() 657 658 // If the operation only consists of the default target, we branch into it and return early. 659 if len(o.Us) == 2 { 660 c.locationStack.releaseRegister(index) 661 if err := compileDropRange(c, o.Us[1]); err != nil { 662 return err 663 } 664 return c.branchInto(wazeroir.Label(o.Us[0])) 665 } 666 667 // Otherwise, we jump into the selected branch. 668 if err := c.compileEnsureOnRegister(index); err != nil { 669 return err 670 } 671 672 tmp, err := c.allocateRegister(registerTypeGeneralPurpose) 673 if err != nil { 674 return err 675 } 676 677 // First, we move the length of target list into the tmp register. 678 c.assembler.CompileConstToRegister(amd64.MOVQ, int64(len(o.Us)/2-1), tmp) 679 680 // Then, we compare the value with the length of targets. 681 c.assembler.CompileRegisterToRegister(amd64.CMPL, tmp, index.register) 682 683 // If the value is larger than the length, 684 // we round the index to the length as the spec states that 685 // if the index is larger than or equal the length of list, 686 // branch into the default branch. 687 c.assembler.CompileRegisterToRegister(amd64.CMOVQCS, tmp, index.register) 688 689 // We prepare the static data which holds the offset of 690 // each target's first instruction (incl. default) 691 // relative to the beginning of label tables. 692 // 693 // For example, if we have targets=[L0, L1] and default=L_DEFAULT, 694 // we emit the the code like this at [Emit the code for each targets and default branch] below. 695 // 696 // L0: 697 // 0x123001: XXXX, ... 698 // ..... 699 // L1: 700 // 0x123005: YYY, ... 701 // ..... 702 // L_DEFAULT: 703 // 0x123009: ZZZ, ... 704 // 705 // then offsetData becomes like [0x0, 0x5, 0x8]. 706 // By using this offset list, we could jump into the label for the index by 707 // "jmp offsetData[index]+0x123001" and "0x123001" can be acquired by "LEA" 708 // instruction. 709 // 710 // Note: We store each offset of 32-bite unsigned integer as 4 consecutive bytes. So more precisely, 711 // the above example's offsetData would be [0x0, 0x0, 0x0, 0x0, 0x5, 0x0, 0x0, 0x0, 0x8, 0x0, 0x0, 0x0]. 712 // 713 // Note: this is similar to how GCC implements Switch statements in C. 714 offsetData := asm.NewStaticConst(make([]byte, 4*(len(o.Us)/2))) 715 716 // Load the offsetData's address into tmp. 717 if err = c.assembler.CompileStaticConstToRegister(amd64.LEAQ, offsetData, tmp); err != nil { 718 return err 719 } 720 721 // Now we have the address of first byte of offsetData in tmp register. 722 // So the target offset's first byte is at tmp+index*4 as we store 723 // the offset as 4 bytes for a 32-byte integer. 724 // Here, we store the offset into the index.register. 725 c.assembler.CompileMemoryWithIndexToRegister(amd64.MOVL, tmp, 0, index.register, 4, index.register) 726 727 // Now we read the address of the beginning of the jump table. 728 // In the above example, this corresponds to reading the address of 0x123001. 729 c.assembler.CompileReadInstructionAddress(tmp, amd64.JMP) 730 731 // Now we have the address of L0 in tmp register, and the offset to the target label in the index.register. 732 // So we could achieve the br_table jump by adding them and jump into the resulting address. 733 c.assembler.CompileRegisterToRegister(amd64.ADDQ, index.register, tmp) 734 735 c.assembler.CompileJumpToRegister(amd64.JMP, tmp) 736 737 // We no longer need the index's register, so mark it unused. 738 c.locationStack.markRegisterUnused(index.register) 739 740 // [Emit the code for each targets and default branch] 741 labelInitialInstructions := make([]asm.Node, len(o.Us)/2) 742 743 // Since we might end up having the different stack state in each branch, 744 // we need to save the initial stack state here, and use the same initial state 745 // for each iteration. 746 initialLocationStack := c.getSavedTemporaryLocationStack() 747 748 for i := range labelInitialInstructions { 749 // Emit the initial instruction of each target. 750 // We use NOP as we don't yet know the next instruction in each label. 751 // Assembler would optimize out this NOP during code generation, so this is harmless. 752 labelInitialInstructions[i] = c.assembler.CompileStandAlone(amd64.NOP) 753 754 targetLabel := wazeroir.Label(o.Us[i*2]) 755 targetToDrop := o.Us[i*2+1] 756 if err = compileDropRange(c, targetToDrop); err != nil { 757 return err 758 } 759 if err = c.branchInto(targetLabel); err != nil { 760 return err 761 } 762 // After the iteration, reset the stack's state with initialLocationStack. 763 c.locationStack.cloneFrom(initialLocationStack) 764 } 765 766 c.assembler.BuildJumpTable(offsetData, labelInitialInstructions) 767 return nil 768 } 769 770 func (c *amd64Compiler) getSavedTemporaryLocationStack() runtimeValueLocationStack { 771 initialLocationStack := *c.locationStack // Take copy! 772 // Use c.brTableTmp for the underlying stack so that we could reduce the allocations. 773 if diff := int(initialLocationStack.sp) - len(c.brTableTmp); diff > 0 { 774 c.brTableTmp = append(c.brTableTmp, make([]runtimeValueLocation, diff)...) 775 } 776 copy(c.brTableTmp, initialLocationStack.stack[:initialLocationStack.sp]) 777 initialLocationStack.stack = c.brTableTmp 778 return initialLocationStack 779 } 780 781 func (c *amd64Compiler) assignJumpTarget(label wazeroir.Label, jmpInstruction asm.Node) { 782 jmpTargetLabel := c.label(label) 783 targetInst := jmpTargetLabel.initialInstruction 784 if targetInst == nil { 785 // If the label isn't compiled yet, allocate the NOP node, and set as the initial instruction. 786 targetInst = c.assembler.AllocateNOP() 787 jmpTargetLabel.initialInstruction = targetInst 788 } 789 jmpInstruction.AssignJumpTarget(targetInst) 790 } 791 792 // compileLabel implements compiler.compileLabel for the amd64 architecture. 793 func (c *amd64Compiler) compileLabel(o *wazeroir.UnionOperation) (skipLabel bool) { 794 label := wazeroir.Label(o.U1) 795 labelInfo := c.label(label) 796 797 // If initialStack is not set, that means this label has never been reached. 798 if !labelInfo.stackInitialized { 799 skipLabel = true 800 return 801 } 802 803 // We use NOP as a beginning of instructions in a label. 804 if labelBegin := labelInfo.initialInstruction; labelBegin == nil { 805 // We use NOP as a beginning of instructions in a label. 806 // This should be eventually optimized out by assembler. 807 labelInfo.initialInstruction = c.assembler.CompileStandAlone(amd64.NOP) 808 } else { 809 c.assembler.Add(labelBegin) 810 } 811 812 // Set the initial stack. 813 c.setLocationStack(&labelInfo.initialStack) 814 return 815 } 816 817 // compileCall implements compiler.compileCall for the amd64 architecture. 818 func (c *amd64Compiler) compileCall(o *wazeroir.UnionOperation) error { 819 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 820 return err 821 } 822 823 functionIndex := o.U1 824 825 target := c.ir.Functions[functionIndex] 826 targetType := &c.ir.Types[target] 827 828 targetAddressRegister, err := c.allocateRegister(registerTypeGeneralPurpose) 829 if err != nil { 830 return err 831 } 832 833 // First, push the index to the callEngine.functionsElement0Address into the target register. 834 c.assembler.CompileConstToRegister(amd64.MOVQ, int64(functionIndex)*functionSize, targetAddressRegister) 835 836 // Next, we add the address of the first item of callEngine.functions slice (= &callEngine.functions[0]) 837 // to the target register. 838 c.assembler.CompileMemoryToRegister(amd64.ADDQ, amd64ReservedRegisterForCallEngine, 839 callEngineModuleContextFunctionsElement0AddressOffset, targetAddressRegister) 840 841 if err := c.compileCallFunctionImpl(targetAddressRegister, targetType); err != nil { 842 return err 843 } 844 return nil 845 } 846 847 // compileCallIndirect implements compiler.compileCallIndirect for the amd64 architecture. 848 func (c *amd64Compiler) compileCallIndirect(o *wazeroir.UnionOperation) error { 849 offset := c.locationStack.pop() 850 if err := c.compileEnsureOnRegister(offset); err != nil { 851 return nil 852 } 853 typeIndex := o.U1 854 tableIndex := o.U2 855 856 tmp, err := c.allocateRegister(registerTypeGeneralPurpose) 857 if err != nil { 858 return err 859 } 860 c.locationStack.markRegisterUsed(tmp) 861 862 tmp2, err := c.allocateRegister(registerTypeGeneralPurpose) 863 if err != nil { 864 return err 865 } 866 c.locationStack.markRegisterUsed(tmp2) 867 868 // Load the address of the target table: tmp = &module.Tables[0] 869 c.assembler.CompileMemoryToRegister(amd64.MOVQ, amd64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset, tmp) 870 // tmp = &module.Tables[0] + Index*8 = &module.Tables[0] + sizeOf(*TableInstance)*index = module.Tables[o.TableIndex]. 871 c.assembler.CompileMemoryToRegister(amd64.MOVQ, tmp, int64(tableIndex*8), tmp) 872 873 // Then, we need to trap if the offset exceeds the length of table. 874 c.assembler.CompileMemoryToRegister(amd64.CMPQ, tmp, tableInstanceTableLenOffset, offset.register) 875 c.compileMaybeExitFromNativeCode(amd64.JHI, nativeCallStatusCodeInvalidTableAccess) 876 877 // next we check if the target's type matches the operation's one. 878 // In order to get the type instance's address, we have to multiply the offset 879 // by 8 as the offset is the "length" of table in Go's "[]uintptr{}", 880 // and size of uintptr equals 8 bytes == (2^3). 881 c.assembler.CompileConstToRegister(amd64.SHLQ, pointerSizeLog2, offset.register) 882 883 // Adds the address of wasm.Table[0] stored as callEngine.tableElement0Address to the offset. 884 c.assembler.CompileMemoryToRegister(amd64.ADDQ, 885 tmp, tableInstanceTableOffset, offset.register) 886 887 // "offset = (*offset) (== table[offset] == *code type)" 888 c.assembler.CompileMemoryToRegister(amd64.MOVQ, offset.register, 0, offset.register) 889 890 // At this point offset.register holds the address of *code (as uintptr) at wasm.Table[offset]. 891 // 892 // Check if the value of table[offset] equals zero, meaning that the target is uninitialized. 893 c.assembler.CompileRegisterToRegister(amd64.TESTQ, offset.register, offset.register) 894 895 // Skipped if the target is initialized. 896 c.compileMaybeExitFromNativeCode(amd64.JNE, nativeCallStatusCodeInvalidTableAccess) 897 898 // Next, we need to check the type matches, i.e. table[offset].source.TypeID == targetFunctionType's typeID. 899 // 900 // "tmp2 = [&moduleInstance.TypeIDs[0] + index * 4] (== moduleInstance.TypeIDs[index])" 901 c.assembler.CompileMemoryToRegister(amd64.MOVQ, 902 amd64ReservedRegisterForCallEngine, callEngineModuleContextTypeIDsElement0AddressOffset, 903 tmp2) 904 c.assembler.CompileMemoryToRegister(amd64.MOVL, tmp2, int64(typeIndex)*4, tmp2) 905 906 // Skipped if the type matches. 907 c.assembler.CompileMemoryToRegister(amd64.CMPL, offset.register, functionTypeIDOffset, tmp2) 908 c.compileMaybeExitFromNativeCode(amd64.JEQ, nativeCallStatusCodeTypeMismatchOnIndirectCall) 909 targetFunctionType := &c.ir.Types[typeIndex] 910 if err = c.compileCallFunctionImpl(offset.register, targetFunctionType); err != nil { 911 return nil 912 } 913 914 // The offset register should be marked as un-used as we consumed in the function call. 915 c.locationStack.markRegisterUnused(offset.register, tmp, tmp2) 916 return nil 917 } 918 919 // compileDrop implements compiler.compileDrop for the amd64 architecture. 920 func (c *amd64Compiler) compileDrop(o *wazeroir.UnionOperation) error { 921 return compileDropRange(c, o.U1) 922 } 923 924 // compileSelectV128Impl implements compileSelect for vector values. 925 func (c *amd64Compiler) compileSelectV128Impl(selectorReg asm.Register) error { 926 x2 := c.locationStack.popV128() 927 if err := c.compileEnsureOnRegister(x2); err != nil { 928 return err 929 } 930 931 x1 := c.locationStack.popV128() 932 if err := c.compileEnsureOnRegister(x1); err != nil { 933 return err 934 } 935 936 // Compare the conditional value with zero. 937 c.assembler.CompileRegisterToRegister(amd64.TESTQ, selectorReg, selectorReg) 938 939 // Set the jump if the top value is not zero. 940 jmpIfNotZero := c.assembler.CompileJump(amd64.JNE) 941 942 // In this branch, we select the value of x2, so we move the value into x1.register so that 943 // we can have the result in x1.register regardless of the selection. 944 c.assembler.CompileRegisterToRegister(amd64.MOVDQU, x2.register, x1.register) 945 946 // Else, we don't need to adjust value, just need to jump to the next instruction. 947 c.assembler.SetJumpTargetOnNext(jmpIfNotZero) 948 949 // As noted, the result exists in x1.register regardless of the selector. 950 c.pushVectorRuntimeValueLocationOnRegister(x1.register) 951 // Plus, x2.register is no longer used. 952 c.locationStack.markRegisterUnused(x2.register) 953 c.locationStack.markRegisterUnused(selectorReg) 954 return nil 955 } 956 957 // compileSelect implements compiler.compileSelect for the amd64 architecture. 958 // 959 // The emitted native code depends on whether the values are on 960 // the physical registers or memory stack, or maybe conditional register. 961 func (c *amd64Compiler) compileSelect(o *wazeroir.UnionOperation) error { 962 cv := c.locationStack.pop() 963 if err := c.compileEnsureOnRegister(cv); err != nil { 964 return err 965 } 966 967 isTargetVector := o.B3 968 if isTargetVector { 969 return c.compileSelectV128Impl(cv.register) 970 } 971 972 x2 := c.locationStack.pop() 973 // We do not consume x1 here, but modify the value according to 974 // the conditional value "c" above. 975 peekedX1 := c.locationStack.peek() 976 977 // Compare the conditional value with zero. 978 c.assembler.CompileRegisterToRegister(amd64.TESTQ, cv.register, cv.register) 979 980 // Now we can use c.register as temporary location. 981 // We alias it here for readability. 982 tmpRegister := cv.register 983 984 // Set the jump if the top value is not zero. 985 jmpIfNotZero := c.assembler.CompileJump(amd64.JNE) 986 987 // If the value is zero, we must place the value of x2 onto the stack position of x1. 988 989 // First we copy the value of x2 to the temporary register if x2 is not currently on a register. 990 if x2.onStack() { 991 x2.register = tmpRegister 992 c.compileLoadValueOnStackToRegister(x2) 993 } 994 995 // 996 // At this point x2's value is always on a register. 997 // 998 999 // Then release the value in the x2's register to the x1's stack position. 1000 if peekedX1.onRegister() { 1001 c.assembler.CompileRegisterToRegister(amd64.MOVQ, x2.register, peekedX1.register) 1002 } else { 1003 peekedX1.register = x2.register 1004 c.compileReleaseRegisterToStack(peekedX1) // Note inside we mark the register unused! 1005 } 1006 1007 // Else, we don't need to adjust value, just need to jump to the next instruction. 1008 c.assembler.SetJumpTargetOnNext(jmpIfNotZero) 1009 1010 // In any case, we don't need x2 and c anymore! 1011 c.locationStack.releaseRegister(x2) 1012 c.locationStack.releaseRegister(cv) 1013 return nil 1014 } 1015 1016 // compilePick implements compiler.compilePick for the amd64 architecture. 1017 func (c *amd64Compiler) compilePick(o *wazeroir.UnionOperation) error { 1018 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 1019 return err 1020 } 1021 depth := o.U1 1022 isTargetVector := o.B3 1023 1024 // TODO: if we track the type of values on the stack, 1025 // we could optimize the instruction according to the bit size of the value. 1026 // For now, we just move the entire register i.e. as a quad word (8 bytes). 1027 pickTarget := &c.locationStack.stack[c.locationStack.sp-1-uint64(depth)] 1028 reg, err := c.allocateRegister(pickTarget.getRegisterType()) 1029 if err != nil { 1030 return err 1031 } 1032 1033 if pickTarget.onRegister() { 1034 var inst asm.Instruction 1035 if isTargetVector { 1036 inst = amd64.MOVDQU 1037 } else if pickTarget.valueType == runtimeValueTypeI32 { // amd64 cannot copy single-precisions between registers. 1038 inst = amd64.MOVL 1039 } else { 1040 inst = amd64.MOVQ 1041 } 1042 c.assembler.CompileRegisterToRegister(inst, pickTarget.register, reg) 1043 } else if pickTarget.onStack() { 1044 // Copy the value from the stack. 1045 var inst asm.Instruction 1046 if isTargetVector { 1047 inst = amd64.MOVDQU 1048 } else if pickTarget.valueType == runtimeValueTypeI32 || pickTarget.valueType == runtimeValueTypeF32 { 1049 inst = amd64.MOVL 1050 } else { 1051 inst = amd64.MOVQ 1052 } 1053 // Note: stack pointers are ensured not to exceed 2^27 so this offset never exceeds 32-bit range. 1054 c.assembler.CompileMemoryToRegister(inst, amd64ReservedRegisterForStackBasePointerAddress, 1055 int64(pickTarget.stackPointer)*8, reg) 1056 } 1057 // Now we already placed the picked value on the register, 1058 // so push the location onto the stack. 1059 if isTargetVector { 1060 c.pushVectorRuntimeValueLocationOnRegister(reg) 1061 } else { 1062 c.pushRuntimeValueLocationOnRegister(reg, pickTarget.valueType) 1063 } 1064 return nil 1065 } 1066 1067 // compileAdd implements compiler.compileAdd for the amd64 architecture. 1068 func (c *amd64Compiler) compileAdd(o *wazeroir.UnionOperation) error { 1069 // TODO: if the previous instruction is const, then 1070 // this can be optimized. Same goes for other arithmetic instructions. 1071 1072 var instruction asm.Instruction 1073 1074 unsignedType := wazeroir.UnsignedType(o.B1) 1075 switch unsignedType { 1076 case wazeroir.UnsignedTypeI32: 1077 instruction = amd64.ADDL 1078 case wazeroir.UnsignedTypeI64: 1079 instruction = amd64.ADDQ 1080 case wazeroir.UnsignedTypeF32: 1081 instruction = amd64.ADDSS 1082 case wazeroir.UnsignedTypeF64: 1083 instruction = amd64.ADDSD 1084 } 1085 1086 x2 := c.locationStack.pop() 1087 if err := c.compileEnsureOnRegister(x2); err != nil { 1088 return err 1089 } 1090 1091 x1 := c.locationStack.peek() // Note this is peek! 1092 if err := c.compileEnsureOnRegister(x1); err != nil { 1093 return err 1094 } 1095 1096 // x1 += x2. 1097 c.assembler.CompileRegisterToRegister(instruction, x2.register, x1.register) 1098 1099 // We no longer need x2 register after ADD operation here, 1100 // so we release it. 1101 c.locationStack.releaseRegister(x2) 1102 return nil 1103 } 1104 1105 // compileSub implements compiler.compileSub for the amd64 architecture. 1106 func (c *amd64Compiler) compileSub(o *wazeroir.UnionOperation) error { 1107 // TODO: if the previous instruction is const, then 1108 // this can be optimized. Same goes for other arithmetic instructions. 1109 1110 var instruction asm.Instruction 1111 unsignedType := wazeroir.UnsignedType(o.B1) 1112 switch unsignedType { 1113 case wazeroir.UnsignedTypeI32: 1114 instruction = amd64.SUBL 1115 case wazeroir.UnsignedTypeI64: 1116 instruction = amd64.SUBQ 1117 case wazeroir.UnsignedTypeF32: 1118 instruction = amd64.SUBSS 1119 case wazeroir.UnsignedTypeF64: 1120 instruction = amd64.SUBSD 1121 } 1122 1123 x2 := c.locationStack.pop() 1124 if err := c.compileEnsureOnRegister(x2); err != nil { 1125 return err 1126 } 1127 1128 x1 := c.locationStack.peek() // Note this is peek! 1129 if err := c.compileEnsureOnRegister(x1); err != nil { 1130 return err 1131 } 1132 1133 // x1 -= x2. 1134 c.assembler.CompileRegisterToRegister(instruction, x2.register, x1.register) 1135 1136 // We no longer need x2 register after ADD operation here, 1137 // so we release it. 1138 c.locationStack.releaseRegister(x2) 1139 return nil 1140 } 1141 1142 // compileMul implements compiler.compileMul for the amd64 architecture. 1143 func (c *amd64Compiler) compileMul(o *wazeroir.UnionOperation) (err error) { 1144 unsignedType := wazeroir.UnsignedType(o.B1) 1145 switch unsignedType { 1146 case wazeroir.UnsignedTypeI32: 1147 err = c.compileMulForInts(true, amd64.MULL) 1148 case wazeroir.UnsignedTypeI64: 1149 err = c.compileMulForInts(false, amd64.MULQ) 1150 case wazeroir.UnsignedTypeF32: 1151 err = c.compileMulForFloats(amd64.MULSS) 1152 case wazeroir.UnsignedTypeF64: 1153 err = c.compileMulForFloats(amd64.MULSD) 1154 } 1155 return 1156 } 1157 1158 // compileMulForInts emits instructions to perform integer multiplication for 1159 // top two values on the stack. If unfamiliar with the convention for integer 1160 // multiplication on x86, see https://www.felixcloutier.com/x86/mul. 1161 // 1162 // In summary, one of the values must be on the AX register, 1163 // and the mul instruction stores the overflow info in DX register which we don't use. 1164 // Here, we mean "the overflow info" by 65 bit or higher part of the result for 64 bit case. 1165 // 1166 // So, we have to ensure that 1167 // 1. Previously located value on DX must be saved to memory stack. That is because 1168 // the existing value will be overridden after the mul execution. 1169 // 2. One of the operands (x1 or x2) must be on AX register. 1170 // 1171 // See https://www.felixcloutier.com/x86/mul#description for detail semantics. 1172 func (c *amd64Compiler) compileMulForInts(is32Bit bool, mulInstruction asm.Instruction) error { 1173 const ( 1174 resultRegister = amd64.RegAX 1175 reservedRegister = amd64.RegDX 1176 ) 1177 1178 x2 := c.locationStack.pop() 1179 x1 := c.locationStack.pop() 1180 1181 var valueOnAX *runtimeValueLocation 1182 if x1.register == resultRegister { 1183 valueOnAX = x1 1184 } else if x2.register == resultRegister { 1185 valueOnAX = x2 1186 } else { 1187 valueOnAX = x2 1188 // This case we move x2 to AX register. 1189 c.onValueReleaseRegisterToStack(resultRegister) 1190 if x2.onConditionalRegister() { 1191 c.compileMoveConditionalToGeneralPurposeRegister(x2, resultRegister) 1192 } else if x2.onStack() { 1193 x2.setRegister(resultRegister) 1194 c.compileLoadValueOnStackToRegister(x2) 1195 c.locationStack.markRegisterUsed(resultRegister) 1196 } else { 1197 var inst asm.Instruction 1198 if is32Bit { 1199 inst = amd64.MOVL 1200 } else { 1201 inst = amd64.MOVQ 1202 } 1203 c.assembler.CompileRegisterToRegister(inst, x2.register, resultRegister) 1204 1205 // We no longer uses the prev register of x2. 1206 c.locationStack.releaseRegister(x2) 1207 x2.setRegister(resultRegister) 1208 c.locationStack.markRegisterUsed(resultRegister) 1209 } 1210 } 1211 1212 // We have to make sure that at this point the operands must be on registers. 1213 if err := c.compileEnsureOnRegister(x2); err != nil { 1214 return err 1215 } 1216 if err := c.compileEnsureOnRegister(x1); err != nil { 1217 return err 1218 } 1219 1220 // We have to save the existing value on DX. 1221 // If the DX register is used by either x1 or x2, we don't need to 1222 // save the value because it is consumed by mul anyway. 1223 if x1.register != reservedRegister && x2.register != reservedRegister { 1224 c.onValueReleaseRegisterToStack(reservedRegister) 1225 } 1226 1227 // Now ready to emit the mul instruction. 1228 if x1 == valueOnAX { 1229 c.assembler.CompileRegisterToNone(mulInstruction, x2.register) 1230 } else { 1231 c.assembler.CompileRegisterToNone(mulInstruction, x1.register) 1232 } 1233 1234 c.locationStack.markRegisterUnused(x2.register) 1235 c.locationStack.markRegisterUnused(x1.register) 1236 1237 // Now we have the result in the AX register, 1238 // so we record it. 1239 c.pushRuntimeValueLocationOnRegister(resultRegister, x1.valueType) 1240 return nil 1241 } 1242 1243 func (c *amd64Compiler) compileMulForFloats(instruction asm.Instruction) error { 1244 x2 := c.locationStack.pop() 1245 if err := c.compileEnsureOnRegister(x2); err != nil { 1246 return err 1247 } 1248 1249 x1 := c.locationStack.peek() // Note this is peek! 1250 if err := c.compileEnsureOnRegister(x1); err != nil { 1251 return err 1252 } 1253 1254 // x1 *= x2. 1255 c.assembler.CompileRegisterToRegister(instruction, x2.register, x1.register) 1256 1257 // We no longer need x2 register after MUL operation here, 1258 // so we release it. 1259 c.locationStack.releaseRegister(x2) 1260 return nil 1261 } 1262 1263 // compileClz implements compiler.compileClz for the amd64 architecture. 1264 func (c *amd64Compiler) compileClz(o *wazeroir.UnionOperation) error { 1265 target := c.locationStack.pop() 1266 if err := c.compileEnsureOnRegister(target); err != nil { 1267 return err 1268 } 1269 1270 unsignedInt := wazeroir.UnsignedInt(o.B1) 1271 if c.cpuFeatures.HasExtra(platform.CpuExtraFeatureABM) { 1272 if unsignedInt == wazeroir.UnsignedInt32 { 1273 c.assembler.CompileRegisterToRegister(amd64.LZCNTL, target.register, target.register) 1274 } else { 1275 c.assembler.CompileRegisterToRegister(amd64.LZCNTQ, target.register, target.register) 1276 } 1277 } else { 1278 // On processors that do not support LZCNT, we combine BSR (calculating 1279 // most significant set bit) with XOR. This logic is described in 1280 // "Replace Raw Assembly Code with Builtin Intrinsics" section in: 1281 // https://developer.apple.com/documentation/apple-silicon/addressing-architectural-differences-in-your-macos-code. 1282 1283 // First, we have to check if the target is non-zero as BSR is undefined 1284 // on zero. See https://www.felixcloutier.com/x86/bsr. 1285 c.assembler.CompileRegisterToRegister(amd64.TESTQ, target.register, target.register) 1286 jmpIfNonZero := c.assembler.CompileJump(amd64.JNE) 1287 1288 // If the value is zero, we just push the const value. 1289 if unsignedInt == wazeroir.UnsignedInt32 { 1290 c.assembler.CompileConstToRegister(amd64.MOVL, int64(32), target.register) 1291 } else { 1292 c.assembler.CompileConstToRegister(amd64.MOVL, int64(64), target.register) 1293 } 1294 1295 // Emit the jmp instruction to jump to the position right after 1296 // the non-zero case. 1297 jmpAtEndOfZero := c.assembler.CompileJump(amd64.JMP) 1298 1299 // Start emitting non-zero case. 1300 c.assembler.SetJumpTargetOnNext(jmpIfNonZero) 1301 // First, we calculate the most significant set bit. 1302 if unsignedInt == wazeroir.UnsignedInt32 { 1303 c.assembler.CompileRegisterToRegister(amd64.BSRL, target.register, target.register) 1304 } else { 1305 c.assembler.CompileRegisterToRegister(amd64.BSRQ, target.register, target.register) 1306 } 1307 1308 // Now we XOR the value with the bit length minus one. 1309 if unsignedInt == wazeroir.UnsignedInt32 { 1310 c.assembler.CompileConstToRegister(amd64.XORL, 31, target.register) 1311 } else { 1312 c.assembler.CompileConstToRegister(amd64.XORQ, 63, target.register) 1313 } 1314 1315 // Finally the end jump instruction of zero case must target towards 1316 // the next instruction. 1317 c.assembler.SetJumpTargetOnNext(jmpAtEndOfZero) 1318 } 1319 1320 // We reused the same register of target for the result. 1321 c.locationStack.markRegisterUnused(target.register) 1322 c.pushRuntimeValueLocationOnRegister(target.register, target.valueType) 1323 return nil 1324 } 1325 1326 // compileCtz implements compiler.compileCtz for the amd64 architecture. 1327 func (c *amd64Compiler) compileCtz(o *wazeroir.UnionOperation) error { 1328 target := c.locationStack.pop() 1329 if err := c.compileEnsureOnRegister(target); err != nil { 1330 return err 1331 } 1332 1333 unsignedInt := wazeroir.UnsignedInt(o.B1) 1334 if c.cpuFeatures.HasExtra(platform.CpuExtraFeatureABM) { 1335 if unsignedInt == wazeroir.UnsignedInt32 { 1336 c.assembler.CompileRegisterToRegister(amd64.TZCNTL, target.register, target.register) 1337 } else { 1338 c.assembler.CompileRegisterToRegister(amd64.TZCNTQ, target.register, target.register) 1339 } 1340 } else { 1341 // On processors that do not support TZCNT, the BSF instruction is 1342 // executed instead. The key difference between TZCNT and BSF 1343 // instruction is that if source operand is zero, the content of 1344 // destination operand is undefined. 1345 // https://www.felixcloutier.com/x86/tzcnt.html 1346 1347 // First we compare the target with zero. 1348 c.assembler.CompileRegisterToRegister(amd64.TESTQ, target.register, target.register) 1349 jmpIfNonZero := c.assembler.CompileJump(amd64.JNE) 1350 1351 // If the value is zero, we just push the const value. 1352 if unsignedInt == wazeroir.UnsignedInt32 { 1353 c.assembler.CompileConstToRegister(amd64.MOVL, int64(32), target.register) 1354 } else { 1355 c.assembler.CompileConstToRegister(amd64.MOVL, int64(64), target.register) 1356 } 1357 1358 // Emit the jmp instruction to jump to the position right after 1359 // the non-zero case. 1360 jmpAtEndOfZero := c.assembler.CompileJump(amd64.JMP) 1361 1362 // Otherwise, emit the TZCNT. 1363 c.assembler.SetJumpTargetOnNext(jmpIfNonZero) 1364 if unsignedInt == wazeroir.UnsignedInt32 { 1365 c.assembler.CompileRegisterToRegister(amd64.TZCNTL, target.register, target.register) 1366 } else { 1367 c.assembler.CompileRegisterToRegister(amd64.TZCNTQ, target.register, target.register) 1368 } 1369 1370 // Finally the end jump instruction of zero case must target towards 1371 // the next instruction. 1372 c.assembler.SetJumpTargetOnNext(jmpAtEndOfZero) 1373 } 1374 1375 // We reused the same register of target for the result. 1376 c.locationStack.markRegisterUnused(target.register) 1377 c.pushRuntimeValueLocationOnRegister(target.register, target.valueType) 1378 return nil 1379 } 1380 1381 // compilePopcnt implements compiler.compilePopcnt for the amd64 architecture. 1382 func (c *amd64Compiler) compilePopcnt(o *wazeroir.UnionOperation) error { 1383 target := c.locationStack.pop() 1384 if err := c.compileEnsureOnRegister(target); err != nil { 1385 return err 1386 } 1387 1388 unsignedInt := wazeroir.UnsignedInt(o.B1) 1389 if unsignedInt == wazeroir.UnsignedInt32 { 1390 c.assembler.CompileRegisterToRegister(amd64.POPCNTL, target.register, target.register) 1391 } else { 1392 c.assembler.CompileRegisterToRegister(amd64.POPCNTQ, target.register, target.register) 1393 } 1394 1395 // We reused the same register of target for the result. 1396 c.locationStack.markRegisterUnused(target.register) 1397 c.pushRuntimeValueLocationOnRegister(target.register, target.valueType) 1398 return nil 1399 } 1400 1401 // compileDiv implements compiler.compileDiv for the amd64 architecture. 1402 func (c *amd64Compiler) compileDiv(o *wazeroir.UnionOperation) (err error) { 1403 signedType := wazeroir.SignedType(o.B1) 1404 switch signedType { 1405 case wazeroir.SignedTypeUint32: 1406 err = c.compileDivForInts(true, false) 1407 case wazeroir.SignedTypeUint64: 1408 err = c.compileDivForInts(false, false) 1409 case wazeroir.SignedTypeInt32: 1410 err = c.compileDivForInts(true, true) 1411 case wazeroir.SignedTypeInt64: 1412 err = c.compileDivForInts(false, true) 1413 case wazeroir.SignedTypeFloat32: 1414 err = c.compileDivForFloats(true) 1415 case wazeroir.SignedTypeFloat64: 1416 err = c.compileDivForFloats(false) 1417 } 1418 return 1419 } 1420 1421 // compileDivForInts emits the instructions to perform division on the top 1422 // two values of integer type on the stack and puts the quotient of the result 1423 // onto the stack. For example, stack [..., 10, 3] results in [..., 3] where 1424 // the remainder is discarded. 1425 func (c *amd64Compiler) compileDivForInts(is32Bit bool, signed bool) error { 1426 if err := c.performDivisionOnInts(false, is32Bit, signed); err != nil { 1427 return err 1428 } 1429 // Now we have the quotient of the division result in the AX register, 1430 // so we record it. 1431 if is32Bit { 1432 c.pushRuntimeValueLocationOnRegister(amd64.RegAX, runtimeValueTypeI32) 1433 } else { 1434 c.pushRuntimeValueLocationOnRegister(amd64.RegAX, runtimeValueTypeI64) 1435 } 1436 return nil 1437 } 1438 1439 // compileRem implements compiler.compileRem for the amd64 architecture. 1440 func (c *amd64Compiler) compileRem(o *wazeroir.UnionOperation) (err error) { 1441 var vt runtimeValueType 1442 signedInt := wazeroir.SignedInt(o.B1) 1443 switch signedInt { 1444 case wazeroir.SignedInt32: 1445 err = c.performDivisionOnInts(true, true, true) 1446 vt = runtimeValueTypeI32 1447 case wazeroir.SignedInt64: 1448 err = c.performDivisionOnInts(true, false, true) 1449 vt = runtimeValueTypeI64 1450 case wazeroir.SignedUint32: 1451 err = c.performDivisionOnInts(true, true, false) 1452 vt = runtimeValueTypeI32 1453 case wazeroir.SignedUint64: 1454 err = c.performDivisionOnInts(true, false, false) 1455 vt = runtimeValueTypeI64 1456 } 1457 if err != nil { 1458 return err 1459 } 1460 1461 // Now we have the remainder of the division result in the DX register, 1462 // so we record it. 1463 c.pushRuntimeValueLocationOnRegister(amd64.RegDX, vt) 1464 return 1465 } 1466 1467 // performDivisionOnInts emits the instructions to do divisions on top two integers on the stack 1468 // via DIV (unsigned div) and IDIV (signed div) instructions. 1469 // See the following explanation of these instructions' semantics from https://www.lri.fr/~filliatr/ens/compil/x86-64.pdf 1470 // 1471 // >> Division requires special arrangements: idiv (signed) and div (unsigned) operate on a 2n-byte dividend and 1472 // >> an n-byte divisor to produce an n-byte quotient and n-byte remainder. The dividend always lives in a fixed pair of 1473 // >> registers (%edx and %eax for the 32-bit case; %rdx and %rax for the 64-bit case); the divisor is specified as the 1474 // >> source operand in the instruction. The quotient goes in %eax (resp. %rax); the remainder in %edx (resp. %rdx). For 1475 // >> signed division, the cltd (resp. ctqo) instruction is used to prepare %edx (resp. %rdx) with the sign extension of 1476 // >> %eax (resp. %rax). For example, if a,b, c are memory locations holding quad words, then we could set c = a/b 1477 // >> using the sequence: movq a(%rip), %rax; ctqo; idivq b(%rip); movq %rax, c(%rip). 1478 // 1479 // tl;dr is that the division result is placed in AX and DX registers after instructions emitted by this function 1480 // where AX holds the quotient while DX the remainder of the division result. 1481 func (c *amd64Compiler) performDivisionOnInts(isRem, is32Bit, signed bool) error { 1482 const ( 1483 quotientRegister = amd64.RegAX 1484 remainderRegister = amd64.RegDX 1485 ) 1486 1487 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 1488 return err 1489 } 1490 1491 // Ensures that previous values on these registers are saved to memory. 1492 c.onValueReleaseRegisterToStack(quotientRegister) 1493 c.onValueReleaseRegisterToStack(remainderRegister) 1494 1495 // In order to ensure x2 is placed on a temporary register for x2 value other than AX and DX, 1496 // we mark them as used here. 1497 c.locationStack.markRegisterUsed(quotientRegister) 1498 c.locationStack.markRegisterUsed(remainderRegister) 1499 1500 // Ensure that x2 is placed on a register which is not either AX or DX. 1501 x2 := c.locationStack.pop() 1502 if err := c.compileEnsureOnRegister(x2); err != nil { 1503 return err 1504 } 1505 1506 // Now we successfully place x2 on a temp register, so we no longer need to 1507 // mark these registers used. 1508 c.locationStack.markRegisterUnused(quotientRegister) 1509 c.locationStack.markRegisterUnused(remainderRegister) 1510 1511 // Check if the x2 equals zero. 1512 if is32Bit { 1513 c.assembler.CompileRegisterToRegister(amd64.TESTL, x2.register, x2.register) 1514 } else { 1515 c.assembler.CompileRegisterToRegister(amd64.TESTQ, x2.register, x2.register) 1516 } 1517 1518 // Skipped if the divisor is nonzero. 1519 c.compileMaybeExitFromNativeCode(amd64.JNE, nativeCallStatusIntegerDivisionByZero) 1520 1521 // next, we ensure that x1 is placed on AX. 1522 x1 := c.locationStack.pop() 1523 if x1.onRegister() && x1.register != quotientRegister { 1524 // Move x1 to quotientRegister. 1525 if is32Bit { 1526 c.assembler.CompileRegisterToRegister(amd64.MOVL, x1.register, quotientRegister) 1527 } else { 1528 c.assembler.CompileRegisterToRegister(amd64.MOVQ, x1.register, quotientRegister) 1529 } 1530 c.locationStack.markRegisterUnused(x1.register) 1531 x1.setRegister(quotientRegister) 1532 } else if x1.onStack() { 1533 x1.setRegister(quotientRegister) 1534 c.compileLoadValueOnStackToRegister(x1) 1535 } 1536 1537 // Note: at this point, x1 is placed on AX, x2 is on a register which is not AX or DX. 1538 1539 isSignedRem := isRem && signed 1540 isSignedDiv := !isRem && signed 1541 var signedRemMinusOneDivisorJmp asm.Node 1542 if isSignedRem { 1543 // If this is for getting remainder of signed division, 1544 // we have to treat the special case where the divisor equals -1. 1545 // For example, if this is 32-bit case, the result of (-2^31) / -1 equals (quotient=2^31, remainder=0) 1546 // where quotient doesn't fit in the 32-bit range whose maximum is 2^31-1. 1547 // x86 in this case cause floating point exception, but according to the Wasm spec 1548 // if the divisor equals -1, the result must be zero (not undefined!) as opposed to be "undefined" 1549 // for divisions on (-2^31) / -1 where we do not need to emit the special branches. 1550 // For detail, please refer to https://stackoverflow.com/questions/56303282/why-idiv-with-1-causes-floating-point-exception 1551 1552 // First we store zero into the remainder result register (DX) and compare the divisor with -1. 1553 if is32Bit { 1554 c.assembler.CompileRegisterToRegister(amd64.XORL, remainderRegister, remainderRegister) 1555 c.assembler.CompileRegisterToConst(amd64.CMPL, x2.register, -1) 1556 } else { 1557 c.assembler.CompileRegisterToRegister(amd64.XORQ, remainderRegister, remainderRegister) 1558 c.assembler.CompileRegisterToConst(amd64.CMPQ, x2.register, -1) 1559 } 1560 1561 // If it equals minus one, we skip the normal case. 1562 signedRemMinusOneDivisorJmp = c.assembler.CompileJump(amd64.JEQ) 1563 } else if isSignedDiv { 1564 // For signed division, we have to have branches for "math.MinInt{32,64} / -1" 1565 // case which results in the floating point exception via division error as 1566 // the resulting value exceeds the maximum of signed int. 1567 1568 // First we compare the division with -1. 1569 if is32Bit { 1570 c.assembler.CompileRegisterToConst(amd64.CMPL, x2.register, -1) 1571 } else { 1572 c.assembler.CompileRegisterToConst(amd64.CMPQ, x2.register, -1) 1573 } 1574 1575 // If it doesn't equal minus one, we jump to the normal case. 1576 nonMinusOneDivisorJmp := c.assembler.CompileJump(amd64.JNE) 1577 1578 // next we check if the quotient is the most negative value for the signed integer. 1579 // That means whether or not we try to do (math.MinInt32 / -1) or (math.MinInt64 / -1) respectively. 1580 if is32Bit { 1581 if err := c.assembler.CompileRegisterToStaticConst(amd64.CMPL, x1.register, c.minimum32BitSignedInt); err != nil { 1582 return err 1583 } 1584 } else { 1585 if err := c.assembler.CompileRegisterToStaticConst(amd64.CMPQ, x1.register, c.minimum64BitSignedInt); err != nil { 1586 return err 1587 } 1588 } 1589 1590 // Trap if we are trying to do (math.MinInt32 / -1) or (math.MinInt64 / -1), 1591 // as that is the overflow in division as the result becomes 2^31 which is larger than 1592 // the maximum of signed 32-bit int (2^31-1). 1593 c.compileMaybeExitFromNativeCode(amd64.JNE, nativeCallStatusIntegerOverflow) 1594 // Set the normal case's jump target. 1595 c.assembler.SetJumpTargetOnNext(nonMinusOneDivisorJmp) 1596 } 1597 1598 // Now ready to emit the div instruction. 1599 // Since the div instructions takes 2n byte dividend placed in DX:AX registers... 1600 // * signed case - we need to sign-extend the dividend into DX register via CDQ (32 bit) or CQO (64 bit). 1601 // * unsigned case - we need to zero DX register via "XOR DX DX" 1602 if is32Bit && signed { 1603 // Emit sign-extension to have 64 bit dividend over DX and AX registers. 1604 c.assembler.CompileStandAlone(amd64.CDQ) 1605 c.assembler.CompileRegisterToNone(amd64.IDIVL, x2.register) 1606 } else if is32Bit && !signed { 1607 // Zeros DX register to have 64 bit dividend over DX and AX registers. 1608 c.assembler.CompileRegisterToRegister(amd64.XORQ, amd64.RegDX, amd64.RegDX) 1609 c.assembler.CompileRegisterToNone(amd64.DIVL, x2.register) 1610 } else if !is32Bit && signed { 1611 // Emits sign-extension to have 128 bit dividend over DX and AX registers. 1612 c.assembler.CompileStandAlone(amd64.CQO) 1613 c.assembler.CompileRegisterToNone(amd64.IDIVQ, x2.register) 1614 } else if !is32Bit && !signed { 1615 // Zeros DX register to have 128 bit dividend over DX and AX registers. 1616 c.assembler.CompileRegisterToRegister(amd64.XORQ, amd64.RegDX, amd64.RegDX) 1617 c.assembler.CompileRegisterToNone(amd64.DIVQ, x2.register) 1618 } 1619 1620 // If this is signed rem instruction, we must set the jump target of 1621 // the exit jump from division -1 case towards the next instruction. 1622 if signedRemMinusOneDivisorJmp != nil { 1623 c.assembler.SetJumpTargetOnNext(signedRemMinusOneDivisorJmp) 1624 } 1625 1626 // We mark them as unused so that we can push one of them onto the location stack at call sites. 1627 c.locationStack.markRegisterUnused(remainderRegister) 1628 c.locationStack.markRegisterUnused(quotientRegister) 1629 c.locationStack.markRegisterUnused(x2.register) 1630 return nil 1631 } 1632 1633 // compileDivForFloats emits the instructions to perform division 1634 // on the top two values of float type on the stack, placing the result back onto the stack. 1635 // For example, stack [..., 1.0, 4.0] results in [..., 0.25]. 1636 func (c *amd64Compiler) compileDivForFloats(is32Bit bool) error { 1637 if is32Bit { 1638 return c.compileSimpleBinaryOp(amd64.DIVSS) 1639 } else { 1640 return c.compileSimpleBinaryOp(amd64.DIVSD) 1641 } 1642 } 1643 1644 // compileAnd implements compiler.compileAnd for the amd64 architecture. 1645 func (c *amd64Compiler) compileAnd(o *wazeroir.UnionOperation) (err error) { 1646 unsignedInt := wazeroir.UnsignedInt(o.B1) 1647 switch unsignedInt { 1648 case wazeroir.UnsignedInt32: 1649 err = c.compileSimpleBinaryOp(amd64.ANDL) 1650 case wazeroir.UnsignedInt64: 1651 err = c.compileSimpleBinaryOp(amd64.ANDQ) 1652 } 1653 return 1654 } 1655 1656 // compileOr implements compiler.compileOr for the amd64 architecture. 1657 func (c *amd64Compiler) compileOr(o *wazeroir.UnionOperation) (err error) { 1658 unsignedInt := wazeroir.UnsignedInt(o.B1) 1659 switch unsignedInt { 1660 case wazeroir.UnsignedInt32: 1661 err = c.compileSimpleBinaryOp(amd64.ORL) 1662 case wazeroir.UnsignedInt64: 1663 err = c.compileSimpleBinaryOp(amd64.ORQ) 1664 } 1665 return 1666 } 1667 1668 // compileXor implements compiler.compileXor for the amd64 architecture. 1669 func (c *amd64Compiler) compileXor(o *wazeroir.UnionOperation) (err error) { 1670 unsignedInt := wazeroir.UnsignedInt(o.B1) 1671 switch unsignedInt { 1672 case wazeroir.UnsignedInt32: 1673 err = c.compileSimpleBinaryOp(amd64.XORL) 1674 case wazeroir.UnsignedInt64: 1675 err = c.compileSimpleBinaryOp(amd64.XORQ) 1676 } 1677 return 1678 } 1679 1680 // compileSimpleBinaryOp emits instructions to pop two values from the stack 1681 // and perform the given instruction on these two values and push the result 1682 // onto the stack. 1683 func (c *amd64Compiler) compileSimpleBinaryOp(instruction asm.Instruction) error { 1684 x2 := c.locationStack.pop() 1685 if err := c.compileEnsureOnRegister(x2); err != nil { 1686 return err 1687 } 1688 1689 x1 := c.locationStack.pop() 1690 if err := c.compileEnsureOnRegister(x1); err != nil { 1691 return err 1692 } 1693 1694 c.assembler.CompileRegisterToRegister(instruction, x2.register, x1.register) 1695 1696 // We consumed x2 register after the operation here, 1697 // so we release it. 1698 c.locationStack.releaseRegister(x2) 1699 1700 // We already stored the result in the register used by x1 1701 // so we record it. 1702 c.locationStack.markRegisterUnused(x1.register) 1703 c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) 1704 return nil 1705 } 1706 1707 // compileShl implements compiler.compileShl for the amd64 architecture. 1708 func (c *amd64Compiler) compileShl(o *wazeroir.UnionOperation) (err error) { 1709 unsignedInt := wazeroir.UnsignedInt(o.B1) 1710 switch unsignedInt { 1711 case wazeroir.UnsignedInt32: 1712 err = c.compileShiftOp(amd64.SHLL, false) 1713 case wazeroir.UnsignedInt64: 1714 err = c.compileShiftOp(amd64.SHLQ, true) 1715 } 1716 return 1717 } 1718 1719 // compileShr implements compiler.compileShr for the amd64 architecture. 1720 func (c *amd64Compiler) compileShr(o *wazeroir.UnionOperation) (err error) { 1721 signedInt := wazeroir.SignedInt(o.B1) 1722 switch signedInt { 1723 case wazeroir.SignedInt32: 1724 err = c.compileShiftOp(amd64.SARL, true) 1725 case wazeroir.SignedInt64: 1726 err = c.compileShiftOp(amd64.SARQ, false) 1727 case wazeroir.SignedUint32: 1728 err = c.compileShiftOp(amd64.SHRL, true) 1729 case wazeroir.SignedUint64: 1730 err = c.compileShiftOp(amd64.SHRQ, false) 1731 } 1732 return 1733 } 1734 1735 // compileRotl implements compiler.compileRotl for the amd64 architecture. 1736 func (c *amd64Compiler) compileRotl(o *wazeroir.UnionOperation) (err error) { 1737 unsignedInt := wazeroir.UnsignedInt(o.B1) 1738 switch unsignedInt { 1739 case wazeroir.UnsignedInt32: 1740 err = c.compileShiftOp(amd64.ROLL, true) 1741 case wazeroir.UnsignedInt64: 1742 err = c.compileShiftOp(amd64.ROLQ, false) 1743 } 1744 return 1745 } 1746 1747 // compileRotr implements compiler.compileRotr for the amd64 architecture. 1748 func (c *amd64Compiler) compileRotr(o *wazeroir.UnionOperation) (err error) { 1749 unsignedInt := wazeroir.UnsignedInt(o.B1) 1750 switch unsignedInt { 1751 case wazeroir.UnsignedInt32: 1752 err = c.compileShiftOp(amd64.RORL, true) 1753 case wazeroir.UnsignedInt64: 1754 err = c.compileShiftOp(amd64.RORQ, false) 1755 } 1756 return 1757 } 1758 1759 // compileShiftOp adds instructions for shift operations (SHR, SHL, ROTR, ROTL) 1760 // where we have to place the second value (shift counts) on the CX register. 1761 func (c *amd64Compiler) compileShiftOp(instruction asm.Instruction, is32Bit bool) error { 1762 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 1763 return err 1764 } 1765 1766 x2 := c.locationStack.pop() 1767 1768 // Ensures that x2 (holding shift counts) is placed on the CX register. 1769 const shiftCountRegister = amd64.RegCX 1770 if (x2.onRegister() && x2.register != shiftCountRegister) || x2.onStack() { 1771 // If another value lives on the CX register, we release it to the stack. 1772 c.onValueReleaseRegisterToStack(shiftCountRegister) 1773 1774 if x2.onRegister() { 1775 x2r := x2.register 1776 // If x2 lives on a register, we move the value to CX. 1777 if is32Bit { 1778 c.assembler.CompileRegisterToRegister(amd64.MOVL, x2r, shiftCountRegister) 1779 } else { 1780 c.assembler.CompileRegisterToRegister(amd64.MOVQ, x2r, shiftCountRegister) 1781 } 1782 // We no longer place any value on the original register, so we record it. 1783 c.locationStack.markRegisterUnused(x2r) 1784 } else { 1785 // If it is on stack, we just move the memory allocated value to the CX register. 1786 x2.setRegister(shiftCountRegister) 1787 c.compileLoadValueOnStackToRegister(x2) 1788 } 1789 c.locationStack.markRegisterUsed(shiftCountRegister) 1790 } 1791 1792 x1 := c.locationStack.peek() // Note this is peek! 1793 x1r := x1.register 1794 1795 if x1.onRegister() { 1796 c.assembler.CompileRegisterToRegister(instruction, shiftCountRegister, x1r) 1797 } else { 1798 // Shift target can be placed on a memory location. 1799 // Note: stack pointers are ensured not to exceed 2^27 so this offset never exceeds 32-bit range. 1800 c.assembler.CompileRegisterToMemory(instruction, shiftCountRegister, amd64ReservedRegisterForStackBasePointerAddress, int64(x1.stackPointer)*8) 1801 } 1802 1803 // We consumed x2 register after the operation here, 1804 // so we release it. 1805 c.locationStack.markRegisterUnused(shiftCountRegister) 1806 return nil 1807 } 1808 1809 // compileAbs implements compiler.compileAbs for the amd64 architecture. 1810 // 1811 // See the following discussions for how we could take the abs of floats on x86 assembly. 1812 // https://stackoverflow.com/questions/32408665/fastest-way-to-compute-absolute-value-using-sse/32422471#32422471 1813 // https://stackoverflow.com/questions/44630015/how-would-fabsdouble-be-implemented-on-x86-is-it-an-expensive-operation 1814 func (c *amd64Compiler) compileAbs(o *wazeroir.UnionOperation) (err error) { 1815 target := c.locationStack.peek() // Note this is peek! 1816 if err = c.compileEnsureOnRegister(target); err != nil { 1817 return err 1818 } 1819 1820 // First shift left by one to clear the sign bit, and then shift right by one. 1821 if wazeroir.Float(o.B1) == wazeroir.Float32 { 1822 c.assembler.CompileConstToRegister(amd64.PSLLD, 1, target.register) 1823 c.assembler.CompileConstToRegister(amd64.PSRLD, 1, target.register) 1824 } else { 1825 c.assembler.CompileConstToRegister(amd64.PSLLQ, 1, target.register) 1826 c.assembler.CompileConstToRegister(amd64.PSRLQ, 1, target.register) 1827 } 1828 return nil 1829 } 1830 1831 // compileNeg implements compiler.compileNeg for the amd64 architecture. 1832 func (c *amd64Compiler) compileNeg(o *wazeroir.UnionOperation) (err error) { 1833 target := c.locationStack.peek() // Note this is peek! 1834 if err := c.compileEnsureOnRegister(target); err != nil { 1835 return err 1836 } 1837 1838 tmpReg, err := c.allocateRegister(registerTypeVector) 1839 if err != nil { 1840 return err 1841 } 1842 1843 // First we move the sign-bit mask (placed in memory) to the tmp register, 1844 // since we cannot take XOR directly with float reg and const. 1845 // And then negate the value by XOR it with the sign-bit mask. 1846 if wazeroir.Float(o.B1) == wazeroir.Float32 { 1847 err = c.assembler.CompileStaticConstToRegister(amd64.MOVL, c.float32SignBitMask, tmpReg) 1848 if err != nil { 1849 return err 1850 } 1851 c.assembler.CompileRegisterToRegister(amd64.XORPS, tmpReg, target.register) 1852 } else { 1853 err = c.assembler.CompileStaticConstToRegister(amd64.MOVQ, c.float64SignBitMask, tmpReg) 1854 if err != nil { 1855 return err 1856 } 1857 c.assembler.CompileRegisterToRegister(amd64.XORPD, tmpReg, target.register) 1858 } 1859 return nil 1860 } 1861 1862 // compileCeil implements compiler.compileCeil for the amd64 architecture. 1863 func (c *amd64Compiler) compileCeil(o *wazeroir.UnionOperation) (err error) { 1864 // Internally, ceil can be performed via ROUND instruction with 0x02 mode. 1865 // See https://android.googlesource.com/platform/bionic/+/882b8af/libm/x86_64/ceilf.S for example. 1866 return c.compileRoundInstruction(wazeroir.Float(o.B1) == wazeroir.Float32, 0x02) 1867 } 1868 1869 // compileFloor implements compiler.compileFloor for the amd64 architecture. 1870 func (c *amd64Compiler) compileFloor(o *wazeroir.UnionOperation) (err error) { 1871 // Internally, floor can be performed via ROUND instruction with 0x01 mode. 1872 // See https://android.googlesource.com/platform/bionic/+/882b8af/libm/x86_64/floorf.S for example. 1873 return c.compileRoundInstruction(wazeroir.Float(o.B1) == wazeroir.Float32, 0x01) 1874 } 1875 1876 // compileTrunc implements compiler.compileTrunc for the amd64 architecture. 1877 func (c *amd64Compiler) compileTrunc(o *wazeroir.UnionOperation) error { 1878 // Internally, trunc can be performed via ROUND instruction with 0x03 mode. 1879 // See https://android.googlesource.com/platform/bionic/+/882b8af/libm/x86_64/truncf.S for example. 1880 return c.compileRoundInstruction(wazeroir.Float(o.B1) == wazeroir.Float32, 0x03) 1881 } 1882 1883 // compileNearest implements compiler.compileNearest for the amd64 architecture. 1884 func (c *amd64Compiler) compileNearest(o *wazeroir.UnionOperation) error { 1885 // Nearest can be performed via ROUND instruction with 0x00 mode. 1886 return c.compileRoundInstruction(wazeroir.Float(o.B1) == wazeroir.Float32, 0x00) 1887 } 1888 1889 func (c *amd64Compiler) compileRoundInstruction(is32Bit bool, mode int64) error { 1890 target := c.locationStack.peek() // Note this is peek! 1891 if err := c.compileEnsureOnRegister(target); err != nil { 1892 return err 1893 } 1894 1895 if is32Bit { 1896 c.assembler.CompileRegisterToRegisterWithArg(amd64.ROUNDSS, target.register, target.register, byte(mode)) 1897 } else { 1898 c.assembler.CompileRegisterToRegisterWithArg(amd64.ROUNDSD, target.register, target.register, byte(mode)) 1899 } 1900 return nil 1901 } 1902 1903 // compileMin implements compiler.compileMin for the amd64 architecture. 1904 func (c *amd64Compiler) compileMin(o *wazeroir.UnionOperation) error { 1905 is32Bit := wazeroir.Float(o.B1) == wazeroir.Float32 1906 if is32Bit { 1907 return c.compileMinOrMax(is32Bit, true, amd64.MINSS) 1908 } else { 1909 return c.compileMinOrMax(is32Bit, true, amd64.MINSD) 1910 } 1911 } 1912 1913 // compileMax implements compiler.compileMax for the amd64 architecture. 1914 func (c *amd64Compiler) compileMax(o *wazeroir.UnionOperation) error { 1915 is32Bit := wazeroir.Float(o.B1) == wazeroir.Float32 1916 if is32Bit { 1917 return c.compileMinOrMax(is32Bit, false, amd64.MAXSS) 1918 } else { 1919 return c.compileMinOrMax(is32Bit, false, amd64.MAXSD) 1920 } 1921 } 1922 1923 // emitMinOrMax adds instructions to pop two values from the stack, and push back either minimum or 1924 // minimum of these two values onto the stack according to the minOrMaxInstruction argument. 1925 // minOrMaxInstruction must be one of MAXSS, MAXSD, MINSS or MINSD. 1926 // Note: These native min/max instructions are almost compatible with min/max in the Wasm specification, 1927 // but it is slightly different with respect to the NaN handling. 1928 // Native min/max instructions return non-NaN value if exactly one of target values 1929 // is NaN. For example native_{min,max}(5.0, NaN) returns always 5.0, not NaN. 1930 // However, WebAssembly specifies that min/max must always return NaN if one of values is NaN. 1931 // Therefore, in this function, we have to add conditional jumps to check if one of values is NaN before 1932 // the native min/max, which is why we cannot simply emit a native min/max instruction here. 1933 // 1934 // For the semantics, see wazeroir.Min and wazeroir.Max for detail. 1935 func (c *amd64Compiler) compileMinOrMax(is32Bit, isMin bool, minOrMaxInstruction asm.Instruction) error { 1936 x2 := c.locationStack.pop() 1937 if err := c.compileEnsureOnRegister(x2); err != nil { 1938 return err 1939 } 1940 x1 := c.locationStack.pop() 1941 if err := c.compileEnsureOnRegister(x1); err != nil { 1942 return err 1943 } 1944 1945 // Check if this is (either x1 or x2 is NaN) or (x1 equals x2) case 1946 if is32Bit { 1947 c.assembler.CompileRegisterToRegister(amd64.UCOMISS, x2.register, x1.register) 1948 } else { 1949 c.assembler.CompileRegisterToRegister(amd64.UCOMISD, x2.register, x1.register) 1950 } 1951 1952 // At this point, we have the three cases of conditional flags below 1953 // (See https://www.felixcloutier.com/x86/ucomiss#operation for detail.) 1954 // 1955 // 1) Two values are NaN-free and different: All flags are cleared. 1956 // 2) Two values are NaN-free and equal: Only ZF flags is set. 1957 // 3) One of Two values is NaN: ZF, PF and CF flags are set. 1958 1959 // Jump instruction to handle 1) case by checking the ZF flag 1960 // as ZF is only set for 2) and 3) cases. 1961 nanFreeOrDiffJump := c.assembler.CompileJump(amd64.JNE) 1962 1963 // Start handling 2) and 3). 1964 1965 // Jump if one of two values is NaN by checking the parity flag (PF). 1966 includeNaNJmp := c.assembler.CompileJump(amd64.JPS) 1967 1968 // Start handling 2). 1969 1970 // Before we exit this case, we have to ensure that positive zero (or negative zero for min instruction) is 1971 // returned if two values are positive and negative zeros. 1972 var inst asm.Instruction 1973 switch { 1974 case is32Bit && isMin: 1975 inst = amd64.ORPS 1976 case !is32Bit && isMin: 1977 inst = amd64.ORPD 1978 case is32Bit && !isMin: 1979 inst = amd64.ANDPS 1980 case !is32Bit && !isMin: 1981 inst = amd64.ANDPD 1982 } 1983 c.assembler.CompileRegisterToRegister(inst, x2.register, x1.register) 1984 1985 sameExitJmp := c.assembler.CompileJump(amd64.JMP) 1986 1987 // start handling 3). 1988 c.assembler.SetJumpTargetOnNext(includeNaNJmp) 1989 1990 // We emit the ADD instruction to produce the NaN in x1. 1991 if is32Bit { 1992 c.assembler.CompileRegisterToRegister(amd64.ADDSS, x2.register, x1.register) 1993 } else { 1994 c.assembler.CompileRegisterToRegister(amd64.ADDSD, x2.register, x1.register) 1995 } 1996 1997 // Exit from the NaN case branch. 1998 nanExitJmp := c.assembler.CompileJump(amd64.JMP) 1999 2000 // Start handling 1). 2001 c.assembler.SetJumpTargetOnNext(nanFreeOrDiffJump) 2002 2003 // Now handle the NaN-free and different values case. 2004 c.assembler.CompileRegisterToRegister(minOrMaxInstruction, x2.register, x1.register) 2005 2006 // Set the jump target of 1) and 2) cases to the next instruction after 3) case. 2007 c.assembler.SetJumpTargetOnNext(nanExitJmp) 2008 c.assembler.SetJumpTargetOnNext(sameExitJmp) 2009 2010 // Record that we consumed the x2 and placed the minOrMax result in the x1's register. 2011 c.locationStack.markRegisterUnused(x2.register) 2012 c.locationStack.markRegisterUnused(x1.register) 2013 c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) 2014 return nil 2015 } 2016 2017 // compileCopysign implements compiler.compileCopysign for the amd64 architecture. 2018 func (c *amd64Compiler) compileCopysign(o *wazeroir.UnionOperation) error { 2019 is32Bit := wazeroir.Float(o.B1) == wazeroir.Float32 2020 2021 x2 := c.locationStack.pop() 2022 if err := c.compileEnsureOnRegister(x2); err != nil { 2023 return err 2024 } 2025 x1 := c.locationStack.pop() 2026 if err := c.compileEnsureOnRegister(x1); err != nil { 2027 return err 2028 } 2029 tmpReg, err := c.allocateRegister(registerTypeVector) 2030 if err != nil { 2031 return err 2032 } 2033 2034 // Move the rest bit mask to the temp register. 2035 if is32Bit { 2036 err = c.assembler.CompileStaticConstToRegister(amd64.MOVL, c.float32RestBitMask, tmpReg) 2037 } else { 2038 err = c.assembler.CompileStaticConstToRegister(amd64.MOVQ, c.float64RestBitMask, tmpReg) 2039 } 2040 if err != nil { 2041 return err 2042 } 2043 2044 // Clear the sign bit of x1 via AND with the mask. 2045 if is32Bit { 2046 c.assembler.CompileRegisterToRegister(amd64.ANDPS, tmpReg, x1.register) 2047 } else { 2048 c.assembler.CompileRegisterToRegister(amd64.ANDPD, tmpReg, x1.register) 2049 } 2050 2051 // Move the sign bit mask to the temp register. 2052 if is32Bit { 2053 err = c.assembler.CompileStaticConstToRegister(amd64.MOVL, c.float32SignBitMask, tmpReg) 2054 } else { 2055 err = c.assembler.CompileStaticConstToRegister(amd64.MOVQ, c.float64SignBitMask, tmpReg) 2056 } 2057 if err != nil { 2058 return err 2059 } 2060 2061 // Clear the non-sign bits of x2 via AND with the mask. 2062 if is32Bit { 2063 c.assembler.CompileRegisterToRegister(amd64.ANDPS, tmpReg, x2.register) 2064 } else { 2065 c.assembler.CompileRegisterToRegister(amd64.ANDPD, tmpReg, x2.register) 2066 } 2067 2068 // Finally, copy the sign bit of x2 to x1. 2069 if is32Bit { 2070 c.assembler.CompileRegisterToRegister(amd64.ORPS, x2.register, x1.register) 2071 } else { 2072 c.assembler.CompileRegisterToRegister(amd64.ORPD, x2.register, x1.register) 2073 } 2074 2075 // Record that we consumed the x2 and placed the copysign result in the x1's register. 2076 c.locationStack.markRegisterUnused(x2.register) 2077 c.locationStack.markRegisterUnused(x1.register) 2078 c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) 2079 return nil 2080 } 2081 2082 // compileSqrt implements compiler.compileSqrt for the amd64 architecture. 2083 func (c *amd64Compiler) compileSqrt(o *wazeroir.UnionOperation) error { 2084 target := c.locationStack.peek() // Note this is peek! 2085 if err := c.compileEnsureOnRegister(target); err != nil { 2086 return err 2087 } 2088 if wazeroir.Float(o.B1) == wazeroir.Float32 { 2089 c.assembler.CompileRegisterToRegister(amd64.SQRTSS, target.register, target.register) 2090 } else { 2091 c.assembler.CompileRegisterToRegister(amd64.SQRTSD, target.register, target.register) 2092 } 2093 return nil 2094 } 2095 2096 // compileI32WrapFromI64 implements compiler.compileI32WrapFromI64 for the amd64 architecture. 2097 func (c *amd64Compiler) compileI32WrapFromI64() error { 2098 target := c.locationStack.peek() // Note this is peek! 2099 if err := c.compileEnsureOnRegister(target); err != nil { 2100 return err 2101 } 2102 c.assembler.CompileRegisterToRegister(amd64.MOVL, target.register, target.register) 2103 target.valueType = runtimeValueTypeI32 2104 return nil 2105 } 2106 2107 // compileITruncFromF implements compiler.compileITruncFromF for the amd64 architecture. 2108 // 2109 // Note: in the following implementation, we use CVTSS2SI and CVTSD2SI to convert floats to signed integers. 2110 // According to the Intel manual ([1],[2]), if the source float value is either +-Inf or NaN, or it exceeds representative ranges 2111 // of target signed integer, then the instruction returns "masked" response float32SignBitMask (or float64SignBitMask for 64 bit case). 2112 // [1] Chapter 11.5.2, SIMD Floating-Point Exception Conditions in "Vol 1, Intel® 64 and IA-32 Architectures Manual" 2113 // 2114 // https://www.intel.com/content/www/us/en/architecture-and-technology/64-ia-32-architectures-software-developer-vol-1-manual.html 2115 // 2116 // [2] https://xem.github.io/minix86/manual/intel-x86-and-64-manual-vol1/o_7281d5ea06a5b67a-268.html 2117 func (c *amd64Compiler) compileITruncFromF(o *wazeroir.UnionOperation) (err error) { 2118 inputType := wazeroir.Float(o.B1) 2119 outputType := wazeroir.SignedInt(o.B2) 2120 nonTrapping := o.B3 2121 if inputType == wazeroir.Float32 && outputType == wazeroir.SignedInt32 { 2122 err = c.emitSignedI32TruncFromFloat(true, nonTrapping) 2123 } else if inputType == wazeroir.Float32 && outputType == wazeroir.SignedInt64 { 2124 err = c.emitSignedI64TruncFromFloat(true, nonTrapping) 2125 } else if inputType == wazeroir.Float64 && outputType == wazeroir.SignedInt32 { 2126 err = c.emitSignedI32TruncFromFloat(false, nonTrapping) 2127 } else if inputType == wazeroir.Float64 && outputType == wazeroir.SignedInt64 { 2128 err = c.emitSignedI64TruncFromFloat(false, nonTrapping) 2129 } else if inputType == wazeroir.Float32 && outputType == wazeroir.SignedUint32 { 2130 err = c.emitUnsignedI32TruncFromFloat(true, nonTrapping) 2131 } else if inputType == wazeroir.Float32 && outputType == wazeroir.SignedUint64 { 2132 err = c.emitUnsignedI64TruncFromFloat(true, nonTrapping) 2133 } else if inputType == wazeroir.Float64 && outputType == wazeroir.SignedUint32 { 2134 err = c.emitUnsignedI32TruncFromFloat(false, nonTrapping) 2135 } else if inputType == wazeroir.Float64 && outputType == wazeroir.SignedUint64 { 2136 err = c.emitUnsignedI64TruncFromFloat(false, nonTrapping) 2137 } 2138 return 2139 } 2140 2141 // emitUnsignedI32TruncFromFloat implements compileITruncFromF when the destination type is a 32-bit unsigned integer. 2142 func (c *amd64Compiler) emitUnsignedI32TruncFromFloat(isFloat32Bit, nonTrapping bool) error { 2143 source := c.locationStack.pop() 2144 if err := c.compileEnsureOnRegister(source); err != nil { 2145 return err 2146 } 2147 2148 result, err := c.allocateRegister(registerTypeGeneralPurpose) 2149 if err != nil { 2150 return err 2151 } 2152 2153 // First, we check the source float value is above or equal math.MaxInt32+1. 2154 if isFloat32Bit { 2155 err = c.assembler.CompileStaticConstToRegister(amd64.UCOMISS, c.float32ForMaximumSigned32bitIntPlusOne, source.register) 2156 } else { 2157 err = c.assembler.CompileStaticConstToRegister(amd64.UCOMISD, c.float64ForMaximumSigned32bitIntPlusOne, source.register) 2158 } 2159 if err != nil { 2160 return err 2161 } 2162 2163 // Check the parity flag (set when the value is NaN), and if it is set, we should raise an exception. 2164 var nonTrappingNaNJump asm.Node 2165 if nonTrapping { 2166 jmpIfNotNaN := c.assembler.CompileJump(amd64.JPC) // jump if parity is not set. 2167 // In non trapping case, NaN is casted as zero. 2168 // Zero out the result register by XOR itsself. 2169 c.assembler.CompileRegisterToRegister(amd64.XORL, result, result) 2170 nonTrappingNaNJump = c.assembler.CompileJump(amd64.JMP) 2171 c.assembler.SetJumpTargetOnNext(jmpIfNotNaN) 2172 } else { 2173 c.compileMaybeExitFromNativeCode(amd64.JPC, nativeCallStatusCodeInvalidFloatToIntConversion) 2174 } 2175 2176 // Jump if the source float value is above or equal math.MaxInt32+1. 2177 jmpAboveOrEqualMaxIn32PlusOne := c.assembler.CompileJump(amd64.JCC) 2178 2179 // next we convert the value as a signed integer. 2180 if isFloat32Bit { 2181 c.assembler.CompileRegisterToRegister(amd64.CVTTSS2SL, source.register, result) 2182 } else { 2183 c.assembler.CompileRegisterToRegister(amd64.CVTTSD2SL, source.register, result) 2184 } 2185 2186 // Then if the result is minus, it is invalid conversion from minus float (incl. -Inf). 2187 c.assembler.CompileRegisterToRegister(amd64.TESTL, result, result) 2188 2189 var nonTrappingMinusJump asm.Node 2190 if nonTrapping { 2191 jmpIfNotMinusOrMinusInf := c.assembler.CompileJump(amd64.JPL) 2192 // In non trapping case, the minus value is casted as zero. 2193 // Zero out the result register by XOR itsself. 2194 c.assembler.CompileRegisterToRegister(amd64.XORL, result, result) 2195 nonTrappingMinusJump = c.assembler.CompileJump(amd64.JMP) 2196 c.assembler.SetJumpTargetOnNext(jmpIfNotMinusOrMinusInf) 2197 } else { 2198 c.compileMaybeExitFromNativeCode(amd64.JPL, nativeCallStatusIntegerOverflow) 2199 } 2200 2201 // Otherwise, the values is valid. 2202 okJmpForLessThanMaxInt32PlusOne := c.assembler.CompileJump(amd64.JMP) 2203 2204 // Now, start handling the case where the original float value is above or equal math.MaxInt32+1. 2205 // 2206 // First, we subtract the math.MaxInt32+1 from the original value so it can fit in signed 32-bit integer. 2207 c.assembler.SetJumpTargetOnNext(jmpAboveOrEqualMaxIn32PlusOne) 2208 if isFloat32Bit { 2209 err = c.assembler.CompileStaticConstToRegister(amd64.SUBSS, c.float32ForMaximumSigned32bitIntPlusOne, source.register) 2210 } else { 2211 err = c.assembler.CompileStaticConstToRegister(amd64.SUBSD, c.float64ForMaximumSigned32bitIntPlusOne, source.register) 2212 } 2213 if err != nil { 2214 return err 2215 } 2216 2217 // Then, convert the subtracted value as a signed 32-bit integer. 2218 if isFloat32Bit { 2219 c.assembler.CompileRegisterToRegister(amd64.CVTTSS2SL, source.register, result) 2220 } else { 2221 c.assembler.CompileRegisterToRegister(amd64.CVTTSD2SL, source.register, result) 2222 } 2223 2224 // next, we have to check if the value is from NaN, +Inf. 2225 // NaN or +Inf cases result in 0x8000_0000 according to the semantics of conversion, 2226 // This means we check if the result int value is minus or not. 2227 c.assembler.CompileRegisterToRegister(amd64.TESTL, result, result) 2228 2229 // If the result is minus, the conversion is invalid (from NaN or +Inf) 2230 var nonTrappingAboveOrEqualMaxInt32PlusOne asm.Node 2231 if nonTrapping { 2232 jmpIfNotPlusInf := c.assembler.CompileJump(amd64.JPL) 2233 err = c.assembler.CompileStaticConstToRegister(amd64.MOVL, c.maximum32BitUnsignedInt, result) 2234 if err != nil { 2235 return err 2236 } 2237 nonTrappingAboveOrEqualMaxInt32PlusOne = c.assembler.CompileJump(amd64.JMP) 2238 c.assembler.SetJumpTargetOnNext(jmpIfNotPlusInf) 2239 } else { 2240 c.compileMaybeExitFromNativeCode(amd64.JPL, nativeCallStatusIntegerOverflow) 2241 } 2242 2243 // Otherwise, we successfully converted the source float minus (math.MaxInt32+1) to int. 2244 // So, we retrieve the original source float value by adding the sign mask. 2245 if err = c.assembler.CompileStaticConstToRegister(amd64.ADDL, c.float32SignBitMask, result); err != nil { 2246 return err 2247 } 2248 2249 // We jump to the next instructions for valid cases. 2250 c.assembler.SetJumpTargetOnNext(okJmpForLessThanMaxInt32PlusOne) 2251 if nonTrapping { 2252 c.assembler.SetJumpTargetOnNext(nonTrappingAboveOrEqualMaxInt32PlusOne) 2253 c.assembler.SetJumpTargetOnNext(nonTrappingMinusJump) 2254 c.assembler.SetJumpTargetOnNext(nonTrappingNaNJump) 2255 } 2256 2257 // We consumed the source's register and placed the conversion result 2258 // in the result register. 2259 c.locationStack.markRegisterUnused(source.register) 2260 c.pushRuntimeValueLocationOnRegister(result, runtimeValueTypeI32) 2261 return nil 2262 } 2263 2264 // emitUnsignedI32TruncFromFloat implements compileITruncFromF when the destination type is a 64-bit unsigned integer. 2265 func (c *amd64Compiler) emitUnsignedI64TruncFromFloat(isFloat32Bit, nonTrapping bool) error { 2266 source := c.locationStack.pop() 2267 if err := c.compileEnsureOnRegister(source); err != nil { 2268 return err 2269 } 2270 2271 result, err := c.allocateRegister(registerTypeGeneralPurpose) 2272 if err != nil { 2273 return err 2274 } 2275 2276 // First, we check the source float value is above or equal math.MaxInt64+1. 2277 if isFloat32Bit { 2278 err = c.assembler.CompileStaticConstToRegister(amd64.UCOMISS, c.float32ForMaximumSigned64bitIntPlusOne, source.register) 2279 } else { 2280 err = c.assembler.CompileStaticConstToRegister(amd64.UCOMISD, c.float64ForMaximumSigned64bitIntPlusOne, source.register) 2281 } 2282 if err != nil { 2283 return err 2284 } 2285 2286 // Check the parity flag (set when the value is NaN), and if it is set, we should raise an exception. 2287 var nonTrappingNaNJump asm.Node 2288 if nonTrapping { 2289 jmpIfNotNaN := c.assembler.CompileJump(amd64.JPC) // jump if parity is c.not set. 2290 // In non trapping case, NaN is casted as zero. 2291 // Zero out the result register by XOR itsself. 2292 c.assembler.CompileRegisterToRegister(amd64.XORQ, result, result) 2293 nonTrappingNaNJump = c.assembler.CompileJump(amd64.JMP) 2294 c.assembler.SetJumpTargetOnNext(jmpIfNotNaN) 2295 } else { 2296 c.compileMaybeExitFromNativeCode(amd64.JPC, nativeCallStatusCodeInvalidFloatToIntConversion) 2297 } 2298 2299 // Jump if the source float values is above or equal math.MaxInt64+1. 2300 jmpAboveOrEqualMaxIn32PlusOne := c.assembler.CompileJump(amd64.JCC) 2301 2302 // next we convert the value as a signed integer. 2303 if isFloat32Bit { 2304 c.assembler.CompileRegisterToRegister(amd64.CVTTSS2SQ, source.register, result) 2305 } else { 2306 c.assembler.CompileRegisterToRegister(amd64.CVTTSD2SQ, source.register, result) 2307 } 2308 2309 // Then if the result is minus, it is invalid conversion from minus float (incl. -Inf). 2310 c.assembler.CompileRegisterToRegister(amd64.TESTQ, result, result) 2311 2312 var nonTrappingMinusJump asm.Node 2313 if nonTrapping { 2314 jmpIfNotMinusOrMinusInf := c.assembler.CompileJump(amd64.JPL) 2315 // In non trapping case, the minus value is casted as zero. 2316 // Zero out the result register by XOR itsself. 2317 c.assembler.CompileRegisterToRegister(amd64.XORQ, result, result) 2318 nonTrappingMinusJump = c.assembler.CompileJump(amd64.JMP) 2319 c.assembler.SetJumpTargetOnNext(jmpIfNotMinusOrMinusInf) 2320 } else { 2321 c.compileMaybeExitFromNativeCode(amd64.JPL, nativeCallStatusIntegerOverflow) 2322 } 2323 2324 // Otherwise, the values is valid. 2325 okJmpForLessThanMaxInt64PlusOne := c.assembler.CompileJump(amd64.JMP) 2326 2327 // Now, start handling the case where the original float value is above or equal math.MaxInt64+1. 2328 // 2329 // First, we subtract the math.MaxInt64+1 from the original value so it can fit in signed 64-bit integer. 2330 c.assembler.SetJumpTargetOnNext(jmpAboveOrEqualMaxIn32PlusOne) 2331 if isFloat32Bit { 2332 err = c.assembler.CompileStaticConstToRegister(amd64.SUBSS, c.float32ForMaximumSigned64bitIntPlusOne, source.register) 2333 } else { 2334 err = c.assembler.CompileStaticConstToRegister(amd64.SUBSD, c.float64ForMaximumSigned64bitIntPlusOne, source.register) 2335 } 2336 if err != nil { 2337 return err 2338 } 2339 2340 // Then, convert the subtracted value as a signed 64-bit integer. 2341 if isFloat32Bit { 2342 c.assembler.CompileRegisterToRegister(amd64.CVTTSS2SQ, source.register, result) 2343 } else { 2344 c.assembler.CompileRegisterToRegister(amd64.CVTTSD2SQ, source.register, result) 2345 } 2346 2347 // next, we have to check if the value is from NaN, +Inf. 2348 // NaN or +Inf cases result in 0x8000_0000 according to the semantics of conversion, 2349 // This means we check if the result int value is minus or not. 2350 c.assembler.CompileRegisterToRegister(amd64.TESTQ, result, result) 2351 2352 // If the result is minus, the conversion is invalid (from NaN or +Inf) 2353 var nonTrappingAboveOrEqualMaxInt64PlusOne asm.Node 2354 if nonTrapping { 2355 jmpIfNotPlusInf := c.assembler.CompileJump(amd64.JPL) 2356 err = c.assembler.CompileStaticConstToRegister(amd64.MOVQ, c.maximum64BitUnsignedInt, result) 2357 if err != nil { 2358 return err 2359 } 2360 nonTrappingAboveOrEqualMaxInt64PlusOne = c.assembler.CompileJump(amd64.JMP) 2361 c.assembler.SetJumpTargetOnNext(jmpIfNotPlusInf) 2362 } else { 2363 c.compileMaybeExitFromNativeCode(amd64.JPL, nativeCallStatusIntegerOverflow) 2364 } 2365 2366 // Otherwise, we successfully converted the the source float minus (math.MaxInt64+1) to int. 2367 // So, we retrieve the original source float value by adding the sign mask. 2368 if err = c.assembler.CompileStaticConstToRegister(amd64.ADDQ, c.float64SignBitMask, result); err != nil { 2369 return err 2370 } 2371 2372 // We jump to the next instructions for valid cases. 2373 c.assembler.SetJumpTargetOnNext(okJmpForLessThanMaxInt64PlusOne) 2374 if nonTrapping { 2375 c.assembler.SetJumpTargetOnNext(nonTrappingAboveOrEqualMaxInt64PlusOne) 2376 c.assembler.SetJumpTargetOnNext(nonTrappingMinusJump) 2377 c.assembler.SetJumpTargetOnNext(nonTrappingNaNJump) 2378 } 2379 2380 // We consumed the source's register and placed the conversion result 2381 // in the result register. 2382 c.locationStack.markRegisterUnused(source.register) 2383 c.pushRuntimeValueLocationOnRegister(result, runtimeValueTypeI64) 2384 return nil 2385 } 2386 2387 // emitSignedI32TruncFromFloat implements compileITruncFromF when the destination type is a 32-bit signed integer. 2388 func (c *amd64Compiler) emitSignedI32TruncFromFloat(isFloat32Bit, nonTrapping bool) error { 2389 source := c.locationStack.pop() 2390 if err := c.compileEnsureOnRegister(source); err != nil { 2391 return err 2392 } 2393 2394 result, err := c.allocateRegister(registerTypeGeneralPurpose) 2395 if err != nil { 2396 return err 2397 } 2398 2399 // First we unconditionally convert source to integer via CVTTSS2SI (CVTTSD2SI for 64bit float). 2400 if isFloat32Bit { 2401 c.assembler.CompileRegisterToRegister(amd64.CVTTSS2SL, source.register, result) 2402 } else { 2403 c.assembler.CompileRegisterToRegister(amd64.CVTTSD2SL, source.register, result) 2404 } 2405 2406 // We compare the conversion result with the sign bit mask to check if it is either 2407 // 1) the source float value is either +-Inf or NaN, or it exceeds representative ranges of 32bit signed integer, or 2408 // 2) the source equals the minimum signed 32-bit (=-2147483648.000000) whose bit pattern is float32ForMinimumSigned32bitIntegerAddress for 32 bit float 2409 // or float64ForMinimumSigned32bitIntegerAddress for 64bit float. 2410 err = c.assembler.CompileStaticConstToRegister(amd64.CMPL, c.float32SignBitMask, result) 2411 if err != nil { 2412 return err 2413 } 2414 2415 // Otherwise, jump to exit as the result is valid. 2416 okJmp := c.assembler.CompileJump(amd64.JNE) 2417 2418 // Start handling the case of 1) and 2). 2419 // First, check if the value is NaN. 2420 if isFloat32Bit { 2421 c.assembler.CompileRegisterToRegister(amd64.UCOMISS, source.register, source.register) 2422 } else { 2423 c.assembler.CompileRegisterToRegister(amd64.UCOMISD, source.register, source.register) 2424 } 2425 2426 // Check the parity flag (set when the value is NaN), and if it is set, we should raise an exception. 2427 var nontrappingNanJump asm.Node 2428 if nonTrapping { 2429 jmpIfNotNaN := c.assembler.CompileJump(amd64.JPC) // jump if parity is not set. 2430 // In non trapping case, NaN is casted as zero. 2431 // Zero out the result register by XOR itsself. 2432 c.assembler.CompileRegisterToRegister(amd64.XORL, result, result) 2433 nontrappingNanJump = c.assembler.CompileJump(amd64.JMP) 2434 c.assembler.SetJumpTargetOnNext(jmpIfNotNaN) 2435 } else { 2436 // If the value is NaN, we return the function with nativeCallStatusCodeInvalidFloatToIntConversion. 2437 c.compileMaybeExitFromNativeCode(amd64.JPC, nativeCallStatusCodeInvalidFloatToIntConversion) 2438 } 2439 2440 // Check if the value is larger than or equal the minimum 32-bit integer value, 2441 // meaning that the value exceeds the lower bound of 32-bit signed integer range. 2442 if isFloat32Bit { 2443 err = c.assembler.CompileStaticConstToRegister(amd64.UCOMISS, c.float32ForMinimumSigned32bitInteger, source.register) 2444 } else { 2445 err = c.assembler.CompileStaticConstToRegister(amd64.UCOMISD, c.float64ForMinimumSigned32bitInteger, source.register) 2446 } 2447 if err != nil { 2448 return err 2449 } 2450 2451 if !nonTrapping { 2452 // Trap if the value does not exceed the lower bound. 2453 if isFloat32Bit { 2454 c.compileMaybeExitFromNativeCode(amd64.JCC, nativeCallStatusIntegerOverflow) 2455 } else { 2456 c.compileMaybeExitFromNativeCode(amd64.JHI, nativeCallStatusIntegerOverflow) 2457 } 2458 2459 // At this point, the value is the minimum signed 32-bit int (=-2147483648.000000) or larger than 32-bit maximum. 2460 // So, check if the value equals the minimum signed 32-bit int. 2461 if isFloat32Bit { 2462 err = c.assembler.CompileStaticConstToRegister(amd64.UCOMISS, c.fourZeros, source.register) 2463 } else { 2464 err = c.assembler.CompileStaticConstToRegister(amd64.UCOMISD, c.eightZeros, source.register) 2465 } 2466 if err != nil { 2467 return err 2468 } 2469 2470 // Trap if the value is not minus (= the minimum signed 32-bit int). 2471 c.compileMaybeExitFromNativeCode(amd64.JCS, nativeCallStatusIntegerOverflow) 2472 2473 // We jump to the next instructions for valid cases. 2474 c.assembler.SetJumpTargetOnNext(okJmp) 2475 } else { 2476 // Jump if the value does not exceed the lower bound. 2477 var jmpIfNotExceedsLowerBound asm.Node 2478 if isFloat32Bit { 2479 jmpIfNotExceedsLowerBound = c.assembler.CompileJump(amd64.JCC) 2480 } else { 2481 jmpIfNotExceedsLowerBound = c.assembler.CompileJump(amd64.JHI) 2482 } 2483 2484 // If the value exceeds the lower bound, we "saturate" it to the minimum. 2485 if err = c.assembler.CompileStaticConstToRegister(amd64.MOVL, c.minimum32BitSignedInt, result); err != nil { 2486 return err 2487 } 2488 nonTrappingSaturatedMinimumJump := c.assembler.CompileJump(amd64.JMP) 2489 2490 // Otherwise, the value is the minimum signed 32-bit int (=-2147483648.000000) or larger than 32-bit maximum. 2491 c.assembler.SetJumpTargetOnNext(jmpIfNotExceedsLowerBound) 2492 if isFloat32Bit { 2493 err = c.assembler.CompileStaticConstToRegister(amd64.UCOMISS, c.fourZeros, source.register) 2494 } else { 2495 err = c.assembler.CompileStaticConstToRegister(amd64.UCOMISD, c.eightZeros, source.register) 2496 } 2497 if err != nil { 2498 return err 2499 } 2500 jmpIfMinimumSignedInt := c.assembler.CompileJump(amd64.JCS) // jump if the value is minus (= the minimum signed 32-bit int). 2501 2502 // If the value exceeds signed 32-bit maximum, we saturate it to the maximum. 2503 if err = c.assembler.CompileStaticConstToRegister(amd64.MOVL, c.maximum32BitSignedInt, result); err != nil { 2504 return err 2505 } 2506 2507 c.assembler.SetJumpTargetOnNext(okJmp) 2508 c.assembler.SetJumpTargetOnNext(nontrappingNanJump) 2509 c.assembler.SetJumpTargetOnNext(nonTrappingSaturatedMinimumJump) 2510 c.assembler.SetJumpTargetOnNext(jmpIfMinimumSignedInt) 2511 } 2512 2513 // We consumed the source's register and placed the conversion result 2514 // in the result register. 2515 c.locationStack.markRegisterUnused(source.register) 2516 c.pushRuntimeValueLocationOnRegister(result, runtimeValueTypeI32) 2517 return nil 2518 } 2519 2520 // emitSignedI64TruncFromFloat implements compileITruncFromF when the destination type is a 64-bit signed integer. 2521 func (c *amd64Compiler) emitSignedI64TruncFromFloat(isFloat32Bit, nonTrapping bool) error { 2522 source := c.locationStack.pop() 2523 if err := c.compileEnsureOnRegister(source); err != nil { 2524 return err 2525 } 2526 2527 result, err := c.allocateRegister(registerTypeGeneralPurpose) 2528 if err != nil { 2529 return err 2530 } 2531 2532 // First we unconditionally convert source to integer via CVTTSS2SI (CVTTSD2SI for 64bit float). 2533 if isFloat32Bit { 2534 c.assembler.CompileRegisterToRegister(amd64.CVTTSS2SQ, source.register, result) 2535 } else { 2536 c.assembler.CompileRegisterToRegister(amd64.CVTTSD2SQ, source.register, result) 2537 } 2538 2539 // We compare the conversion result with the sign bit mask to check if it is either 2540 // 1) the source float value is either +-Inf or NaN, or it exceeds representative ranges of 32bit signed integer, or 2541 // 2) the source equals the minimum signed 32-bit (=-9223372036854775808.0) whose bit pattern is float32ForMinimumSigned64bitIntegerAddress for 32 bit float 2542 // or float64ForMinimumSigned64bitIntegerAddress for 64bit float. 2543 err = c.assembler.CompileStaticConstToRegister(amd64.CMPQ, c.float64SignBitMask, result) 2544 if err != nil { 2545 return err 2546 } 2547 2548 // Otherwise, we simply jump to exit as the result is valid. 2549 okJmp := c.assembler.CompileJump(amd64.JNE) 2550 2551 // Start handling the case of 1) and 2). 2552 // First, check if the value is NaN. 2553 if isFloat32Bit { 2554 c.assembler.CompileRegisterToRegister(amd64.UCOMISS, source.register, source.register) 2555 } else { 2556 c.assembler.CompileRegisterToRegister(amd64.UCOMISD, source.register, source.register) 2557 } 2558 2559 // Check the parity flag (set when the value is NaN), and if it is set, we should raise an exception. 2560 var nontrappingNanJump asm.Node 2561 if nonTrapping { 2562 jmpIfNotNaN := c.assembler.CompileJump(amd64.JPC) // jump if parity is not set. 2563 // In non trapping case, NaN is casted as zero. 2564 // Zero out the result register by XOR itsself. 2565 c.assembler.CompileRegisterToRegister(amd64.XORQ, result, result) 2566 nontrappingNanJump = c.assembler.CompileJump(amd64.JMP) 2567 c.assembler.SetJumpTargetOnNext(jmpIfNotNaN) 2568 } else { 2569 c.compileMaybeExitFromNativeCode(amd64.JPC, nativeCallStatusCodeInvalidFloatToIntConversion) 2570 } 2571 2572 // Check if the value is larger than or equal the minimum 64-bit integer value, 2573 // meaning that the value exceeds the lower bound of 64-bit signed integer range. 2574 if isFloat32Bit { 2575 err = c.assembler.CompileStaticConstToRegister(amd64.UCOMISS, c.float32ForMinimumSigned64bitInteger, source.register) 2576 } else { 2577 err = c.assembler.CompileStaticConstToRegister(amd64.UCOMISD, c.float64ForMinimumSigned64bitInteger, source.register) 2578 } 2579 if err != nil { 2580 return err 2581 } 2582 2583 if !nonTrapping { 2584 // Jump if the value is -Inf. 2585 c.compileMaybeExitFromNativeCode(amd64.JCC, nativeCallStatusIntegerOverflow) 2586 2587 // At this point, the value is the minimum signed 64-bit int (=-9223372036854775808.0) or larger than 64-bit maximum. 2588 // So, check if the value equals the minimum signed 64-bit int. 2589 if isFloat32Bit { 2590 err = c.assembler.CompileStaticConstToRegister(amd64.UCOMISS, c.fourZeros, source.register) 2591 } else { 2592 err = c.assembler.CompileStaticConstToRegister(amd64.UCOMISD, c.eightZeros, source.register) 2593 } 2594 if err != nil { 2595 return err 2596 } 2597 2598 // Trap if the value is not minus (= the minimum signed 64-bit int). 2599 c.compileMaybeExitFromNativeCode(amd64.JCS, nativeCallStatusIntegerOverflow) 2600 2601 // We jump to the next instructions for valid cases. 2602 c.assembler.SetJumpTargetOnNext(okJmp) 2603 } else { 2604 // Jump if the value is not -Inf. 2605 jmpIfNotExceedsLowerBound := c.assembler.CompileJump(amd64.JCC) 2606 2607 // If the value exceeds the lower bound, we "saturate" it to the minimum. 2608 err = c.assembler.CompileStaticConstToRegister(amd64.MOVQ, c.minimum64BitSignedInt, result) 2609 if err != nil { 2610 return err 2611 } 2612 2613 nonTrappingSaturatedMinimumJump := c.assembler.CompileJump(amd64.JMP) 2614 2615 // Otherwise, the value is the minimum signed 64-bit int (=-9223372036854775808.0) or larger than 64-bit maximum. 2616 // So, check if the value equals the minimum signed 64-bit int. 2617 c.assembler.SetJumpTargetOnNext(jmpIfNotExceedsLowerBound) 2618 if isFloat32Bit { 2619 err = c.assembler.CompileStaticConstToRegister(amd64.UCOMISS, c.fourZeros, source.register) 2620 } else { 2621 err = c.assembler.CompileStaticConstToRegister(amd64.UCOMISD, c.eightZeros, source.register) 2622 } 2623 if err != nil { 2624 return err 2625 } 2626 2627 jmpIfMinimumSignedInt := c.assembler.CompileJump(amd64.JCS) // jump if the value is minus (= the minimum signed 64-bit int). 2628 2629 // If the value exceeds signed 64-bit maximum, we saturate it to the maximum. 2630 if err = c.assembler.CompileStaticConstToRegister(amd64.MOVQ, c.maximum64BitSignedInt, result); err != nil { 2631 return err 2632 } 2633 2634 c.assembler.SetJumpTargetOnNext(okJmp) 2635 c.assembler.SetJumpTargetOnNext(jmpIfMinimumSignedInt) 2636 c.assembler.SetJumpTargetOnNext(nonTrappingSaturatedMinimumJump) 2637 c.assembler.SetJumpTargetOnNext(nontrappingNanJump) 2638 } 2639 2640 // We consumed the source's register and placed the conversion result 2641 // in the result register. 2642 c.locationStack.markRegisterUnused(source.register) 2643 c.pushRuntimeValueLocationOnRegister(result, runtimeValueTypeI64) 2644 return nil 2645 } 2646 2647 // compileFConvertFromI implements compiler.compileFConvertFromI for the amd64 architecture. 2648 func (c *amd64Compiler) compileFConvertFromI(o *wazeroir.UnionOperation) (err error) { 2649 inputType := wazeroir.SignedInt(o.B1) 2650 outputType := wazeroir.Float(o.B2) 2651 if outputType == wazeroir.Float32 && inputType == wazeroir.SignedInt32 { 2652 err = c.compileSimpleConversion(amd64.CVTSL2SS, registerTypeVector, runtimeValueTypeF32) // = CVTSI2SS for 32bit int 2653 } else if outputType == wazeroir.Float32 && inputType == wazeroir.SignedInt64 { 2654 err = c.compileSimpleConversion(amd64.CVTSQ2SS, registerTypeVector, runtimeValueTypeF32) // = CVTSI2SS for 64bit int 2655 } else if outputType == wazeroir.Float64 && inputType == wazeroir.SignedInt32 { 2656 err = c.compileSimpleConversion(amd64.CVTSL2SD, registerTypeVector, runtimeValueTypeF64) // = CVTSI2SD for 32bit int 2657 } else if outputType == wazeroir.Float64 && inputType == wazeroir.SignedInt64 { 2658 err = c.compileSimpleConversion(amd64.CVTSQ2SD, registerTypeVector, runtimeValueTypeF64) // = CVTSI2SD for 64bit int 2659 } else if outputType == wazeroir.Float32 && inputType == wazeroir.SignedUint32 { 2660 // See the following link for why we use 64bit conversion for unsigned 32bit integer sources: 2661 // https://stackoverflow.com/questions/41495498/fpu-operations-generated-by-gcc-during-casting-integer-to-float. 2662 // 2663 // Here's the summary: 2664 // >> CVTSI2SS is indeed designed for converting a signed integer to a scalar single-precision float, 2665 // >> not an unsigned integer like you have here. So what gives? Well, a 64-bit processor has 64-bit wide 2666 // >> registers available, so the unsigned 32-bit input values can be stored as signed 64-bit intermediate values, 2667 // >> which allows CVTSI2SS to be used after all. 2668 err = c.compileSimpleConversion(amd64.CVTSQ2SS, registerTypeVector, runtimeValueTypeF32) // = CVTSI2SS for 64bit int. 2669 } else if outputType == wazeroir.Float64 && inputType == wazeroir.SignedUint32 { 2670 // For the same reason above, we use 64bit conversion for unsigned 32bit. 2671 err = c.compileSimpleConversion(amd64.CVTSQ2SD, registerTypeVector, runtimeValueTypeF64) // = CVTSI2SD for 64bit int. 2672 } else if outputType == wazeroir.Float32 && inputType == wazeroir.SignedUint64 { 2673 err = c.emitUnsignedInt64ToFloatConversion(true) 2674 } else if outputType == wazeroir.Float64 && inputType == wazeroir.SignedUint64 { 2675 err = c.emitUnsignedInt64ToFloatConversion(false) 2676 } 2677 return 2678 } 2679 2680 // emitUnsignedInt64ToFloatConversion is handling the case of unsigned 64-bit integer 2681 // in compileFConvertFromI. 2682 func (c *amd64Compiler) emitUnsignedInt64ToFloatConversion(isFloat32bit bool) error { 2683 // The logic here is exactly the same as GCC emits for the following code: 2684 // 2685 // float convert(int num) { 2686 // float foo; 2687 // uint64_t ptr1 = 100; 2688 // foo = (float)(ptr1); 2689 // return foo; 2690 // } 2691 // 2692 // which is compiled by GCC as 2693 // 2694 // convert: 2695 // push rbp 2696 // mov rbp, rsp 2697 // mov DWORD PTR [rbp-20], edi 2698 // mov DWORD PTR [rbp-4], 100 2699 // mov eax, DWORD PTR [rbp-4] 2700 // test rax, rax 2701 // js .handle_sign_bit_case 2702 // cvtsi2ss xmm0, rax 2703 // jmp .exit 2704 // .handle_sign_bit_case: 2705 // mov rdx, rax 2706 // shr rdx 2707 // and eax, 1 2708 // or rdx, rax 2709 // cvtsi2ss xmm0, rdx 2710 // addsd xmm0, xmm0 2711 // .exit: ... 2712 // 2713 // tl;dr is that we have a branch depending on whether or not sign bit is set. 2714 2715 origin := c.locationStack.pop() 2716 if err := c.compileEnsureOnRegister(origin); err != nil { 2717 return err 2718 } 2719 2720 dest, err := c.allocateRegister(registerTypeVector) 2721 if err != nil { 2722 return err 2723 } 2724 2725 c.locationStack.markRegisterUsed(dest) 2726 2727 tmpReg, err := c.allocateRegister(registerTypeGeneralPurpose) 2728 if err != nil { 2729 return err 2730 } 2731 2732 // Check if the most significant bit (sign bit) is set. 2733 c.assembler.CompileRegisterToRegister(amd64.TESTQ, origin.register, origin.register) 2734 2735 // Jump if the sign bit is set. 2736 jmpIfSignbitSet := c.assembler.CompileJump(amd64.JMI) 2737 2738 // Otherwise, we could fit the unsigned int into float32. 2739 // So, we convert it to float32 and emit jump instruction to exit from this branch. 2740 if isFloat32bit { 2741 c.assembler.CompileRegisterToRegister(amd64.CVTSQ2SS, origin.register, dest) 2742 } else { 2743 c.assembler.CompileRegisterToRegister(amd64.CVTSQ2SD, origin.register, dest) 2744 } 2745 exitFromSignbitUnSet := c.assembler.CompileJump(amd64.JMP) 2746 2747 // Now handling the case where sign-bit is set. 2748 // We emit the following sequences: 2749 // mov tmpReg, origin 2750 // shr tmpReg, 1 2751 // and origin, 1 2752 // or tmpReg, origin 2753 // cvtsi2ss xmm0, tmpReg 2754 // addsd xmm0, xmm0 2755 2756 c.assembler.SetJumpTargetOnNext(jmpIfSignbitSet) 2757 c.assembler.CompileRegisterToRegister(amd64.MOVQ, origin.register, tmpReg) 2758 c.assembler.CompileConstToRegister(amd64.SHRQ, 1, tmpReg) 2759 c.assembler.CompileConstToRegister(amd64.ANDQ, 1, origin.register) 2760 c.assembler.CompileRegisterToRegister(amd64.ORQ, origin.register, tmpReg) 2761 if isFloat32bit { 2762 c.assembler.CompileRegisterToRegister(amd64.CVTSQ2SS, tmpReg, dest) 2763 } else { 2764 c.assembler.CompileRegisterToRegister(amd64.CVTSQ2SD, tmpReg, dest) 2765 } 2766 if isFloat32bit { 2767 c.assembler.CompileRegisterToRegister(amd64.ADDSS, dest, dest) 2768 } else { 2769 c.assembler.CompileRegisterToRegister(amd64.ADDSD, dest, dest) 2770 } 2771 2772 // Now, we finished the sign-bit set branch. 2773 // We have to make the exit jump target of sign-bit unset branch 2774 // towards the next instruction. 2775 c.assembler.SetJumpTargetOnNext(exitFromSignbitUnSet) 2776 2777 // We consumed the origin's register and placed the conversion result 2778 // in the dest register. 2779 c.locationStack.markRegisterUnused(origin.register) 2780 if isFloat32bit { 2781 c.pushRuntimeValueLocationOnRegister(dest, runtimeValueTypeF32) 2782 } else { 2783 c.pushRuntimeValueLocationOnRegister(dest, runtimeValueTypeF64) 2784 } 2785 return nil 2786 } 2787 2788 // compileSimpleConversion pops a value type from the stack, and applies the 2789 // given instruction on it, and push the result onto a register of the given type. 2790 func (c *amd64Compiler) compileSimpleConversion(convInstruction asm.Instruction, 2791 destinationRegisterType registerType, destinationValueType runtimeValueType, 2792 ) error { 2793 origin := c.locationStack.pop() 2794 if err := c.compileEnsureOnRegister(origin); err != nil { 2795 return err 2796 } 2797 2798 dest, err := c.allocateRegister(destinationRegisterType) 2799 if err != nil { 2800 return err 2801 } 2802 2803 c.assembler.CompileRegisterToRegister(convInstruction, origin.register, dest) 2804 2805 c.locationStack.markRegisterUnused(origin.register) 2806 c.pushRuntimeValueLocationOnRegister(dest, destinationValueType) 2807 return nil 2808 } 2809 2810 // compileF32DemoteFromF64 implements compiler.compileF32DemoteFromF64 for the amd64 architecture. 2811 func (c *amd64Compiler) compileF32DemoteFromF64() error { 2812 target := c.locationStack.peek() // Note this is peek! 2813 if err := c.compileEnsureOnRegister(target); err != nil { 2814 return err 2815 } 2816 2817 c.assembler.CompileRegisterToRegister(amd64.CVTSD2SS, target.register, target.register) 2818 target.valueType = runtimeValueTypeF32 2819 return nil 2820 } 2821 2822 // compileF64PromoteFromF32 implements compiler.compileF64PromoteFromF32 for the amd64 architecture. 2823 func (c *amd64Compiler) compileF64PromoteFromF32() error { 2824 target := c.locationStack.peek() // Note this is peek! 2825 if err := c.compileEnsureOnRegister(target); err != nil { 2826 return err 2827 } 2828 2829 c.assembler.CompileRegisterToRegister(amd64.CVTSS2SD, target.register, target.register) 2830 target.valueType = runtimeValueTypeF64 2831 return nil 2832 } 2833 2834 // compileI32ReinterpretFromF32 implements compiler.compileI32ReinterpretFromF32 for the amd64 architecture. 2835 func (c *amd64Compiler) compileI32ReinterpretFromF32() error { 2836 if peek := c.locationStack.peek(); peek.onStack() { 2837 // If the value is on the stack, this is no-op as there is nothing to do for converting type. 2838 peek.valueType = runtimeValueTypeI32 2839 return nil 2840 } 2841 return c.compileSimpleConversion(amd64.MOVL, registerTypeGeneralPurpose, runtimeValueTypeI32) 2842 } 2843 2844 // compileI64ReinterpretFromF64 implements compiler.compileI64ReinterpretFromF64 for the amd64 architecture. 2845 func (c *amd64Compiler) compileI64ReinterpretFromF64() error { 2846 if peek := c.locationStack.peek(); peek.onStack() { 2847 // If the value is on the stack, this is no-op as there is nothing to do for converting type. 2848 peek.valueType = runtimeValueTypeI64 2849 return nil 2850 } 2851 return c.compileSimpleConversion(amd64.MOVQ, registerTypeGeneralPurpose, runtimeValueTypeI64) 2852 } 2853 2854 // compileF32ReinterpretFromI32 implements compiler.compileF32ReinterpretFromI32 for the amd64 architecture. 2855 func (c *amd64Compiler) compileF32ReinterpretFromI32() error { 2856 if peek := c.locationStack.peek(); peek.onStack() { 2857 // If the value is on the stack, this is no-op as there is nothing to do for converting type. 2858 peek.valueType = runtimeValueTypeF32 2859 return nil 2860 } 2861 return c.compileSimpleConversion(amd64.MOVL, registerTypeVector, runtimeValueTypeF32) 2862 } 2863 2864 // compileF64ReinterpretFromI64 implements compiler.compileF64ReinterpretFromI64 for the amd64 architecture. 2865 func (c *amd64Compiler) compileF64ReinterpretFromI64() error { 2866 if peek := c.locationStack.peek(); peek.onStack() { 2867 // If the value is on the stack, this is no-op as there is nothing to do for converting type. 2868 peek.valueType = runtimeValueTypeF64 2869 return nil 2870 } 2871 return c.compileSimpleConversion(amd64.MOVQ, registerTypeVector, runtimeValueTypeF64) 2872 } 2873 2874 // compileExtend implements compiler.compileExtend for the amd64 architecture. 2875 func (c *amd64Compiler) compileExtend(o *wazeroir.UnionOperation) error { 2876 var inst asm.Instruction 2877 signed := o.B1 != 0 2878 if signed { 2879 inst = amd64.MOVLQSX // = MOVSXD https://www.felixcloutier.com/x86/movsx:movsxd 2880 } else { 2881 inst = amd64.MOVL 2882 } 2883 return c.compileExtendImpl(inst, runtimeValueTypeI64) 2884 } 2885 2886 // compileSignExtend32From8 implements compiler.compileSignExtend32From8 for the amd64 architecture. 2887 func (c *amd64Compiler) compileSignExtend32From8() error { 2888 return c.compileExtendImpl(amd64.MOVBLSX, runtimeValueTypeI32) 2889 } 2890 2891 // compileSignExtend32From16 implements compiler.compileSignExtend32From16 for the amd64 architecture. 2892 func (c *amd64Compiler) compileSignExtend32From16() error { 2893 return c.compileExtendImpl(amd64.MOVWLSX, runtimeValueTypeI32) 2894 } 2895 2896 // compileSignExtend64From8 implements compiler.compileSignExtend64From8 for the amd64 architecture. 2897 func (c *amd64Compiler) compileSignExtend64From8() error { 2898 return c.compileExtendImpl(amd64.MOVBQSX, runtimeValueTypeI64) 2899 } 2900 2901 // compileSignExtend64From16 implements compiler.compileSignExtend64From16 for the amd64 architecture. 2902 func (c *amd64Compiler) compileSignExtend64From16() error { 2903 return c.compileExtendImpl(amd64.MOVWQSX, runtimeValueTypeI64) 2904 } 2905 2906 // compileSignExtend64From32 implements compiler.compileSignExtend64From32 for the amd64 architecture. 2907 func (c *amd64Compiler) compileSignExtend64From32() error { 2908 return c.compileExtendImpl(amd64.MOVLQSX, runtimeValueTypeI64) 2909 } 2910 2911 func (c *amd64Compiler) compileExtendImpl(inst asm.Instruction, destinationType runtimeValueType) error { 2912 target := c.locationStack.peek() // Note this is peek! 2913 if err := c.compileEnsureOnRegister(target); err != nil { 2914 return err 2915 } 2916 2917 c.assembler.CompileRegisterToRegister(inst, target.register, target.register) 2918 target.valueType = destinationType 2919 return nil 2920 } 2921 2922 // compileEq implements compiler.compileEq for the amd64 architecture. 2923 func (c *amd64Compiler) compileEq(o *wazeroir.UnionOperation) error { 2924 return c.compileEqOrNe(wazeroir.UnsignedType(o.B1), true) 2925 } 2926 2927 // compileNe implements compiler.compileNe for the amd64 architecture. 2928 func (c *amd64Compiler) compileNe(o *wazeroir.UnionOperation) error { 2929 return c.compileEqOrNe(wazeroir.UnsignedType(o.B1), false) 2930 } 2931 2932 func (c *amd64Compiler) compileEqOrNe(t wazeroir.UnsignedType, shouldEqual bool) (err error) { 2933 x2 := c.locationStack.pop() 2934 if err := c.compileEnsureOnRegister(x2); err != nil { 2935 return err 2936 } 2937 2938 x1 := c.locationStack.pop() 2939 if err := c.compileEnsureOnRegister(x1); err != nil { 2940 return err 2941 } 2942 2943 x1r, x2r := x1.register, x2.register 2944 2945 // x1 and x2 are temporary registers only used for the cmp operation. Release them. 2946 c.locationStack.releaseRegister(x1) 2947 c.locationStack.releaseRegister(x2) 2948 2949 switch t { 2950 case wazeroir.UnsignedTypeI32: 2951 err = c.compileEqOrNeForInts(x1r, x2r, amd64.CMPL, shouldEqual) 2952 case wazeroir.UnsignedTypeI64: 2953 err = c.compileEqOrNeForInts(x1r, x2r, amd64.CMPQ, shouldEqual) 2954 case wazeroir.UnsignedTypeF32: 2955 err = c.compileEqOrNeForFloats(x1r, x2r, amd64.UCOMISS, shouldEqual) 2956 case wazeroir.UnsignedTypeF64: 2957 err = c.compileEqOrNeForFloats(x1r, x2r, amd64.UCOMISD, shouldEqual) 2958 } 2959 if err != nil { 2960 return 2961 } 2962 return 2963 } 2964 2965 func (c *amd64Compiler) compileEqOrNeForInts(x1Reg, x2Reg asm.Register, cmpInstruction asm.Instruction, 2966 shouldEqual bool, 2967 ) error { 2968 c.assembler.CompileRegisterToRegister(cmpInstruction, x2Reg, x1Reg) 2969 2970 // Record that the result is on the conditional register. 2971 var condReg asm.ConditionalRegisterState 2972 if shouldEqual { 2973 condReg = amd64.ConditionalRegisterStateE 2974 } else { 2975 condReg = amd64.ConditionalRegisterStateNE 2976 } 2977 loc := c.locationStack.pushRuntimeValueLocationOnConditionalRegister(condReg) 2978 loc.valueType = runtimeValueTypeI32 2979 return nil 2980 } 2981 2982 // For float EQ and NE, we have to take NaN values into account. 2983 // Notably, Wasm specification states that if one of targets is NaN, 2984 // the result must be zero for EQ or one for NE. 2985 func (c *amd64Compiler) compileEqOrNeForFloats(x1Reg, x2Reg asm.Register, cmpInstruction asm.Instruction, shouldEqual bool) error { 2986 // Before we allocate the result, we have to reserve two int registers. 2987 nanFragReg, err := c.allocateRegister(registerTypeGeneralPurpose) 2988 if err != nil { 2989 return err 2990 } 2991 c.locationStack.markRegisterUsed(nanFragReg) 2992 cmpResultReg, err := c.allocateRegister(registerTypeGeneralPurpose) 2993 if err != nil { 2994 return err 2995 } 2996 2997 // Then, execute the comparison. 2998 c.assembler.CompileRegisterToRegister(cmpInstruction, x2Reg, x1Reg) 2999 3000 // First, we get the parity flag which indicates whether one of values was NaN. 3001 if shouldEqual { 3002 // Set 1 if two values are NOT NaN. 3003 c.assembler.CompileNoneToRegister(amd64.SETPC, nanFragReg) 3004 } else { 3005 // Set 1 if one of values is NaN. 3006 c.assembler.CompileNoneToRegister(amd64.SETPS, nanFragReg) 3007 } 3008 3009 // next, we get the usual comparison flag. 3010 if shouldEqual { 3011 // Set 1 if equal. 3012 c.assembler.CompileNoneToRegister(amd64.SETEQ, cmpResultReg) 3013 } else { 3014 // Set 1 if not equal. 3015 c.assembler.CompileNoneToRegister(amd64.SETNE, cmpResultReg) 3016 } 3017 3018 // Do "and" or "or" operations on these two flags to get the actual result. 3019 if shouldEqual { 3020 c.assembler.CompileRegisterToRegister(amd64.ANDL, nanFragReg, cmpResultReg) 3021 } else { 3022 c.assembler.CompileRegisterToRegister(amd64.ORL, nanFragReg, cmpResultReg) 3023 } 3024 3025 // Clear the unnecessary bits by zero extending the first byte. 3026 // This is necessary the upper bits (5 to 32 bits) of SET* instruction result is undefined. 3027 c.assembler.CompileRegisterToRegister(amd64.MOVBLZX, cmpResultReg, cmpResultReg) 3028 3029 // Now we have the result in cmpResultReg register, so we record it. 3030 c.pushRuntimeValueLocationOnRegister(cmpResultReg, runtimeValueTypeI32) 3031 // Also, we no longer need nanFragRegister. 3032 c.locationStack.markRegisterUnused(nanFragReg) 3033 return nil 3034 } 3035 3036 // compileEqz implements compiler.compileEqz for the amd64 architecture. 3037 func (c *amd64Compiler) compileEqz(o *wazeroir.UnionOperation) (err error) { 3038 v := c.locationStack.pop() 3039 if err = c.compileEnsureOnRegister(v); err != nil { 3040 return err 3041 } 3042 3043 unsignedInt := wazeroir.UnsignedInt(o.B1) 3044 switch unsignedInt { 3045 case wazeroir.UnsignedInt32: 3046 err = c.assembler.CompileStaticConstToRegister(amd64.CMPL, c.fourZeros, v.register) 3047 case wazeroir.UnsignedInt64: 3048 err = c.assembler.CompileStaticConstToRegister(amd64.CMPQ, c.eightZeros, v.register) 3049 } 3050 if err != nil { 3051 return err 3052 } 3053 3054 // v is consumed by the cmp operation so release it. 3055 c.locationStack.releaseRegister(v) 3056 3057 // Finally, record that the result is on the conditional register. 3058 loc := c.locationStack.pushRuntimeValueLocationOnConditionalRegister(amd64.ConditionalRegisterStateE) 3059 loc.valueType = runtimeValueTypeI32 3060 return nil 3061 } 3062 3063 // compileLt implements compiler.compileLt for the amd64 architecture. 3064 func (c *amd64Compiler) compileLt(o *wazeroir.UnionOperation) error { 3065 x2 := c.locationStack.pop() 3066 if err := c.compileEnsureOnRegister(x2); err != nil { 3067 return err 3068 } 3069 3070 x1 := c.locationStack.pop() 3071 if err := c.compileEnsureOnRegister(x1); err != nil { 3072 return err 3073 } 3074 3075 // Emit the compare instruction. 3076 var resultConditionState asm.ConditionalRegisterState 3077 var inst asm.Instruction 3078 signedType := wazeroir.SignedType(o.B1) 3079 switch signedType { 3080 case wazeroir.SignedTypeInt32: 3081 resultConditionState = amd64.ConditionalRegisterStateL 3082 inst = amd64.CMPL 3083 case wazeroir.SignedTypeUint32: 3084 resultConditionState = amd64.ConditionalRegisterStateB 3085 inst = amd64.CMPL 3086 case wazeroir.SignedTypeInt64: 3087 inst = amd64.CMPQ 3088 resultConditionState = amd64.ConditionalRegisterStateL 3089 case wazeroir.SignedTypeUint64: 3090 resultConditionState = amd64.ConditionalRegisterStateB 3091 inst = amd64.CMPQ 3092 case wazeroir.SignedTypeFloat32: 3093 resultConditionState = amd64.ConditionalRegisterStateA 3094 inst = amd64.COMISS 3095 case wazeroir.SignedTypeFloat64: 3096 resultConditionState = amd64.ConditionalRegisterStateA 3097 inst = amd64.COMISD 3098 } 3099 c.assembler.CompileRegisterToRegister(inst, x1.register, x2.register) 3100 3101 // x1 and x2 are temporary registers only used for the cmp operation. Release them. 3102 c.locationStack.releaseRegister(x1) 3103 c.locationStack.releaseRegister(x2) 3104 3105 // Finally, record that the result is on the conditional register. 3106 loc := c.locationStack.pushRuntimeValueLocationOnConditionalRegister(resultConditionState) 3107 loc.valueType = runtimeValueTypeI32 3108 return nil 3109 } 3110 3111 // compileGt implements compiler.compileGt for the amd64 architecture. 3112 func (c *amd64Compiler) compileGt(o *wazeroir.UnionOperation) error { 3113 x2 := c.locationStack.pop() 3114 if err := c.compileEnsureOnRegister(x2); err != nil { 3115 return err 3116 } 3117 3118 x1 := c.locationStack.pop() 3119 if err := c.compileEnsureOnRegister(x1); err != nil { 3120 return err 3121 } 3122 3123 // Emit the compare instruction. 3124 var resultConditionState asm.ConditionalRegisterState 3125 signedType := wazeroir.SignedType(o.B1) 3126 switch signedType { 3127 case wazeroir.SignedTypeInt32: 3128 resultConditionState = amd64.ConditionalRegisterStateG 3129 c.assembler.CompileRegisterToRegister(amd64.CMPL, x1.register, x2.register) 3130 case wazeroir.SignedTypeUint32: 3131 c.assembler.CompileRegisterToRegister(amd64.CMPL, x1.register, x2.register) 3132 resultConditionState = amd64.ConditionalRegisterStateA 3133 case wazeroir.SignedTypeInt64: 3134 c.assembler.CompileRegisterToRegister(amd64.CMPQ, x1.register, x2.register) 3135 resultConditionState = amd64.ConditionalRegisterStateG 3136 case wazeroir.SignedTypeUint64: 3137 c.assembler.CompileRegisterToRegister(amd64.CMPQ, x1.register, x2.register) 3138 resultConditionState = amd64.ConditionalRegisterStateA 3139 case wazeroir.SignedTypeFloat32: 3140 c.assembler.CompileRegisterToRegister(amd64.UCOMISS, x2.register, x1.register) 3141 resultConditionState = amd64.ConditionalRegisterStateA 3142 case wazeroir.SignedTypeFloat64: 3143 c.assembler.CompileRegisterToRegister(amd64.UCOMISD, x2.register, x1.register) 3144 resultConditionState = amd64.ConditionalRegisterStateA 3145 } 3146 3147 // x1 and x2 are temporary registers only used for the cmp operation. Release them. 3148 c.locationStack.releaseRegister(x1) 3149 c.locationStack.releaseRegister(x2) 3150 3151 // Finally, record that the result is on the conditional register. 3152 loc := c.locationStack.pushRuntimeValueLocationOnConditionalRegister(resultConditionState) 3153 loc.valueType = runtimeValueTypeI32 3154 return nil 3155 } 3156 3157 // compileLe implements compiler.compileLe for the amd64 architecture. 3158 func (c *amd64Compiler) compileLe(o *wazeroir.UnionOperation) error { 3159 x2 := c.locationStack.pop() 3160 if err := c.compileEnsureOnRegister(x2); err != nil { 3161 return err 3162 } 3163 3164 x1 := c.locationStack.pop() 3165 if err := c.compileEnsureOnRegister(x1); err != nil { 3166 return err 3167 } 3168 3169 // Emit the compare instruction. 3170 var inst asm.Instruction 3171 var resultConditionState asm.ConditionalRegisterState 3172 signedType := wazeroir.SignedType(o.B1) 3173 switch signedType { 3174 case wazeroir.SignedTypeInt32: 3175 resultConditionState = amd64.ConditionalRegisterStateLE 3176 inst = amd64.CMPL 3177 case wazeroir.SignedTypeUint32: 3178 resultConditionState = amd64.ConditionalRegisterStateBE 3179 inst = amd64.CMPL 3180 case wazeroir.SignedTypeInt64: 3181 resultConditionState = amd64.ConditionalRegisterStateLE 3182 inst = amd64.CMPQ 3183 case wazeroir.SignedTypeUint64: 3184 resultConditionState = amd64.ConditionalRegisterStateBE 3185 inst = amd64.CMPQ 3186 case wazeroir.SignedTypeFloat32: 3187 resultConditionState = amd64.ConditionalRegisterStateAE 3188 inst = amd64.UCOMISS 3189 case wazeroir.SignedTypeFloat64: 3190 resultConditionState = amd64.ConditionalRegisterStateAE 3191 inst = amd64.UCOMISD 3192 } 3193 c.assembler.CompileRegisterToRegister(inst, x1.register, x2.register) 3194 3195 // x1 and x2 are temporary registers only used for the cmp operation. Release them. 3196 c.locationStack.releaseRegister(x1) 3197 c.locationStack.releaseRegister(x2) 3198 3199 // Finally, record that the result is on the conditional register. 3200 loc := c.locationStack.pushRuntimeValueLocationOnConditionalRegister(resultConditionState) 3201 loc.valueType = runtimeValueTypeI32 3202 return nil 3203 } 3204 3205 // compileGe implements compiler.compileGe for the amd64 architecture. 3206 func (c *amd64Compiler) compileGe(o *wazeroir.UnionOperation) error { 3207 x2 := c.locationStack.pop() 3208 if err := c.compileEnsureOnRegister(x2); err != nil { 3209 return err 3210 } 3211 3212 x1 := c.locationStack.pop() 3213 if err := c.compileEnsureOnRegister(x1); err != nil { 3214 return err 3215 } 3216 3217 // Emit the compare instruction. 3218 var resultConditionState asm.ConditionalRegisterState 3219 signedType := wazeroir.SignedType(o.B1) 3220 switch signedType { 3221 case wazeroir.SignedTypeInt32: 3222 c.assembler.CompileRegisterToRegister(amd64.CMPL, x1.register, x2.register) 3223 resultConditionState = amd64.ConditionalRegisterStateGE 3224 case wazeroir.SignedTypeUint32: 3225 c.assembler.CompileRegisterToRegister(amd64.CMPL, x1.register, x2.register) 3226 resultConditionState = amd64.ConditionalRegisterStateAE 3227 case wazeroir.SignedTypeInt64: 3228 c.assembler.CompileRegisterToRegister(amd64.CMPQ, x1.register, x2.register) 3229 resultConditionState = amd64.ConditionalRegisterStateGE 3230 case wazeroir.SignedTypeUint64: 3231 c.assembler.CompileRegisterToRegister(amd64.CMPQ, x1.register, x2.register) 3232 resultConditionState = amd64.ConditionalRegisterStateAE 3233 case wazeroir.SignedTypeFloat32: 3234 c.assembler.CompileRegisterToRegister(amd64.COMISS, x2.register, x1.register) 3235 resultConditionState = amd64.ConditionalRegisterStateAE 3236 case wazeroir.SignedTypeFloat64: 3237 c.assembler.CompileRegisterToRegister(amd64.COMISD, x2.register, x1.register) 3238 resultConditionState = amd64.ConditionalRegisterStateAE 3239 } 3240 3241 // x1 and x2 are temporary registers only used for the cmp operation. Release them. 3242 c.locationStack.releaseRegister(x1) 3243 c.locationStack.releaseRegister(x2) 3244 3245 // Finally, record that the result is on the conditional register. 3246 loc := c.locationStack.pushRuntimeValueLocationOnConditionalRegister(resultConditionState) 3247 loc.valueType = runtimeValueTypeI32 3248 return nil 3249 } 3250 3251 // compileLoad implements compiler.compileLoad for the amd64 architecture. 3252 func (c *amd64Compiler) compileLoad(o *wazeroir.UnionOperation) error { 3253 var ( 3254 isIntType bool 3255 movInst asm.Instruction 3256 targetSizeInBytes int64 3257 vt runtimeValueType 3258 ) 3259 3260 unsignedType := wazeroir.UnsignedType(o.B1) 3261 offset := uint32(o.U2) 3262 3263 switch unsignedType { 3264 case wazeroir.UnsignedTypeI32: 3265 isIntType = true 3266 movInst = amd64.MOVL 3267 targetSizeInBytes = 32 / 8 3268 vt = runtimeValueTypeI32 3269 case wazeroir.UnsignedTypeI64: 3270 isIntType = true 3271 movInst = amd64.MOVQ 3272 targetSizeInBytes = 64 / 8 3273 vt = runtimeValueTypeI64 3274 case wazeroir.UnsignedTypeF32: 3275 isIntType = false 3276 movInst = amd64.MOVL 3277 targetSizeInBytes = 32 / 8 3278 vt = runtimeValueTypeF32 3279 case wazeroir.UnsignedTypeF64: 3280 isIntType = false 3281 movInst = amd64.MOVQ 3282 targetSizeInBytes = 64 / 8 3283 vt = runtimeValueTypeF64 3284 } 3285 3286 reg, err := c.compileMemoryAccessCeilSetup(offset, targetSizeInBytes) 3287 if err != nil { 3288 return err 3289 } 3290 3291 if isIntType { 3292 // For integer types, read the corresponding bytes from the offset to the memory 3293 // and store the value to the int register. 3294 c.assembler.CompileMemoryWithIndexToRegister(movInst, 3295 // we access memory as memory.Buffer[ceil-targetSizeInBytes: ceil]. 3296 amd64ReservedRegisterForMemory, -targetSizeInBytes, reg, 1, 3297 reg) 3298 c.pushRuntimeValueLocationOnRegister(reg, vt) 3299 } else { 3300 // For float types, we read the value to the float register. 3301 floatReg, err := c.allocateRegister(registerTypeVector) 3302 if err != nil { 3303 return err 3304 } 3305 c.assembler.CompileMemoryWithIndexToRegister(movInst, 3306 // we access memory as memory.Buffer[ceil-targetSizeInBytes: ceil]. 3307 amd64ReservedRegisterForMemory, -targetSizeInBytes, reg, 1, 3308 floatReg) 3309 c.pushRuntimeValueLocationOnRegister(floatReg, vt) 3310 // We no longer need the int register so mark it unused. 3311 c.locationStack.markRegisterUnused(reg) 3312 } 3313 return nil 3314 } 3315 3316 // compileLoad8 implements compiler.compileLoad8 for the amd64 architecture. 3317 func (c *amd64Compiler) compileLoad8(o *wazeroir.UnionOperation) error { 3318 const targetSizeInBytes = 1 3319 offset := uint32(o.U2) 3320 reg, err := c.compileMemoryAccessCeilSetup(offset, targetSizeInBytes) 3321 if err != nil { 3322 return err 3323 } 3324 3325 // Then move a byte at the offset to the register. 3326 // Note that Load8 is only for integer types. 3327 var inst asm.Instruction 3328 var vt runtimeValueType 3329 signedInt := wazeroir.SignedInt(o.B1) 3330 switch signedInt { 3331 case wazeroir.SignedInt32: 3332 inst = amd64.MOVBLSX 3333 vt = runtimeValueTypeI32 3334 case wazeroir.SignedUint32: 3335 inst = amd64.MOVBLZX 3336 vt = runtimeValueTypeI32 3337 case wazeroir.SignedInt64: 3338 inst = amd64.MOVBQSX 3339 vt = runtimeValueTypeI64 3340 case wazeroir.SignedUint64: 3341 inst = amd64.MOVBQZX 3342 vt = runtimeValueTypeI64 3343 } 3344 3345 c.assembler.CompileMemoryWithIndexToRegister(inst, 3346 // we access memory as memory.Buffer[ceil-targetSizeInBytes: ceil]. 3347 amd64ReservedRegisterForMemory, -targetSizeInBytes, reg, 1, 3348 reg) 3349 3350 c.pushRuntimeValueLocationOnRegister(reg, vt) 3351 return nil 3352 } 3353 3354 // compileLoad16 implements compiler.compileLoad16 for the amd64 architecture. 3355 func (c *amd64Compiler) compileLoad16(o *wazeroir.UnionOperation) error { 3356 const targetSizeInBytes = 16 / 8 3357 offset := uint32(o.U2) 3358 reg, err := c.compileMemoryAccessCeilSetup(offset, targetSizeInBytes) 3359 if err != nil { 3360 return err 3361 } 3362 3363 // Then move 2 bytes at the offset to the register. 3364 // Note that Load16 is only for integer types. 3365 var inst asm.Instruction 3366 var vt runtimeValueType 3367 signedInt := wazeroir.SignedInt(o.B1) 3368 switch signedInt { 3369 case wazeroir.SignedInt32: 3370 inst = amd64.MOVWLSX 3371 vt = runtimeValueTypeI32 3372 case wazeroir.SignedInt64: 3373 inst = amd64.MOVWQSX 3374 vt = runtimeValueTypeI64 3375 case wazeroir.SignedUint32: 3376 inst = amd64.MOVWLZX 3377 vt = runtimeValueTypeI32 3378 case wazeroir.SignedUint64: 3379 inst = amd64.MOVWQZX 3380 vt = runtimeValueTypeI64 3381 } 3382 3383 c.assembler.CompileMemoryWithIndexToRegister(inst, 3384 // we access memory as memory.Buffer[ceil-targetSizeInBytes: ceil]. 3385 amd64ReservedRegisterForMemory, -targetSizeInBytes, reg, 1, 3386 reg) 3387 3388 c.pushRuntimeValueLocationOnRegister(reg, vt) 3389 return nil 3390 } 3391 3392 // compileLoad32 implements compiler.compileLoad32 for the amd64 architecture. 3393 func (c *amd64Compiler) compileLoad32(o *wazeroir.UnionOperation) error { 3394 const targetSizeInBytes = 32 / 8 3395 offset := uint32(o.U2) 3396 reg, err := c.compileMemoryAccessCeilSetup(offset, targetSizeInBytes) 3397 if err != nil { 3398 return err 3399 } 3400 3401 // Then move 4 bytes at the offset to the register. 3402 var inst asm.Instruction 3403 signed := o.B1 == 1 3404 if signed { 3405 inst = amd64.MOVLQSX 3406 } else { 3407 inst = amd64.MOVLQZX 3408 } 3409 c.assembler.CompileMemoryWithIndexToRegister(inst, 3410 // We access memory as memory.Buffer[ceil-targetSizeInBytes: ceil]. 3411 amd64ReservedRegisterForMemory, -targetSizeInBytes, reg, 1, 3412 reg) 3413 c.pushRuntimeValueLocationOnRegister(reg, runtimeValueTypeI64) 3414 return nil 3415 } 3416 3417 // compileMemoryAccessCeilSetup pops the top value from the stack (called "base"), stores "base + offsetArg + targetSizeInBytes" 3418 // into a register, and returns the stored register. We call the result "ceil" because we access the memory 3419 // as memory.Buffer[ceil-targetSizeInBytes: ceil]. 3420 // 3421 // Note: this also emits the instructions to check the out-of-bounds memory access. 3422 // In other words, if the ceil exceeds the memory size, the code exits with nativeCallStatusCodeMemoryOutOfBounds status. 3423 func (c *amd64Compiler) compileMemoryAccessCeilSetup(offsetArg uint32, targetSizeInBytes int64) (asm.Register, error) { 3424 base := c.locationStack.pop() 3425 if err := c.compileEnsureOnRegister(base); err != nil { 3426 return asm.NilRegister, err 3427 } 3428 3429 result := base.register 3430 if offsetConst := int64(offsetArg) + targetSizeInBytes; offsetConst <= math.MaxInt32 { 3431 c.assembler.CompileConstToRegister(amd64.ADDQ, offsetConst, result) 3432 } else if offsetConst <= math.MaxUint32 { 3433 // Note: in practice, this branch rarely happens as in this case, the wasm binary know that 3434 // memory has more than 1 GBi or at least tries to access above 1 GBi memory region. 3435 // 3436 // This case, we cannot directly add the offset to a register by ADDQ(const) instruction. 3437 // That is because the imm32 const is sign-extended to 64-bit in ADDQ(const), and we end up 3438 // making offsetConst as the negative number, which is wrong. 3439 tmp, err := c.allocateRegister(registerTypeGeneralPurpose) 3440 if err != nil { 3441 return asm.NilRegister, err 3442 } 3443 c.assembler.CompileConstToRegister(amd64.MOVL, int64(uint32(offsetConst)), tmp) 3444 c.assembler.CompileRegisterToRegister(amd64.ADDQ, tmp, result) 3445 } else { 3446 // If the offset const is too large, we exit with nativeCallStatusCodeMemoryOutOfBounds. 3447 c.compileExitFromNativeCode(nativeCallStatusCodeMemoryOutOfBounds) 3448 return result, nil 3449 } 3450 3451 // Now we compare the value with the memory length which is held by callEngine. 3452 if err := c.compileCompareWithMemorySliceLen(result); err != nil { 3453 return asm.NilRegister, err 3454 } 3455 3456 // Trap if the value is out-of-bounds of memory length. 3457 c.compileMaybeExitFromNativeCode(amd64.JCC, nativeCallStatusCodeMemoryOutOfBounds) 3458 3459 c.locationStack.markRegisterUnused(result) 3460 return result, nil 3461 } 3462 3463 // compileStore implements compiler.compileStore for the amd64 architecture. 3464 func (c *amd64Compiler) compileStore(o *wazeroir.UnionOperation) error { 3465 var movInst asm.Instruction 3466 var targetSizeInByte int64 3467 unsignedType := wazeroir.UnsignedType(o.B1) 3468 offset := uint32(o.U2) 3469 switch unsignedType { 3470 case wazeroir.UnsignedTypeI32, wazeroir.UnsignedTypeF32: 3471 movInst = amd64.MOVL 3472 targetSizeInByte = 32 / 8 3473 case wazeroir.UnsignedTypeI64, wazeroir.UnsignedTypeF64: 3474 movInst = amd64.MOVQ 3475 targetSizeInByte = 64 / 8 3476 } 3477 return c.compileStoreImpl(offset, movInst, targetSizeInByte) 3478 } 3479 3480 // compileStore8 implements compiler.compileStore8 for the amd64 architecture. 3481 func (c *amd64Compiler) compileStore8(o *wazeroir.UnionOperation) error { 3482 return c.compileStoreImpl(uint32(o.U2), amd64.MOVB, 1) 3483 } 3484 3485 // compileStore32 implements compiler.compileStore32 for the amd64 architecture. 3486 func (c *amd64Compiler) compileStore16(o *wazeroir.UnionOperation) error { 3487 return c.compileStoreImpl(uint32(o.U2), amd64.MOVW, 16/8) 3488 } 3489 3490 // compileStore32 implements compiler.compileStore32 for the amd64 architecture. 3491 func (c *amd64Compiler) compileStore32(o *wazeroir.UnionOperation) error { 3492 return c.compileStoreImpl(uint32(o.U2), amd64.MOVL, 32/8) 3493 } 3494 3495 func (c *amd64Compiler) compileStoreImpl(offsetConst uint32, inst asm.Instruction, targetSizeInBytes int64) error { 3496 val := c.locationStack.pop() 3497 if err := c.compileEnsureOnRegister(val); err != nil { 3498 return err 3499 } 3500 3501 reg, err := c.compileMemoryAccessCeilSetup(offsetConst, targetSizeInBytes) 3502 if err != nil { 3503 return err 3504 } 3505 3506 c.assembler.CompileRegisterToMemoryWithIndex( 3507 inst, val.register, 3508 amd64ReservedRegisterForMemory, -targetSizeInBytes, reg, 1, 3509 ) 3510 3511 // We no longer need both the value and base registers. 3512 c.locationStack.releaseRegister(val) 3513 c.locationStack.markRegisterUnused(reg) 3514 return nil 3515 } 3516 3517 // compileMemoryGrow implements compiler.compileMemoryGrow for the amd64 architecture. 3518 func (c *amd64Compiler) compileMemoryGrow() error { 3519 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 3520 return err 3521 } 3522 3523 if err := c.compileCallBuiltinFunction(builtinFunctionIndexMemoryGrow); err != nil { 3524 return err 3525 } 3526 3527 // After the function call, we have to initialize the stack base pointer and memory reserved registers. 3528 c.compileReservedStackBasePointerInitialization() 3529 c.compileReservedMemoryPointerInitialization() 3530 return nil 3531 } 3532 3533 // compileMemorySize implements compiler.compileMemorySize for the amd64 architecture. 3534 func (c *amd64Compiler) compileMemorySize() error { 3535 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 3536 return err 3537 } 3538 3539 reg, err := c.allocateRegister(registerTypeGeneralPurpose) 3540 if err != nil { 3541 return err 3542 } 3543 loc := c.pushRuntimeValueLocationOnRegister(reg, runtimeValueTypeI32) 3544 3545 c.assembler.CompileMemoryToRegister(amd64.MOVQ, 3546 amd64ReservedRegisterForCallEngine, callEngineModuleContextMemoryInstanceOffset, 3547 loc.register) 3548 3549 c.assembler.CompileMemoryToRegister(amd64.MOVQ, loc.register, memoryInstanceBufferLenOffset, loc.register) 3550 3551 // WebAssembly's memory.size returns the page size (65536) of memory region. 3552 // That is equivalent to divide the len of memory slice by 65536 and 3553 // that can be calculated as SHR by 16 bits as 65536 = 2^16. 3554 c.assembler.CompileConstToRegister(amd64.SHRQ, wasm.MemoryPageSizeInBits, loc.register) 3555 return nil 3556 } 3557 3558 // compileMemoryInit implements compiler.compileMemoryInit for the amd64 architecture. 3559 func (c *amd64Compiler) compileMemoryInit(o *wazeroir.UnionOperation) error { 3560 dataIndex := uint32(o.U1) 3561 return c.compileInitImpl(false, dataIndex, 0) 3562 } 3563 3564 // compileInitImpl implements compileTableInit and compileMemoryInit. 3565 // 3566 // TODO: the compiled code in this function should be reused and compile at once as 3567 // the code is independent of any module. 3568 func (c *amd64Compiler) compileInitImpl(isTable bool, index, tableIndex uint32) error { 3569 outOfBoundsErrorStatus := nativeCallStatusCodeMemoryOutOfBounds 3570 if isTable { 3571 outOfBoundsErrorStatus = nativeCallStatusCodeInvalidTableAccess 3572 } 3573 3574 copySize := c.locationStack.pop() 3575 if err := c.compileEnsureOnRegister(copySize); err != nil { 3576 return err 3577 } 3578 3579 sourceOffset := c.locationStack.pop() 3580 if err := c.compileEnsureOnRegister(sourceOffset); err != nil { 3581 return err 3582 } 3583 3584 destinationOffset := c.locationStack.pop() 3585 if err := c.compileEnsureOnRegister(destinationOffset); err != nil { 3586 return err 3587 } 3588 3589 instanceAddr, err := c.allocateRegister(registerTypeGeneralPurpose) 3590 if err != nil { 3591 return err 3592 } 3593 c.locationStack.markRegisterUsed(instanceAddr) 3594 if isTable { 3595 c.compileLoadElemInstanceAddress(index, instanceAddr) 3596 } else { 3597 c.compileLoadDataInstanceAddress(index, instanceAddr) 3598 } 3599 3600 tmp, err := c.allocateRegister(registerTypeGeneralPurpose) 3601 if err != nil { 3602 return err 3603 } 3604 c.locationStack.markRegisterUsed(tmp) 3605 3606 // sourceOffset += size. 3607 c.assembler.CompileRegisterToRegister(amd64.ADDQ, copySize.register, sourceOffset.register) 3608 // destinationOffset += size. 3609 c.assembler.CompileRegisterToRegister(amd64.ADDQ, copySize.register, destinationOffset.register) 3610 3611 // Check instance bounds and if exceeds the length, exit with out of bounds error. 3612 c.assembler.CompileMemoryToRegister(amd64.CMPQ, 3613 instanceAddr, 8, // DataInstance and Element instance holds the length is stored at offset 8. 3614 sourceOffset.register) 3615 c.compileMaybeExitFromNativeCode(amd64.JCC, outOfBoundsErrorStatus) 3616 3617 // Check destination bounds and if exceeds the length, exit with out of bounds error. 3618 if isTable { 3619 // Load the target table's address. 3620 c.assembler.CompileMemoryToRegister(amd64.MOVQ, amd64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset, tmp) 3621 c.assembler.CompileMemoryToRegister(amd64.MOVQ, tmp, int64(tableIndex*8), tmp) 3622 // Compare length. 3623 c.assembler.CompileMemoryToRegister(amd64.CMPQ, tmp, tableInstanceTableLenOffset, destinationOffset.register) 3624 } else { 3625 if err := c.compileCompareWithMemorySliceLen(destinationOffset.register); err != nil { 3626 return err 3627 } 3628 } 3629 3630 c.compileMaybeExitFromNativeCode(amd64.JCC, outOfBoundsErrorStatus) 3631 3632 // Otherwise, ready to copy the value from source to destination. 3633 // 3634 // If the copy size equal zero, we skip the entire instructions below. 3635 c.assembler.CompileRegisterToRegister(amd64.TESTQ, copySize.register, copySize.register) 3636 skipJump := c.assembler.CompileJump(amd64.JEQ) 3637 3638 var scale int16 3639 var memToReg, regToMem asm.Instruction 3640 if isTable { 3641 // Each element is of type uintptr; 2^3 = 1 << pointerSizeLog2. 3642 c.assembler.CompileConstToRegister(amd64.SHLQ, pointerSizeLog2, sourceOffset.register) 3643 c.assembler.CompileConstToRegister(amd64.SHLQ, pointerSizeLog2, destinationOffset.register) 3644 // destinationOffset += table buffer's absolute address. 3645 c.assembler.CompileMemoryToRegister(amd64.ADDQ, 3646 tmp, tableInstanceTableOffset, destinationOffset.register) 3647 // sourceOffset += data buffer's absolute address. 3648 c.assembler.CompileMemoryToRegister(amd64.ADDQ, 3649 instanceAddr, 0, sourceOffset.register) 3650 3651 // For tables, we move 8 bytes at once. 3652 memToReg = amd64.MOVQ 3653 regToMem = memToReg 3654 scale = 8 3655 } else { 3656 // destinationOffset += memory buffer's absolute address. 3657 c.assembler.CompileRegisterToRegister(amd64.ADDQ, amd64ReservedRegisterForMemory, destinationOffset.register) 3658 3659 // sourceOffset += data buffer's absolute address. 3660 c.assembler.CompileMemoryToRegister(amd64.ADDQ, instanceAddr, 0, sourceOffset.register) 3661 3662 // Move one byte at once. 3663 memToReg = amd64.MOVBQZX 3664 regToMem = amd64.MOVB 3665 scale = 1 3666 } 3667 3668 // Negate the counter. 3669 c.assembler.CompileNoneToRegister(amd64.NEGQ, copySize.register) 3670 3671 beginCopyLoop := c.assembler.CompileStandAlone(amd64.NOP) 3672 3673 c.assembler.CompileMemoryWithIndexToRegister(memToReg, 3674 sourceOffset.register, 0, copySize.register, scale, 3675 tmp) 3676 // [destinationOffset + (size.register)] = tmp. 3677 c.assembler.CompileRegisterToMemoryWithIndex(regToMem, 3678 tmp, 3679 destinationOffset.register, 0, copySize.register, scale, 3680 ) 3681 3682 // size += 1 3683 c.assembler.CompileNoneToRegister(amd64.INCQ, copySize.register) 3684 c.assembler.CompileJump(amd64.JMI).AssignJumpTarget(beginCopyLoop) 3685 3686 c.locationStack.markRegisterUnused(copySize.register, sourceOffset.register, 3687 destinationOffset.register, instanceAddr, tmp) 3688 c.assembler.SetJumpTargetOnNext(skipJump) 3689 return nil 3690 } 3691 3692 // compileDataDrop implements compiler.compileDataDrop for the amd64 architecture. 3693 func (c *amd64Compiler) compileDataDrop(o *wazeroir.UnionOperation) error { 3694 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 3695 return err 3696 } 3697 3698 tmp, err := c.allocateRegister(registerTypeGeneralPurpose) 3699 if err != nil { 3700 return err 3701 } 3702 3703 dataIndex := uint32(o.U1) 3704 c.compileLoadDataInstanceAddress(dataIndex, tmp) 3705 3706 // Clears the content of DataInstance[o.DataIndex] (== []byte type). 3707 c.assembler.CompileConstToMemory(amd64.MOVQ, 0, tmp, 0) 3708 c.assembler.CompileConstToMemory(amd64.MOVQ, 0, tmp, 8) 3709 c.assembler.CompileConstToMemory(amd64.MOVQ, 0, tmp, 16) 3710 return nil 3711 } 3712 3713 func (c *amd64Compiler) compileLoadDataInstanceAddress(dataIndex uint32, dst asm.Register) { 3714 // dst = dataIndex * dataInstanceStructSize. 3715 c.assembler.CompileConstToRegister(amd64.MOVQ, int64(dataIndex)*dataInstanceStructSize, dst) 3716 3717 // dst = &moduleInstance.DataInstances[0] + dst 3718 // = &moduleInstance.DataInstances[0] + dataIndex*dataInstanceStructSize 3719 // = &moduleInstance.DataInstances[dataIndex] 3720 c.assembler.CompileMemoryToRegister(amd64.ADDQ, 3721 amd64ReservedRegisterForCallEngine, callEngineModuleContextDataInstancesElement0AddressOffset, 3722 dst, 3723 ) 3724 } 3725 3726 // compileCopyLoopImpl implements a REP MOVSQ memory copy for the given range with support for both directions. 3727 func (c *amd64Compiler) compileCopyLoopImpl(destinationOffset, sourceOffset, copySize *runtimeValueLocation, backwards bool, bwOffset uint8) { 3728 // skip if nothing to copy 3729 c.assembler.CompileRegisterToRegister(amd64.TESTQ, copySize.register, copySize.register) 3730 emptyEightGroupsJump := c.assembler.CompileJump(amd64.JEQ) 3731 3732 // Prepare registers for swaps. There will never be more than 3 XCHGs in total. 3733 restoreCrossing := c.compilePreventCrossedTargetRegisters( 3734 []*runtimeValueLocation{destinationOffset, sourceOffset, copySize}, 3735 []asm.Register{amd64.RegDI, amd64.RegSI, amd64.RegCX}) 3736 3737 // Prepare registers for REP MOVSQ: copy from rsi to rdi, rcx times. 3738 c.compileMaybeSwapRegisters(destinationOffset.register, amd64.RegDI) 3739 c.compileMaybeSwapRegisters(sourceOffset.register, amd64.RegSI) 3740 c.compileMaybeSwapRegisters(copySize.register, amd64.RegCX) 3741 3742 // Point on first byte of first quadword to copy. 3743 if backwards { 3744 c.assembler.CompileConstToRegister(amd64.ADDQ, -int64(bwOffset), amd64.RegDI) 3745 c.assembler.CompileConstToRegister(amd64.ADDQ, -int64(bwOffset), amd64.RegSI) 3746 // Set REP prefix direction backwards. 3747 c.assembler.CompileStandAlone(amd64.STD) 3748 } 3749 3750 c.assembler.CompileStandAlone(amd64.REPMOVSQ) 3751 3752 if backwards { 3753 // Reset direction. 3754 c.assembler.CompileStandAlone(amd64.CLD) 3755 } 3756 3757 // Restore registers. 3758 c.compileMaybeSwapRegisters(destinationOffset.register, amd64.RegDI) 3759 c.compileMaybeSwapRegisters(sourceOffset.register, amd64.RegSI) 3760 c.compileMaybeSwapRegisters(copySize.register, amd64.RegCX) 3761 restoreCrossing() 3762 3763 c.assembler.SetJumpTargetOnNext(emptyEightGroupsJump) 3764 c.assembler.CompileStandAlone(amd64.NOP) 3765 } 3766 3767 // compileMemoryCopyLoopImpl is used for directly copying after bounds/direction check. 3768 func (c *amd64Compiler) compileMemoryCopyLoopImpl(destinationOffset, sourceOffset, copySize *runtimeValueLocation, tmp asm.Register, backwards bool) { 3769 // Point on first byte to be copied depending on direction. 3770 if backwards { 3771 c.assembler.CompileNoneToRegister(amd64.DECQ, sourceOffset.register) 3772 c.assembler.CompileNoneToRegister(amd64.DECQ, destinationOffset.register) 3773 } else { 3774 c.assembler.CompileRegisterToRegister(amd64.SUBQ, copySize.register, sourceOffset.register) 3775 c.assembler.CompileRegisterToRegister(amd64.SUBQ, copySize.register, destinationOffset.register) 3776 } 3777 3778 // destinationOffset += memory buffer's absolute address. 3779 c.assembler.CompileRegisterToRegister(amd64.ADDQ, amd64ReservedRegisterForMemory, destinationOffset.register) 3780 // sourceOffset += memory buffer's absolute address. 3781 c.assembler.CompileRegisterToRegister(amd64.ADDQ, amd64ReservedRegisterForMemory, sourceOffset.register) 3782 3783 // Copy copySize % 8 bytes in loop to allow copying in 8 byte groups afterward. 3784 beginLoop := c.assembler.CompileStandAlone(amd64.NOP) 3785 3786 // Check copySize % 8 == 0. 3787 c.assembler.CompileConstToRegister(amd64.TESTQ, 7, copySize.register) 3788 breakLoop := c.assembler.CompileJump(amd64.JEQ) 3789 3790 c.assembler.CompileMemoryToRegister(amd64.MOVBQZX, sourceOffset.register, 0, tmp) 3791 c.assembler.CompileRegisterToMemory(amd64.MOVB, tmp, destinationOffset.register, 0) 3792 3793 if backwards { 3794 c.assembler.CompileNoneToRegister(amd64.DECQ, sourceOffset.register) 3795 c.assembler.CompileNoneToRegister(amd64.DECQ, destinationOffset.register) 3796 } else { 3797 c.assembler.CompileNoneToRegister(amd64.INCQ, sourceOffset.register) 3798 c.assembler.CompileNoneToRegister(amd64.INCQ, destinationOffset.register) 3799 } 3800 3801 c.assembler.CompileNoneToRegister(amd64.DECQ, copySize.register) 3802 c.assembler.CompileJump(amd64.JMP).AssignJumpTarget(beginLoop) 3803 c.assembler.SetJumpTargetOnNext(breakLoop) 3804 3805 // compileCopyLoopImpl counts in groups of 8 bytes, so we have to divide the copySize by 8. 3806 c.assembler.CompileConstToRegister(amd64.SHRQ, 3, copySize.register) 3807 3808 c.compileCopyLoopImpl(destinationOffset, sourceOffset, copySize, backwards, 7) 3809 } 3810 3811 // compileMemoryCopy implements compiler.compileMemoryCopy for the amd64 architecture. 3812 // 3813 // This uses efficient `REP MOVSQ` instructions to copy in quadword (8 bytes) batches. The remaining bytes 3814 // are copied with a simple `MOV` loop. It uses backward copying for overlapped segments. 3815 func (c *amd64Compiler) compileMemoryCopy() error { 3816 copySize := c.locationStack.pop() 3817 if err := c.compileEnsureOnRegister(copySize); err != nil { 3818 return err 3819 } 3820 3821 sourceOffset := c.locationStack.pop() 3822 if err := c.compileEnsureOnRegister(sourceOffset); err != nil { 3823 return err 3824 } 3825 3826 destinationOffset := c.locationStack.pop() 3827 if err := c.compileEnsureOnRegister(destinationOffset); err != nil { 3828 return err 3829 } 3830 3831 tmp, err := c.allocateRegister(registerTypeGeneralPurpose) 3832 if err != nil { 3833 return err 3834 } 3835 c.locationStack.markRegisterUsed(tmp) 3836 3837 // sourceOffset += size. 3838 c.assembler.CompileRegisterToRegister(amd64.ADDQ, copySize.register, sourceOffset.register) 3839 // destinationOffset += size. 3840 c.assembler.CompileRegisterToRegister(amd64.ADDQ, copySize.register, destinationOffset.register) 3841 // tmp = max(sourceOffset, destinationOffset). 3842 c.assembler.CompileRegisterToRegister(amd64.CMPQ, sourceOffset.register, destinationOffset.register) 3843 c.assembler.CompileRegisterToRegister(amd64.MOVQ, sourceOffset.register, tmp) 3844 c.assembler.CompileRegisterToRegister(amd64.CMOVQCS, destinationOffset.register, tmp) 3845 3846 // Check source bounds and if exceeds the length, exit with out of bounds error. 3847 if err := c.compileCompareWithMemorySliceLen(sourceOffset.register); err != nil { 3848 return err 3849 } 3850 c.compileMaybeExitFromNativeCode(amd64.JCC, nativeCallStatusCodeMemoryOutOfBounds) 3851 3852 // Check destination bounds and if exceeds the length, exit with out of bounds error. 3853 if err := c.compileCompareWithMemorySliceLen(destinationOffset.register); err != nil { 3854 return err 3855 } 3856 c.compileMaybeExitFromNativeCode(amd64.JCC, nativeCallStatusCodeMemoryOutOfBounds) 3857 3858 // Skip zero size. 3859 c.assembler.CompileRegisterToRegister(amd64.TESTQ, copySize.register, copySize.register) 3860 skipJump := c.assembler.CompileJump(amd64.JEQ) 3861 3862 // If dest < source, we can copy forwards 3863 c.assembler.CompileRegisterToRegister(amd64.CMPQ, destinationOffset.register, sourceOffset.register) 3864 destLowerThanSourceJump := c.assembler.CompileJump(amd64.JLS) 3865 3866 // If source + size < dest, we can copy forwards 3867 c.assembler.CompileRegisterToRegister(amd64.MOVQ, destinationOffset.register, tmp) 3868 c.assembler.CompileRegisterToRegister(amd64.SUBQ, copySize.register, tmp) 3869 c.assembler.CompileRegisterToRegister(amd64.CMPQ, sourceOffset.register, tmp) 3870 sourceBoundLowerThanDestJump := c.assembler.CompileJump(amd64.JLS) 3871 3872 // Copy backwards. 3873 c.compileMemoryCopyLoopImpl(destinationOffset, sourceOffset, copySize, tmp, true) 3874 endJump := c.assembler.CompileJump(amd64.JMP) 3875 3876 // Copy forwards. 3877 c.assembler.SetJumpTargetOnNext(destLowerThanSourceJump) 3878 c.assembler.SetJumpTargetOnNext(sourceBoundLowerThanDestJump) 3879 c.compileMemoryCopyLoopImpl(destinationOffset, sourceOffset, copySize, tmp, false) 3880 3881 c.locationStack.markRegisterUnused(copySize.register, sourceOffset.register, 3882 destinationOffset.register, tmp) 3883 c.assembler.SetJumpTargetOnNext(skipJump) 3884 c.assembler.SetJumpTargetOnNext(endJump) 3885 3886 return nil 3887 } 3888 3889 // compileFillLoopImpl implements a REP STOSQ fill loop. 3890 func (c *amd64Compiler) compileFillLoopImpl(destinationOffset, value, fillSize *runtimeValueLocation, tmp asm.Register, replicateByte bool) { 3891 // Skip if nothing to fill. 3892 c.assembler.CompileRegisterToRegister(amd64.TESTQ, fillSize.register, fillSize.register) 3893 emptyEightGroupsJump := c.assembler.CompileJump(amd64.JEQ) 3894 3895 if replicateByte { 3896 // Truncate value.register to a single byte 3897 c.assembler.CompileConstToRegister(amd64.ANDQ, 0xff, value.register) 3898 // Replicate single byte onto full 8-byte register. 3899 c.assembler.CompileConstToRegister(amd64.MOVQ, 0x0101010101010101, tmp) 3900 c.assembler.CompileRegisterToRegister(amd64.IMULQ, tmp, value.register) 3901 } 3902 3903 // Prepare registers for swaps. There will never be more than 3 XCHGs in total. 3904 restoreCrossing := c.compilePreventCrossedTargetRegisters( 3905 []*runtimeValueLocation{destinationOffset, value, fillSize}, 3906 []asm.Register{amd64.RegDI, amd64.RegAX, amd64.RegCX}) 3907 3908 // Prepare registers for REP STOSQ: fill at [rdi] with rax, rcx times. 3909 c.compileMaybeSwapRegisters(destinationOffset.register, amd64.RegDI) 3910 c.compileMaybeSwapRegisters(value.register, amd64.RegAX) 3911 c.compileMaybeSwapRegisters(fillSize.register, amd64.RegCX) 3912 3913 c.assembler.CompileStandAlone(amd64.REPSTOSQ) 3914 3915 // Restore registers. 3916 c.compileMaybeSwapRegisters(destinationOffset.register, amd64.RegDI) 3917 c.compileMaybeSwapRegisters(value.register, amd64.RegAX) 3918 c.compileMaybeSwapRegisters(fillSize.register, amd64.RegCX) 3919 restoreCrossing() 3920 3921 c.assembler.SetJumpTargetOnNext(emptyEightGroupsJump) 3922 } 3923 3924 // compileMemoryFill implements compiler.compileMemoryFill for the amd64 architecture. 3925 // 3926 // This function uses efficient `REP STOSQ` instructions to copy in quadword (8 bytes) batches 3927 // if the size if above 15 bytes. For smaller sizes, a simple MOVB copy loop is the best 3928 // option. 3929 // 3930 // TODO: the compiled code in this function should be reused and compile at once as 3931 // the code is independent of any module. 3932 func (c *amd64Compiler) compileFillImpl(isTable bool, tableIndex uint32) error { 3933 copySize := c.locationStack.pop() 3934 if err := c.compileEnsureOnRegister(copySize); err != nil { 3935 return err 3936 } 3937 3938 value := c.locationStack.pop() 3939 if err := c.compileEnsureOnRegister(value); err != nil { 3940 return err 3941 } 3942 3943 destinationOffset := c.locationStack.pop() 3944 if err := c.compileEnsureOnRegister(destinationOffset); err != nil { 3945 return err 3946 } 3947 3948 tmp, err := c.allocateRegister(registerTypeGeneralPurpose) 3949 if err != nil { 3950 return err 3951 } 3952 c.locationStack.markRegisterUsed(tmp) 3953 3954 // destinationOffset += size. 3955 c.assembler.CompileRegisterToRegister(amd64.ADDQ, copySize.register, destinationOffset.register) 3956 3957 // Check destination bounds and if exceeds the length, exit with out of bounds error. 3958 if isTable { 3959 // tmp = &tables[0] 3960 c.assembler.CompileMemoryToRegister(amd64.MOVQ, 3961 amd64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset, 3962 tmp) 3963 3964 // tmp = [tmp + TableIndex*8] 3965 // = [&tables[0] + TableIndex*sizeOf(*tableInstance)] 3966 // = [&tables[TableIndex]] = tables[TableIndex]. 3967 c.assembler.CompileMemoryToRegister(amd64.MOVQ, tmp, int64(tableIndex)*8, tmp) 3968 3969 c.assembler.CompileMemoryToRegister(amd64.CMPQ, 3970 tmp, tableInstanceTableLenOffset, 3971 destinationOffset.register) 3972 } else { 3973 if err := c.compileCompareWithMemorySliceLen(destinationOffset.register); err != nil { 3974 return err 3975 } 3976 } 3977 if isTable { 3978 c.compileMaybeExitFromNativeCode(amd64.JCC, nativeCallStatusCodeInvalidTableAccess) 3979 } else { 3980 c.compileMaybeExitFromNativeCode(amd64.JCC, nativeCallStatusCodeMemoryOutOfBounds) 3981 } 3982 3983 // Otherwise, ready to copy the value from source to destination. 3984 // 3985 // If the copy size equal zero, we skip the entire instructions below. 3986 c.assembler.CompileRegisterToRegister(amd64.TESTQ, copySize.register, copySize.register) 3987 skipJump := c.assembler.CompileJump(amd64.JEQ) 3988 3989 // destinationOffset -= size. 3990 c.assembler.CompileRegisterToRegister(amd64.SUBQ, copySize.register, destinationOffset.register) 3991 3992 if isTable { 3993 // Each element is of type uintptr; 2^3 = 1 << pointerSizeLog2. 3994 c.assembler.CompileConstToRegister(amd64.SHLQ, pointerSizeLog2, destinationOffset.register) 3995 // destinationOffset += table buffer's absolute address. 3996 c.assembler.CompileMemoryToRegister(amd64.ADDQ, tmp, tableInstanceTableOffset, destinationOffset.register) 3997 3998 } else { 3999 // destinationOffset += memory buffer's absolute address. 4000 c.assembler.CompileRegisterToRegister(amd64.ADDQ, amd64ReservedRegisterForMemory, destinationOffset.register) 4001 4002 // Copy first % 16 bytes with simple MOVB instruction. 4003 beginCopyLoop := c.assembler.CompileStandAlone(amd64.NOP) 4004 c.assembler.CompileConstToRegister(amd64.TESTQ, 15, copySize.register) 4005 breakLoop := c.assembler.CompileJump(amd64.JEQ) 4006 4007 c.assembler.CompileRegisterToMemory(amd64.MOVB, value.register, destinationOffset.register, 0) 4008 4009 c.assembler.CompileNoneToRegister(amd64.INCQ, destinationOffset.register) 4010 c.assembler.CompileNoneToRegister(amd64.DECQ, copySize.register) 4011 c.assembler.CompileJump(amd64.JMP).AssignJumpTarget(beginCopyLoop) 4012 4013 c.assembler.SetJumpTargetOnNext(breakLoop) 4014 // compileFillLoopImpl counts in groups of 8 bytes, so we have to divide the copySize by 8. 4015 c.assembler.CompileConstToRegister(amd64.SHRQ, 3, copySize.register) 4016 } 4017 4018 c.compileFillLoopImpl(destinationOffset, value, copySize, tmp, !isTable) 4019 4020 c.locationStack.markRegisterUnused(copySize.register, value.register, 4021 destinationOffset.register, tmp) 4022 c.assembler.SetJumpTargetOnNext(skipJump) 4023 return nil 4024 } 4025 4026 // compileMemoryFill implements compiler.compileMemoryFill for the amd64 architecture. 4027 // 4028 // TODO: the compiled code in this function should be reused and compile at once as 4029 // the code is independent of any module. 4030 func (c *amd64Compiler) compileMemoryFill() error { 4031 return c.compileFillImpl(false, 0) 4032 } 4033 4034 // compileTableInit implements compiler.compileTableInit for the amd64 architecture. 4035 func (c *amd64Compiler) compileTableInit(o *wazeroir.UnionOperation) error { 4036 elemIndex := uint32(o.U1) 4037 tableIndex := uint32(o.U2) 4038 return c.compileInitImpl(true, elemIndex, tableIndex) 4039 } 4040 4041 // compileTableCopyLoopImpl is used for directly copying after bounds/direction check. 4042 func (c *amd64Compiler) compileTableCopyLoopImpl(srcTableIndex, dstTableIndex uint32, destinationOffset, sourceOffset, copySize *runtimeValueLocation, tmp asm.Register, backwards bool) { 4043 // Point on first byte to be copied. 4044 if !backwards { 4045 c.assembler.CompileRegisterToRegister(amd64.SUBQ, copySize.register, sourceOffset.register) 4046 c.assembler.CompileRegisterToRegister(amd64.SUBQ, copySize.register, destinationOffset.register) 4047 } 4048 4049 // Each element is of type uintptr; 2^3 = 1 << pointerSizeLog2. 4050 c.assembler.CompileConstToRegister(amd64.SHLQ, pointerSizeLog2, sourceOffset.register) 4051 c.assembler.CompileConstToRegister(amd64.SHLQ, pointerSizeLog2, destinationOffset.register) 4052 // destinationOffset += table buffer's absolute address. 4053 c.assembler.CompileMemoryToRegister(amd64.MOVQ, amd64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset, tmp) 4054 c.assembler.CompileMemoryToRegister(amd64.MOVQ, tmp, int64(dstTableIndex*8), tmp) 4055 c.assembler.CompileMemoryToRegister(amd64.ADDQ, tmp, tableInstanceTableOffset, destinationOffset.register) 4056 // sourceOffset += table buffer's absolute address. 4057 c.assembler.CompileMemoryToRegister(amd64.MOVQ, amd64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset, tmp) 4058 c.assembler.CompileMemoryToRegister(amd64.MOVQ, tmp, int64(srcTableIndex*8), tmp) 4059 c.assembler.CompileMemoryToRegister(amd64.ADDQ, tmp, tableInstanceTableOffset, sourceOffset.register) 4060 4061 c.compileCopyLoopImpl(destinationOffset, sourceOffset, copySize, backwards, 8) 4062 } 4063 4064 // compileTableCopy implements compiler.compileTableCopy for the amd64 architecture. 4065 // 4066 // It uses efficient `REP MOVSB` instructions for optimized copying. It uses backward copying for 4067 // overlapped segments. 4068 func (c *amd64Compiler) compileTableCopy(o *wazeroir.UnionOperation) error { 4069 copySize := c.locationStack.pop() 4070 if err := c.compileEnsureOnRegister(copySize); err != nil { 4071 return err 4072 } 4073 4074 sourceOffset := c.locationStack.pop() 4075 if err := c.compileEnsureOnRegister(sourceOffset); err != nil { 4076 return err 4077 } 4078 4079 destinationOffset := c.locationStack.pop() 4080 if err := c.compileEnsureOnRegister(destinationOffset); err != nil { 4081 return err 4082 } 4083 4084 tmp, err := c.allocateRegister(registerTypeGeneralPurpose) 4085 if err != nil { 4086 return err 4087 } 4088 4089 // sourceOffset += size. 4090 c.assembler.CompileRegisterToRegister(amd64.ADDQ, copySize.register, sourceOffset.register) 4091 // destinationOffset += size. 4092 c.assembler.CompileRegisterToRegister(amd64.ADDQ, copySize.register, destinationOffset.register) 4093 4094 srcTableIndex := uint32(o.U1) 4095 dstTableIndex := uint32(o.U2) 4096 4097 // Check source bounds and if exceeds the length, exit with out of bounds error. 4098 c.assembler.CompileMemoryToRegister(amd64.MOVQ, amd64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset, tmp) 4099 c.assembler.CompileMemoryToRegister(amd64.MOVQ, tmp, int64(srcTableIndex*8), tmp) 4100 c.assembler.CompileMemoryToRegister(amd64.CMPQ, tmp, tableInstanceTableLenOffset, sourceOffset.register) 4101 c.compileMaybeExitFromNativeCode(amd64.JCC, nativeCallStatusCodeInvalidTableAccess) 4102 4103 // Check destination bounds and if exceeds the length, exit with out of bounds error. 4104 c.assembler.CompileMemoryToRegister(amd64.MOVQ, amd64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset, tmp) 4105 c.assembler.CompileMemoryToRegister(amd64.MOVQ, tmp, int64(dstTableIndex*8), tmp) 4106 c.assembler.CompileMemoryToRegister(amd64.CMPQ, tmp, tableInstanceTableLenOffset, destinationOffset.register) 4107 c.compileMaybeExitFromNativeCode(amd64.JCC, nativeCallStatusCodeInvalidTableAccess) 4108 4109 // Skip zero size. 4110 c.assembler.CompileRegisterToRegister(amd64.TESTQ, copySize.register, copySize.register) 4111 skipJump := c.assembler.CompileJump(amd64.JEQ) 4112 4113 // If dest < source, we can copy forwards. 4114 c.assembler.CompileRegisterToRegister(amd64.CMPQ, destinationOffset.register, sourceOffset.register) 4115 destLowerThanSourceJump := c.assembler.CompileJump(amd64.JLS) 4116 4117 // If source + size < dest, we can copy forwards. 4118 c.assembler.CompileRegisterToRegister(amd64.MOVQ, destinationOffset.register, tmp) 4119 c.assembler.CompileRegisterToRegister(amd64.SUBQ, copySize.register, tmp) 4120 c.assembler.CompileRegisterToRegister(amd64.CMPQ, sourceOffset.register, tmp) 4121 sourceBoundLowerThanDestJump := c.assembler.CompileJump(amd64.JLS) 4122 4123 // Copy backwards. 4124 c.compileTableCopyLoopImpl(srcTableIndex, dstTableIndex, destinationOffset, sourceOffset, copySize, tmp, true) 4125 endJump := c.assembler.CompileJump(amd64.JMP) 4126 4127 // Copy forwards. 4128 c.assembler.SetJumpTargetOnNext(destLowerThanSourceJump) 4129 c.assembler.SetJumpTargetOnNext(sourceBoundLowerThanDestJump) 4130 c.compileTableCopyLoopImpl(srcTableIndex, dstTableIndex, destinationOffset, sourceOffset, copySize, tmp, false) 4131 4132 c.locationStack.markRegisterUnused(copySize.register, sourceOffset.register, 4133 destinationOffset.register, tmp) 4134 c.assembler.SetJumpTargetOnNext(skipJump) 4135 c.assembler.SetJumpTargetOnNext(endJump) 4136 return nil 4137 } 4138 4139 // compileElemDrop implements compiler.compileElemDrop for the amd64 architecture. 4140 func (c *amd64Compiler) compileElemDrop(o *wazeroir.UnionOperation) error { 4141 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 4142 return err 4143 } 4144 4145 tmp, err := c.allocateRegister(registerTypeGeneralPurpose) 4146 if err != nil { 4147 return err 4148 } 4149 4150 elemIndex := uint32(o.U1) 4151 c.compileLoadElemInstanceAddress(elemIndex, tmp) 4152 4153 // Clears the content of ElementInstances[o.ElemIndex].References (== []uintptr{} type). 4154 c.assembler.CompileConstToMemory(amd64.MOVQ, 0, tmp, 0) 4155 c.assembler.CompileConstToMemory(amd64.MOVQ, 0, tmp, 8) 4156 c.assembler.CompileConstToMemory(amd64.MOVQ, 0, tmp, 16) 4157 return nil 4158 } 4159 4160 func (c *amd64Compiler) compileLoadElemInstanceAddress(elemIndex uint32, dst asm.Register) { 4161 // dst = elemIndex * elementInstanceStructSize 4162 c.assembler.CompileConstToRegister(amd64.MOVQ, int64(elemIndex)*elementInstanceStructSize, dst) 4163 4164 // dst = &moduleInstance.ElementInstances[0] + dst 4165 // = &moduleInstance.ElementInstances[0] + elemIndex*elementInstanceStructSize 4166 // = &moduleInstance.ElementInstances[elemIndex] 4167 c.assembler.CompileMemoryToRegister(amd64.ADDQ, 4168 amd64ReservedRegisterForCallEngine, callEngineModuleContextElementInstancesElement0AddressOffset, 4169 dst, 4170 ) 4171 } 4172 4173 // compileTableGet implements compiler.compileTableGet for the amd64 architecture. 4174 func (c *amd64Compiler) compileTableGet(o *wazeroir.UnionOperation) error { 4175 ref, err := c.allocateRegister(registerTypeGeneralPurpose) 4176 if err != nil { 4177 return err 4178 } 4179 4180 c.locationStack.markRegisterUsed(ref) 4181 4182 offset := c.locationStack.pop() 4183 if err := c.compileEnsureOnRegister(offset); err != nil { 4184 return err 4185 } 4186 4187 // ref = &tables[0] 4188 c.assembler.CompileMemoryToRegister(amd64.MOVQ, 4189 amd64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset, 4190 ref) 4191 4192 // ref = [ref + TableIndex*8] 4193 // = [&tables[0] + TableIndex*sizeOf(*tableInstance)] 4194 // = [&tables[TableIndex]] = tables[TableIndex]. 4195 tableIndex := int64(o.U1) 4196 c.assembler.CompileMemoryToRegister(amd64.MOVQ, ref, tableIndex*8, ref) 4197 4198 // Out of bounds check. 4199 c.assembler.CompileMemoryToRegister(amd64.CMPQ, ref, tableInstanceTableLenOffset, offset.register) 4200 c.compileMaybeExitFromNativeCode(amd64.JHI, nativeCallStatusCodeInvalidTableAccess) 4201 4202 // ref = [&tables[TableIndex] + tableInstanceTableOffset] = &tables[TableIndex].References[0] 4203 c.assembler.CompileMemoryToRegister(amd64.MOVQ, ref, tableInstanceTableOffset, ref) 4204 4205 // ref = [ref + 0 + offset.register * 8] 4206 // = [&tables[TableIndex].References[0] + sizeOf(uintptr) * offset] 4207 // = [&tables[TableIndex].References[offset]] 4208 // = tables[TableIndex].References[offset] 4209 c.assembler.CompileMemoryWithIndexToRegister(amd64.MOVQ, ref, 4210 0, offset.register, 8, ref, 4211 ) 4212 4213 c.locationStack.markRegisterUnused(offset.register) 4214 c.pushRuntimeValueLocationOnRegister(ref, runtimeValueTypeI64) // table elements are opaque 64-bit at runtime. 4215 return nil 4216 } 4217 4218 // compileTableSet implements compiler.compileTableSet for the amd64 architecture. 4219 func (c *amd64Compiler) compileTableSet(o *wazeroir.UnionOperation) error { 4220 ref := c.locationStack.pop() 4221 if err := c.compileEnsureOnRegister(ref); err != nil { 4222 return err 4223 } 4224 4225 offset := c.locationStack.pop() 4226 if err := c.compileEnsureOnRegister(offset); err != nil { 4227 return err 4228 } 4229 4230 tmp, err := c.allocateRegister(registerTypeGeneralPurpose) 4231 if err != nil { 4232 return err 4233 } 4234 4235 // tmp = &tables[0] 4236 c.assembler.CompileMemoryToRegister(amd64.MOVQ, 4237 amd64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset, 4238 tmp) 4239 4240 // ref = [ref + TableIndex*8] 4241 // = [&tables[0] + TableIndex*sizeOf(*tableInstance)] 4242 // = [&tables[TableIndex]] = tables[TableIndex]. 4243 tableIndex := int64(o.U1) 4244 c.assembler.CompileMemoryToRegister(amd64.MOVQ, tmp, tableIndex*8, tmp) 4245 4246 // Out of bounds check. 4247 c.assembler.CompileMemoryToRegister(amd64.CMPQ, tmp, tableInstanceTableLenOffset, offset.register) 4248 c.compileMaybeExitFromNativeCode(amd64.JHI, nativeCallStatusCodeInvalidTableAccess) 4249 4250 // tmp = [&tables[TableIndex] + tableInstanceTableOffset] = &tables[TableIndex].References[0] 4251 c.assembler.CompileMemoryToRegister(amd64.MOVQ, tmp, tableInstanceTableOffset, tmp) 4252 4253 // [tmp + 0 + offset.register * 8] = ref 4254 // [&tables[TableIndex].References[0] + sizeOf(uintptr) * offset] = ref 4255 // [&tables[TableIndex].References[offset]] = ref 4256 // tables[TableIndex].References[offset] = ref 4257 c.assembler.CompileRegisterToMemoryWithIndex(amd64.MOVQ, 4258 ref.register, 4259 tmp, 0, offset.register, 8) 4260 4261 c.locationStack.markRegisterUnused(offset.register, ref.register) 4262 return nil 4263 } 4264 4265 // compileTableGrow implements compiler.compileTableGrow for the amd64 architecture. 4266 func (c *amd64Compiler) compileTableGrow(o *wazeroir.UnionOperation) error { 4267 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 4268 return err 4269 } 4270 4271 // Pushes the table index. 4272 tableIndex := uint32(o.U1) 4273 if err := c.compileConstI32Impl(tableIndex); err != nil { 4274 return err 4275 } 4276 4277 // Table grow cannot be done in assembly just like memory grow as it involves with allocation in Go. 4278 // Therefore, call out to the built function for this purpose. 4279 if err := c.compileCallBuiltinFunction(builtinFunctionIndexTableGrow); err != nil { 4280 return err 4281 } 4282 4283 // TableGrow consumes three values (table index, number of items, initial value). 4284 for i := 0; i < 3; i++ { 4285 c.locationStack.pop() 4286 } 4287 4288 // Then, the previous length was pushed as the result. 4289 loc := c.locationStack.pushRuntimeValueLocationOnStack() 4290 loc.valueType = runtimeValueTypeI32 4291 4292 // After return, we re-initialize reserved registers just like preamble of functions. 4293 c.compileReservedStackBasePointerInitialization() 4294 c.compileReservedMemoryPointerInitialization() 4295 return nil 4296 } 4297 4298 // compileTableSize implements compiler.compileTableSize for the amd64 architecture. 4299 func (c *amd64Compiler) compileTableSize(o *wazeroir.UnionOperation) error { 4300 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 4301 return err 4302 } 4303 4304 result, err := c.allocateRegister(registerTypeGeneralPurpose) 4305 if err != nil { 4306 return err 4307 } 4308 4309 // result = &tables[0] 4310 c.assembler.CompileMemoryToRegister(amd64.MOVQ, 4311 amd64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset, 4312 result) 4313 4314 // result = [result + TableIndex*8] 4315 // = [&tables[0] + TableIndex*sizeOf(*tableInstance)] 4316 // = [&tables[TableIndex]] = tables[TableIndex]. 4317 tableIndex := int64(o.U1) 4318 c.assembler.CompileMemoryToRegister(amd64.MOVQ, result, tableIndex*8, result) 4319 4320 // result = [result + tableInstanceTableLenOffset] 4321 // = [tables[TableIndex] + tableInstanceTableLenOffset] 4322 // = len(tables[TableIndex]) 4323 c.assembler.CompileMemoryToRegister(amd64.MOVQ, result, tableInstanceTableLenOffset, result) 4324 4325 c.pushRuntimeValueLocationOnRegister(result, runtimeValueTypeI32) 4326 return nil 4327 } 4328 4329 // compileTableFill implements compiler.compileTableFill for the amd64 architecture. 4330 func (c *amd64Compiler) compileTableFill(o *wazeroir.UnionOperation) error { 4331 tableIndex := uint32(o.U1) 4332 return c.compileFillImpl(true, tableIndex) 4333 } 4334 4335 // compileRefFunc implements compiler.compileRefFunc for the amd64 architecture. 4336 func (c *amd64Compiler) compileRefFunc(o *wazeroir.UnionOperation) error { 4337 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 4338 return err 4339 } 4340 4341 ref, err := c.allocateRegister(registerTypeGeneralPurpose) 4342 if err != nil { 4343 return err 4344 } 4345 4346 functionIndex := int64(o.U1) 4347 c.assembler.CompileConstToRegister(amd64.MOVQ, functionIndex*functionSize, ref) 4348 4349 // ref = [amd64ReservedRegisterForCallEngine + callEngineModuleContextFunctionsElement0AddressOffset + int64(o.FunctionIndex)*functionSize] 4350 // = &moduleEngine.functions[index] 4351 c.assembler.CompileMemoryToRegister( 4352 amd64.ADDQ, amd64ReservedRegisterForCallEngine, callEngineModuleContextFunctionsElement0AddressOffset, 4353 ref, 4354 ) 4355 4356 c.pushRuntimeValueLocationOnRegister(ref, runtimeValueTypeI64) 4357 return nil 4358 } 4359 4360 // compileConstI32 implements compiler.compileConstI32 for the amd64 architecture. 4361 func (c *amd64Compiler) compileConstI32(o *wazeroir.UnionOperation) error { 4362 return c.compileConstI32Impl(uint32(o.U1)) 4363 } 4364 4365 func (c *amd64Compiler) compileConstI32Impl(v uint32) error { 4366 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 4367 return err 4368 } 4369 4370 reg, err := c.allocateRegister(registerTypeGeneralPurpose) 4371 if err != nil { 4372 return err 4373 } 4374 c.pushRuntimeValueLocationOnRegister(reg, runtimeValueTypeI32) 4375 c.assembler.CompileConstToRegister(amd64.MOVL, int64(v), reg) 4376 return nil 4377 } 4378 4379 // compileConstI64 implements compiler.compileConstI64 for the amd64 architecture. 4380 func (c *amd64Compiler) compileConstI64(o *wazeroir.UnionOperation) error { 4381 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 4382 return err 4383 } 4384 4385 reg, err := c.allocateRegister(registerTypeGeneralPurpose) 4386 if err != nil { 4387 return err 4388 } 4389 c.pushRuntimeValueLocationOnRegister(reg, runtimeValueTypeI64) 4390 4391 c.assembler.CompileConstToRegister(amd64.MOVQ, int64(o.U1), reg) 4392 return nil 4393 } 4394 4395 // compileConstF32 implements compiler.compileConstF32 for the amd64 architecture. 4396 func (c *amd64Compiler) compileConstF32(o *wazeroir.UnionOperation) error { 4397 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 4398 return err 4399 } 4400 4401 reg, err := c.allocateRegister(registerTypeVector) 4402 if err != nil { 4403 return err 4404 } 4405 c.pushRuntimeValueLocationOnRegister(reg, runtimeValueTypeF32) 4406 4407 // We cannot directly load the value from memory to float regs, 4408 // so we move it to int reg temporarily. 4409 tmpReg, err := c.allocateRegister(registerTypeGeneralPurpose) 4410 if err != nil { 4411 return err 4412 } 4413 4414 c.assembler.CompileConstToRegister(amd64.MOVL, int64(o.U1) /*math.Float32bits(o.Value)*/, tmpReg) 4415 c.assembler.CompileRegisterToRegister(amd64.MOVL, tmpReg, reg) 4416 return nil 4417 } 4418 4419 // compileConstF64 implements compiler.compileConstF64 for the amd64 architecture. 4420 func (c *amd64Compiler) compileConstF64(o *wazeroir.UnionOperation) error { 4421 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 4422 return err 4423 } 4424 4425 reg, err := c.allocateRegister(registerTypeVector) 4426 if err != nil { 4427 return err 4428 } 4429 c.pushRuntimeValueLocationOnRegister(reg, runtimeValueTypeF64) 4430 4431 // We cannot directly load the value from memory to float regs, 4432 // so we move it to int reg temporarily. 4433 tmpReg, err := c.allocateRegister(registerTypeGeneralPurpose) 4434 if err != nil { 4435 return err 4436 } 4437 4438 c.assembler.CompileConstToRegister(amd64.MOVQ, int64(o.U1) /* math.Float64bits(o.Value) */, tmpReg) 4439 c.assembler.CompileRegisterToRegister(amd64.MOVQ, tmpReg, reg) 4440 return nil 4441 } 4442 4443 func (c *amd64Compiler) compileAtomicLoad(o *wazeroir.UnionOperation) error { 4444 var ( 4445 inst asm.Instruction 4446 targetSizeInBytes int64 4447 vt runtimeValueType 4448 ) 4449 4450 unsignedType := wazeroir.UnsignedType(o.B1) 4451 offset := uint32(o.U2) 4452 4453 switch unsignedType { 4454 case wazeroir.UnsignedTypeI32: 4455 inst = amd64.MOVL 4456 targetSizeInBytes = 32 / 8 4457 vt = runtimeValueTypeI32 4458 case wazeroir.UnsignedTypeI64: 4459 inst = amd64.MOVQ 4460 targetSizeInBytes = 64 / 8 4461 vt = runtimeValueTypeI64 4462 } 4463 4464 return c.compileAtomicLoadImpl(inst, offset, targetSizeInBytes, vt) 4465 } 4466 4467 func (c *amd64Compiler) compileAtomicLoad8(o *wazeroir.UnionOperation) error { 4468 var ( 4469 inst asm.Instruction 4470 vt runtimeValueType 4471 ) 4472 4473 unsignedType := wazeroir.UnsignedType(o.B1) 4474 offset := uint32(o.U2) 4475 4476 switch unsignedType { 4477 case wazeroir.UnsignedTypeI32: 4478 inst = amd64.MOVBLZX 4479 vt = runtimeValueTypeI32 4480 case wazeroir.UnsignedTypeI64: 4481 inst = amd64.MOVBQZX 4482 vt = runtimeValueTypeI64 4483 } 4484 4485 return c.compileAtomicLoadImpl(inst, offset, 1, vt) 4486 } 4487 4488 func (c *amd64Compiler) compileAtomicLoad16(o *wazeroir.UnionOperation) error { 4489 var ( 4490 inst asm.Instruction 4491 vt runtimeValueType 4492 ) 4493 4494 unsignedType := wazeroir.UnsignedType(o.B1) 4495 offset := uint32(o.U2) 4496 4497 switch unsignedType { 4498 case wazeroir.UnsignedTypeI32: 4499 inst = amd64.MOVWLZX 4500 vt = runtimeValueTypeI32 4501 case wazeroir.UnsignedTypeI64: 4502 inst = amd64.MOVWQZX 4503 vt = runtimeValueTypeI64 4504 } 4505 4506 return c.compileAtomicLoadImpl(inst, offset, 16/8, vt) 4507 } 4508 4509 func (c *amd64Compiler) compileAtomicLoadImpl( 4510 inst asm.Instruction, offset uint32, targetSizeInBytes int64, resultRuntimeValueType runtimeValueType, 4511 ) error { 4512 reg, err := c.compileMemoryAccessCeilSetup(offset, targetSizeInBytes) 4513 if err != nil { 4514 return err 4515 } 4516 4517 c.compileMemoryAlignmentCheck(reg, targetSizeInBytes) 4518 4519 c.assembler.CompileMemoryWithIndexToRegister(inst, 4520 // we access memory as memory.Buffer[ceil-targetSizeInBytes: ceil]. 4521 amd64ReservedRegisterForMemory, -targetSizeInBytes, reg, 1, 4522 reg) 4523 c.pushRuntimeValueLocationOnRegister(reg, resultRuntimeValueType) 4524 4525 return nil 4526 } 4527 4528 func (c *amd64Compiler) compileAtomicStore(o *wazeroir.UnionOperation) error { 4529 var inst asm.Instruction 4530 var targetSizeInByte int64 4531 unsignedType := wazeroir.UnsignedType(o.B1) 4532 offset := uint32(o.U2) 4533 switch unsignedType { 4534 case wazeroir.UnsignedTypeI32: 4535 inst = amd64.XCHGL 4536 targetSizeInByte = 32 / 8 4537 case wazeroir.UnsignedTypeI64: 4538 inst = amd64.XCHGQ 4539 targetSizeInByte = 64 / 8 4540 } 4541 return c.compileAtomicStoreImpl(inst, offset, targetSizeInByte) 4542 } 4543 4544 func (c *amd64Compiler) compileAtomicStore8(o *wazeroir.UnionOperation) error { 4545 return c.compileAtomicStoreImpl(amd64.XCHGB, uint32(o.U2), 1) 4546 } 4547 4548 func (c *amd64Compiler) compileAtomicStore16(o *wazeroir.UnionOperation) error { 4549 return c.compileAtomicStoreImpl(amd64.XCHGW, uint32(o.U2), 16/8) 4550 } 4551 4552 func (c *amd64Compiler) compileAtomicStoreImpl( 4553 inst asm.Instruction, offset uint32, targetSizeInBytes int64, 4554 ) error { 4555 val := c.locationStack.pop() 4556 if err := c.compileEnsureOnRegister(val); err != nil { 4557 return err 4558 } 4559 4560 reg, err := c.compileMemoryAccessCeilSetup(offset, targetSizeInBytes) 4561 if err != nil { 4562 return err 4563 } 4564 4565 c.compileMemoryAlignmentCheck(reg, targetSizeInBytes) 4566 4567 c.assembler.CompileRegisterToMemoryWithIndex( 4568 inst, val.register, 4569 amd64ReservedRegisterForMemory, -targetSizeInBytes, reg, 1, 4570 ) 4571 4572 // We no longer need both the value and base registers. 4573 c.locationStack.releaseRegister(val) 4574 c.locationStack.markRegisterUnused(reg) 4575 return nil 4576 } 4577 4578 func (c *amd64Compiler) compileAtomicRMW(o *wazeroir.UnionOperation) error { 4579 var ( 4580 inst asm.Instruction 4581 targetSizeInBytes int64 4582 vt runtimeValueType 4583 ) 4584 4585 unsignedType := wazeroir.UnsignedType(o.B1) 4586 op := wazeroir.AtomicArithmeticOp(o.B2) 4587 offset := uint32(o.U2) 4588 4589 switch unsignedType { 4590 case wazeroir.UnsignedTypeI32: 4591 targetSizeInBytes = 32 / 8 4592 vt = runtimeValueTypeI32 4593 switch op { 4594 case wazeroir.AtomicArithmeticOpAdd: 4595 return c.compileAtomicAddImpl(amd64.XADDL, offset, false, targetSizeInBytes, vt) 4596 case wazeroir.AtomicArithmeticOpSub: 4597 return c.compileAtomicAddImpl(amd64.XADDL, offset, true, targetSizeInBytes, vt) 4598 case wazeroir.AtomicArithmeticOpAnd: 4599 inst = amd64.ANDL 4600 case wazeroir.AtomicArithmeticOpOr: 4601 inst = amd64.ORL 4602 case wazeroir.AtomicArithmeticOpXor: 4603 inst = amd64.XORL 4604 case wazeroir.AtomicArithmeticOpNop: 4605 return c.compileAtomicXchgImpl(amd64.XCHGL, offset, targetSizeInBytes, vt) 4606 } 4607 case wazeroir.UnsignedTypeI64: 4608 targetSizeInBytes = 64 / 8 4609 vt = runtimeValueTypeI64 4610 switch op { 4611 case wazeroir.AtomicArithmeticOpAdd: 4612 return c.compileAtomicAddImpl(amd64.XADDQ, offset, false, targetSizeInBytes, vt) 4613 case wazeroir.AtomicArithmeticOpSub: 4614 return c.compileAtomicAddImpl(amd64.XADDQ, offset, true, targetSizeInBytes, vt) 4615 case wazeroir.AtomicArithmeticOpAnd: 4616 inst = amd64.ANDQ 4617 case wazeroir.AtomicArithmeticOpOr: 4618 inst = amd64.ORQ 4619 case wazeroir.AtomicArithmeticOpXor: 4620 inst = amd64.XORQ 4621 case wazeroir.AtomicArithmeticOpNop: 4622 return c.compileAtomicXchgImpl(amd64.XCHGQ, offset, targetSizeInBytes, vt) 4623 } 4624 } 4625 4626 return c.compileAtomicRMWCASLoopImpl(inst, offset, targetSizeInBytes, vt) 4627 } 4628 4629 func (c *amd64Compiler) compileAtomicRMW8(o *wazeroir.UnionOperation) error { 4630 var ( 4631 inst asm.Instruction 4632 vt runtimeValueType 4633 ) 4634 4635 unsignedType := wazeroir.UnsignedType(o.B1) 4636 op := wazeroir.AtomicArithmeticOp(o.B2) 4637 offset := uint32(o.U2) 4638 4639 switch unsignedType { 4640 case wazeroir.UnsignedTypeI32: 4641 vt = runtimeValueTypeI32 4642 case wazeroir.UnsignedTypeI64: 4643 vt = runtimeValueTypeI64 4644 } 4645 4646 switch op { 4647 case wazeroir.AtomicArithmeticOpAdd: 4648 return c.compileAtomicAddImpl(amd64.XADDB, offset, false, 1, vt) 4649 case wazeroir.AtomicArithmeticOpSub: 4650 return c.compileAtomicAddImpl(amd64.XADDB, offset, true, 1, vt) 4651 case wazeroir.AtomicArithmeticOpAnd: 4652 inst = amd64.ANDL 4653 case wazeroir.AtomicArithmeticOpOr: 4654 inst = amd64.ORL 4655 case wazeroir.AtomicArithmeticOpXor: 4656 inst = amd64.XORL 4657 case wazeroir.AtomicArithmeticOpNop: 4658 return c.compileAtomicXchgImpl(amd64.XCHGB, offset, 1, vt) 4659 } 4660 4661 return c.compileAtomicRMWCASLoopImpl(inst, offset, 1, vt) 4662 } 4663 4664 func (c *amd64Compiler) compileAtomicRMW16(o *wazeroir.UnionOperation) error { 4665 var ( 4666 inst asm.Instruction 4667 vt runtimeValueType 4668 ) 4669 4670 unsignedType := wazeroir.UnsignedType(o.B1) 4671 op := wazeroir.AtomicArithmeticOp(o.B2) 4672 offset := uint32(o.U2) 4673 4674 switch unsignedType { 4675 case wazeroir.UnsignedTypeI32: 4676 vt = runtimeValueTypeI32 4677 case wazeroir.UnsignedTypeI64: 4678 vt = runtimeValueTypeI64 4679 } 4680 4681 switch op { 4682 case wazeroir.AtomicArithmeticOpAdd: 4683 return c.compileAtomicAddImpl(amd64.XADDW, offset, false, 16/8, vt) 4684 case wazeroir.AtomicArithmeticOpSub: 4685 return c.compileAtomicAddImpl(amd64.XADDW, offset, true, 16/8, vt) 4686 case wazeroir.AtomicArithmeticOpAnd: 4687 inst = amd64.ANDL 4688 case wazeroir.AtomicArithmeticOpOr: 4689 inst = amd64.ORL 4690 case wazeroir.AtomicArithmeticOpXor: 4691 inst = amd64.XORL 4692 case wazeroir.AtomicArithmeticOpNop: 4693 return c.compileAtomicXchgImpl(amd64.XCHGW, offset, 16/8, vt) 4694 } 4695 4696 return c.compileAtomicRMWCASLoopImpl(inst, offset, 16/8, vt) 4697 } 4698 4699 func (c *amd64Compiler) compileAtomicAddImpl(inst asm.Instruction, offsetConst uint32, negateArg bool, targetSizeInBytes int64, resultRuntimeValueType runtimeValueType) error { 4700 val := c.locationStack.pop() 4701 if err := c.compileEnsureOnRegister(val); err != nil { 4702 return err 4703 } 4704 4705 if negateArg { 4706 var negArg asm.Instruction 4707 switch targetSizeInBytes { 4708 case 1: 4709 negArg = amd64.NEGB 4710 case 2: 4711 negArg = amd64.NEGW 4712 case 4: 4713 negArg = amd64.NEGL 4714 case 8: 4715 negArg = amd64.NEGQ 4716 } 4717 c.assembler.CompileNoneToRegister(negArg, val.register) 4718 } 4719 4720 reg, err := c.compileMemoryAccessCeilSetup(offsetConst, targetSizeInBytes) 4721 if err != nil { 4722 return err 4723 } 4724 4725 c.compileMemoryAlignmentCheck(reg, targetSizeInBytes) 4726 4727 c.assembler.CompileRegisterToMemoryWithIndexAndLock( 4728 inst, val.register, 4729 amd64ReservedRegisterForMemory, -targetSizeInBytes, reg, 1, 4730 ) 4731 4732 if targetSizeInBytes < 4 { 4733 mask := (1 << (8 * targetSizeInBytes)) - 1 4734 c.assembler.CompileConstToRegister(amd64.ANDQ, int64(mask), val.register) 4735 } 4736 4737 c.locationStack.markRegisterUnused(reg) 4738 c.locationStack.pushRuntimeValueLocationOnRegister(val.register, resultRuntimeValueType) 4739 4740 return nil 4741 } 4742 4743 func (c *amd64Compiler) compileAtomicXchgImpl(inst asm.Instruction, offsetConst uint32, targetSizeInBytes int64, resultRuntimeValueType runtimeValueType) error { 4744 val := c.locationStack.pop() 4745 if err := c.compileEnsureOnRegister(val); err != nil { 4746 return err 4747 } 4748 4749 reg, err := c.compileMemoryAccessCeilSetup(offsetConst, targetSizeInBytes) 4750 if err != nil { 4751 return err 4752 } 4753 4754 c.compileMemoryAlignmentCheck(reg, targetSizeInBytes) 4755 4756 c.assembler.CompileRegisterToMemoryWithIndex( 4757 inst, val.register, 4758 amd64ReservedRegisterForMemory, -targetSizeInBytes, reg, 1, 4759 ) 4760 4761 if targetSizeInBytes < 4 { 4762 mask := (1 << (8 * targetSizeInBytes)) - 1 4763 c.assembler.CompileConstToRegister(amd64.ANDQ, int64(mask), val.register) 4764 } 4765 4766 c.locationStack.markRegisterUnused(reg) 4767 c.locationStack.pushRuntimeValueLocationOnRegister(val.register, resultRuntimeValueType) 4768 4769 return nil 4770 } 4771 4772 func (c *amd64Compiler) compileAtomicRMWCASLoopImpl(rmwInst asm.Instruction, 4773 offsetConst uint32, targetSizeInBytes int64, resultRuntimeValueType runtimeValueType, 4774 ) error { 4775 const resultRegister = amd64.RegAX 4776 4777 var copyInst asm.Instruction 4778 var loadInst asm.Instruction 4779 var cmpXchgInst asm.Instruction 4780 4781 switch targetSizeInBytes { 4782 case 8: 4783 copyInst = amd64.MOVQ 4784 loadInst = amd64.MOVQ 4785 cmpXchgInst = amd64.CMPXCHGQ 4786 case 4: 4787 copyInst = amd64.MOVL 4788 loadInst = amd64.MOVL 4789 cmpXchgInst = amd64.CMPXCHGL 4790 case 2: 4791 copyInst = amd64.MOVL 4792 loadInst = amd64.MOVWLZX 4793 cmpXchgInst = amd64.CMPXCHGW 4794 case 1: 4795 copyInst = amd64.MOVL 4796 loadInst = amd64.MOVBLZX 4797 cmpXchgInst = amd64.CMPXCHGB 4798 } 4799 4800 c.onValueReleaseRegisterToStack(resultRegister) 4801 c.locationStack.markRegisterUsed(resultRegister) 4802 4803 tmp, err := c.allocateRegister(registerTypeGeneralPurpose) 4804 if err != nil { 4805 return err 4806 } 4807 c.locationStack.markRegisterUsed(tmp) 4808 4809 val := c.locationStack.pop() 4810 if err := c.compileEnsureOnRegister(val); err != nil { 4811 return err 4812 } 4813 4814 reg, err := c.compileMemoryAccessCeilSetup(offsetConst, targetSizeInBytes) 4815 if err != nil { 4816 return err 4817 } 4818 4819 c.compileMemoryAlignmentCheck(reg, targetSizeInBytes) 4820 4821 if targetSizeInBytes < 32 { 4822 mask := (1 << (8 * targetSizeInBytes)) - 1 4823 c.assembler.CompileConstToRegister(amd64.ANDQ, int64(mask), val.register) 4824 } 4825 4826 beginLoop := c.assembler.CompileStandAlone(amd64.NOP) 4827 c.assembler.CompileRegisterToRegister(copyInst, val.register, tmp) 4828 c.assembler.CompileMemoryWithIndexToRegister( 4829 loadInst, amd64ReservedRegisterForMemory, -targetSizeInBytes, reg, 1, resultRegister) 4830 if targetSizeInBytes < 32 { 4831 mask := (1 << (8 * targetSizeInBytes)) - 1 4832 c.assembler.CompileConstToRegister(amd64.ANDQ, int64(mask), resultRegister) 4833 } 4834 c.assembler.CompileRegisterToRegister(rmwInst, resultRegister, tmp) 4835 c.assembler.CompileRegisterToMemoryWithIndexAndLock( 4836 cmpXchgInst, tmp, 4837 amd64ReservedRegisterForMemory, -targetSizeInBytes, reg, 1, 4838 ) 4839 c.assembler.CompileJump(amd64.JNE).AssignJumpTarget(beginLoop) 4840 4841 if targetSizeInBytes < 32 { 4842 mask := (1 << (8 * targetSizeInBytes)) - 1 4843 c.assembler.CompileConstToRegister(amd64.ANDQ, int64(mask), resultRegister) 4844 } 4845 4846 c.locationStack.markRegisterUnused(reg) 4847 c.locationStack.markRegisterUnused(tmp) 4848 c.locationStack.markRegisterUnused(val.register) 4849 c.locationStack.pushRuntimeValueLocationOnRegister(resultRegister, resultRuntimeValueType) 4850 4851 return nil 4852 } 4853 4854 func (c *amd64Compiler) compileAtomicRMWCmpxchg(o *wazeroir.UnionOperation) error { 4855 var ( 4856 casInst asm.Instruction 4857 targetSizeInBytes int64 4858 vt runtimeValueType 4859 ) 4860 4861 unsignedType := wazeroir.UnsignedType(o.B1) 4862 offset := uint32(o.U2) 4863 4864 switch unsignedType { 4865 case wazeroir.UnsignedTypeI32: 4866 casInst = amd64.CMPXCHGL 4867 targetSizeInBytes = 32 / 8 4868 vt = runtimeValueTypeI32 4869 case wazeroir.UnsignedTypeI64: 4870 casInst = amd64.CMPXCHGQ 4871 targetSizeInBytes = 64 / 8 4872 vt = runtimeValueTypeI64 4873 } 4874 return c.compileAtomicRMWCmpxchgImpl(casInst, offset, targetSizeInBytes, vt) 4875 } 4876 4877 func (c *amd64Compiler) compileAtomicRMW8Cmpxchg(o *wazeroir.UnionOperation) error { 4878 var vt runtimeValueType 4879 4880 unsignedType := wazeroir.UnsignedType(o.B1) 4881 offset := uint32(o.U2) 4882 4883 switch unsignedType { 4884 case wazeroir.UnsignedTypeI32: 4885 vt = runtimeValueTypeI32 4886 case wazeroir.UnsignedTypeI64: 4887 vt = runtimeValueTypeI64 4888 } 4889 return c.compileAtomicRMWCmpxchgImpl(amd64.CMPXCHGB, offset, 1, vt) 4890 } 4891 4892 func (c *amd64Compiler) compileAtomicRMW16Cmpxchg(o *wazeroir.UnionOperation) error { 4893 var vt runtimeValueType 4894 4895 unsignedType := wazeroir.UnsignedType(o.B1) 4896 offset := uint32(o.U2) 4897 4898 switch unsignedType { 4899 case wazeroir.UnsignedTypeI32: 4900 vt = runtimeValueTypeI32 4901 case wazeroir.UnsignedTypeI64: 4902 vt = runtimeValueTypeI64 4903 } 4904 return c.compileAtomicRMWCmpxchgImpl(amd64.CMPXCHGW, offset, 16/8, vt) 4905 } 4906 4907 func (c *amd64Compiler) compileAtomicRMWCmpxchgImpl(inst asm.Instruction, offsetArg uint32, targetSizeInBytes int64, resultRuntimeValueType runtimeValueType) error { 4908 const resultRegister = amd64.RegAX 4909 4910 repl := c.locationStack.pop() 4911 exp := c.locationStack.pop() 4912 4913 // expected value must be in accumulator register, which will also hold the loaded result. 4914 if exp.register != resultRegister { 4915 c.onValueReleaseRegisterToStack(resultRegister) 4916 if exp.onConditionalRegister() { 4917 c.compileMoveConditionalToGeneralPurposeRegister(exp, resultRegister) 4918 } else if exp.onStack() { 4919 exp.setRegister(resultRegister) 4920 c.compileLoadValueOnStackToRegister(exp) 4921 c.locationStack.markRegisterUnused(resultRegister) 4922 } else { 4923 c.assembler.CompileRegisterToRegister(amd64.MOVQ, exp.register, resultRegister) 4924 c.locationStack.releaseRegister(exp) 4925 exp.setRegister(resultRegister) 4926 c.locationStack.markRegisterUsed(resultRegister) 4927 } 4928 } 4929 4930 if err := c.compileEnsureOnRegister(repl); err != nil { 4931 return err 4932 } 4933 4934 reg, err := c.compileMemoryAccessCeilSetup(offsetArg, targetSizeInBytes) 4935 if err != nil { 4936 return err 4937 } 4938 4939 c.compileMemoryAlignmentCheck(reg, targetSizeInBytes) 4940 4941 c.assembler.CompileRegisterToMemoryWithIndexAndLock( 4942 inst, repl.register, 4943 amd64ReservedRegisterForMemory, -targetSizeInBytes, reg, 1, 4944 ) 4945 4946 if targetSizeInBytes < 4 { 4947 mask := (1 << (8 * targetSizeInBytes)) - 1 4948 c.assembler.CompileConstToRegister(amd64.ANDQ, int64(mask), resultRegister) 4949 } 4950 4951 c.locationStack.markRegisterUnused(reg) 4952 c.locationStack.markRegisterUnused(repl.register) 4953 c.locationStack.pushRuntimeValueLocationOnRegister(resultRegister, resultRuntimeValueType) 4954 4955 return nil 4956 } 4957 4958 func (c *amd64Compiler) compileAtomicMemoryWait(o *wazeroir.UnionOperation) error { 4959 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 4960 return err 4961 } 4962 4963 var ( 4964 vt runtimeValueType 4965 targetSizeInBytes int64 4966 waitFunc wasm.Index 4967 ) 4968 4969 unsignedType := wazeroir.UnsignedType(o.B1) 4970 offset := uint32(o.U2) 4971 4972 switch unsignedType { 4973 case wazeroir.UnsignedTypeI32: 4974 vt = runtimeValueTypeI32 4975 targetSizeInBytes = 32 / 8 4976 waitFunc = builtinFunctionMemoryWait32 4977 case wazeroir.UnsignedTypeI64: 4978 vt = runtimeValueTypeI64 4979 targetSizeInBytes = 64 / 8 4980 waitFunc = builtinFunctionMemoryWait64 4981 } 4982 4983 timeout := c.locationStack.pop() 4984 if err := c.compileEnsureOnRegister(timeout); err != nil { 4985 return err 4986 } 4987 exp := c.locationStack.pop() 4988 if err := c.compileEnsureOnRegister(exp); err != nil { 4989 return err 4990 } 4991 4992 reg, err := c.compileMemoryAccessCeilSetup(offset, targetSizeInBytes) 4993 if err != nil { 4994 return err 4995 } 4996 c.locationStack.markRegisterUsed(reg) 4997 c.compileMemoryAlignmentCheck(reg, targetSizeInBytes) 4998 c.assembler.CompileRegisterToRegister(amd64.ADDQ, amd64ReservedRegisterForMemory, reg) 4999 c.assembler.CompileConstToRegister(amd64.ADDQ, -targetSizeInBytes, reg) 5000 5001 // Push address, values, and timeout back to read in Go 5002 c.pushRuntimeValueLocationOnRegister(reg, runtimeValueTypeI64) 5003 c.pushRuntimeValueLocationOnRegister(exp.register, vt) 5004 c.pushRuntimeValueLocationOnRegister(timeout.register, runtimeValueTypeI64) 5005 if err := c.compileCallBuiltinFunction(waitFunc); err != nil { 5006 return err 5007 } 5008 // Address, values and timeout consumed in Go 5009 c.locationStack.pop() 5010 c.locationStack.pop() 5011 c.locationStack.pop() 5012 5013 // Then, the result was pushed. 5014 v := c.locationStack.pushRuntimeValueLocationOnStack() 5015 v.valueType = runtimeValueTypeI32 5016 5017 c.locationStack.markRegisterUnused(reg) 5018 c.locationStack.releaseRegister(exp) 5019 c.locationStack.releaseRegister(timeout) 5020 5021 // After return, we re-initialize reserved registers just like preamble of functions. 5022 c.compileReservedStackBasePointerInitialization() 5023 c.compileReservedMemoryPointerInitialization() 5024 5025 return nil 5026 } 5027 5028 func (c *amd64Compiler) compileAtomicMemoryNotify(o *wazeroir.UnionOperation) error { 5029 offset := uint32(o.U2) 5030 5031 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 5032 return err 5033 } 5034 5035 count := c.locationStack.pop() 5036 if err := c.compileEnsureOnRegister(count); err != nil { 5037 return err 5038 } 5039 5040 reg, err := c.compileMemoryAccessCeilSetup(offset, 4) 5041 if err != nil { 5042 return err 5043 } 5044 c.compileMemoryAlignmentCheck(reg, 4) 5045 c.assembler.CompileRegisterToRegister(amd64.ADDQ, amd64ReservedRegisterForMemory, reg) 5046 c.assembler.CompileConstToRegister(amd64.ADDQ, -4, reg) 5047 5048 // Push address and count back to read in Go 5049 c.pushRuntimeValueLocationOnRegister(reg, runtimeValueTypeI64) 5050 c.pushRuntimeValueLocationOnRegister(count.register, runtimeValueTypeI32) 5051 if err := c.compileCallBuiltinFunction(builtinFunctionMemoryNotify); err != nil { 5052 return err 5053 } 5054 5055 // Address and count consumed by Go 5056 c.locationStack.pop() 5057 c.locationStack.pop() 5058 5059 // Then, the result was pushed. 5060 v := c.locationStack.pushRuntimeValueLocationOnStack() 5061 v.valueType = runtimeValueTypeI32 5062 5063 // After return, we re-initialize reserved registers just like preamble of functions. 5064 c.compileReservedStackBasePointerInitialization() 5065 c.compileReservedMemoryPointerInitialization() 5066 return nil 5067 } 5068 5069 func (c *amd64Compiler) compileAtomicFence(_ *wazeroir.UnionOperation) error { 5070 c.assembler.CompileStandAlone(amd64.MFENCE) 5071 return nil 5072 } 5073 5074 func (c *amd64Compiler) compileMemoryAlignmentCheck(baseRegister asm.Register, targetSizeInBytes int64) { 5075 if targetSizeInBytes == 1 { 5076 return // No alignment restrictions when accessing a byte 5077 } 5078 var checkBits asm.ConstantValue 5079 switch targetSizeInBytes { 5080 case 2: 5081 checkBits = 0b1 5082 case 4: 5083 checkBits = 0b11 5084 case 8: 5085 checkBits = 0b111 5086 } 5087 c.assembler.CompileConstToRegister(amd64.TESTQ, checkBits, baseRegister) 5088 aligned := c.assembler.CompileJump(amd64.JEQ) 5089 5090 c.compileExitFromNativeCode(nativeCallStatusUnalignedAtomic) 5091 c.assembler.SetJumpTargetOnNext(aligned) 5092 } 5093 5094 // compileLoadValueOnStackToRegister implements compiler.compileLoadValueOnStackToRegister for amd64. 5095 func (c *amd64Compiler) compileLoadValueOnStackToRegister(loc *runtimeValueLocation) { 5096 var inst asm.Instruction 5097 switch loc.valueType { 5098 case runtimeValueTypeV128Lo: 5099 inst = amd64.MOVDQU 5100 case runtimeValueTypeV128Hi: 5101 panic("BUG: V128Hi must be be loaded to a register along with V128Lo") 5102 case runtimeValueTypeI32, runtimeValueTypeF32: 5103 inst = amd64.MOVL 5104 case runtimeValueTypeI64, runtimeValueTypeF64: 5105 inst = amd64.MOVQ 5106 default: 5107 panic("BUG: unknown runtime value type") 5108 } 5109 5110 // Copy the value from the stack. 5111 c.assembler.CompileMemoryToRegister(inst, 5112 // Note: stack pointers are ensured not to exceed 2^27 so this offset never exceeds 32-bit range. 5113 amd64ReservedRegisterForStackBasePointerAddress, int64(loc.stackPointer)*8, 5114 loc.register) 5115 5116 if loc.valueType == runtimeValueTypeV128Lo { 5117 // Higher 64-bits are loaded as well ^^. 5118 hi := &c.locationStack.stack[loc.stackPointer+1] 5119 hi.setRegister(loc.register) 5120 } 5121 } 5122 5123 // maybeCompileMoveTopConditionalToGeneralPurposeRegister moves the top value on the stack 5124 // if the value is located on a conditional register. 5125 // 5126 // This is usually called at the beginning of methods on compiler interface where we possibly 5127 // compile instructions without saving the conditional register value. 5128 // The compileXXX functions without calling this function is saving the conditional 5129 // value to the stack or register by invoking compileEnsureOnRegister for the top. 5130 func (c *amd64Compiler) maybeCompileMoveTopConditionalToGeneralPurposeRegister() (err error) { 5131 if c.locationStack.sp > 0 { 5132 if loc := c.locationStack.peek(); loc.onConditionalRegister() { 5133 if err = c.compileLoadConditionalRegisterToGeneralPurposeRegister(loc); err != nil { 5134 return err 5135 } 5136 } 5137 } 5138 return 5139 } 5140 5141 // loadConditionalRegisterToGeneralPurposeRegister saves the conditional register value 5142 // to a general purpose register. 5143 func (c *amd64Compiler) compileLoadConditionalRegisterToGeneralPurposeRegister(loc *runtimeValueLocation) error { 5144 reg, err := c.allocateRegister(registerTypeGeneralPurpose) 5145 if err != nil { 5146 return err 5147 } 5148 c.compileMoveConditionalToGeneralPurposeRegister(loc, reg) 5149 return nil 5150 } 5151 5152 func (c *amd64Compiler) compileMoveConditionalToGeneralPurposeRegister(loc *runtimeValueLocation, reg asm.Register) { 5153 // Set the flag bit to the destination. See 5154 // - https://c9x.me/x86/html/file_module_x86_id_288.html 5155 // - https://github.com/golang/go/blob/master/src/cmd/internal/obj/x86/asm6.go#L1453-L1468 5156 // to translate conditionalRegisterState* to amd64.SET* 5157 var inst asm.Instruction 5158 switch loc.conditionalRegister { 5159 case amd64.ConditionalRegisterStateE: 5160 inst = amd64.SETEQ 5161 case amd64.ConditionalRegisterStateNE: 5162 inst = amd64.SETNE 5163 case amd64.ConditionalRegisterStateS: 5164 inst = amd64.SETMI 5165 case amd64.ConditionalRegisterStateNS: 5166 inst = amd64.SETPL 5167 case amd64.ConditionalRegisterStateG: 5168 inst = amd64.SETGT 5169 case amd64.ConditionalRegisterStateGE: 5170 inst = amd64.SETGE 5171 case amd64.ConditionalRegisterStateL: 5172 inst = amd64.SETLT 5173 case amd64.ConditionalRegisterStateLE: 5174 inst = amd64.SETLE 5175 case amd64.ConditionalRegisterStateA: 5176 inst = amd64.SETHI 5177 case amd64.ConditionalRegisterStateAE: 5178 inst = amd64.SETCC 5179 case amd64.ConditionalRegisterStateB: 5180 inst = amd64.SETCS 5181 case amd64.ConditionalRegisterStateBE: 5182 inst = amd64.SETLS 5183 } 5184 5185 c.assembler.CompileNoneToRegister(inst, reg) 5186 5187 // Then we reset the unnecessary bit. 5188 c.assembler.CompileConstToRegister(amd64.ANDQ, 0x1, reg) 5189 5190 // Mark it uses the register. 5191 loc.setRegister(reg) 5192 c.locationStack.markRegisterUsed(reg) 5193 } 5194 5195 // allocateRegister implements compiler.allocateRegister for amd64. 5196 func (c *amd64Compiler) allocateRegister(t registerType) (reg asm.Register, err error) { 5197 var ok bool 5198 // Try to get the unused register. 5199 reg, ok = c.locationStack.takeFreeRegister(t) 5200 if ok { 5201 return 5202 } 5203 5204 // If not found, we have to steal the register. 5205 stealTarget, ok := c.locationStack.takeStealTargetFromUsedRegister(t) 5206 if !ok { 5207 err = fmt.Errorf("cannot steal register") 5208 return 5209 } 5210 5211 // Release the steal target register value onto stack location. 5212 reg = stealTarget.register 5213 c.compileReleaseRegisterToStack(stealTarget) 5214 return 5215 } 5216 5217 // callFunction adds instructions to call a function whose address equals either addr parameter or the value on indexReg. 5218 // 5219 // Note: this is the counterpart for returnFunction, and see the comments there as well 5220 // to understand how the function calls are achieved. 5221 func (c *amd64Compiler) compileCallFunctionImpl(functionAddressRegister asm.Register, functype *wasm.FunctionType) error { 5222 // Release all the registers as our calling convention requires the caller-save. 5223 if err := c.compileReleaseAllRegistersToStack(); err != nil { 5224 return err 5225 } 5226 5227 c.locationStack.markRegisterUsed(functionAddressRegister) 5228 5229 // Obtain a temporary register to be used in the followings. 5230 tmpRegister, found := c.locationStack.takeFreeRegister(registerTypeGeneralPurpose) 5231 if !found { 5232 // This in theory never happen as all the registers must be free except codeAddressRegister. 5233 return fmt.Errorf("could not find enough free registers") 5234 } 5235 5236 // The stack should look like: 5237 // 5238 // reserved slots for results (if len(results) > len(args)) 5239 // | | 5240 // ,arg0, ..., argN, ..., _, .returnAddress, .returnStackBasePointerInBytes, .function, .... 5241 // | | | 5242 // | callFrame{^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^} 5243 // | 5244 // nextStackBasePointerOffset 5245 // 5246 // where callFrame is used to return to this currently executed function. 5247 5248 nextStackBasePointerOffset := int64(c.locationStack.sp) - int64(functype.ParamNumInUint64) 5249 5250 callFrameReturnAddressLoc, callFrameStackBasePointerInBytesLoc, callFrameFunctionLoc := c.locationStack.pushCallFrame(functype) 5251 5252 // Save the current stack base pointer at callFrameStackBasePointerInBytesLoc. 5253 c.assembler.CompileMemoryToRegister(amd64.MOVQ, 5254 amd64ReservedRegisterForCallEngine, callEngineStackContextStackBasePointerInBytesOffset, 5255 tmpRegister) 5256 callFrameStackBasePointerInBytesLoc.setRegister(tmpRegister) 5257 c.compileReleaseRegisterToStack(callFrameStackBasePointerInBytesLoc) 5258 5259 // Set callEngine.stackContext.stackBasePointer for the next function. 5260 c.assembler.CompileConstToRegister(amd64.ADDQ, nextStackBasePointerOffset<<3, tmpRegister) 5261 5262 // Write the calculated value to callEngine.stackContext.stackBasePointer. 5263 c.assembler.CompileRegisterToMemory(amd64.MOVQ, tmpRegister, 5264 amd64ReservedRegisterForCallEngine, callEngineStackContextStackBasePointerInBytesOffset) 5265 5266 // Save the currently executed *function (placed at callEngine.moduleContext.fn) into callFrameFunctionLoc. 5267 c.assembler.CompileMemoryToRegister(amd64.MOVQ, 5268 amd64ReservedRegisterForCallEngine, callEngineModuleContextFnOffset, 5269 tmpRegister) 5270 callFrameFunctionLoc.setRegister(tmpRegister) 5271 c.compileReleaseRegisterToStack(callFrameFunctionLoc) 5272 5273 // Set callEngine.moduleContext.fn to the next *function. 5274 c.assembler.CompileRegisterToMemory(amd64.MOVQ, functionAddressRegister, 5275 amd64ReservedRegisterForCallEngine, callEngineModuleContextFnOffset) 5276 5277 // Write the return address into callFrameReturnAddressLoc. 5278 c.assembler.CompileReadInstructionAddress(tmpRegister, amd64.JMP) 5279 callFrameReturnAddressLoc.setRegister(tmpRegister) 5280 c.compileReleaseRegisterToStack(callFrameReturnAddressLoc) 5281 5282 if amd64CallingConventionDestinationFunctionModuleInstanceAddressRegister == functionAddressRegister { 5283 // This case we must move the value on targetFunctionAddressRegister to another register, otherwise 5284 // the address (jump target below) will be modified and result in segfault. 5285 // See #526. 5286 c.assembler.CompileRegisterToRegister(amd64.MOVQ, functionAddressRegister, tmpRegister) 5287 functionAddressRegister = tmpRegister 5288 } 5289 5290 // Also, we have to put the target function's *wasm.ModuleInstance into amd64CallingConventionDestinationFunctionModuleInstanceAddressRegister. 5291 c.assembler.CompileMemoryToRegister(amd64.MOVQ, functionAddressRegister, functionModuleInstanceOffset, 5292 amd64CallingConventionDestinationFunctionModuleInstanceAddressRegister) 5293 5294 // And jump into the initial address of the target function. 5295 c.assembler.CompileJumpToMemory(amd64.JMP, functionAddressRegister, functionCodeInitialAddressOffset) 5296 5297 // All the registers used are temporary, so we mark them unused. 5298 c.locationStack.markRegisterUnused(tmpRegister, functionAddressRegister) 5299 5300 // On the function return, we have to initialize the state. 5301 if err := c.compileModuleContextInitialization(); err != nil { 5302 return err 5303 } 5304 5305 // Due to the change to callEngine.stackContext.stackBasePointer. 5306 c.compileReservedStackBasePointerInitialization() 5307 5308 // Due to the change to callEngine.moduleContext.moduleInstance as that might result in 5309 // the memory instance manipulation. 5310 c.compileReservedMemoryPointerInitialization() 5311 5312 // We consumed the function parameters, the call frame stack and reserved slots during the call. 5313 c.locationStack.sp = uint64(nextStackBasePointerOffset) 5314 5315 // Now the function results are pushed by the call. 5316 for _, t := range functype.Results { 5317 loc := c.locationStack.pushRuntimeValueLocationOnStack() 5318 switch t { 5319 case wasm.ValueTypeI32: 5320 loc.valueType = runtimeValueTypeI32 5321 case wasm.ValueTypeI64, wasm.ValueTypeFuncref, wasm.ValueTypeExternref: 5322 loc.valueType = runtimeValueTypeI64 5323 case wasm.ValueTypeF32: 5324 loc.valueType = runtimeValueTypeF32 5325 case wasm.ValueTypeF64: 5326 loc.valueType = runtimeValueTypeF64 5327 case wasm.ValueTypeV128: 5328 loc.valueType = runtimeValueTypeV128Lo 5329 hi := c.locationStack.pushRuntimeValueLocationOnStack() 5330 hi.valueType = runtimeValueTypeV128Hi 5331 default: 5332 panic("BUG: invalid type: " + wasm.ValueTypeName(t)) 5333 } 5334 } 5335 return nil 5336 } 5337 5338 // returnFunction adds instructions to return from the current callframe back to the caller's frame. 5339 // If this is the current one is the origin, we return to the callEngine.execWasmFunction with the Returned status. 5340 // Otherwise, we jump into the callers' return address stored in callFrame.returnAddress while setting 5341 // up all the necessary change on the callEngine's state. 5342 // 5343 // Note: this is the counterpart for callFunction, and see the comments there as well 5344 // to understand how the function calls are achieved. 5345 func (c *amd64Compiler) compileReturnFunction() error { 5346 // Release all the registers as our calling convention requires the caller-save. 5347 if err := c.compileReleaseAllRegistersToStack(); err != nil { 5348 return err 5349 } 5350 5351 if c.withListener { 5352 if err := c.compileCallBuiltinFunction(builtinFunctionIndexFunctionListenerAfter); err != nil { 5353 return err 5354 } 5355 // After return, we re-initialize the stack base pointer as that is used to return to the caller below. 5356 c.compileReservedStackBasePointerInitialization() 5357 } 5358 5359 // amd64CallingConventionDestinationFunctionModuleInstanceAddressRegister holds the module instance's address 5360 // so mark it used so that it won't be used as a free register. 5361 c.locationStack.markRegisterUsed(amd64CallingConventionDestinationFunctionModuleInstanceAddressRegister) 5362 defer c.locationStack.markRegisterUnused(amd64CallingConventionDestinationFunctionModuleInstanceAddressRegister) 5363 5364 // Obtain a temporary register to be used in the following. 5365 returnAddressRegister, found := c.locationStack.takeFreeRegister(registerTypeGeneralPurpose) 5366 if !found { 5367 panic("BUG: all the registers should be free at this point: " + c.locationStack.String()) 5368 } 5369 5370 returnAddress, callerStackBasePointerInBytes, callerFunction := c.locationStack.getCallFrameLocations(c.typ) 5371 5372 // A zero return address means return from the execution. 5373 c.assembler.CompileMemoryToRegister(amd64.MOVQ, 5374 amd64ReservedRegisterForStackBasePointerAddress, int64(returnAddress.stackPointer)*8, 5375 returnAddressRegister, 5376 ) 5377 c.assembler.CompileRegisterToRegister(amd64.TESTQ, returnAddressRegister, returnAddressRegister) 5378 5379 c.compileMaybeExitFromNativeCode(amd64.JNE, nativeCallStatusCodeReturned) 5380 5381 // Alias for readability. 5382 tmpRegister := amd64CallingConventionDestinationFunctionModuleInstanceAddressRegister 5383 5384 // First, restore the stackContext.stackBasePointerInBytesOffset from callerStackBasePointerInBytes. 5385 callerStackBasePointerInBytes.setRegister(tmpRegister) 5386 c.compileLoadValueOnStackToRegister(callerStackBasePointerInBytes) 5387 c.assembler.CompileRegisterToMemory(amd64.MOVQ, 5388 tmpRegister, amd64ReservedRegisterForCallEngine, callEngineStackContextStackBasePointerInBytesOffset) 5389 5390 // Next, restore moduleContext.fn from callerFunction. 5391 callerFunction.setRegister(tmpRegister) 5392 c.compileLoadValueOnStackToRegister(callerFunction) 5393 c.assembler.CompileRegisterToMemory(amd64.MOVQ, 5394 tmpRegister, amd64ReservedRegisterForCallEngine, callEngineModuleContextFnOffset) 5395 5396 // Also, we have to put the target function's *wasm.ModuleInstance into amd64CallingConventionDestinationFunctionModuleInstanceAddressRegister. 5397 c.assembler.CompileMemoryToRegister(amd64.MOVQ, 5398 tmpRegister, functionModuleInstanceOffset, 5399 amd64CallingConventionDestinationFunctionModuleInstanceAddressRegister) 5400 5401 // Then, jump into the return address! 5402 c.assembler.CompileJumpToRegister(amd64.JMP, returnAddressRegister) 5403 return nil 5404 } 5405 5406 func (c *amd64Compiler) compileCallGoHostFunction() error { 5407 return c.compileCallGoFunction(nativeCallStatusCodeCallGoHostFunction) 5408 } 5409 5410 func (c *amd64Compiler) compileCallBuiltinFunction(index wasm.Index) error { 5411 // Set the functionAddress to the callEngine.exitContext functionCallAddress. 5412 c.assembler.CompileConstToMemory(amd64.MOVL, int64(index), amd64ReservedRegisterForCallEngine, callEngineExitContextBuiltinFunctionCallIndexOffset) 5413 return c.compileCallGoFunction(nativeCallStatusCodeCallBuiltInFunction) 5414 } 5415 5416 func (c *amd64Compiler) compileCallGoFunction(compilerStatus nativeCallStatusCode) error { 5417 // Release all the registers as our calling convention requires the caller-save. 5418 if err := c.compileReleaseAllRegistersToStack(); err != nil { 5419 return err 5420 } 5421 5422 c.compileExitFromNativeCode(compilerStatus) 5423 return nil 5424 } 5425 5426 // compileReleaseAllRegistersToStack add the instructions to release all the LIVE value 5427 // in the value location stack at this point into the stack memory location. 5428 func (c *amd64Compiler) compileReleaseAllRegistersToStack() (err error) { 5429 for i := uint64(0); i < c.locationStack.sp; i++ { 5430 if loc := &c.locationStack.stack[i]; loc.onRegister() { 5431 c.compileReleaseRegisterToStack(loc) 5432 } else if loc.onConditionalRegister() { 5433 if err = c.compileLoadConditionalRegisterToGeneralPurposeRegister(loc); err != nil { 5434 return 5435 } 5436 c.compileReleaseRegisterToStack(loc) 5437 } 5438 } 5439 return 5440 } 5441 5442 func (c *amd64Compiler) onValueReleaseRegisterToStack(reg asm.Register) { 5443 for i := uint64(0); i < c.locationStack.sp; i++ { 5444 prevValue := &c.locationStack.stack[i] 5445 if prevValue.register == reg { 5446 c.compileReleaseRegisterToStack(prevValue) 5447 break 5448 } 5449 } 5450 } 5451 5452 // compileReleaseRegisterToStack implements compiler.compileReleaseRegisterToStack for amd64. 5453 func (c *amd64Compiler) compileReleaseRegisterToStack(loc *runtimeValueLocation) { 5454 var inst asm.Instruction 5455 switch loc.valueType { 5456 case runtimeValueTypeV128Lo: 5457 inst = amd64.MOVDQU 5458 case runtimeValueTypeV128Hi: 5459 panic("BUG: V128Hi must be released to the stack along with V128Lo") 5460 case runtimeValueTypeI32, runtimeValueTypeF32: 5461 inst = amd64.MOVL 5462 case runtimeValueTypeI64, runtimeValueTypeF64: 5463 inst = amd64.MOVQ 5464 default: 5465 panic("BUG: unknown runtime value type") 5466 } 5467 5468 c.assembler.CompileRegisterToMemory(inst, loc.register, 5469 // Note: stack pointers are ensured not to exceed 2^27 so this offset never exceeds 32-bit range. 5470 amd64ReservedRegisterForStackBasePointerAddress, int64(loc.stackPointer)*8) 5471 5472 // Mark the register is free. 5473 c.locationStack.releaseRegister(loc) 5474 5475 if loc.valueType == runtimeValueTypeV128Lo { 5476 // Higher 64-bits are released as well ^^. 5477 hi := &c.locationStack.stack[loc.stackPointer+1] 5478 c.locationStack.releaseRegister(hi) 5479 } 5480 } 5481 5482 func (c *amd64Compiler) compileMaybeExitFromNativeCode(skipCondition asm.Instruction, status nativeCallStatusCode) { 5483 if target := c.compiledTrapTargets[status]; target != nil { 5484 // We've already compiled this. 5485 // Invert the return condition to jump into the appropriate target. 5486 var returnCondition asm.Instruction 5487 switch skipCondition { 5488 case amd64.JHI: 5489 returnCondition = amd64.JLS 5490 case amd64.JLS: 5491 returnCondition = amd64.JHI 5492 case amd64.JNE: 5493 returnCondition = amd64.JEQ 5494 case amd64.JEQ: 5495 returnCondition = amd64.JNE 5496 case amd64.JCC: 5497 returnCondition = amd64.JCS 5498 case amd64.JCS: 5499 returnCondition = amd64.JCC 5500 case amd64.JPC: 5501 returnCondition = amd64.JPS 5502 case amd64.JPS: 5503 returnCondition = amd64.JPC 5504 case amd64.JPL: 5505 returnCondition = amd64.JMI 5506 case amd64.JMI: 5507 returnCondition = amd64.JPL 5508 default: 5509 panic("BUG: couldn't invert condition") 5510 } 5511 c.assembler.CompileJump(returnCondition).AssignJumpTarget(target) 5512 } else { 5513 skip := c.assembler.CompileJump(skipCondition) 5514 c.compileExitFromNativeCode(status) 5515 c.assembler.SetJumpTargetOnNext(skip) 5516 } 5517 } 5518 5519 func (c *amd64Compiler) compileExitFromNativeCode(status nativeCallStatusCode) { 5520 if target := c.compiledTrapTargets[status]; target != nil { 5521 c.assembler.CompileJump(amd64.JMP).AssignJumpTarget(target) 5522 return 5523 } 5524 5525 switch status { 5526 case nativeCallStatusCodeReturned: 5527 // Save the target for reuse. 5528 c.compiledTrapTargets[status] = c.compileNOP() 5529 case nativeCallStatusCodeCallGoHostFunction, nativeCallStatusCodeCallBuiltInFunction: 5530 // Read the return address, and write it to callEngine.exitContext.returnAddress. 5531 returnAddressReg, ok := c.locationStack.takeFreeRegister(registerTypeGeneralPurpose) 5532 if !ok { 5533 panic("BUG: cannot take free register") 5534 } 5535 c.assembler.CompileReadInstructionAddress(returnAddressReg, amd64.RET) 5536 c.assembler.CompileRegisterToMemory(amd64.MOVQ, 5537 returnAddressReg, amd64ReservedRegisterForCallEngine, callEngineExitContextReturnAddressOffset) 5538 default: 5539 if c.ir.IROperationSourceOffsetsInWasmBinary != nil { 5540 // This case, the execution traps and we want the top frame's source position in the stack trace. 5541 // Take RegR15 and store the instruction address onto callEngine.returnAddress. 5542 returnAddressReg := amd64.RegR15 5543 c.assembler.CompileReadInstructionAddress(returnAddressReg, amd64.MOVQ) 5544 c.assembler.CompileRegisterToMemory(amd64.MOVQ, 5545 returnAddressReg, amd64ReservedRegisterForCallEngine, callEngineExitContextReturnAddressOffset) 5546 } else { 5547 // We won't use the source position, so just save the target for reuse. 5548 c.compiledTrapTargets[status] = c.compileNOP() 5549 } 5550 } 5551 5552 // Write the status to callEngine.exitContext.statusCode. 5553 c.assembler.CompileConstToMemory(amd64.MOVB, int64(status), 5554 amd64ReservedRegisterForCallEngine, callEngineExitContextNativeCallStatusCodeOffset) 5555 5556 // Write back the cached SP to the actual eng.stackPointer. 5557 c.assembler.CompileConstToMemory(amd64.MOVQ, int64(c.locationStack.sp), 5558 amd64ReservedRegisterForCallEngine, callEngineStackContextStackPointerOffset) 5559 5560 c.assembler.CompileStandAlone(amd64.RET) 5561 } 5562 5563 func (c *amd64Compiler) compilePreamble() (err error) { 5564 // We assume all function parameters are already pushed onto the stack by 5565 // the caller. 5566 c.locationStack.init(c.typ) 5567 5568 if err := c.compileModuleContextInitialization(); err != nil { 5569 return err 5570 } 5571 5572 // Check if it's necessary to grow the value stack by using max stack pointer. 5573 if err = c.compileMaybeGrowStack(); err != nil { 5574 return err 5575 } 5576 5577 if c.withListener { 5578 if err = c.compileCallBuiltinFunction(builtinFunctionIndexFunctionListenerBefore); err != nil { 5579 return err 5580 } 5581 } 5582 5583 c.compileReservedStackBasePointerInitialization() 5584 5585 // Finally, we initialize the reserved memory register based on the module context. 5586 c.compileReservedMemoryPointerInitialization() 5587 return 5588 } 5589 5590 func (c *amd64Compiler) compileReservedStackBasePointerInitialization() { 5591 // First, make reservedRegisterForStackBasePointer point to the beginning of the slice backing array. 5592 c.assembler.CompileMemoryToRegister(amd64.MOVQ, 5593 amd64ReservedRegisterForCallEngine, callEngineStackContextStackElement0AddressOffset, 5594 amd64ReservedRegisterForStackBasePointerAddress) 5595 5596 // next we move the base pointer (callEngine.stackBasePointer) to the tmp register. 5597 c.assembler.CompileMemoryToRegister(amd64.ADDQ, 5598 amd64ReservedRegisterForCallEngine, callEngineStackContextStackBasePointerInBytesOffset, 5599 amd64ReservedRegisterForStackBasePointerAddress, 5600 ) 5601 } 5602 5603 func (c *amd64Compiler) compileReservedMemoryPointerInitialization() { 5604 if c.ir.HasMemory || c.ir.UsesMemory { 5605 c.assembler.CompileMemoryToRegister(amd64.MOVQ, 5606 amd64ReservedRegisterForCallEngine, callEngineModuleContextMemoryElement0AddressOffset, 5607 amd64ReservedRegisterForMemory, 5608 ) 5609 } 5610 } 5611 5612 // compileMaybeGrowStack adds instructions to check the necessity to grow the value stack, 5613 // and if so, make the builtin function call to do so. These instructions are called in the function's 5614 // preamble. 5615 func (c *amd64Compiler) compileMaybeGrowStack() error { 5616 tmpRegister, ok := c.locationStack.takeFreeRegister(registerTypeGeneralPurpose) 5617 if !ok { 5618 panic("BUG: cannot take free register") 5619 } 5620 5621 c.assembler.CompileMemoryToRegister(amd64.MOVQ, 5622 amd64ReservedRegisterForCallEngine, callEngineStackContextStackLenInBytesOffset, tmpRegister) 5623 c.assembler.CompileMemoryToRegister(amd64.SUBQ, 5624 amd64ReservedRegisterForCallEngine, callEngineStackContextStackBasePointerInBytesOffset, tmpRegister) 5625 5626 // If stack base pointer + max stack pointer > stackLen, we need to grow the stack. 5627 cmpWithStackPointerCeil := c.assembler.CompileRegisterToConst(amd64.CMPQ, tmpRegister, 0) 5628 c.assignStackPointerCeilNeeded = cmpWithStackPointerCeil 5629 5630 // Jump if we have no need to grow. 5631 jmpIfNoNeedToGrowStack := c.assembler.CompileJump(amd64.JCC) 5632 5633 // Otherwise, we have to make the builtin function call to grow the call stack. 5634 if err := c.compileCallBuiltinFunction(builtinFunctionIndexGrowStack); err != nil { 5635 return err 5636 } 5637 5638 c.assembler.SetJumpTargetOnNext(jmpIfNoNeedToGrowStack) 5639 return nil 5640 } 5641 5642 // compileModuleContextInitialization adds instructions to initialize callEngine.ModuleContext's fields based on 5643 // callEngine.ModuleContext.ModuleInstanceAddress. 5644 // This is called in two cases: in function preamble, and on the return from (non-Go) function calls. 5645 func (c *amd64Compiler) compileModuleContextInitialization() error { 5646 // amd64CallingConventionDestinationFunctionModuleInstanceAddressRegister holds the module instance's address 5647 // so mark it used so that it won't be used as a free register until the module context initialization finishes. 5648 c.locationStack.markRegisterUsed(amd64CallingConventionDestinationFunctionModuleInstanceAddressRegister) 5649 defer c.locationStack.markRegisterUnused(amd64CallingConventionDestinationFunctionModuleInstanceAddressRegister) 5650 5651 // Obtain the temporary registers to be used in the followings. 5652 tmpRegister, found := c.locationStack.takeFreeRegister(registerTypeGeneralPurpose) 5653 if !found { 5654 // This in theory never happen as all the registers must be free except indexReg. 5655 return fmt.Errorf("could not find enough free registers") 5656 } 5657 c.locationStack.markRegisterUsed(tmpRegister) 5658 tmpRegister2, found := c.locationStack.takeFreeRegister(registerTypeGeneralPurpose) 5659 if !found { 5660 // This in theory never happen as all the registers must be free except indexReg. 5661 return fmt.Errorf("could not find enough free registers") 5662 } 5663 c.locationStack.markRegisterUsed(tmpRegister2) 5664 5665 // If the module instance address stays the same, we could skip the entire code below. 5666 // The rationale/idea for this is that, in almost all use cases, users instantiate a single 5667 // Wasm binary and run the functions from it, rather than doing import/export on multiple 5668 // binaries. As a result, this cmp and jmp instruction sequence below must be easy for 5669 // x64 CPU to do branch prediction since almost 100% jump happens across function calls. 5670 c.assembler.CompileMemoryToRegister(amd64.CMPQ, 5671 amd64ReservedRegisterForCallEngine, callEngineModuleContextModuleInstanceOffset, amd64CallingConventionDestinationFunctionModuleInstanceAddressRegister) 5672 jmpIfModuleNotChange := c.assembler.CompileJump(amd64.JEQ) 5673 5674 // If engine.ModuleContext.ModuleInstance is not equal the value on amd64CallingConventionDestinationFunctionModuleInstanceAddressRegister, 5675 // we have to put the new value there. 5676 c.assembler.CompileRegisterToMemory(amd64.MOVQ, amd64CallingConventionDestinationFunctionModuleInstanceAddressRegister, 5677 amd64ReservedRegisterForCallEngine, callEngineModuleContextModuleInstanceOffset) 5678 5679 // Also, we have to update the following fields: 5680 // * callEngine.moduleContext.globalElement0Address 5681 // * callEngine.moduleContext.tableElement0Address 5682 // * callEngine.moduleContext.memoryInstance 5683 // * callEngine.moduleContext.memoryElement0Address 5684 // * callEngine.moduleContext.memorySliceLen 5685 // * callEngine.moduleContext.codesElement0Address 5686 // * callEngine.moduleContext.typeIDsElement0Address 5687 // * callEngine.moduleContext.dataInstancesElement0Address 5688 // * callEngine.moduleContext.elementInstancesElement0Address 5689 5690 // Update globalElement0Address. 5691 // 5692 // Note: if there's global.get or set instruction in the function, the existence of the globals 5693 // is ensured by function validation at module instantiation phase, and that's why it is ok to 5694 // skip the initialization if the module's globals slice is empty. 5695 if len(c.ir.Globals) > 0 { 5696 // Since ModuleInstance.Globals is []*globalInstance, internally 5697 // the address of the first item in the underlying array lies exactly on the globals offset. 5698 // See https://go.dev/blog/slices-intro if unfamiliar. 5699 c.assembler.CompileMemoryToRegister(amd64.MOVQ, amd64CallingConventionDestinationFunctionModuleInstanceAddressRegister, moduleInstanceGlobalsOffset, tmpRegister) 5700 5701 c.assembler.CompileRegisterToMemory(amd64.MOVQ, tmpRegister, amd64ReservedRegisterForCallEngine, callEngineModuleContextGlobalElement0AddressOffset) 5702 } 5703 5704 // Update tableElement0Address. 5705 // 5706 // Note: if there's table instruction in the function, the existence of the table 5707 // is ensured by function validation at module instantiation phase, and that's 5708 // why it is ok to skip the initialization if the module's table doesn't exist. 5709 if c.ir.HasTable { 5710 // First, we need to read the *wasm.Table. 5711 c.assembler.CompileMemoryToRegister(amd64.MOVQ, amd64CallingConventionDestinationFunctionModuleInstanceAddressRegister, moduleInstanceTablesOffset, tmpRegister) 5712 5713 // At this point, tmpRegister holds the address of ModuleInstance.Table. 5714 // So we are ready to read and put the first item's address stored in Table.Table. 5715 // Here we read the value into tmpRegister2. 5716 c.assembler.CompileRegisterToMemory(amd64.MOVQ, tmpRegister, 5717 amd64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset) 5718 5719 // Finally, we put &ModuleInstance.TypeIDs[0] into moduleContext.typeIDsElement0Address. 5720 c.assembler.CompileMemoryToRegister(amd64.MOVQ, 5721 amd64CallingConventionDestinationFunctionModuleInstanceAddressRegister, moduleInstanceTypeIDsOffset, tmpRegister) 5722 c.assembler.CompileRegisterToMemory(amd64.MOVQ, 5723 tmpRegister, amd64ReservedRegisterForCallEngine, callEngineModuleContextTypeIDsElement0AddressOffset) 5724 } 5725 5726 // Update memoryElement0Address and memorySliceLen. 5727 // 5728 // Note: if there's memory instruction in the function, memory instance must be non-nil. 5729 // That is ensured by function validation at module instantiation phase, and that's 5730 // why it is ok to skip the initialization if the module's memory instance is nil. 5731 if c.ir.HasMemory { 5732 c.assembler.CompileMemoryToRegister(amd64.MOVQ, 5733 amd64CallingConventionDestinationFunctionModuleInstanceAddressRegister, moduleInstanceMemoryOffset, 5734 tmpRegister) 5735 5736 // Set memory instance. 5737 c.assembler.CompileRegisterToMemory(amd64.MOVQ, tmpRegister, 5738 amd64ReservedRegisterForCallEngine, callEngineModuleContextMemoryInstanceOffset) 5739 5740 // Set element zero address. 5741 c.assembler.CompileMemoryToRegister(amd64.MOVQ, tmpRegister, memoryInstanceBufferOffset, tmpRegister2) 5742 c.assembler.CompileRegisterToMemory(amd64.MOVQ, tmpRegister2, 5743 amd64ReservedRegisterForCallEngine, callEngineModuleContextMemoryElement0AddressOffset) 5744 } 5745 5746 // Update moduleContext.codesElement0Address 5747 { 5748 // "tmpRegister = [moduleInstanceAddressRegister + moduleInstanceEngineOffset + interfaceDataOffset] (== *moduleEngine)" 5749 // 5750 // Go's interface is laid out on memory as two quad words as struct {tab, data uintptr} 5751 // where tab points to the interface table, and the latter points to the actual 5752 // implementation of interface. This case, we extract "data" pointer as *moduleEngine. 5753 // See the following references for detail: 5754 // * https://research.swtch.com/interfaces 5755 // * https://github.com/golang/go/blob/release-branch.go1.20/src/runtime/runtime2.go#L207-L210 5756 c.assembler.CompileMemoryToRegister(amd64.MOVQ, amd64CallingConventionDestinationFunctionModuleInstanceAddressRegister, moduleInstanceEngineOffset+interfaceDataOffset, tmpRegister) 5757 5758 // "tmpRegister = [tmpRegister + moduleEnginecodesOffset] (== &moduleEngine.codes[0])" 5759 c.assembler.CompileMemoryToRegister(amd64.MOVQ, tmpRegister, moduleEngineFunctionsOffset, tmpRegister) 5760 5761 // "callEngine.moduleContext.functionsElement0Address = tmpRegister". 5762 c.assembler.CompileRegisterToMemory(amd64.MOVQ, tmpRegister, amd64ReservedRegisterForCallEngine, 5763 callEngineModuleContextFunctionsElement0AddressOffset) 5764 } 5765 5766 // Update dataInstancesElement0Address. 5767 if c.ir.HasDataInstances { 5768 // "tmpRegister = &moduleInstance.DataInstances[0]" 5769 c.assembler.CompileMemoryToRegister( 5770 amd64.MOVQ, 5771 amd64CallingConventionDestinationFunctionModuleInstanceAddressRegister, moduleInstanceDataInstancesOffset, 5772 tmpRegister, 5773 ) 5774 // "callEngine.moduleContext.dataInstancesElement0Address = tmpRegister". 5775 c.assembler.CompileRegisterToMemory( 5776 amd64.MOVQ, 5777 tmpRegister, 5778 amd64ReservedRegisterForCallEngine, callEngineModuleContextDataInstancesElement0AddressOffset, 5779 ) 5780 } 5781 5782 // Update callEngine.moduleContext.elementInstancesElement0Address 5783 if c.ir.HasElementInstances { 5784 // "tmpRegister = &moduleInstance.ElementInstnaces[0]" 5785 c.assembler.CompileMemoryToRegister( 5786 amd64.MOVQ, 5787 amd64CallingConventionDestinationFunctionModuleInstanceAddressRegister, moduleInstanceElementInstancesOffset, 5788 tmpRegister, 5789 ) 5790 // "callEngine.moduleContext.dataInstancesElement0Address = tmpRegister". 5791 c.assembler.CompileRegisterToMemory( 5792 amd64.MOVQ, 5793 tmpRegister, 5794 amd64ReservedRegisterForCallEngine, callEngineModuleContextElementInstancesElement0AddressOffset, 5795 ) 5796 } 5797 5798 c.locationStack.markRegisterUnused(tmpRegister, tmpRegister2) 5799 5800 // Set the jump target towards the next instruction for the case where module instance address hasn't changed. 5801 c.assembler.SetJumpTargetOnNext(jmpIfModuleNotChange) 5802 return nil 5803 } 5804 5805 // compileEnsureOnRegister ensures that the given value is located on a 5806 // general purpose register of an appropriate type. 5807 func (c *amd64Compiler) compileEnsureOnRegister(loc *runtimeValueLocation) (err error) { 5808 if loc.onStack() { 5809 // Allocate the register. 5810 reg, err := c.allocateRegister(loc.getRegisterType()) 5811 if err != nil { 5812 return err 5813 } 5814 5815 // Mark it uses the register. 5816 loc.setRegister(reg) 5817 c.locationStack.markRegisterUsed(reg) 5818 5819 c.compileLoadValueOnStackToRegister(loc) 5820 } else if loc.onConditionalRegister() { 5821 err = c.compileLoadConditionalRegisterToGeneralPurposeRegister(loc) 5822 } 5823 return 5824 } 5825 5826 // compileMaybeSwapRegisters swaps two registers if they're not equal. 5827 func (c *amd64Compiler) compileMaybeSwapRegisters(reg1, reg2 asm.Register) { 5828 if reg1 != reg2 { 5829 c.assembler.CompileRegisterToRegister(amd64.XCHGQ, reg1, reg2) 5830 } 5831 } 5832 5833 func (c *amd64Compiler) compileCompareWithMemorySliceLen(addrReg asm.Register) error { 5834 // Obtain the temporary registers to be used in the followings. 5835 tmpRegister, err := c.allocateRegister(registerTypeGeneralPurpose) 5836 if err != nil { 5837 return err 5838 } 5839 5840 c.assembler.CompileMemoryToRegister(amd64.MOVQ, 5841 amd64ReservedRegisterForCallEngine, callEngineModuleContextMemoryInstanceOffset, 5842 tmpRegister) 5843 5844 c.assembler.CompileMemoryToRegister(amd64.CMPQ, tmpRegister, memoryInstanceBufferLenOffset, addrReg) 5845 5846 c.locationStack.markRegisterUnused(tmpRegister) 5847 5848 return nil 5849 } 5850 5851 // compilePreventCrossedTargetRegisters swaps registers in such a way, that for neither runtimeValueLocation from locs its 5852 // corresponding register with the same index from targets is occupied by some other runtimeValueLocation from locs. It returns a 5853 // closure to restore the original register placement. 5854 // 5855 // This function makes it possible to safely exchange one set of registers with another, where a register might be in both sets. 5856 // Each register will correspond either to itself or another register not present in its own set. 5857 // 5858 // For example, if we have locs = [AX, BX, CX], targets = [BX, SI, AX], then it'll do two swaps 5859 // to make locs = [BX, CX, AX]. 5860 func (c *amd64Compiler) compilePreventCrossedTargetRegisters(locs []*runtimeValueLocation, targets []asm.Register) (restore func()) { 5861 type swap struct{ srcIndex, dstIndex int } 5862 var swaps []swap 5863 for i := range locs { 5864 targetLocation := -1 // -1 means not found. 5865 for j := range locs { 5866 if locs[j].register == targets[i] { 5867 targetLocation = j 5868 break 5869 } 5870 } 5871 if targetLocation != -1 && targetLocation != i { 5872 c.compileMaybeSwapRegisters(locs[i].register, locs[targetLocation].register) 5873 locs[i].register, locs[targetLocation].register = locs[targetLocation].register, locs[i].register 5874 swaps = append(swaps, swap{i, targetLocation}) 5875 } 5876 } 5877 return func() { 5878 // Restore in reverse order because a register can be moved multiple times. 5879 for i := len(swaps) - 1; i >= 0; i -= 1 { 5880 r1, r2 := swaps[i].srcIndex, swaps[i].dstIndex 5881 c.compileMaybeSwapRegisters(locs[r1].register, locs[r2].register) 5882 locs[r1].register, locs[r2].register = locs[r2].register, locs[r1].register 5883 } 5884 } 5885 }