wa-lang.org/wazero@v1.0.2/internal/engine/compiler/impl_amd64.go (about) 1 package compiler 2 3 // This file implements the compiler for amd64/x86_64 target. 4 // Please refer to https://www.felixcloutier.com/x86/index.html 5 // if unfamiliar with amd64 instructions used here. 6 7 import ( 8 "bytes" 9 "fmt" 10 "math" 11 "runtime" 12 13 "wa-lang.org/wazero/internal/asm" 14 "wa-lang.org/wazero/internal/asm/amd64" 15 "wa-lang.org/wazero/internal/platform" 16 "wa-lang.org/wazero/internal/u32" 17 "wa-lang.org/wazero/internal/u64" 18 "wa-lang.org/wazero/internal/wasm" 19 "wa-lang.org/wazero/internal/wazeroir" 20 ) 21 22 var ( 23 minimum32BitSignedInt int32 = math.MinInt32 24 maximum32BitSignedInt int32 = math.MaxInt32 25 maximum32BitUnsignedInt uint32 = math.MaxUint32 26 minimum64BitSignedInt int64 = math.MinInt64 27 maximum64BitSignedInt int64 = math.MaxInt64 28 maximum64BitUnsignedInt uint64 = math.MaxUint64 29 float32SignBitMask uint32 = 1 << 31 30 float32RestBitMask = ^float32SignBitMask 31 float64SignBitMask uint64 = 1 << 63 32 float64RestBitMask = ^float64SignBitMask 33 float32ForMinimumSigned32bitInteger = uint32(0xCF00_0000) 34 float64ForMinimumSigned32bitInteger = uint64(0xC1E0_0000_0020_0000) 35 float32ForMinimumSigned64bitInteger = uint32(0xDF00_0000) 36 float64ForMinimumSigned64bitInteger = uint64(0xC3E0_0000_0000_0000) 37 float32ForMaximumSigned32bitIntPlusOne = uint32(0x4F00_0000) 38 float64ForMaximumSigned32bitIntPlusOne = uint64(0x41E0_0000_0000_0000) 39 float32ForMaximumSigned64bitIntPlusOne = uint32(0x5F00_0000) 40 float64ForMaximumSigned64bitIntPlusOne = uint64(0x43E0_0000_0000_0000) 41 ) 42 43 var ( 44 // amd64ReservedRegisterForCallEngine: pointer to callEngine (i.e. *callEngine as uintptr) 45 amd64ReservedRegisterForCallEngine = amd64.RegR13 46 // amd64ReservedRegisterForStackBasePointerAddress: stack base pointer's address (callEngine.stackBasePointer) in the current function call. 47 amd64ReservedRegisterForStackBasePointerAddress = amd64.RegR14 48 // amd64ReservedRegisterForMemory: pointer to the memory slice's data (i.e. &memory.Buffer[0] as uintptr). 49 amd64ReservedRegisterForMemory = amd64.RegR15 50 ) 51 52 var ( 53 amd64UnreservedVectorRegisters = []asm.Register{ // nolint 54 amd64.RegX0, amd64.RegX1, amd64.RegX2, amd64.RegX3, 55 amd64.RegX4, amd64.RegX5, amd64.RegX6, amd64.RegX7, 56 amd64.RegX8, amd64.RegX9, amd64.RegX10, amd64.RegX11, 57 amd64.RegX12, amd64.RegX13, amd64.RegX14, amd64.RegX15, 58 } 59 // Note that we never invoke "call" instruction, 60 // so we don't need to care about the calling convention. 61 // TODO: Maybe it is safe just save rbp, rsp somewhere 62 // in Go-allocated variables, and reuse these registers 63 // in compiled functions and write them back before returns. 64 amd64UnreservedGeneralPurposeRegisters = []asm.Register{ // nolint 65 amd64.RegAX, amd64.RegCX, amd64.RegDX, amd64.RegBX, 66 amd64.RegSI, amd64.RegDI, amd64.RegR8, amd64.RegR9, 67 amd64.RegR10, amd64.RegR11, amd64.RegR12, 68 } 69 ) 70 71 // amd64CallingConventionDestinationFunctionModuleInstanceAddressRegister holds *wasm.ModuleInstance of the 72 // next executing function instance. The value is set and used when making function calls 73 // or function returns in the ModuleContextInitialization. See compileModuleContextInitialization. 74 var amd64CallingConventionDestinationFunctionModuleInstanceAddressRegister = amd64.RegR12 75 76 func (c *amd64Compiler) String() string { 77 return c.locationStack.String() 78 } 79 80 type amd64Compiler struct { 81 assembler amd64.Assembler 82 ir *wazeroir.CompilationResult 83 // locationStack holds the state of wazeroir virtual stack. 84 // and each item is either placed in register or the actual memory stack. 85 locationStack *runtimeValueLocationStack 86 // labels hold per wazeroir label specific information in this function. 87 labels map[string]*amd64LabelInfo 88 // stackPointerCeil is the greatest stack pointer value (from runtimeValueLocationStack) seen during compilation. 89 stackPointerCeil uint64 90 // currentLabel holds a currently compiled wazeroir label key. For debugging only. 91 currentLabel string 92 // onStackPointerCeilDeterminedCallBack hold a callback which are called when the max stack pointer is determined BEFORE generating native code. 93 onStackPointerCeilDeterminedCallBack func(stackPointerCeil uint64) 94 withListener bool 95 } 96 97 func newAmd64Compiler(ir *wazeroir.CompilationResult, withListener bool) (compiler, error) { 98 c := &amd64Compiler{ 99 assembler: amd64.NewAssembler(), 100 locationStack: newRuntimeValueLocationStack(), 101 currentLabel: wazeroir.EntrypointLabel, 102 ir: ir, 103 labels: map[string]*amd64LabelInfo{}, 104 withListener: withListener, 105 } 106 return c, nil 107 } 108 109 // runtimeValueLocationStack implements compilerImpl.runtimeValueLocationStack for the amd64 architecture. 110 func (c *amd64Compiler) runtimeValueLocationStack() *runtimeValueLocationStack { 111 return c.locationStack 112 } 113 114 // setLocationStack sets the given runtimeValueLocationStack to .locationStack field, 115 // while allowing us to track runtimeValueLocationStack.stackPointerCeil across multiple stacks. 116 // This is called when we branch into different block. 117 func (c *amd64Compiler) setLocationStack(newStack *runtimeValueLocationStack) { 118 if c.stackPointerCeil < c.locationStack.stackPointerCeil { 119 c.stackPointerCeil = c.locationStack.stackPointerCeil 120 } 121 c.locationStack = newStack 122 } 123 124 // pushRuntimeValueLocationOnRegister implements compiler.pushRuntimeValueLocationOnRegister for amd64. 125 func (c *amd64Compiler) pushRuntimeValueLocationOnRegister(reg asm.Register, vt runtimeValueType) (ret *runtimeValueLocation) { 126 ret = c.locationStack.pushRuntimeValueLocationOnRegister(reg, vt) 127 c.locationStack.markRegisterUsed(reg) 128 return 129 } 130 131 // pushVectorRuntimeValueLocationOnRegister implements compiler.pushVectorRuntimeValueLocationOnRegister for amd64. 132 func (c *amd64Compiler) pushVectorRuntimeValueLocationOnRegister(reg asm.Register) (lowerBitsLocation *runtimeValueLocation) { 133 lowerBitsLocation = c.locationStack.pushRuntimeValueLocationOnRegister(reg, runtimeValueTypeV128Lo) 134 c.locationStack.pushRuntimeValueLocationOnRegister(reg, runtimeValueTypeV128Hi) 135 c.locationStack.markRegisterUsed(reg) 136 return 137 } 138 139 type amd64LabelInfo struct { 140 // initialInstruction is the initial instruction for this label so other block can jump into it. 141 initialInstruction asm.Node 142 // initialStack is the initial value location stack from which we start compiling this label. 143 initialStack *runtimeValueLocationStack 144 // labelBeginningCallbacks holds callbacks should to be called with initialInstruction 145 labelBeginningCallbacks []func(asm.Node) 146 } 147 148 func (c *amd64Compiler) label(labelKey string) *amd64LabelInfo { 149 ret, ok := c.labels[labelKey] 150 if ok { 151 return ret 152 } 153 c.labels[labelKey] = &amd64LabelInfo{} 154 return c.labels[labelKey] 155 } 156 157 // compileGoDefinedHostFunction constructs the entire code to enter the host function implementation, 158 // and return to the caller. 159 func (c *amd64Compiler) compileGoDefinedHostFunction() error { 160 // First we must update the location stack to reflect the number of host function inputs. 161 c.locationStack.init(c.ir.Signature) 162 163 if c.withListener { 164 if err := c.compileCallBuiltinFunction(builtinFunctionIndexFunctionListenerBefore); err != nil { 165 return err 166 } 167 } 168 169 if err := c.compileCallGoHostFunction(); err != nil { 170 return err 171 } 172 173 // Initializes the reserved stack base pointer which is used to retrieve the call frame stack. 174 c.compileReservedStackBasePointerInitialization() 175 return c.compileReturnFunction() 176 } 177 178 // compile implements compiler.compile for the amd64 architecture. 179 func (c *amd64Compiler) compile() (code []byte, stackPointerCeil uint64, err error) { 180 // c.stackPointerCeil tracks the stack pointer ceiling (max seen) value across all runtimeValueLocationStack(s) 181 // used for all labels (via setLocationStack), excluding the current one. 182 // Hence, we check here if the final block's max one exceeds the current c.stackPointerCeil. 183 stackPointerCeil = c.stackPointerCeil 184 if stackPointerCeil < c.locationStack.stackPointerCeil { 185 stackPointerCeil = c.locationStack.stackPointerCeil 186 } 187 188 // Now that the max stack pointer is determined, we are invoking the callback. 189 // Note this MUST be called before Assemble() below. 190 if c.onStackPointerCeilDeterminedCallBack != nil { 191 c.onStackPointerCeilDeterminedCallBack(stackPointerCeil) 192 c.onStackPointerCeilDeterminedCallBack = nil 193 } 194 195 code, err = c.assembler.Assemble() 196 if err != nil { 197 return 198 } 199 200 code, err = platform.MmapCodeSegment(bytes.NewReader(code), len(code)) 201 return 202 } 203 204 // compileUnreachable implements compiler.compileUnreachable for the amd64 architecture. 205 func (c *amd64Compiler) compileUnreachable() error { 206 c.compileExitFromNativeCode(nativeCallStatusCodeUnreachable) 207 return nil 208 } 209 210 // compileSet implements compiler.compileSet for the amd64 architecture. 211 func (c *amd64Compiler) compileSet(o *wazeroir.OperationSet) error { 212 setTargetIndex := int(c.locationStack.sp) - 1 - o.Depth 213 214 if o.IsTargetVector { 215 _ = c.locationStack.pop() // ignore the higher 64-bits. 216 } 217 v := c.locationStack.pop() 218 if err := c.compileEnsureOnRegister(v); err != nil { 219 return err 220 } 221 222 targetLocation := c.locationStack.stack[setTargetIndex] 223 if targetLocation.onRegister() { 224 // We no longer need the register previously used by the target location. 225 c.locationStack.markRegisterUnused(targetLocation.register) 226 } 227 228 reg := v.register 229 targetLocation.setRegister(reg) 230 if o.IsTargetVector { 231 c.locationStack.stack[setTargetIndex+1].setRegister(reg) 232 } 233 return nil 234 } 235 236 // compileGlobalGet implements compiler.compileGlobalGet for the amd64 architecture. 237 func (c *amd64Compiler) compileGlobalGet(o *wazeroir.OperationGlobalGet) error { 238 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 239 return err 240 } 241 242 intReg, err := c.allocateRegister(registerTypeGeneralPurpose) 243 if err != nil { 244 return err 245 } 246 247 // First, move the pointer to the global slice into the allocated register. 248 c.assembler.CompileMemoryToRegister(amd64.MOVQ, amd64ReservedRegisterForCallEngine, callEngineModuleContextGlobalElement0AddressOffset, intReg) 249 250 // Now, move the location of the global instance into the register. 251 c.assembler.CompileMemoryToRegister(amd64.MOVQ, intReg, 8*int64(o.Index), intReg) 252 253 // When an integer, reuse the pointer register for the value. Otherwise, allocate a float register for it. 254 valueReg := intReg 255 var vt runtimeValueType 256 var inst asm.Instruction 257 switch c.ir.Globals[o.Index].ValType { 258 case wasm.ValueTypeI32: 259 inst = amd64.MOVL 260 vt = runtimeValueTypeI32 261 case wasm.ValueTypeI64, wasm.ValueTypeExternref, wasm.ValueTypeFuncref: 262 inst = amd64.MOVQ 263 vt = runtimeValueTypeI64 264 case wasm.ValueTypeF32: 265 inst = amd64.MOVL 266 vt = runtimeValueTypeF32 267 valueReg, err = c.allocateRegister(registerTypeVector) 268 if err != nil { 269 return err 270 } 271 case wasm.ValueTypeF64: 272 inst = amd64.MOVQ 273 vt = runtimeValueTypeF64 274 valueReg, err = c.allocateRegister(registerTypeVector) 275 if err != nil { 276 return err 277 } 278 case wasm.ValueTypeV128: 279 inst = amd64.MOVDQU 280 vt = runtimeValueTypeV128Lo 281 valueReg, err = c.allocateRegister(registerTypeVector) 282 if err != nil { 283 return err 284 } 285 default: 286 panic("BUG: unknown runtime value type") 287 } 288 289 // Using the register holding the pointer to the target instance, move its value into a register. 290 c.assembler.CompileMemoryToRegister(inst, intReg, globalInstanceValueOffset, valueReg) 291 292 // Record that the retrieved global value on the top of the stack is now in a register. 293 if vt == runtimeValueTypeV128Lo { 294 c.pushVectorRuntimeValueLocationOnRegister(valueReg) 295 } else { 296 c.pushRuntimeValueLocationOnRegister(valueReg, vt) 297 } 298 return nil 299 } 300 301 // compileGlobalSet implements compiler.compileGlobalSet for the amd64 architecture. 302 func (c *amd64Compiler) compileGlobalSet(o *wazeroir.OperationGlobalSet) error { 303 wasmValueType := c.ir.Globals[o.Index].ValType 304 isV128 := wasmValueType == wasm.ValueTypeV128 305 306 // First, move the value to set into a temporary register. 307 val := c.locationStack.pop() 308 if isV128 { 309 // The previous val is higher 64-bits, and have to use lower 64-bit's runtimeValueLocation for allocation, etc. 310 val = c.locationStack.pop() 311 } 312 if err := c.compileEnsureOnRegister(val); err != nil { 313 return err 314 } 315 316 // Allocate a register to hold the memory location of the target global instance. 317 intReg, err := c.allocateRegister(registerTypeGeneralPurpose) 318 if err != nil { 319 return err 320 } 321 322 // First, move the pointer to the global slice into the allocated register. 323 c.assembler.CompileMemoryToRegister(amd64.MOVQ, amd64ReservedRegisterForCallEngine, callEngineModuleContextGlobalElement0AddressOffset, intReg) 324 325 // Now, move the location of the global instance into the register. 326 c.assembler.CompileMemoryToRegister(amd64.MOVQ, intReg, 8*int64(o.Index), intReg) 327 328 // Now ready to write the value to the global instance location. 329 var inst asm.Instruction 330 if isV128 { 331 inst = amd64.MOVDQU 332 } else if wasmValueType == wasm.ValueTypeI32 || wasmValueType == wasm.ValueTypeF32 { 333 inst = amd64.MOVL 334 } else { 335 inst = amd64.MOVQ 336 } 337 c.assembler.CompileRegisterToMemory(inst, val.register, intReg, globalInstanceValueOffset) 338 339 // Since the value is now written to memory, release the value register. 340 c.locationStack.releaseRegister(val) 341 return nil 342 } 343 344 // compileBr implements compiler.compileBr for the amd64 architecture. 345 func (c *amd64Compiler) compileBr(o *wazeroir.OperationBr) error { 346 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 347 return err 348 } 349 return c.branchInto(o.Target) 350 } 351 352 // branchInto adds instruction necessary to jump into the given branch target. 353 func (c *amd64Compiler) branchInto(target *wazeroir.BranchTarget) error { 354 if target.IsReturnTarget() { 355 return c.compileReturnFunction() 356 } else { 357 labelKey := target.String() 358 if c.ir.LabelCallers[labelKey] > 1 { 359 // We can only re-use register state if when there's a single call-site. 360 // Release existing values on registers to the stack if there's multiple ones to have 361 // the consistent value location state at the beginning of label. 362 if err := c.compileReleaseAllRegistersToStack(); err != nil { 363 return err 364 } 365 } 366 // Set the initial stack of the target label, so we can start compiling the label 367 // with the appropriate value locations. Note we clone the stack here as we maybe 368 // manipulate the stack before compiler reaches the label. 369 targetLabel := c.label(labelKey) 370 if targetLabel.initialStack == nil { 371 // It seems unnecessary to clone as branchInto is always the tail of the current block. 372 // TODO: verify ^^. 373 targetLabel.initialStack = c.locationStack.clone() 374 } 375 jmp := c.assembler.CompileJump(amd64.JMP) 376 c.assignJumpTarget(labelKey, jmp) 377 } 378 return nil 379 } 380 381 // compileBrIf implements compiler.compileBrIf for the amd64 architecture. 382 func (c *amd64Compiler) compileBrIf(o *wazeroir.OperationBrIf) error { 383 cond := c.locationStack.pop() 384 var jmpWithCond asm.Node 385 if cond.onConditionalRegister() { 386 var inst asm.Instruction 387 switch cond.conditionalRegister { 388 case amd64.ConditionalRegisterStateE: 389 inst = amd64.JEQ 390 case amd64.ConditionalRegisterStateNE: 391 inst = amd64.JNE 392 case amd64.ConditionalRegisterStateS: 393 inst = amd64.JMI 394 case amd64.ConditionalRegisterStateNS: 395 inst = amd64.JPL 396 case amd64.ConditionalRegisterStateG: 397 inst = amd64.JGT 398 case amd64.ConditionalRegisterStateGE: 399 inst = amd64.JGE 400 case amd64.ConditionalRegisterStateL: 401 inst = amd64.JLT 402 case amd64.ConditionalRegisterStateLE: 403 inst = amd64.JLE 404 case amd64.ConditionalRegisterStateA: 405 inst = amd64.JHI 406 case amd64.ConditionalRegisterStateAE: 407 inst = amd64.JCC 408 case amd64.ConditionalRegisterStateB: 409 inst = amd64.JCS 410 case amd64.ConditionalRegisterStateBE: 411 inst = amd64.JLS 412 } 413 jmpWithCond = c.assembler.CompileJump(inst) 414 } else { 415 // Usually the comparison operand for br_if is on the conditional register, 416 // but in some cases, they are on the stack or register. 417 // For example, the following code 418 // i64.const 1 419 // local.get 1 420 // i64.add 421 // br_if .... 422 // will try to use the result of i64.add, which resides on the (virtual) stack, 423 // as the operand for br_if instruction. 424 if err := c.compileEnsureOnRegister(cond); err != nil { 425 return err 426 } 427 // Check if the value not equals zero. 428 c.assembler.CompileRegisterToConst(amd64.CMPQ, cond.register, 0) 429 430 // Emit jump instruction which jumps when the value does not equals zero. 431 jmpWithCond = c.assembler.CompileJump(amd64.JNE) 432 c.locationStack.markRegisterUnused(cond.register) 433 } 434 435 // Make sure that the next coming label is the else jump target. 436 thenTarget, elseTarget := o.Then, o.Else 437 438 // Here's the diagram of how we organize the instructions necessarily for brif operation. 439 // 440 // jmp_with_cond -> jmp (.Else) -> Then operations... 441 // |---------(satisfied)------------^^^ 442 // 443 // Note that .Else branch doesn't have ToDrop as .Else is in reality 444 // corresponding to either If's Else block or Br_if's else block in Wasm. 445 446 // Emit for else branches 447 saved := c.locationStack 448 c.setLocationStack(saved.clone()) 449 if elseTarget.Target.IsReturnTarget() { 450 if err := c.compileReturnFunction(); err != nil { 451 return err 452 } 453 } else { 454 elseLabelKey := elseTarget.Target.Label.String() 455 if c.ir.LabelCallers[elseLabelKey] > 1 { 456 // We can only re-use register state if when there's a single call-site. 457 // Release existing values on registers to the stack if there's multiple ones to have 458 // the consistent value location state at the beginning of label. 459 if err := c.compileReleaseAllRegistersToStack(); err != nil { 460 return err 461 } 462 } 463 // Set the initial stack of the target label, so we can start compiling the label 464 // with the appropriate value locations. Note we clone the stack here as we maybe 465 // manipulate the stack before compiler reaches the label. 466 labelInfo := c.label(elseLabelKey) 467 if labelInfo.initialStack == nil { 468 labelInfo.initialStack = c.locationStack 469 } 470 471 elseJmp := c.assembler.CompileJump(amd64.JMP) 472 c.assignJumpTarget(elseLabelKey, elseJmp) 473 } 474 475 // Handle then branch. 476 c.assembler.SetJumpTargetOnNext(jmpWithCond) 477 c.setLocationStack(saved) 478 if err := compileDropRange(c, thenTarget.ToDrop); err != nil { 479 return err 480 } 481 if thenTarget.Target.IsReturnTarget() { 482 return c.compileReturnFunction() 483 } else { 484 thenLabelKey := thenTarget.Target.Label.String() 485 if c.ir.LabelCallers[thenLabelKey] > 1 { 486 // We can only re-use register state if when there's a single call-site. 487 // Release existing values on registers to the stack if there's multiple ones to have 488 // the consistent value location state at the beginning of label. 489 if err := c.compileReleaseAllRegistersToStack(); err != nil { 490 return err 491 } 492 } 493 // Set the initial stack of the target label, so we can start compiling the label 494 // with the appropriate value locations. Note we clone the stack here as we maybe 495 // manipulate the stack before compiler reaches the label. 496 labelInfo := c.label(thenLabelKey) 497 if labelInfo.initialStack == nil { 498 labelInfo.initialStack = c.locationStack 499 } 500 thenJmp := c.assembler.CompileJump(amd64.JMP) 501 c.assignJumpTarget(thenLabelKey, thenJmp) 502 return nil 503 } 504 } 505 506 // compileBrTable implements compiler.compileBrTable for the amd64 architecture. 507 func (c *amd64Compiler) compileBrTable(o *wazeroir.OperationBrTable) error { 508 index := c.locationStack.pop() 509 510 // If the operation only consists of the default target, we branch into it and return early. 511 if len(o.Targets) == 0 { 512 c.locationStack.releaseRegister(index) 513 if err := compileDropRange(c, o.Default.ToDrop); err != nil { 514 return err 515 } 516 return c.branchInto(o.Default.Target) 517 } 518 519 // Otherwise, we jump into the selected branch. 520 if err := c.compileEnsureOnRegister(index); err != nil { 521 return err 522 } 523 524 tmp, err := c.allocateRegister(registerTypeGeneralPurpose) 525 if err != nil { 526 return err 527 } 528 529 // First, we move the length of target list into the tmp register. 530 c.assembler.CompileConstToRegister(amd64.MOVQ, int64(len(o.Targets)), tmp) 531 532 // Then, we compare the value with the length of targets. 533 c.assembler.CompileRegisterToRegister(amd64.CMPL, tmp, index.register) 534 535 // If the value is larger than the length, 536 // we round the index to the length as the spec states that 537 // if the index is larger than or equal the length of list, 538 // branch into the default branch. 539 c.assembler.CompileRegisterToRegister(amd64.CMOVQCS, tmp, index.register) 540 541 // We prepare the static data which holds the offset of 542 // each target's first instruction (incl. default) 543 // relative to the beginning of label tables. 544 // 545 // For example, if we have targets=[L0, L1] and default=L_DEFAULT, 546 // we emit the the code like this at [Emit the code for each targets and default branch] below. 547 // 548 // L0: 549 // 0x123001: XXXX, ... 550 // ..... 551 // L1: 552 // 0x123005: YYY, ... 553 // ..... 554 // L_DEFAULT: 555 // 0x123009: ZZZ, ... 556 // 557 // then offsetData becomes like [0x0, 0x5, 0x8]. 558 // By using this offset list, we could jump into the label for the index by 559 // "jmp offsetData[index]+0x123001" and "0x123001" can be acquired by "LEA" 560 // instruction. 561 // 562 // Note: We store each offset of 32-bite unsigned integer as 4 consecutive bytes. So more precisely, 563 // the above example's offsetData would be [0x0, 0x0, 0x0, 0x0, 0x5, 0x0, 0x0, 0x0, 0x8, 0x0, 0x0, 0x0]. 564 // 565 // Note: this is similar to how GCC implements Switch statements in C. 566 offsetData := asm.NewStaticConst(make([]byte, 4*(len(o.Targets)+1))) 567 568 // Load the offsetData's address into tmp. 569 if err = c.assembler.CompileStaticConstToRegister(amd64.LEAQ, offsetData, tmp); err != nil { 570 return err 571 } 572 573 // Now we have the address of first byte of offsetData in tmp register. 574 // So the target offset's first byte is at tmp+index*4 as we store 575 // the offset as 4 bytes for a 32-byte integer. 576 // Here, we store the offset into the index.register. 577 c.assembler.CompileMemoryWithIndexToRegister(amd64.MOVL, tmp, 0, index.register, 4, index.register) 578 579 // Now we read the address of the beginning of the jump table. 580 // In the above example, this corresponds to reading the address of 0x123001. 581 c.assembler.CompileReadInstructionAddress(tmp, amd64.JMP) 582 583 // Now we have the address of L0 in tmp register, and the offset to the target label in the index.register. 584 // So we could achieve the br_table jump by adding them and jump into the resulting address. 585 c.assembler.CompileRegisterToRegister(amd64.ADDQ, index.register, tmp) 586 587 c.assembler.CompileJumpToRegister(amd64.JMP, tmp) 588 589 // We no longer need the index's register, so mark it unused. 590 c.locationStack.markRegisterUnused(index.register) 591 592 // [Emit the code for each targets and default branch] 593 labelInitialInstructions := make([]asm.Node, len(o.Targets)+1) 594 saved := c.locationStack 595 for i := range labelInitialInstructions { 596 // Emit the initial instruction of each target. 597 // We use NOP as we don't yet know the next instruction in each label. 598 // Assembler would optimize out this NOP during code generation, so this is harmless. 599 labelInitialInstructions[i] = c.assembler.CompileStandAlone(amd64.NOP) 600 601 var locationStack *runtimeValueLocationStack 602 var target *wazeroir.BranchTargetDrop 603 if i < len(o.Targets) { 604 target = o.Targets[i] 605 // Clone the location stack so the branch-specific code doesn't 606 // affect others. 607 locationStack = saved.clone() 608 } else { 609 target = o.Default 610 // If this is the default branch, we use the original one 611 // as this is the last code in this block. 612 locationStack = saved 613 } 614 c.setLocationStack(locationStack) 615 if err := compileDropRange(c, target.ToDrop); err != nil { 616 return err 617 } 618 if err := c.branchInto(target.Target); err != nil { 619 return err 620 } 621 } 622 623 c.assembler.BuildJumpTable(offsetData, labelInitialInstructions) 624 return nil 625 } 626 627 func (c *amd64Compiler) assignJumpTarget(labelKey string, jmpInstruction asm.Node) { 628 jmpTargetLabel := c.label(labelKey) 629 if jmpTargetLabel.initialInstruction != nil { 630 jmpInstruction.AssignJumpTarget(jmpTargetLabel.initialInstruction) 631 } else { 632 jmpTargetLabel.labelBeginningCallbacks = append(jmpTargetLabel.labelBeginningCallbacks, func(labelInitialInstruction asm.Node) { 633 jmpInstruction.AssignJumpTarget(labelInitialInstruction) 634 }) 635 } 636 } 637 638 // compileLabel implements compiler.compileLabel for the amd64 architecture. 639 func (c *amd64Compiler) compileLabel(o *wazeroir.OperationLabel) (skipLabel bool) { 640 if false { 641 fmt.Printf("[label %s ends]\n\n", c.currentLabel) 642 } 643 644 labelKey := o.Label.String() 645 labelInfo := c.label(labelKey) 646 647 // If initialStack is not set, that means this label has never been reached. 648 if labelInfo.initialStack == nil { 649 skipLabel = true 650 c.currentLabel = "" 651 return 652 } 653 654 // We use NOP as a beginning of instructions in a label. 655 labelBegin := c.assembler.CompileStandAlone(amd64.NOP) 656 657 // Save the instructions so that backward branching 658 // instructions can jump to this label. 659 labelInfo.initialInstruction = labelBegin 660 661 // Set the initial stack. 662 c.setLocationStack(labelInfo.initialStack) 663 664 // Invoke callbacks to notify the forward branching 665 // instructions can properly jump to this label. 666 for _, cb := range labelInfo.labelBeginningCallbacks { 667 cb(labelBegin) 668 } 669 670 // Clear for debugging purpose. See the comment in "len(amd64LabelInfo.labelBeginningCallbacks) > 0" block above. 671 labelInfo.labelBeginningCallbacks = nil 672 673 if false { 674 fmt.Printf("[label %s (num callers=%d)]\n%s\n", labelKey, c.ir.LabelCallers[labelKey], c.locationStack) 675 } 676 c.currentLabel = labelKey 677 return 678 } 679 680 // compileCall implements compiler.compileCall for the amd64 architecture. 681 func (c *amd64Compiler) compileCall(o *wazeroir.OperationCall) error { 682 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 683 return err 684 } 685 686 target := c.ir.Functions[o.FunctionIndex] 687 targetType := c.ir.Types[target] 688 689 targetAddressRegister, err := c.allocateRegister(registerTypeGeneralPurpose) 690 if err != nil { 691 return err 692 } 693 694 // First, we read the address of the first item of callEngine.functions slice (= &callEngine.functions[0]) 695 // into tmpRegister. 696 c.assembler.CompileMemoryToRegister(amd64.MOVQ, amd64ReservedRegisterForCallEngine, 697 callEngineModuleContextFunctionsElement0AddressOffset, targetAddressRegister) 698 699 // next, read the address of the target function (= &callEngine.codes[offset]) 700 // into targetAddressRegister. 701 c.assembler.CompileMemoryToRegister(amd64.MOVQ, 702 // Note: FunctionIndex is limited up to 2^27 so this offset never exceeds 32-bit integer. 703 // *8 because the size of *code equals 8 bytes. 704 targetAddressRegister, int64(o.FunctionIndex)*8, 705 targetAddressRegister, 706 ) 707 708 if err := c.compileCallFunctionImpl(targetAddressRegister, targetType); err != nil { 709 return err 710 } 711 return nil 712 } 713 714 // compileCallIndirect implements compiler.compileCallIndirect for the amd64 architecture. 715 func (c *amd64Compiler) compileCallIndirect(o *wazeroir.OperationCallIndirect) error { 716 offset := c.locationStack.pop() 717 if err := c.compileEnsureOnRegister(offset); err != nil { 718 return nil 719 } 720 721 tmp, err := c.allocateRegister(registerTypeGeneralPurpose) 722 if err != nil { 723 return err 724 } 725 c.locationStack.markRegisterUsed(tmp) 726 727 tmp2, err := c.allocateRegister(registerTypeGeneralPurpose) 728 if err != nil { 729 return err 730 } 731 c.locationStack.markRegisterUsed(tmp2) 732 733 // Load the address of the target table: tmp = &module.Tables[0] 734 c.assembler.CompileMemoryToRegister(amd64.MOVQ, amd64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset, tmp) 735 // tmp = &module.Tables[0] + Index*8 = &module.Tables[0] + sizeOf(*TableInstance)*index = module.Tables[o.TableIndex]. 736 c.assembler.CompileMemoryToRegister(amd64.MOVQ, tmp, int64(o.TableIndex*8), tmp) 737 738 // Then, we need to check if the offset doesn't exceed the length of table. 739 c.assembler.CompileMemoryToRegister(amd64.CMPQ, tmp, tableInstanceTableLenOffset, offset.register) 740 notLengthExceedJump := c.assembler.CompileJump(amd64.JHI) 741 742 // If it exceeds, we return the function with nativeCallStatusCodeInvalidTableAccess. 743 c.compileExitFromNativeCode(nativeCallStatusCodeInvalidTableAccess) 744 c.assembler.SetJumpTargetOnNext(notLengthExceedJump) 745 746 // next we check if the target's type matches the operation's one. 747 // In order to get the type instance's address, we have to multiply the offset 748 // by 8 as the offset is the "length" of table in Go's "[]uintptr{}", 749 // and size of uintptr equals 8 bytes == (2^3). 750 c.assembler.CompileConstToRegister(amd64.SHLQ, pointerSizeLog2, offset.register) 751 752 // Adds the address of wasm.Table[0] stored as callEngine.tableElement0Address to the offset. 753 c.assembler.CompileMemoryToRegister(amd64.ADDQ, 754 tmp, tableInstanceTableOffset, offset.register) 755 756 // "offset = (*offset) (== table[offset] == *code type)" 757 c.assembler.CompileMemoryToRegister(amd64.MOVQ, offset.register, 0, offset.register) 758 759 // At this point offset.register holds the address of *code (as uintptr) at wasm.Table[offset]. 760 // 761 // Check if the value of table[offset] equals zero, meaning that the target is uninitialized. 762 c.assembler.CompileRegisterToConst(amd64.CMPQ, offset.register, 0) 763 764 // Jump if the target is initialized element. 765 jumpIfInitialized := c.assembler.CompileJump(amd64.JNE) 766 767 // If not initialized, we return the function with nativeCallStatusCodeInvalidTableAccess. 768 c.compileExitFromNativeCode(nativeCallStatusCodeInvalidTableAccess) 769 770 c.assembler.SetJumpTargetOnNext(jumpIfInitialized) 771 772 // next we need to check the type matches, i.e. table[offset].source.TypeID == targetFunctionType's typeID. 773 // 774 // "tmp = table[offset].source ( == *FunctionInstance type)" 775 c.assembler.CompileMemoryToRegister(amd64.MOVQ, offset.register, functionSourceOffset, tmp) 776 777 // "tmp2 = [&moduleInstance.TypeIDs[0] + index * 4] (== moduleInstance.TypeIDs[index])" 778 c.assembler.CompileMemoryToRegister(amd64.MOVQ, 779 amd64ReservedRegisterForCallEngine, callEngineModuleContextTypeIDsElement0AddressOffset, 780 tmp2) 781 c.assembler.CompileMemoryToRegister(amd64.MOVL, tmp2, int64(o.TypeIndex)*4, tmp2) 782 783 // Jump if the type matches. 784 c.assembler.CompileMemoryToRegister(amd64.CMPL, tmp, functionInstanceTypeIDOffset, tmp2) 785 jumpIfTypeMatch := c.assembler.CompileJump(amd64.JEQ) 786 787 // Otherwise, exit with type mismatch status. 788 c.compileExitFromNativeCode(nativeCallStatusCodeTypeMismatchOnIndirectCall) 789 790 c.assembler.SetJumpTargetOnNext(jumpIfTypeMatch) 791 targetFunctionType := c.ir.Types[o.TypeIndex] 792 if err = c.compileCallFunctionImpl(offset.register, targetFunctionType); err != nil { 793 return nil 794 } 795 796 // The offset register should be marked as un-used as we consumed in the function call. 797 c.locationStack.markRegisterUnused(offset.register, tmp, tmp2) 798 return nil 799 } 800 801 // compileDrop implements compiler.compileDrop for the amd64 architecture. 802 func (c *amd64Compiler) compileDrop(o *wazeroir.OperationDrop) error { 803 return compileDropRange(c, o.Depth) 804 } 805 806 // compileSelectV128Impl implements compileSelect for vector values. 807 func (c *amd64Compiler) compileSelectV128Impl(selectorReg asm.Register) error { 808 x2 := c.locationStack.popV128() 809 if err := c.compileEnsureOnRegister(x2); err != nil { 810 return err 811 } 812 813 x1 := c.locationStack.popV128() 814 if err := c.compileEnsureOnRegister(x1); err != nil { 815 return err 816 } 817 818 // Compare the conditional value with zero. 819 c.assembler.CompileRegisterToConst(amd64.CMPQ, selectorReg, 0) 820 821 // Set the jump if the top value is not zero. 822 jmpIfNotZero := c.assembler.CompileJump(amd64.JNE) 823 824 // In this branch, we select the value of x2, so we move the value into x1.register so that 825 // we can have the result in x1.register regardless of the selection. 826 c.assembler.CompileRegisterToRegister(amd64.MOVDQU, x2.register, x1.register) 827 828 // Else, we don't need to adjust value, just need to jump to the next instruction. 829 c.assembler.SetJumpTargetOnNext(jmpIfNotZero) 830 831 // As noted, the result exists in x1.register regardless of the selector. 832 c.pushVectorRuntimeValueLocationOnRegister(x1.register) 833 // Plus, x2.register is no longer used. 834 c.locationStack.markRegisterUnused(x2.register) 835 c.locationStack.markRegisterUnused(selectorReg) 836 return nil 837 } 838 839 // compileSelect implements compiler.compileSelect for the amd64 architecture. 840 // 841 // The emitted native code depends on whether the values are on 842 // the physical registers or memory stack, or maybe conditional register. 843 func (c *amd64Compiler) compileSelect(o *wazeroir.OperationSelect) error { 844 cv := c.locationStack.pop() 845 if err := c.compileEnsureOnRegister(cv); err != nil { 846 return err 847 } 848 849 if o.IsTargetVector { 850 return c.compileSelectV128Impl(cv.register) 851 } 852 853 x2 := c.locationStack.pop() 854 // We do not consume x1 here, but modify the value according to 855 // the conditional value "c" above. 856 peekedX1 := c.locationStack.peek() 857 858 // Compare the conditional value with zero. 859 c.assembler.CompileRegisterToConst(amd64.CMPQ, cv.register, 0) 860 861 // Now we can use c.register as temporary location. 862 // We alias it here for readability. 863 tmpRegister := cv.register 864 865 // Set the jump if the top value is not zero. 866 jmpIfNotZero := c.assembler.CompileJump(amd64.JNE) 867 868 // If the value is zero, we must place the value of x2 onto the stack position of x1. 869 870 // First we copy the value of x2 to the temporary register if x2 is not currently on a register. 871 if x2.onStack() { 872 x2.register = tmpRegister 873 c.compileLoadValueOnStackToRegister(x2) 874 } 875 876 // 877 // At this point x2's value is always on a register. 878 // 879 880 // Then release the value in the x2's register to the x1's stack position. 881 if peekedX1.onRegister() { 882 c.assembler.CompileRegisterToRegister(amd64.MOVQ, x2.register, peekedX1.register) 883 } else { 884 peekedX1.register = x2.register 885 c.compileReleaseRegisterToStack(peekedX1) // Note inside we mark the register unused! 886 } 887 888 // Else, we don't need to adjust value, just need to jump to the next instruction. 889 c.assembler.SetJumpTargetOnNext(jmpIfNotZero) 890 891 // In any case, we don't need x2 and c anymore! 892 c.locationStack.releaseRegister(x2) 893 c.locationStack.releaseRegister(cv) 894 return nil 895 } 896 897 // compilePick implements compiler.compilePick for the amd64 architecture. 898 func (c *amd64Compiler) compilePick(o *wazeroir.OperationPick) error { 899 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 900 return err 901 } 902 903 // TODO: if we track the type of values on the stack, 904 // we could optimize the instruction according to the bit size of the value. 905 // For now, we just move the entire register i.e. as a quad word (8 bytes). 906 pickTarget := c.locationStack.stack[c.locationStack.sp-1-uint64(o.Depth)] 907 reg, err := c.allocateRegister(pickTarget.getRegisterType()) 908 if err != nil { 909 return err 910 } 911 912 if pickTarget.onRegister() { 913 var inst asm.Instruction 914 if o.IsTargetVector { 915 inst = amd64.MOVDQU 916 } else if pickTarget.valueType == runtimeValueTypeI32 { // amd64 cannot copy single-precisions between registers. 917 inst = amd64.MOVL 918 } else { 919 inst = amd64.MOVQ 920 } 921 c.assembler.CompileRegisterToRegister(inst, pickTarget.register, reg) 922 } else if pickTarget.onStack() { 923 // Copy the value from the stack. 924 var inst asm.Instruction 925 if o.IsTargetVector { 926 inst = amd64.MOVDQU 927 } else if pickTarget.valueType == runtimeValueTypeI32 || pickTarget.valueType == runtimeValueTypeF32 { 928 inst = amd64.MOVL 929 } else { 930 inst = amd64.MOVQ 931 } 932 // Note: stack pointers are ensured not to exceed 2^27 so this offset never exceeds 32-bit range. 933 c.assembler.CompileMemoryToRegister(inst, amd64ReservedRegisterForStackBasePointerAddress, 934 int64(pickTarget.stackPointer)*8, reg) 935 } 936 // Now we already placed the picked value on the register, 937 // so push the location onto the stack. 938 if o.IsTargetVector { 939 c.pushVectorRuntimeValueLocationOnRegister(reg) 940 } else { 941 c.pushRuntimeValueLocationOnRegister(reg, pickTarget.valueType) 942 } 943 return nil 944 } 945 946 // compileAdd implements compiler.compileAdd for the amd64 architecture. 947 func (c *amd64Compiler) compileAdd(o *wazeroir.OperationAdd) error { 948 // TODO: if the previous instruction is const, then 949 // this can be optimized. Same goes for other arithmetic instructions. 950 951 var instruction asm.Instruction 952 switch o.Type { 953 case wazeroir.UnsignedTypeI32: 954 instruction = amd64.ADDL 955 case wazeroir.UnsignedTypeI64: 956 instruction = amd64.ADDQ 957 case wazeroir.UnsignedTypeF32: 958 instruction = amd64.ADDSS 959 case wazeroir.UnsignedTypeF64: 960 instruction = amd64.ADDSD 961 } 962 963 x2 := c.locationStack.pop() 964 if err := c.compileEnsureOnRegister(x2); err != nil { 965 return err 966 } 967 968 x1 := c.locationStack.peek() // Note this is peek, pop! 969 if err := c.compileEnsureOnRegister(x1); err != nil { 970 return err 971 } 972 973 // x1 += x2. 974 c.assembler.CompileRegisterToRegister(instruction, x2.register, x1.register) 975 976 // We no longer need x2 register after ADD operation here, 977 // so we release it. 978 c.locationStack.releaseRegister(x2) 979 return nil 980 } 981 982 // compileSub implements compiler.compileSub for the amd64 architecture. 983 func (c *amd64Compiler) compileSub(o *wazeroir.OperationSub) error { 984 // TODO: if the previous instruction is const, then 985 // this can be optimized. Same goes for other arithmetic instructions. 986 987 var instruction asm.Instruction 988 switch o.Type { 989 case wazeroir.UnsignedTypeI32: 990 instruction = amd64.SUBL 991 case wazeroir.UnsignedTypeI64: 992 instruction = amd64.SUBQ 993 case wazeroir.UnsignedTypeF32: 994 instruction = amd64.SUBSS 995 case wazeroir.UnsignedTypeF64: 996 instruction = amd64.SUBSD 997 } 998 999 x2 := c.locationStack.pop() 1000 if err := c.compileEnsureOnRegister(x2); err != nil { 1001 return err 1002 } 1003 1004 x1 := c.locationStack.peek() // Note this is peek, pop! 1005 if err := c.compileEnsureOnRegister(x1); err != nil { 1006 return err 1007 } 1008 1009 // x1 -= x2. 1010 c.assembler.CompileRegisterToRegister(instruction, x2.register, x1.register) 1011 1012 // We no longer need x2 register after ADD operation here, 1013 // so we release it. 1014 c.locationStack.releaseRegister(x2) 1015 return nil 1016 } 1017 1018 // compileMul implements compiler.compileMul for the amd64 architecture. 1019 func (c *amd64Compiler) compileMul(o *wazeroir.OperationMul) (err error) { 1020 switch o.Type { 1021 case wazeroir.UnsignedTypeI32: 1022 err = c.compileMulForInts(true, amd64.MULL) 1023 case wazeroir.UnsignedTypeI64: 1024 err = c.compileMulForInts(false, amd64.MULQ) 1025 case wazeroir.UnsignedTypeF32: 1026 err = c.compileMulForFloats(amd64.MULSS) 1027 case wazeroir.UnsignedTypeF64: 1028 err = c.compileMulForFloats(amd64.MULSD) 1029 } 1030 return 1031 } 1032 1033 // compileMulForInts emits instructions to perform integer multiplication for 1034 // top two values on the stack. If unfamiliar with the convention for integer 1035 // multiplication on x86, see https://www.felixcloutier.com/x86/mul. 1036 // 1037 // In summary, one of the values must be on the AX register, 1038 // and the mul instruction stores the overflow info in DX register which we don't use. 1039 // Here, we mean "the overflow info" by 65 bit or higher part of the result for 64 bit case. 1040 // 1041 // So, we have to ensure that 1042 // 1. Previously located value on DX must be saved to memory stack. That is because 1043 // the existing value will be overridden after the mul execution. 1044 // 2. One of the operands (x1 or x2) must be on AX register. 1045 // 1046 // See https://www.felixcloutier.com/x86/mul#description for detail semantics. 1047 func (c *amd64Compiler) compileMulForInts(is32Bit bool, mulInstruction asm.Instruction) error { 1048 const ( 1049 resultRegister = amd64.RegAX 1050 reservedRegister = amd64.RegDX 1051 ) 1052 1053 x2 := c.locationStack.pop() 1054 x1 := c.locationStack.pop() 1055 1056 var valueOnAX *runtimeValueLocation 1057 if x1.register == resultRegister { 1058 valueOnAX = x1 1059 } else if x2.register == resultRegister { 1060 valueOnAX = x2 1061 } else { 1062 valueOnAX = x2 1063 // This case we move x2 to AX register. 1064 c.onValueReleaseRegisterToStack(resultRegister) 1065 if x2.onConditionalRegister() { 1066 c.compileMoveConditionalToGeneralPurposeRegister(x2, resultRegister) 1067 } else if x2.onStack() { 1068 x2.setRegister(resultRegister) 1069 c.compileLoadValueOnStackToRegister(x2) 1070 c.locationStack.markRegisterUsed(resultRegister) 1071 } else { 1072 var inst asm.Instruction 1073 if is32Bit { 1074 inst = amd64.MOVL 1075 } else { 1076 inst = amd64.MOVQ 1077 } 1078 c.assembler.CompileRegisterToRegister(inst, x2.register, resultRegister) 1079 1080 // We no longer uses the prev register of x2. 1081 c.locationStack.releaseRegister(x2) 1082 x2.setRegister(resultRegister) 1083 c.locationStack.markRegisterUsed(resultRegister) 1084 } 1085 } 1086 1087 // We have to make sure that at this point the operands must be on registers. 1088 if err := c.compileEnsureOnRegister(x2); err != nil { 1089 return err 1090 } 1091 if err := c.compileEnsureOnRegister(x1); err != nil { 1092 return err 1093 } 1094 1095 // We have to save the existing value on DX. 1096 // If the DX register is used by either x1 or x2, we don't need to 1097 // save the value because it is consumed by mul anyway. 1098 if x1.register != reservedRegister && x2.register != reservedRegister { 1099 c.onValueReleaseRegisterToStack(reservedRegister) 1100 } 1101 1102 // Now ready to emit the mul instruction. 1103 if x1 == valueOnAX { 1104 c.assembler.CompileRegisterToNone(mulInstruction, x2.register) 1105 } else { 1106 c.assembler.CompileRegisterToNone(mulInstruction, x1.register) 1107 } 1108 1109 c.locationStack.markRegisterUnused(x2.register) 1110 c.locationStack.markRegisterUnused(x1.register) 1111 1112 // Now we have the result in the AX register, 1113 // so we record it. 1114 c.pushRuntimeValueLocationOnRegister(resultRegister, x1.valueType) 1115 return nil 1116 } 1117 1118 func (c *amd64Compiler) compileMulForFloats(instruction asm.Instruction) error { 1119 x2 := c.locationStack.pop() 1120 if err := c.compileEnsureOnRegister(x2); err != nil { 1121 return err 1122 } 1123 1124 x1 := c.locationStack.peek() // Note this is peek! 1125 if err := c.compileEnsureOnRegister(x1); err != nil { 1126 return err 1127 } 1128 1129 // x1 *= x2. 1130 c.assembler.CompileRegisterToRegister(instruction, x2.register, x1.register) 1131 1132 // We no longer need x2 register after MUL operation here, 1133 // so we release it. 1134 c.locationStack.releaseRegister(x2) 1135 return nil 1136 } 1137 1138 // compileClz implements compiler.compileClz for the amd64 architecture. 1139 func (c *amd64Compiler) compileClz(o *wazeroir.OperationClz) error { 1140 target := c.locationStack.pop() 1141 if err := c.compileEnsureOnRegister(target); err != nil { 1142 return err 1143 } 1144 1145 if runtime.GOOS != "darwin" && runtime.GOOS != "freebsd" { 1146 if o.Type == wazeroir.UnsignedInt32 { 1147 c.assembler.CompileRegisterToRegister(amd64.LZCNTL, target.register, target.register) 1148 } else { 1149 c.assembler.CompileRegisterToRegister(amd64.LZCNTQ, target.register, target.register) 1150 } 1151 } else { 1152 // On x86 mac, we cannot use LZCNT as it always results in zero. 1153 // Instead we combine BSR (calculating most significant set bit) 1154 // with XOR. This logic is described in 1155 // "Replace Raw Assembly Code with Builtin Intrinsics" section in: 1156 // https://developer.apple.com/documentation/apple-silicon/addressing-architectural-differences-in-your-macos-code. 1157 1158 // First, we have to check if the target is non-zero as BSR is undefined 1159 // on zero. See https://www.felixcloutier.com/x86/bsr. 1160 c.assembler.CompileRegisterToConst(amd64.CMPQ, target.register, 0) 1161 jmpIfNonZero := c.assembler.CompileJump(amd64.JNE) 1162 1163 // If the value is zero, we just push the const value. 1164 if o.Type == wazeroir.UnsignedInt32 { 1165 c.assembler.CompileConstToRegister(amd64.MOVL, int64(32), target.register) 1166 } else { 1167 c.assembler.CompileConstToRegister(amd64.MOVL, int64(64), target.register) 1168 } 1169 1170 // Emit the jmp instruction to jump to the position right after 1171 // the non-zero case. 1172 jmpAtEndOfZero := c.assembler.CompileJump(amd64.JMP) 1173 1174 // Start emitting non-zero case. 1175 c.assembler.SetJumpTargetOnNext(jmpIfNonZero) 1176 // First, we calculate the most significant set bit. 1177 if o.Type == wazeroir.UnsignedInt32 { 1178 c.assembler.CompileRegisterToRegister(amd64.BSRL, target.register, target.register) 1179 } else { 1180 c.assembler.CompileRegisterToRegister(amd64.BSRQ, target.register, target.register) 1181 } 1182 1183 // Now we XOR the value with the bit length minus one. 1184 if o.Type == wazeroir.UnsignedInt32 { 1185 c.assembler.CompileConstToRegister(amd64.XORL, 31, target.register) 1186 } else { 1187 c.assembler.CompileConstToRegister(amd64.XORQ, 63, target.register) 1188 } 1189 1190 // Finally the end jump instruction of zero case must target towards 1191 // the next instruction. 1192 c.assembler.SetJumpTargetOnNext(jmpAtEndOfZero) 1193 } 1194 1195 // We reused the same register of target for the result. 1196 c.locationStack.markRegisterUnused(target.register) 1197 c.pushRuntimeValueLocationOnRegister(target.register, target.valueType) 1198 return nil 1199 } 1200 1201 // compileCtz implements compiler.compileCtz for the amd64 architecture. 1202 func (c *amd64Compiler) compileCtz(o *wazeroir.OperationCtz) error { 1203 target := c.locationStack.pop() 1204 if err := c.compileEnsureOnRegister(target); err != nil { 1205 return err 1206 } 1207 1208 if runtime.GOOS != "darwin" && runtime.GOOS != "freebsd" { 1209 if o.Type == wazeroir.UnsignedInt32 { 1210 c.assembler.CompileRegisterToRegister(amd64.TZCNTL, target.register, target.register) 1211 } else { 1212 c.assembler.CompileRegisterToRegister(amd64.TZCNTQ, target.register, target.register) 1213 } 1214 } else { 1215 // Somehow, if the target value is zero, TZCNT always returns zero: this is wrong. 1216 // Meanwhile, we need branches for non-zero and zero cases on macos. 1217 // TODO: find the reference to this behavior and put the link here. 1218 1219 // First we compare the target with zero. 1220 c.assembler.CompileRegisterToConst(amd64.CMPQ, target.register, 0) 1221 jmpIfNonZero := c.assembler.CompileJump(amd64.JNE) 1222 1223 // If the value is zero, we just push the const value. 1224 if o.Type == wazeroir.UnsignedInt32 { 1225 c.assembler.CompileConstToRegister(amd64.MOVL, int64(32), target.register) 1226 } else { 1227 c.assembler.CompileConstToRegister(amd64.MOVL, int64(64), target.register) 1228 } 1229 1230 // Emit the jmp instruction to jump to the position right after 1231 // the non-zero case. 1232 jmpAtEndOfZero := c.assembler.CompileJump(amd64.JMP) 1233 1234 // Otherwise, emit the TZCNT. 1235 c.assembler.SetJumpTargetOnNext(jmpIfNonZero) 1236 if o.Type == wazeroir.UnsignedInt32 { 1237 c.assembler.CompileRegisterToRegister(amd64.TZCNTL, target.register, target.register) 1238 } else { 1239 c.assembler.CompileRegisterToRegister(amd64.TZCNTQ, target.register, target.register) 1240 } 1241 1242 // Finally the end jump instruction of zero case must target towards 1243 // the next instruction. 1244 c.assembler.SetJumpTargetOnNext(jmpAtEndOfZero) 1245 } 1246 1247 // We reused the same register of target for the result. 1248 c.locationStack.markRegisterUnused(target.register) 1249 c.pushRuntimeValueLocationOnRegister(target.register, target.valueType) 1250 return nil 1251 } 1252 1253 // compilePopcnt implements compiler.compilePopcnt for the amd64 architecture. 1254 func (c *amd64Compiler) compilePopcnt(o *wazeroir.OperationPopcnt) error { 1255 target := c.locationStack.pop() 1256 if err := c.compileEnsureOnRegister(target); err != nil { 1257 return err 1258 } 1259 1260 if o.Type == wazeroir.UnsignedInt32 { 1261 c.assembler.CompileRegisterToRegister(amd64.POPCNTL, target.register, target.register) 1262 } else { 1263 c.assembler.CompileRegisterToRegister(amd64.POPCNTQ, target.register, target.register) 1264 } 1265 1266 // We reused the same register of target for the result. 1267 c.locationStack.markRegisterUnused(target.register) 1268 c.pushRuntimeValueLocationOnRegister(target.register, target.valueType) 1269 return nil 1270 } 1271 1272 // compileDiv implements compiler.compileDiv for the amd64 architecture. 1273 func (c *amd64Compiler) compileDiv(o *wazeroir.OperationDiv) (err error) { 1274 switch o.Type { 1275 case wazeroir.SignedTypeUint32: 1276 err = c.compileDivForInts(true, false) 1277 case wazeroir.SignedTypeUint64: 1278 err = c.compileDivForInts(false, false) 1279 case wazeroir.SignedTypeInt32: 1280 err = c.compileDivForInts(true, true) 1281 case wazeroir.SignedTypeInt64: 1282 err = c.compileDivForInts(false, true) 1283 case wazeroir.SignedTypeFloat32: 1284 err = c.compileDivForFloats(true) 1285 case wazeroir.SignedTypeFloat64: 1286 err = c.compileDivForFloats(false) 1287 } 1288 return 1289 } 1290 1291 // compileDivForInts emits the instructions to perform division on the top 1292 // two values of integer type on the stack and puts the quotient of the result 1293 // onto the stack. For example, stack [..., 10, 3] results in [..., 3] where 1294 // the remainder is discarded. 1295 func (c *amd64Compiler) compileDivForInts(is32Bit bool, signed bool) error { 1296 if err := c.performDivisionOnInts(false, is32Bit, signed); err != nil { 1297 return err 1298 } 1299 // Now we have the quotient of the division result in the AX register, 1300 // so we record it. 1301 if is32Bit { 1302 c.pushRuntimeValueLocationOnRegister(amd64.RegAX, runtimeValueTypeI32) 1303 } else { 1304 c.pushRuntimeValueLocationOnRegister(amd64.RegAX, runtimeValueTypeI64) 1305 } 1306 return nil 1307 } 1308 1309 // compileRem implements compiler.compileRem for the amd64 architecture. 1310 func (c *amd64Compiler) compileRem(o *wazeroir.OperationRem) (err error) { 1311 var vt runtimeValueType 1312 switch o.Type { 1313 case wazeroir.SignedInt32: 1314 err = c.performDivisionOnInts(true, true, true) 1315 vt = runtimeValueTypeI32 1316 case wazeroir.SignedInt64: 1317 err = c.performDivisionOnInts(true, false, true) 1318 vt = runtimeValueTypeI64 1319 case wazeroir.SignedUint32: 1320 err = c.performDivisionOnInts(true, true, false) 1321 vt = runtimeValueTypeI32 1322 case wazeroir.SignedUint64: 1323 err = c.performDivisionOnInts(true, false, false) 1324 vt = runtimeValueTypeI64 1325 } 1326 if err != nil { 1327 return err 1328 } 1329 1330 // Now we have the remainder of the division result in the DX register, 1331 // so we record it. 1332 c.pushRuntimeValueLocationOnRegister(amd64.RegDX, vt) 1333 return 1334 } 1335 1336 // performDivisionOnInts emits the instructions to do divisions on top two integers on the stack 1337 // via DIV (unsigned div) and IDIV (signed div) instructions. 1338 // See the following explanation of these instructions' semantics from https://www.lri.fr/~filliatr/ens/compil/x86-64.pdf 1339 // 1340 // >> Division requires special arrangements: idiv (signed) and div (unsigned) operate on a 2n-byte dividend and 1341 // >> an n-byte divisor to produce an n-byte quotient and n-byte remainder. The dividend always lives in a fixed pair of 1342 // >> registers (%edx and %eax for the 32-bit case; %rdx and %rax for the 64-bit case); the divisor is specified as the 1343 // >> source operand in the instruction. The quotient goes in %eax (resp. %rax); the remainder in %edx (resp. %rdx). For 1344 // >> signed division, the cltd (resp. ctqo) instruction is used to prepare %edx (resp. %rdx) with the sign extension of 1345 // >> %eax (resp. %rax). For example, if a,b, c are memory locations holding quad words, then we could set c = a/b 1346 // >> using the sequence: movq a(%rip), %rax; ctqo; idivq b(%rip); movq %rax, c(%rip). 1347 // 1348 // tl;dr is that the division result is placed in AX and DX registers after instructions emitted by this function 1349 // where AX holds the quotient while DX the remainder of the division result. 1350 func (c *amd64Compiler) performDivisionOnInts(isRem, is32Bit, signed bool) error { 1351 const ( 1352 quotientRegister = amd64.RegAX 1353 remainderRegister = amd64.RegDX 1354 ) 1355 1356 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 1357 return err 1358 } 1359 1360 // Ensures that previous values on these registers are saved to memory. 1361 c.onValueReleaseRegisterToStack(quotientRegister) 1362 c.onValueReleaseRegisterToStack(remainderRegister) 1363 1364 // In order to ensure x2 is placed on a temporary register for x2 value other than AX and DX, 1365 // we mark them as used here. 1366 c.locationStack.markRegisterUsed(quotientRegister) 1367 c.locationStack.markRegisterUsed(remainderRegister) 1368 1369 // Ensure that x2 is placed on a register which is not either AX or DX. 1370 x2 := c.locationStack.pop() 1371 if err := c.compileEnsureOnRegister(x2); err != nil { 1372 return err 1373 } 1374 1375 // Now we successfully place x2 on a temp register, so we no longer need to 1376 // mark these registers used. 1377 c.locationStack.markRegisterUnused(quotientRegister) 1378 c.locationStack.markRegisterUnused(remainderRegister) 1379 1380 // Check if the x2 equals zero. 1381 if is32Bit { 1382 c.assembler.CompileRegisterToConst(amd64.CMPL, x2.register, 0) 1383 } else { 1384 c.assembler.CompileRegisterToConst(amd64.CMPQ, x2.register, 0) 1385 } 1386 1387 // Jump if the divisor is not zero. 1388 jmpIfNotZero := c.assembler.CompileJump(amd64.JNE) 1389 1390 // Otherwise, we return with nativeCallStatusIntegerDivisionByZero status. 1391 c.compileExitFromNativeCode(nativeCallStatusIntegerDivisionByZero) 1392 1393 c.assembler.SetJumpTargetOnNext(jmpIfNotZero) 1394 1395 // next, we ensure that x1 is placed on AX. 1396 x1 := c.locationStack.pop() 1397 if x1.onRegister() && x1.register != quotientRegister { 1398 // Move x1 to quotientRegister. 1399 if is32Bit { 1400 c.assembler.CompileRegisterToRegister(amd64.MOVL, x1.register, quotientRegister) 1401 } else { 1402 c.assembler.CompileRegisterToRegister(amd64.MOVQ, x1.register, quotientRegister) 1403 } 1404 c.locationStack.markRegisterUnused(x1.register) 1405 x1.setRegister(quotientRegister) 1406 } else if x1.onStack() { 1407 x1.setRegister(quotientRegister) 1408 c.compileLoadValueOnStackToRegister(x1) 1409 } 1410 1411 // Note: at this point, x1 is placed on AX, x2 is on a register which is not AX or DX. 1412 1413 isSignedRem := isRem && signed 1414 isSignedDiv := !isRem && signed 1415 var signedRemMinusOneDivisorJmp asm.Node 1416 if isSignedRem { 1417 // If this is for getting remainder of signed division, 1418 // we have to treat the special case where the divisor equals -1. 1419 // For example, if this is 32-bit case, the result of (-2^31) / -1 equals (quotient=2^31, remainder=0) 1420 // where quotient doesn't fit in the 32-bit range whose maximum is 2^31-1. 1421 // x86 in this case cause floating point exception, but according to the Wasm spec 1422 // if the divisor equals -1, the result must be zero (not undefined!) as opposed to be "undefined" 1423 // for divisions on (-2^31) / -1 where we do not need to emit the special branches. 1424 // For detail, please refer to https://stackoverflow.com/questions/56303282/why-idiv-with-1-causes-floating-point-exception 1425 1426 // First we compare the division with -1. 1427 if is32Bit { 1428 c.assembler.CompileRegisterToConst(amd64.CMPL, x2.register, -1) 1429 } else { 1430 c.assembler.CompileRegisterToConst(amd64.CMPQ, x2.register, -1) 1431 } 1432 1433 // If it doesn't equal minus one, we jump to the normal case. 1434 okJmp := c.assembler.CompileJump(amd64.JNE) 1435 1436 // Otherwise, we store zero into the remainder result register (DX). 1437 if is32Bit { 1438 c.assembler.CompileRegisterToRegister(amd64.XORL, remainderRegister, remainderRegister) 1439 } else { 1440 c.assembler.CompileRegisterToRegister(amd64.XORQ, remainderRegister, remainderRegister) 1441 } 1442 1443 // Emit the exit jump instruction for the divisor -1 case so 1444 // we skips the normal case. 1445 signedRemMinusOneDivisorJmp = c.assembler.CompileJump(amd64.JMP) 1446 1447 // Set the normal case's jump target. 1448 c.assembler.SetJumpTargetOnNext(okJmp) 1449 } else if isSignedDiv { 1450 // For signed division, we have to have branches for "math.MinInt{32,64} / -1" 1451 // case which results in the floating point exception via division error as 1452 // the resulting value exceeds the maximum of signed int. 1453 1454 // First we compare the division with -1. 1455 if is32Bit { 1456 c.assembler.CompileRegisterToConst(amd64.CMPL, x2.register, -1) 1457 } else { 1458 c.assembler.CompileRegisterToConst(amd64.CMPQ, x2.register, -1) 1459 } 1460 1461 // If it doesn't equal minus one, we jump to the normal case. 1462 nonMinusOneDivisorJmp := c.assembler.CompileJump(amd64.JNE) 1463 1464 // next we check if the quotient is the most negative value for the signed integer. 1465 // That means whether or not we try to do (math.MaxInt32 / -1) or (math.Math.Int64 / -1) respectively. 1466 if is32Bit { 1467 if err := c.assembler.CompileRegisterToStaticConst(amd64.CMPL, x1.register, 1468 asm.NewStaticConst(u32.LeBytes(uint32(minimum32BitSignedInt)))); err != nil { 1469 return err 1470 } 1471 } else { 1472 if err := c.assembler.CompileRegisterToStaticConst(amd64.CMPQ, x1.register, 1473 asm.NewStaticConst(u64.LeBytes(uint64(minimum64BitSignedInt)))); err != nil { 1474 return err 1475 } 1476 } 1477 1478 // If it doesn't equal, we jump to the normal case. 1479 jmpOK := c.assembler.CompileJump(amd64.JNE) 1480 1481 // Otherwise, we are trying to do (math.MaxInt32 / -1) or (math.Math.Int64 / -1), 1482 // and that is the overflow in division as the result becomes 2^31 which is larger than 1483 // the maximum of signed 32-bit int (2^31-1). 1484 c.compileExitFromNativeCode(nativeCallStatusIntegerOverflow) 1485 1486 // Set the normal case's jump target. 1487 c.assembler.SetJumpTargetOnNext(nonMinusOneDivisorJmp, jmpOK) 1488 } 1489 1490 // Now ready to emit the div instruction. 1491 // Since the div instructions takes 2n byte dividend placed in DX:AX registers... 1492 // * signed case - we need to sign-extend the dividend into DX register via CDQ (32 bit) or CQO (64 bit). 1493 // * unsigned case - we need to zero DX register via "XOR DX DX" 1494 if is32Bit && signed { 1495 // Emit sign-extension to have 64 bit dividend over DX and AX registers. 1496 c.assembler.CompileStandAlone(amd64.CDQ) 1497 c.assembler.CompileRegisterToNone(amd64.IDIVL, x2.register) 1498 } else if is32Bit && !signed { 1499 // Zeros DX register to have 64 bit dividend over DX and AX registers. 1500 c.assembler.CompileRegisterToRegister(amd64.XORQ, amd64.RegDX, amd64.RegDX) 1501 c.assembler.CompileRegisterToNone(amd64.DIVL, x2.register) 1502 } else if !is32Bit && signed { 1503 // Emits sign-extension to have 128 bit dividend over DX and AX registers. 1504 c.assembler.CompileStandAlone(amd64.CQO) 1505 c.assembler.CompileRegisterToNone(amd64.IDIVQ, x2.register) 1506 } else if !is32Bit && !signed { 1507 // Zeros DX register to have 128 bit dividend over DX and AX registers. 1508 c.assembler.CompileRegisterToRegister(amd64.XORQ, amd64.RegDX, amd64.RegDX) 1509 c.assembler.CompileRegisterToNone(amd64.DIVQ, x2.register) 1510 } 1511 1512 // If this is signed rem instruction, we must set the jump target of 1513 // the exit jump from division -1 case towards the next instruction. 1514 if signedRemMinusOneDivisorJmp != nil { 1515 c.assembler.SetJumpTargetOnNext(signedRemMinusOneDivisorJmp) 1516 } 1517 1518 // We mark them as unused so that we can push one of them onto the location stack at call sites. 1519 c.locationStack.markRegisterUnused(remainderRegister) 1520 c.locationStack.markRegisterUnused(quotientRegister) 1521 c.locationStack.markRegisterUnused(x2.register) 1522 return nil 1523 } 1524 1525 // compileDivForFloats emits the instructions to perform division 1526 // on the top two values of float type on the stack, placing the result back onto the stack. 1527 // For example, stack [..., 1.0, 4.0] results in [..., 0.25]. 1528 func (c *amd64Compiler) compileDivForFloats(is32Bit bool) error { 1529 if is32Bit { 1530 return c.compileSimpleBinaryOp(amd64.DIVSS) 1531 } else { 1532 return c.compileSimpleBinaryOp(amd64.DIVSD) 1533 } 1534 } 1535 1536 // compileAnd implements compiler.compileAnd for the amd64 architecture. 1537 func (c *amd64Compiler) compileAnd(o *wazeroir.OperationAnd) (err error) { 1538 switch o.Type { 1539 case wazeroir.UnsignedInt32: 1540 err = c.compileSimpleBinaryOp(amd64.ANDL) 1541 case wazeroir.UnsignedInt64: 1542 err = c.compileSimpleBinaryOp(amd64.ANDQ) 1543 } 1544 return 1545 } 1546 1547 // compileOr implements compiler.compileOr for the amd64 architecture. 1548 func (c *amd64Compiler) compileOr(o *wazeroir.OperationOr) (err error) { 1549 switch o.Type { 1550 case wazeroir.UnsignedInt32: 1551 err = c.compileSimpleBinaryOp(amd64.ORL) 1552 case wazeroir.UnsignedInt64: 1553 err = c.compileSimpleBinaryOp(amd64.ORQ) 1554 } 1555 return 1556 } 1557 1558 // compileXor implements compiler.compileXor for the amd64 architecture. 1559 func (c *amd64Compiler) compileXor(o *wazeroir.OperationXor) (err error) { 1560 switch o.Type { 1561 case wazeroir.UnsignedInt32: 1562 err = c.compileSimpleBinaryOp(amd64.XORL) 1563 case wazeroir.UnsignedInt64: 1564 err = c.compileSimpleBinaryOp(amd64.XORQ) 1565 } 1566 return 1567 } 1568 1569 // compileSimpleBinaryOp emits instructions to pop two values from the stack 1570 // and perform the given instruction on these two values and push the result 1571 // onto the stack. 1572 func (c *amd64Compiler) compileSimpleBinaryOp(instruction asm.Instruction) error { 1573 x2 := c.locationStack.pop() 1574 if err := c.compileEnsureOnRegister(x2); err != nil { 1575 return err 1576 } 1577 1578 x1 := c.locationStack.pop() 1579 if err := c.compileEnsureOnRegister(x1); err != nil { 1580 return err 1581 } 1582 1583 c.assembler.CompileRegisterToRegister(instruction, x2.register, x1.register) 1584 1585 // We consumed x2 register after the operation here, 1586 // so we release it. 1587 c.locationStack.releaseRegister(x2) 1588 1589 // We already stored the result in the register used by x1 1590 // so we record it. 1591 c.locationStack.markRegisterUnused(x1.register) 1592 c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) 1593 return nil 1594 } 1595 1596 // compileShl implements compiler.compileShl for the amd64 architecture. 1597 func (c *amd64Compiler) compileShl(o *wazeroir.OperationShl) (err error) { 1598 switch o.Type { 1599 case wazeroir.UnsignedInt32: 1600 err = c.compileShiftOp(amd64.SHLL, false) 1601 case wazeroir.UnsignedInt64: 1602 err = c.compileShiftOp(amd64.SHLQ, true) 1603 } 1604 return 1605 } 1606 1607 // compileShr implements compiler.compileShr for the amd64 architecture. 1608 func (c *amd64Compiler) compileShr(o *wazeroir.OperationShr) (err error) { 1609 switch o.Type { 1610 case wazeroir.SignedInt32: 1611 err = c.compileShiftOp(amd64.SARL, true) 1612 case wazeroir.SignedInt64: 1613 err = c.compileShiftOp(amd64.SARQ, false) 1614 case wazeroir.SignedUint32: 1615 err = c.compileShiftOp(amd64.SHRL, true) 1616 case wazeroir.SignedUint64: 1617 err = c.compileShiftOp(amd64.SHRQ, false) 1618 } 1619 return 1620 } 1621 1622 // compileRotl implements compiler.compileRotl for the amd64 architecture. 1623 func (c *amd64Compiler) compileRotl(o *wazeroir.OperationRotl) (err error) { 1624 switch o.Type { 1625 case wazeroir.UnsignedInt32: 1626 err = c.compileShiftOp(amd64.ROLL, true) 1627 case wazeroir.UnsignedInt64: 1628 err = c.compileShiftOp(amd64.ROLQ, false) 1629 } 1630 return 1631 } 1632 1633 // compileRotr implements compiler.compileRotr for the amd64 architecture. 1634 func (c *amd64Compiler) compileRotr(o *wazeroir.OperationRotr) (err error) { 1635 switch o.Type { 1636 case wazeroir.UnsignedInt32: 1637 err = c.compileShiftOp(amd64.RORL, true) 1638 case wazeroir.UnsignedInt64: 1639 err = c.compileShiftOp(amd64.RORQ, false) 1640 } 1641 return 1642 } 1643 1644 // compileShiftOp adds instructions for shift operations (SHR, SHL, ROTR, ROTL) 1645 // where we have to place the second value (shift counts) on the CX register. 1646 func (c *amd64Compiler) compileShiftOp(instruction asm.Instruction, is32Bit bool) error { 1647 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 1648 return err 1649 } 1650 1651 x2 := c.locationStack.pop() 1652 1653 // Ensures that x2 (holding shift counts) is placed on the CX register. 1654 const shiftCountRegister = amd64.RegCX 1655 if (x2.onRegister() && x2.register != shiftCountRegister) || x2.onStack() { 1656 // If another value lives on the CX register, we release it to the stack. 1657 c.onValueReleaseRegisterToStack(shiftCountRegister) 1658 1659 if x2.onRegister() { 1660 // If x2 lives on a register, we move the value to CX. 1661 if is32Bit { 1662 c.assembler.CompileRegisterToRegister(amd64.MOVL, x2.register, shiftCountRegister) 1663 } else { 1664 c.assembler.CompileRegisterToRegister(amd64.MOVQ, x2.register, shiftCountRegister) 1665 } 1666 // We no longer place any value on the original register, so we record it. 1667 c.locationStack.markRegisterUnused(x2.register) 1668 // Instead, we've already placed the value on the CX register. 1669 x2.setRegister(shiftCountRegister) 1670 } else { 1671 // If it is on stack, we just move the memory allocated value to the CX register. 1672 x2.setRegister(shiftCountRegister) 1673 c.compileLoadValueOnStackToRegister(x2) 1674 } 1675 c.locationStack.markRegisterUsed(shiftCountRegister) 1676 } 1677 1678 x1 := c.locationStack.peek() // Note this is peek! 1679 1680 if x1.onRegister() { 1681 c.assembler.CompileRegisterToRegister(instruction, x2.register, x1.register) 1682 } else { 1683 // Shift target can be placed on a memory location. 1684 // Note: stack pointers are ensured not to exceed 2^27 so this offset never exceeds 32-bit range. 1685 c.assembler.CompileRegisterToMemory(instruction, x2.register, amd64ReservedRegisterForStackBasePointerAddress, int64(x1.stackPointer)*8) 1686 } 1687 1688 // We consumed x2 register after the operation here, 1689 // so we release it. 1690 c.locationStack.releaseRegister(x2) 1691 return nil 1692 } 1693 1694 // compileAbs implements compiler.compileAbs for the amd64 architecture. 1695 // 1696 // See the following discussions for how we could take the abs of floats on x86 assembly. 1697 // https://stackoverflow.com/questions/32408665/fastest-way-to-compute-absolute-value-using-sse/32422471#32422471 1698 // https://stackoverflow.com/questions/44630015/how-would-fabsdouble-be-implemented-on-x86-is-it-an-expensive-operation 1699 func (c *amd64Compiler) compileAbs(o *wazeroir.OperationAbs) (err error) { 1700 target := c.locationStack.peek() // Note this is peek! 1701 if err = c.compileEnsureOnRegister(target); err != nil { 1702 return err 1703 } 1704 1705 // First shift left by one to clear the sign bit, and then shift right by one. 1706 if o.Type == wazeroir.Float32 { 1707 c.assembler.CompileConstToRegister(amd64.PSLLD, 1, target.register) 1708 c.assembler.CompileConstToRegister(amd64.PSRLD, 1, target.register) 1709 } else { 1710 c.assembler.CompileConstToRegister(amd64.PSLLQ, 1, target.register) 1711 c.assembler.CompileConstToRegister(amd64.PSRLQ, 1, target.register) 1712 } 1713 return nil 1714 } 1715 1716 // compileNeg implements compiler.compileNeg for the amd64 architecture. 1717 func (c *amd64Compiler) compileNeg(o *wazeroir.OperationNeg) (err error) { 1718 target := c.locationStack.peek() // Note this is peek! 1719 if err := c.compileEnsureOnRegister(target); err != nil { 1720 return err 1721 } 1722 1723 tmpReg, err := c.allocateRegister(registerTypeVector) 1724 if err != nil { 1725 return err 1726 } 1727 1728 // First we move the sign-bit mask (placed in memory) to the tmp register, 1729 // since we cannot take XOR directly with float reg and const. 1730 // And then negate the value by XOR it with the sign-bit mask. 1731 if o.Type == wazeroir.Float32 { 1732 err = c.assembler.CompileStaticConstToRegister(amd64.MOVL, asm.NewStaticConst(u32.LeBytes(float32SignBitMask)), tmpReg) 1733 if err != nil { 1734 return err 1735 } 1736 c.assembler.CompileRegisterToRegister(amd64.XORPS, tmpReg, target.register) 1737 } else { 1738 err = c.assembler.CompileStaticConstToRegister(amd64.MOVQ, asm.NewStaticConst(u64.LeBytes(float64SignBitMask)), tmpReg) 1739 if err != nil { 1740 return err 1741 } 1742 c.assembler.CompileRegisterToRegister(amd64.XORPD, tmpReg, target.register) 1743 } 1744 return nil 1745 } 1746 1747 // compileCeil implements compiler.compileCeil for the amd64 architecture. 1748 func (c *amd64Compiler) compileCeil(o *wazeroir.OperationCeil) (err error) { 1749 // Internally, ceil can be performed via ROUND instruction with 0x02 mode. 1750 // See https://android.googlesource.com/platform/bionic/+/882b8af/libm/x86_64/ceilf.S for example. 1751 return c.compileRoundInstruction(o.Type == wazeroir.Float32, 0x02) 1752 } 1753 1754 // compileFloor implements compiler.compileFloor for the amd64 architecture. 1755 func (c *amd64Compiler) compileFloor(o *wazeroir.OperationFloor) (err error) { 1756 // Internally, floor can be performed via ROUND instruction with 0x01 mode. 1757 // See https://android.googlesource.com/platform/bionic/+/882b8af/libm/x86_64/floorf.S for example. 1758 return c.compileRoundInstruction(o.Type == wazeroir.Float32, 0x01) 1759 } 1760 1761 // compileTrunc implements compiler.compileTrunc for the amd64 architecture. 1762 func (c *amd64Compiler) compileTrunc(o *wazeroir.OperationTrunc) error { 1763 // Internally, trunc can be performed via ROUND instruction with 0x03 mode. 1764 // See https://android.googlesource.com/platform/bionic/+/882b8af/libm/x86_64/truncf.S for example. 1765 return c.compileRoundInstruction(o.Type == wazeroir.Float32, 0x03) 1766 } 1767 1768 // compileNearest implements compiler.compileNearest for the amd64 architecture. 1769 func (c *amd64Compiler) compileNearest(o *wazeroir.OperationNearest) error { 1770 // Nearest can be performed via ROUND instruction with 0x00 mode. 1771 return c.compileRoundInstruction(o.Type == wazeroir.Float32, 0x00) 1772 } 1773 1774 func (c *amd64Compiler) compileRoundInstruction(is32Bit bool, mode int64) error { 1775 target := c.locationStack.peek() // Note this is peek! 1776 if err := c.compileEnsureOnRegister(target); err != nil { 1777 return err 1778 } 1779 1780 if is32Bit { 1781 c.assembler.CompileRegisterToRegisterWithArg(amd64.ROUNDSS, target.register, target.register, byte(mode)) 1782 } else { 1783 c.assembler.CompileRegisterToRegisterWithArg(amd64.ROUNDSD, target.register, target.register, byte(mode)) 1784 } 1785 return nil 1786 } 1787 1788 // compileMin implements compiler.compileMin for the amd64 architecture. 1789 func (c *amd64Compiler) compileMin(o *wazeroir.OperationMin) error { 1790 is32Bit := o.Type == wazeroir.Float32 1791 if is32Bit { 1792 return c.compileMinOrMax(is32Bit, true, amd64.MINSS) 1793 } else { 1794 return c.compileMinOrMax(is32Bit, true, amd64.MINSD) 1795 } 1796 } 1797 1798 // compileMax implements compiler.compileMax for the amd64 architecture. 1799 func (c *amd64Compiler) compileMax(o *wazeroir.OperationMax) error { 1800 is32Bit := o.Type == wazeroir.Float32 1801 if is32Bit { 1802 return c.compileMinOrMax(is32Bit, false, amd64.MAXSS) 1803 } else { 1804 return c.compileMinOrMax(is32Bit, false, amd64.MAXSD) 1805 } 1806 } 1807 1808 // emitMinOrMax adds instructions to pop two values from the stack, and push back either minimum or 1809 // minimum of these two values onto the stack according to the minOrMaxInstruction argument. 1810 // minOrMaxInstruction must be one of MAXSS, MAXSD, MINSS or MINSD. 1811 // Note: These native min/max instructions are almost compatible with min/max in the Wasm specification, 1812 // but it is slightly different with respect to the NaN handling. 1813 // Native min/max instructions return non-NaN value if exactly one of target values 1814 // is NaN. For example native_{min,max}(5.0, NaN) returns always 5.0, not NaN. 1815 // However, WebAssembly specifies that min/max must always return NaN if one of values is NaN. 1816 // Therefore, in this function, we have to add conditional jumps to check if one of values is NaN before 1817 // the native min/max, which is why we cannot simply emit a native min/max instruction here. 1818 // 1819 // For the semantics, see wazeroir.Min and wazeroir.Max for detail. 1820 func (c *amd64Compiler) compileMinOrMax(is32Bit, isMin bool, minOrMaxInstruction asm.Instruction) error { 1821 x2 := c.locationStack.pop() 1822 if err := c.compileEnsureOnRegister(x2); err != nil { 1823 return err 1824 } 1825 x1 := c.locationStack.pop() 1826 if err := c.compileEnsureOnRegister(x1); err != nil { 1827 return err 1828 } 1829 1830 // Check if this is (either x1 or x2 is NaN) or (x1 equals x2) case 1831 if is32Bit { 1832 c.assembler.CompileRegisterToRegister(amd64.UCOMISS, x2.register, x1.register) 1833 } else { 1834 c.assembler.CompileRegisterToRegister(amd64.UCOMISD, x2.register, x1.register) 1835 } 1836 1837 // At this point, we have the three cases of conditional flags below 1838 // (See https://www.felixcloutier.com/x86/ucomiss#operation for detail.) 1839 // 1840 // 1) Two values are NaN-free and different: All flags are cleared. 1841 // 2) Two values are NaN-free and equal: Only ZF flags is set. 1842 // 3) One of Two values is NaN: ZF, PF and CF flags are set. 1843 1844 // Jump instruction to handle 1) case by checking the ZF flag 1845 // as ZF is only set for 2) and 3) cases. 1846 nanFreeOrDiffJump := c.assembler.CompileJump(amd64.JNE) 1847 1848 // Start handling 2) and 3). 1849 1850 // Jump if one of two values is NaN by checking the parity flag (PF). 1851 includeNaNJmp := c.assembler.CompileJump(amd64.JPS) 1852 1853 // Start handling 2). 1854 1855 // Before we exit this case, we have to ensure that positive zero (or negative zero for min instruction) is 1856 // returned if two values are positive and negative zeros. 1857 var inst asm.Instruction 1858 switch { 1859 case is32Bit && isMin: 1860 inst = amd64.ORPS 1861 case !is32Bit && isMin: 1862 inst = amd64.ORPD 1863 case is32Bit && !isMin: 1864 inst = amd64.ANDPS 1865 case !is32Bit && !isMin: 1866 inst = amd64.ANDPD 1867 } 1868 c.assembler.CompileRegisterToRegister(inst, x2.register, x1.register) 1869 1870 sameExitJmp := c.assembler.CompileJump(amd64.JMP) 1871 1872 // Start handling 3). 1873 c.assembler.SetJumpTargetOnNext(includeNaNJmp) 1874 1875 // We emit the ADD instruction to produce the NaN in x1. 1876 if is32Bit { 1877 c.assembler.CompileRegisterToRegister(amd64.ADDSS, x2.register, x1.register) 1878 } else { 1879 c.assembler.CompileRegisterToRegister(amd64.ADDSD, x2.register, x1.register) 1880 } 1881 1882 // Exit from the NaN case branch. 1883 nanExitJmp := c.assembler.CompileJump(amd64.JMP) 1884 1885 // Start handling 1). 1886 c.assembler.SetJumpTargetOnNext(nanFreeOrDiffJump) 1887 1888 // Now handle the NaN-free and different values case. 1889 c.assembler.CompileRegisterToRegister(minOrMaxInstruction, x2.register, x1.register) 1890 1891 // Set the jump target of 1) and 2) cases to the next instruction after 3) case. 1892 c.assembler.SetJumpTargetOnNext(nanExitJmp, sameExitJmp) 1893 1894 // Record that we consumed the x2 and placed the minOrMax result in the x1's register. 1895 c.locationStack.markRegisterUnused(x2.register) 1896 c.locationStack.markRegisterUnused(x1.register) 1897 c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) 1898 return nil 1899 } 1900 1901 // compileCopysign implements compiler.compileCopysign for the amd64 architecture. 1902 func (c *amd64Compiler) compileCopysign(o *wazeroir.OperationCopysign) error { 1903 is32Bit := o.Type == wazeroir.Float32 1904 1905 x2 := c.locationStack.pop() 1906 if err := c.compileEnsureOnRegister(x2); err != nil { 1907 return err 1908 } 1909 x1 := c.locationStack.pop() 1910 if err := c.compileEnsureOnRegister(x1); err != nil { 1911 return err 1912 } 1913 tmpReg, err := c.allocateRegister(registerTypeVector) 1914 if err != nil { 1915 return err 1916 } 1917 1918 // Move the rest bit mask to the temp register. 1919 if is32Bit { 1920 err = c.assembler.CompileStaticConstToRegister(amd64.MOVL, asm.NewStaticConst(u32.LeBytes(float32RestBitMask)), tmpReg) 1921 } else { 1922 err = c.assembler.CompileStaticConstToRegister(amd64.MOVQ, asm.NewStaticConst(u64.LeBytes(float64RestBitMask)), tmpReg) 1923 } 1924 if err != nil { 1925 return err 1926 } 1927 1928 // Clear the sign bit of x1 via AND with the mask. 1929 if is32Bit { 1930 c.assembler.CompileRegisterToRegister(amd64.ANDPS, tmpReg, x1.register) 1931 } else { 1932 c.assembler.CompileRegisterToRegister(amd64.ANDPD, tmpReg, x1.register) 1933 } 1934 1935 // Move the sign bit mask to the temp register. 1936 if is32Bit { 1937 err = c.assembler.CompileStaticConstToRegister(amd64.MOVL, asm.NewStaticConst(u32.LeBytes(float32SignBitMask)), tmpReg) 1938 } else { 1939 err = c.assembler.CompileStaticConstToRegister(amd64.MOVQ, asm.NewStaticConst(u64.LeBytes(float64SignBitMask)), tmpReg) 1940 } 1941 if err != nil { 1942 return err 1943 } 1944 1945 // Clear the non-sign bits of x2 via AND with the mask. 1946 if is32Bit { 1947 c.assembler.CompileRegisterToRegister(amd64.ANDPS, tmpReg, x2.register) 1948 } else { 1949 c.assembler.CompileRegisterToRegister(amd64.ANDPD, tmpReg, x2.register) 1950 } 1951 1952 // Finally, copy the sign bit of x2 to x1. 1953 if is32Bit { 1954 c.assembler.CompileRegisterToRegister(amd64.ORPS, x2.register, x1.register) 1955 } else { 1956 c.assembler.CompileRegisterToRegister(amd64.ORPD, x2.register, x1.register) 1957 } 1958 1959 // Record that we consumed the x2 and placed the copysign result in the x1's register. 1960 c.locationStack.markRegisterUnused(x2.register) 1961 c.locationStack.markRegisterUnused(x1.register) 1962 c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) 1963 return nil 1964 } 1965 1966 // compileSqrt implements compiler.compileSqrt for the amd64 architecture. 1967 func (c *amd64Compiler) compileSqrt(o *wazeroir.OperationSqrt) error { 1968 target := c.locationStack.peek() // Note this is peek! 1969 if err := c.compileEnsureOnRegister(target); err != nil { 1970 return err 1971 } 1972 if o.Type == wazeroir.Float32 { 1973 c.assembler.CompileRegisterToRegister(amd64.SQRTSS, target.register, target.register) 1974 } else { 1975 c.assembler.CompileRegisterToRegister(amd64.SQRTSD, target.register, target.register) 1976 } 1977 return nil 1978 } 1979 1980 // compileI32WrapFromI64 implements compiler.compileI32WrapFromI64 for the amd64 architecture. 1981 func (c *amd64Compiler) compileI32WrapFromI64() error { 1982 target := c.locationStack.peek() // Note this is peek! 1983 if err := c.compileEnsureOnRegister(target); err != nil { 1984 return err 1985 } 1986 c.assembler.CompileRegisterToRegister(amd64.MOVL, target.register, target.register) 1987 target.valueType = runtimeValueTypeI32 1988 return nil 1989 } 1990 1991 // compileITruncFromF implements compiler.compileITruncFromF for the amd64 architecture. 1992 // 1993 // Note: in the following implementation, we use CVTSS2SI and CVTSD2SI to convert floats to signed integers. 1994 // According to the Intel manual ([1],[2]), if the source float value is either +-Inf or NaN, or it exceeds representative ranges 1995 // of target signed integer, then the instruction returns "masked" response float32SignBitMask (or float64SignBitMask for 64 bit case). 1996 // [1] Chapter 11.5.2, SIMD Floating-Point Exception Conditions in "Vol 1, Intel® 64 and IA-32 Architectures Manual" 1997 // 1998 // https://www.intel.com/content/www/us/en/architecture-and-technology/64-ia-32-architectures-software-developer-vol-1-manual.html 1999 // 2000 // [2] https://xem.github.io/minix86/manual/intel-x86-and-64-manual-vol1/o_7281d5ea06a5b67a-268.html 2001 func (c *amd64Compiler) compileITruncFromF(o *wazeroir.OperationITruncFromF) (err error) { 2002 if o.InputType == wazeroir.Float32 && o.OutputType == wazeroir.SignedInt32 { 2003 err = c.emitSignedI32TruncFromFloat(true, o.NonTrapping) 2004 } else if o.InputType == wazeroir.Float32 && o.OutputType == wazeroir.SignedInt64 { 2005 err = c.emitSignedI64TruncFromFloat(true, o.NonTrapping) 2006 } else if o.InputType == wazeroir.Float64 && o.OutputType == wazeroir.SignedInt32 { 2007 err = c.emitSignedI32TruncFromFloat(false, o.NonTrapping) 2008 } else if o.InputType == wazeroir.Float64 && o.OutputType == wazeroir.SignedInt64 { 2009 err = c.emitSignedI64TruncFromFloat(false, o.NonTrapping) 2010 } else if o.InputType == wazeroir.Float32 && o.OutputType == wazeroir.SignedUint32 { 2011 err = c.emitUnsignedI32TruncFromFloat(true, o.NonTrapping) 2012 } else if o.InputType == wazeroir.Float32 && o.OutputType == wazeroir.SignedUint64 { 2013 err = c.emitUnsignedI64TruncFromFloat(true, o.NonTrapping) 2014 } else if o.InputType == wazeroir.Float64 && o.OutputType == wazeroir.SignedUint32 { 2015 err = c.emitUnsignedI32TruncFromFloat(false, o.NonTrapping) 2016 } else if o.InputType == wazeroir.Float64 && o.OutputType == wazeroir.SignedUint64 { 2017 err = c.emitUnsignedI64TruncFromFloat(false, o.NonTrapping) 2018 } 2019 return 2020 } 2021 2022 // emitUnsignedI32TruncFromFloat implements compileITruncFromF when the destination type is a 32-bit unsigned integer. 2023 func (c *amd64Compiler) emitUnsignedI32TruncFromFloat(isFloat32Bit, nonTrapping bool) error { 2024 source := c.locationStack.pop() 2025 if err := c.compileEnsureOnRegister(source); err != nil { 2026 return err 2027 } 2028 2029 result, err := c.allocateRegister(registerTypeGeneralPurpose) 2030 if err != nil { 2031 return err 2032 } 2033 2034 // First, we check the source float value is above or equal math.MaxInt32+1. 2035 if isFloat32Bit { 2036 err = c.assembler.CompileStaticConstToRegister(amd64.UCOMISS, 2037 asm.NewStaticConst(u32.LeBytes(float32ForMaximumSigned32bitIntPlusOne)), source.register) 2038 } else { 2039 err = c.assembler.CompileStaticConstToRegister(amd64.UCOMISD, 2040 asm.NewStaticConst(u64.LeBytes(float64ForMaximumSigned32bitIntPlusOne)), source.register) 2041 } 2042 if err != nil { 2043 return err 2044 } 2045 2046 // Check the parity flag (set when the value is NaN), and if it is set, we should raise an exception. 2047 jmpIfNotNaN := c.assembler.CompileJump(amd64.JPC) // jump if parity is not set. 2048 2049 var nonTrappingNaNJump asm.Node 2050 if !nonTrapping { 2051 c.compileExitFromNativeCode(nativeCallStatusCodeInvalidFloatToIntConversion) 2052 } else { 2053 // In non trapping case, NaN is casted as zero. 2054 // Zero out the result register by XOR itsself. 2055 c.assembler.CompileRegisterToRegister(amd64.XORL, result, result) 2056 nonTrappingNaNJump = c.assembler.CompileJump(amd64.JMP) 2057 } 2058 2059 c.assembler.SetJumpTargetOnNext(jmpIfNotNaN) 2060 2061 // Jump if the source float value is above or equal math.MaxInt32+1. 2062 jmpAboveOrEqualMaxIn32PlusOne := c.assembler.CompileJump(amd64.JCC) 2063 2064 // next we convert the value as a signed integer. 2065 if isFloat32Bit { 2066 c.assembler.CompileRegisterToRegister(amd64.CVTTSS2SL, source.register, result) 2067 } else { 2068 c.assembler.CompileRegisterToRegister(amd64.CVTTSD2SL, source.register, result) 2069 } 2070 2071 // Then if the result is minus, it is invalid conversion from minus float (incl. -Inf). 2072 c.assembler.CompileRegisterToRegister(amd64.TESTL, result, result) 2073 jmpIfNotMinusOrMinusInf := c.assembler.CompileJump(amd64.JPL) 2074 2075 var nonTrappingMinusJump asm.Node 2076 if !nonTrapping { 2077 c.compileExitFromNativeCode(nativeCallStatusIntegerOverflow) 2078 } else { 2079 // In non trapping case, the minus value is casted as zero. 2080 // Zero out the result register by XOR itsself. 2081 c.assembler.CompileRegisterToRegister(amd64.XORL, result, result) 2082 nonTrappingMinusJump = c.assembler.CompileJump(amd64.JMP) 2083 } 2084 2085 c.assembler.SetJumpTargetOnNext(jmpIfNotMinusOrMinusInf) 2086 2087 // Otherwise, the values is valid. 2088 okJmpForLessThanMaxInt32PlusOne := c.assembler.CompileJump(amd64.JMP) 2089 2090 // Now, start handling the case where the original float value is above or equal math.MaxInt32+1. 2091 // 2092 // First, we subtract the math.MaxInt32+1 from the original value so it can fit in signed 32-bit integer. 2093 c.assembler.SetJumpTargetOnNext(jmpAboveOrEqualMaxIn32PlusOne) 2094 if isFloat32Bit { 2095 err = c.assembler.CompileStaticConstToRegister(amd64.SUBSS, 2096 asm.NewStaticConst(u32.LeBytes(float32ForMaximumSigned32bitIntPlusOne)), source.register) 2097 } else { 2098 err = c.assembler.CompileStaticConstToRegister(amd64.SUBSD, 2099 asm.NewStaticConst(u64.LeBytes(float64ForMaximumSigned32bitIntPlusOne)), source.register) 2100 } 2101 if err != nil { 2102 return err 2103 } 2104 2105 // Then, convert the subtracted value as a signed 32-bit integer. 2106 if isFloat32Bit { 2107 c.assembler.CompileRegisterToRegister(amd64.CVTTSS2SL, source.register, result) 2108 } else { 2109 c.assembler.CompileRegisterToRegister(amd64.CVTTSD2SL, source.register, result) 2110 } 2111 2112 // next, we have to check if the value is from NaN, +Inf. 2113 // NaN or +Inf cases result in 0x8000_0000 according to the semantics of conversion, 2114 // This means we check if the result int value is minus or not. 2115 c.assembler.CompileRegisterToRegister(amd64.TESTL, result, result) 2116 2117 // If the result is minus, the conversion is invalid (from NaN or +Inf) 2118 jmpIfPlusInf := c.assembler.CompileJump(amd64.JMI) 2119 2120 // Otherwise, we successfully converted the source float minus (math.MaxInt32+1) to int. 2121 // So, we retrieve the original source float value by adding the sign mask. 2122 if err = c.assembler.CompileStaticConstToRegister(amd64.ADDL, 2123 asm.NewStaticConst(u32.LeBytes(float32SignBitMask)), result); err != nil { 2124 return err 2125 } 2126 2127 okJmpForAboveOrEqualMaxInt32PlusOne := c.assembler.CompileJump(amd64.JMP) 2128 2129 c.assembler.SetJumpTargetOnNext(jmpIfPlusInf) 2130 if !nonTrapping { 2131 c.compileExitFromNativeCode(nativeCallStatusIntegerOverflow) 2132 } else { 2133 err = c.assembler.CompileStaticConstToRegister(amd64.MOVL, 2134 asm.NewStaticConst(u32.LeBytes(maximum32BitUnsignedInt)), result) 2135 if err != nil { 2136 return err 2137 } 2138 } 2139 2140 // We jump to the next instructions for valid cases. 2141 c.assembler.SetJumpTargetOnNext(okJmpForLessThanMaxInt32PlusOne, okJmpForAboveOrEqualMaxInt32PlusOne) 2142 if nonTrapping { 2143 c.assembler.SetJumpTargetOnNext(nonTrappingMinusJump, nonTrappingNaNJump) 2144 } 2145 2146 // We consumed the source's register and placed the conversion result 2147 // in the result register. 2148 c.locationStack.markRegisterUnused(source.register) 2149 c.pushRuntimeValueLocationOnRegister(result, runtimeValueTypeI32) 2150 return nil 2151 } 2152 2153 // emitUnsignedI32TruncFromFloat implements compileITruncFromF when the destination type is a 64-bit unsigned integer. 2154 func (c *amd64Compiler) emitUnsignedI64TruncFromFloat(isFloat32Bit, nonTrapping bool) error { 2155 source := c.locationStack.pop() 2156 if err := c.compileEnsureOnRegister(source); err != nil { 2157 return err 2158 } 2159 2160 result, err := c.allocateRegister(registerTypeGeneralPurpose) 2161 if err != nil { 2162 return err 2163 } 2164 2165 // First, we check the source float value is above or equal math.MaxInt64+1. 2166 if isFloat32Bit { 2167 err = c.assembler.CompileStaticConstToRegister(amd64.UCOMISS, 2168 asm.NewStaticConst(u32.LeBytes(float32ForMaximumSigned64bitIntPlusOne)), source.register) 2169 } else { 2170 err = c.assembler.CompileStaticConstToRegister(amd64.UCOMISD, 2171 asm.NewStaticConst(u64.LeBytes(float64ForMaximumSigned64bitIntPlusOne)), source.register) 2172 } 2173 if err != nil { 2174 return err 2175 } 2176 2177 // Check the parity flag (set when the value is NaN), and if it is set, we should raise an exception. 2178 jmpIfNotNaN := c.assembler.CompileJump(amd64.JPC) // jump if parity is not set. 2179 2180 var nonTrappingNaNJump asm.Node 2181 if !nonTrapping { 2182 c.compileExitFromNativeCode(nativeCallStatusCodeInvalidFloatToIntConversion) 2183 } else { 2184 // In non trapping case, NaN is casted as zero. 2185 // Zero out the result register by XOR itsself. 2186 c.assembler.CompileRegisterToRegister(amd64.XORQ, result, result) 2187 nonTrappingNaNJump = c.assembler.CompileJump(amd64.JMP) 2188 } 2189 2190 c.assembler.SetJumpTargetOnNext(jmpIfNotNaN) 2191 2192 // Jump if the source float values is above or equal math.MaxInt64+1. 2193 jmpAboveOrEqualMaxIn32PlusOne := c.assembler.CompileJump(amd64.JCC) 2194 2195 // next we convert the value as a signed integer. 2196 if isFloat32Bit { 2197 c.assembler.CompileRegisterToRegister(amd64.CVTTSS2SQ, source.register, result) 2198 } else { 2199 c.assembler.CompileRegisterToRegister(amd64.CVTTSD2SQ, source.register, result) 2200 } 2201 2202 // Then if the result is minus, it is invalid conversion from minus float (incl. -Inf). 2203 c.assembler.CompileRegisterToRegister(amd64.TESTQ, result, result) 2204 jmpIfNotMinusOrMinusInf := c.assembler.CompileJump(amd64.JPL) 2205 2206 var nonTrappingMinusJump asm.Node 2207 if !nonTrapping { 2208 c.compileExitFromNativeCode(nativeCallStatusIntegerOverflow) 2209 } else { 2210 // In non trapping case, the minus value is casted as zero. 2211 // Zero out the result register by XOR itsself. 2212 c.assembler.CompileRegisterToRegister(amd64.XORQ, result, result) 2213 nonTrappingMinusJump = c.assembler.CompileJump(amd64.JMP) 2214 } 2215 2216 c.assembler.SetJumpTargetOnNext(jmpIfNotMinusOrMinusInf) 2217 2218 // Otherwise, the values is valid. 2219 okJmpForLessThanMaxInt64PlusOne := c.assembler.CompileJump(amd64.JMP) 2220 2221 // Now, start handling the case where the original float value is above or equal math.MaxInt64+1. 2222 // 2223 // First, we subtract the math.MaxInt64+1 from the original value so it can fit in signed 64-bit integer. 2224 c.assembler.SetJumpTargetOnNext(jmpAboveOrEqualMaxIn32PlusOne) 2225 if isFloat32Bit { 2226 err = c.assembler.CompileStaticConstToRegister(amd64.SUBSS, 2227 asm.NewStaticConst(u32.LeBytes(float32ForMaximumSigned64bitIntPlusOne)), source.register) 2228 } else { 2229 err = c.assembler.CompileStaticConstToRegister(amd64.SUBSD, 2230 asm.NewStaticConst(u64.LeBytes(float64ForMaximumSigned64bitIntPlusOne)), source.register) 2231 } 2232 if err != nil { 2233 return err 2234 } 2235 2236 // Then, convert the subtracted value as a signed 64-bit integer. 2237 if isFloat32Bit { 2238 c.assembler.CompileRegisterToRegister(amd64.CVTTSS2SQ, source.register, result) 2239 } else { 2240 c.assembler.CompileRegisterToRegister(amd64.CVTTSD2SQ, source.register, result) 2241 } 2242 2243 // next, we have to check if the value is from NaN, +Inf. 2244 // NaN or +Inf cases result in 0x8000_0000 according to the semantics of conversion, 2245 // This means we check if the result int value is minus or not. 2246 c.assembler.CompileRegisterToRegister(amd64.TESTQ, result, result) 2247 2248 // If the result is minus, the conversion is invalid (from NaN or +Inf) 2249 jmpIfPlusInf := c.assembler.CompileJump(amd64.JMI) 2250 2251 // Otherwise, we successfully converted the the source float minus (math.MaxInt64+1) to int. 2252 // So, we retrieve the original source float value by adding the sign mask. 2253 if err = c.assembler.CompileStaticConstToRegister(amd64.ADDQ, 2254 asm.NewStaticConst(u64.LeBytes(float64SignBitMask)), result); err != nil { 2255 return err 2256 } 2257 2258 okJmpForAboveOrEqualMaxInt64PlusOne := c.assembler.CompileJump(amd64.JMP) 2259 2260 c.assembler.SetJumpTargetOnNext(jmpIfPlusInf) 2261 if !nonTrapping { 2262 c.compileExitFromNativeCode(nativeCallStatusIntegerOverflow) 2263 } else { 2264 err = c.assembler.CompileStaticConstToRegister(amd64.MOVQ, 2265 asm.NewStaticConst(u64.LeBytes(maximum64BitUnsignedInt)), result) 2266 if err != nil { 2267 return err 2268 } 2269 } 2270 2271 // We jump to the next instructions for valid cases. 2272 c.assembler.SetJumpTargetOnNext(okJmpForLessThanMaxInt64PlusOne, okJmpForAboveOrEqualMaxInt64PlusOne) 2273 if nonTrapping { 2274 c.assembler.SetJumpTargetOnNext(nonTrappingMinusJump, nonTrappingNaNJump) 2275 } 2276 2277 // We consumed the source's register and placed the conversion result 2278 // in the result register. 2279 c.locationStack.markRegisterUnused(source.register) 2280 c.pushRuntimeValueLocationOnRegister(result, runtimeValueTypeI64) 2281 return nil 2282 } 2283 2284 // emitSignedI32TruncFromFloat implements compileITruncFromF when the destination type is a 32-bit signed integer. 2285 func (c *amd64Compiler) emitSignedI32TruncFromFloat(isFloat32Bit, nonTrapping bool) error { 2286 source := c.locationStack.pop() 2287 if err := c.compileEnsureOnRegister(source); err != nil { 2288 return err 2289 } 2290 2291 result, err := c.allocateRegister(registerTypeGeneralPurpose) 2292 if err != nil { 2293 return err 2294 } 2295 2296 // First we unconditionally convert source to integer via CVTTSS2SI (CVTTSD2SI for 64bit float). 2297 if isFloat32Bit { 2298 c.assembler.CompileRegisterToRegister(amd64.CVTTSS2SL, source.register, result) 2299 } else { 2300 c.assembler.CompileRegisterToRegister(amd64.CVTTSD2SL, source.register, result) 2301 } 2302 2303 // We compare the conversion result with the sign bit mask to check if it is either 2304 // 1) the source float value is either +-Inf or NaN, or it exceeds representative ranges of 32bit signed integer, or 2305 // 2) the source equals the minimum signed 32-bit (=-2147483648.000000) whose bit pattern is float32ForMinimumSigned32bitIntegerAddress for 32 bit float 2306 // or float64ForMinimumSigned32bitIntegerAddress for 64bit float. 2307 err = c.assembler.CompileStaticConstToRegister(amd64.CMPL, asm.NewStaticConst(u32.LeBytes(float32SignBitMask)), result) 2308 if err != nil { 2309 return err 2310 } 2311 2312 // Otherwise, jump to exit as the result is valid. 2313 okJmp := c.assembler.CompileJump(amd64.JNE) 2314 2315 // Start handling the case of 1) and 2). 2316 // First, check if the value is NaN. 2317 if isFloat32Bit { 2318 c.assembler.CompileRegisterToRegister(amd64.UCOMISS, source.register, source.register) 2319 } else { 2320 c.assembler.CompileRegisterToRegister(amd64.UCOMISD, source.register, source.register) 2321 } 2322 2323 // Check the parity flag (set when the value is NaN), and if it is set, we should raise an exception. 2324 jmpIfNotNaN := c.assembler.CompileJump(amd64.JPC) // jump if parity is not set. 2325 2326 var nontrappingNanJump asm.Node 2327 if !nonTrapping { 2328 // If the value is NaN, we return the function with nativeCallStatusCodeInvalidFloatToIntConversion. 2329 c.compileExitFromNativeCode(nativeCallStatusCodeInvalidFloatToIntConversion) 2330 } else { 2331 // In non trapping case, NaN is casted as zero. 2332 // Zero out the result register by XOR itsself. 2333 c.assembler.CompileRegisterToRegister(amd64.XORL, result, result) 2334 nontrappingNanJump = c.assembler.CompileJump(amd64.JMP) 2335 } 2336 2337 // Check if the value is larger than or equal the minimum 32-bit integer value, 2338 // meaning that the value exceeds the lower bound of 32-bit signed integer range. 2339 c.assembler.SetJumpTargetOnNext(jmpIfNotNaN) 2340 if isFloat32Bit { 2341 err = c.assembler.CompileStaticConstToRegister(amd64.UCOMISS, 2342 asm.NewStaticConst(u32.LeBytes(float32ForMinimumSigned32bitInteger)), source.register) 2343 } else { 2344 err = c.assembler.CompileStaticConstToRegister(amd64.UCOMISD, 2345 asm.NewStaticConst(u64.LeBytes(float64ForMinimumSigned32bitInteger)), source.register) 2346 } 2347 if err != nil { 2348 return err 2349 } 2350 2351 if !nonTrapping { 2352 // Jump if the value exceeds the lower bound. 2353 var jmpIfExceedsLowerBound asm.Node 2354 if isFloat32Bit { 2355 jmpIfExceedsLowerBound = c.assembler.CompileJump(amd64.JCS) 2356 } else { 2357 jmpIfExceedsLowerBound = c.assembler.CompileJump(amd64.JLS) 2358 } 2359 2360 // At this point, the value is the minimum signed 32-bit int (=-2147483648.000000) or larger than 32-bit maximum. 2361 // So, check if the value equals the minimum signed 32-bit int. 2362 if isFloat32Bit { 2363 err = c.assembler.CompileStaticConstToRegister(amd64.UCOMISS, 2364 asm.NewStaticConst([]byte{0, 0, 0, 0}), source.register) 2365 } else { 2366 err = c.assembler.CompileStaticConstToRegister(amd64.UCOMISD, 2367 asm.NewStaticConst([]byte{0, 0, 0, 0, 0, 0, 0, 0}), source.register) 2368 } 2369 if err != nil { 2370 return err 2371 } 2372 2373 jmpIfMinimumSignedInt := c.assembler.CompileJump(amd64.JCS) // jump if the value is minus (= the minimum signed 32-bit int). 2374 2375 c.assembler.SetJumpTargetOnNext(jmpIfExceedsLowerBound) 2376 c.compileExitFromNativeCode(nativeCallStatusIntegerOverflow) 2377 2378 // We jump to the next instructions for valid cases. 2379 c.assembler.SetJumpTargetOnNext(okJmp, jmpIfMinimumSignedInt) 2380 } else { 2381 // Jump if the value does not exceed the lower bound. 2382 var jmpIfNotExceedsLowerBound asm.Node 2383 if isFloat32Bit { 2384 jmpIfNotExceedsLowerBound = c.assembler.CompileJump(amd64.JCC) 2385 } else { 2386 jmpIfNotExceedsLowerBound = c.assembler.CompileJump(amd64.JHI) 2387 } 2388 2389 // If the value exceeds the lower bound, we "saturate" it to the minimum. 2390 if err = c.assembler.CompileStaticConstToRegister(amd64.MOVL, 2391 asm.NewStaticConst(u32.LeBytes(uint32(minimum32BitSignedInt))), result); err != nil { 2392 return err 2393 } 2394 nonTrappingSaturatedMinimumJump := c.assembler.CompileJump(amd64.JMP) 2395 2396 // Otherwise, the value is the minimum signed 32-bit int (=-2147483648.000000) or larger than 32-bit maximum. 2397 c.assembler.SetJumpTargetOnNext(jmpIfNotExceedsLowerBound) 2398 if isFloat32Bit { 2399 err = c.assembler.CompileStaticConstToRegister(amd64.UCOMISS, 2400 asm.NewStaticConst([]byte{0, 0, 0, 0}), source.register) 2401 } else { 2402 err = c.assembler.CompileStaticConstToRegister(amd64.UCOMISD, 2403 asm.NewStaticConst([]byte{0, 0, 0, 0, 0, 0, 0, 0}), source.register) 2404 } 2405 if err != nil { 2406 return err 2407 } 2408 jmpIfMinimumSignedInt := c.assembler.CompileJump(amd64.JCS) // jump if the value is minus (= the minimum signed 32-bit int). 2409 2410 // If the value exceeds signed 32-bit maximum, we saturate it to the maximum. 2411 if err = c.assembler.CompileStaticConstToRegister(amd64.MOVL, 2412 asm.NewStaticConst(u32.LeBytes(uint32(maximum32BitSignedInt))), result); err != nil { 2413 return err 2414 } 2415 2416 c.assembler.SetJumpTargetOnNext(okJmp, nontrappingNanJump, nonTrappingSaturatedMinimumJump, jmpIfMinimumSignedInt) 2417 } 2418 2419 // We consumed the source's register and placed the conversion result 2420 // in the result register. 2421 c.locationStack.markRegisterUnused(source.register) 2422 c.pushRuntimeValueLocationOnRegister(result, runtimeValueTypeI32) 2423 return nil 2424 } 2425 2426 // emitSignedI64TruncFromFloat implements compileITruncFromF when the destination type is a 64-bit signed integer. 2427 func (c *amd64Compiler) emitSignedI64TruncFromFloat(isFloat32Bit, nonTrapping bool) error { 2428 source := c.locationStack.pop() 2429 if err := c.compileEnsureOnRegister(source); err != nil { 2430 return err 2431 } 2432 2433 result, err := c.allocateRegister(registerTypeGeneralPurpose) 2434 if err != nil { 2435 return err 2436 } 2437 2438 // First we unconditionally convert source to integer via CVTTSS2SI (CVTTSD2SI for 64bit float). 2439 if isFloat32Bit { 2440 c.assembler.CompileRegisterToRegister(amd64.CVTTSS2SQ, source.register, result) 2441 } else { 2442 c.assembler.CompileRegisterToRegister(amd64.CVTTSD2SQ, source.register, result) 2443 } 2444 2445 // We compare the conversion result with the sign bit mask to check if it is either 2446 // 1) the source float value is either +-Inf or NaN, or it exceeds representative ranges of 32bit signed integer, or 2447 // 2) the source equals the minimum signed 32-bit (=-9223372036854775808.0) whose bit pattern is float32ForMinimumSigned64bitIntegerAddress for 32 bit float 2448 // or float64ForMinimumSigned64bitIntegerAddress for 64bit float. 2449 err = c.assembler.CompileStaticConstToRegister(amd64.CMPQ, 2450 asm.NewStaticConst(u64.LeBytes(float64SignBitMask)), result) 2451 if err != nil { 2452 return err 2453 } 2454 2455 // Otherwise, we simply jump to exit as the result is valid. 2456 okJmp := c.assembler.CompileJump(amd64.JNE) 2457 2458 // Start handling the case of 1) and 2). 2459 // First, check if the value is NaN. 2460 if isFloat32Bit { 2461 c.assembler.CompileRegisterToRegister(amd64.UCOMISS, source.register, source.register) 2462 } else { 2463 c.assembler.CompileRegisterToRegister(amd64.UCOMISD, source.register, source.register) 2464 } 2465 2466 // Check the parity flag (set when the value is NaN), and if it is set, we should raise an exception. 2467 jmpIfNotNaN := c.assembler.CompileJump(amd64.JPC) // jump if parity is not set. 2468 2469 var nontrappingNanJump asm.Node 2470 if !nonTrapping { 2471 c.compileExitFromNativeCode(nativeCallStatusCodeInvalidFloatToIntConversion) 2472 } else { 2473 // In non trapping case, NaN is casted as zero. 2474 // Zero out the result register by XOR itsself. 2475 c.assembler.CompileRegisterToRegister(amd64.XORQ, result, result) 2476 nontrappingNanJump = c.assembler.CompileJump(amd64.JMP) 2477 } 2478 2479 // Check if the value is larger than or equal the minimum 64-bit integer value, 2480 // meaning that the value exceeds the lower bound of 64-bit signed integer range. 2481 c.assembler.SetJumpTargetOnNext(jmpIfNotNaN) 2482 if isFloat32Bit { 2483 err = c.assembler.CompileStaticConstToRegister(amd64.UCOMISS, 2484 asm.NewStaticConst(u32.LeBytes(float32ForMinimumSigned64bitInteger)), source.register) 2485 } else { 2486 err = c.assembler.CompileStaticConstToRegister(amd64.UCOMISD, 2487 asm.NewStaticConst(u64.LeBytes(float64ForMinimumSigned64bitInteger)), source.register) 2488 } 2489 if err != nil { 2490 return err 2491 } 2492 2493 if !nonTrapping { 2494 // Jump if the value is -Inf. 2495 jmpIfExceedsLowerBound := c.assembler.CompileJump(amd64.JCS) 2496 2497 // At this point, the value is the minimum signed 64-bit int (=-9223372036854775808.0) or larger than 64-bit maximum. 2498 // So, check if the value equals the minimum signed 64-bit int. 2499 if isFloat32Bit { 2500 err = c.assembler.CompileStaticConstToRegister(amd64.UCOMISS, 2501 asm.NewStaticConst([]byte{0, 0, 0, 0}), source.register) 2502 } else { 2503 err = c.assembler.CompileStaticConstToRegister(amd64.UCOMISD, 2504 asm.NewStaticConst([]byte{0, 0, 0, 0, 0, 0, 0, 0}), source.register) 2505 } 2506 if err != nil { 2507 return err 2508 } 2509 2510 jmpIfMinimumSignedInt := c.assembler.CompileJump(amd64.JCS) // jump if the value is minus (= the minimum signed 64-bit int). 2511 2512 c.assembler.SetJumpTargetOnNext(jmpIfExceedsLowerBound) 2513 c.compileExitFromNativeCode(nativeCallStatusIntegerOverflow) 2514 2515 // We jump to the next instructions for valid cases. 2516 c.assembler.SetJumpTargetOnNext(okJmp, jmpIfMinimumSignedInt) 2517 } else { 2518 // Jump if the value is not -Inf. 2519 jmpIfNotExceedsLowerBound := c.assembler.CompileJump(amd64.JCC) 2520 2521 // If the value exceeds the lower bound, we "saturate" it to the minimum. 2522 err = c.assembler.CompileStaticConstToRegister(amd64.MOVQ, 2523 asm.NewStaticConst(u64.LeBytes(uint64(minimum64BitSignedInt))), result) 2524 if err != nil { 2525 return err 2526 } 2527 2528 nonTrappingSaturatedMinimumJump := c.assembler.CompileJump(amd64.JMP) 2529 2530 // Otherwise, the value is the minimum signed 64-bit int (=-9223372036854775808.0) or larger than 64-bit maximum. 2531 // So, check if the value equals the minimum signed 64-bit int. 2532 c.assembler.SetJumpTargetOnNext(jmpIfNotExceedsLowerBound) 2533 if isFloat32Bit { 2534 err = c.assembler.CompileStaticConstToRegister(amd64.UCOMISS, asm.NewStaticConst([]byte{0, 0, 0, 0}), source.register) 2535 } else { 2536 err = c.assembler.CompileStaticConstToRegister(amd64.UCOMISD, asm.NewStaticConst([]byte{0, 0, 0, 0, 0, 0, 0, 0}), source.register) 2537 } 2538 if err != nil { 2539 return err 2540 } 2541 2542 jmpIfMinimumSignedInt := c.assembler.CompileJump(amd64.JCS) // jump if the value is minus (= the minimum signed 64-bit int). 2543 2544 // If the value exceeds signed 64-bit maximum, we saturate it to the maximum. 2545 if err = c.assembler.CompileStaticConstToRegister(amd64.MOVQ, asm.NewStaticConst(u64.LeBytes(uint64(maximum64BitSignedInt))), result); err != nil { 2546 return err 2547 } 2548 2549 c.assembler.SetJumpTargetOnNext(okJmp, jmpIfMinimumSignedInt, nonTrappingSaturatedMinimumJump, nontrappingNanJump) 2550 } 2551 2552 // We consumed the source's register and placed the conversion result 2553 // in the result register. 2554 c.locationStack.markRegisterUnused(source.register) 2555 c.pushRuntimeValueLocationOnRegister(result, runtimeValueTypeI64) 2556 return nil 2557 } 2558 2559 // compileFConvertFromI implements compiler.compileFConvertFromI for the amd64 architecture. 2560 func (c *amd64Compiler) compileFConvertFromI(o *wazeroir.OperationFConvertFromI) (err error) { 2561 if o.OutputType == wazeroir.Float32 && o.InputType == wazeroir.SignedInt32 { 2562 err = c.compileSimpleConversion(amd64.CVTSL2SS, registerTypeVector, runtimeValueTypeF32) // = CVTSI2SS for 32bit int 2563 } else if o.OutputType == wazeroir.Float32 && o.InputType == wazeroir.SignedInt64 { 2564 err = c.compileSimpleConversion(amd64.CVTSQ2SS, registerTypeVector, runtimeValueTypeF32) // = CVTSI2SS for 64bit int 2565 } else if o.OutputType == wazeroir.Float64 && o.InputType == wazeroir.SignedInt32 { 2566 err = c.compileSimpleConversion(amd64.CVTSL2SD, registerTypeVector, runtimeValueTypeF64) // = CVTSI2SD for 32bit int 2567 } else if o.OutputType == wazeroir.Float64 && o.InputType == wazeroir.SignedInt64 { 2568 err = c.compileSimpleConversion(amd64.CVTSQ2SD, registerTypeVector, runtimeValueTypeF64) // = CVTSI2SD for 64bit int 2569 } else if o.OutputType == wazeroir.Float32 && o.InputType == wazeroir.SignedUint32 { 2570 // See the following link for why we use 64bit conversion for unsigned 32bit integer sources: 2571 // https://stackoverflow.com/questions/41495498/fpu-operations-generated-by-gcc-during-casting-integer-to-float. 2572 // 2573 // Here's the summary: 2574 // >> CVTSI2SS is indeed designed for converting a signed integer to a scalar single-precision float, 2575 // >> not an unsigned integer like you have here. So what gives? Well, a 64-bit processor has 64-bit wide 2576 // >> registers available, so the unsigned 32-bit input values can be stored as signed 64-bit intermediate values, 2577 // >> which allows CVTSI2SS to be used after all. 2578 err = c.compileSimpleConversion(amd64.CVTSQ2SS, registerTypeVector, runtimeValueTypeF32) // = CVTSI2SS for 64bit int. 2579 } else if o.OutputType == wazeroir.Float64 && o.InputType == wazeroir.SignedUint32 { 2580 // For the same reason above, we use 64bit conversion for unsigned 32bit. 2581 err = c.compileSimpleConversion(amd64.CVTSQ2SD, registerTypeVector, runtimeValueTypeF64) // = CVTSI2SD for 64bit int. 2582 } else if o.OutputType == wazeroir.Float32 && o.InputType == wazeroir.SignedUint64 { 2583 err = c.emitUnsignedInt64ToFloatConversion(true) 2584 } else if o.OutputType == wazeroir.Float64 && o.InputType == wazeroir.SignedUint64 { 2585 err = c.emitUnsignedInt64ToFloatConversion(false) 2586 } 2587 return 2588 } 2589 2590 // emitUnsignedInt64ToFloatConversion is handling the case of unsigned 64-bit integer 2591 // in compileFConvertFromI. 2592 func (c *amd64Compiler) emitUnsignedInt64ToFloatConversion(isFloat32bit bool) error { 2593 // The logic here is exactly the same as GCC emits for the following code: 2594 // 2595 // float convert(int num) { 2596 // float foo; 2597 // uint64_t ptr1 = 100; 2598 // foo = (float)(ptr1); 2599 // return foo; 2600 // } 2601 // 2602 // which is compiled by GCC as 2603 // 2604 // convert: 2605 // push rbp 2606 // mov rbp, rsp 2607 // mov DWORD PTR [rbp-20], edi 2608 // mov DWORD PTR [rbp-4], 100 2609 // mov eax, DWORD PTR [rbp-4] 2610 // test rax, rax 2611 // js .handle_sign_bit_case 2612 // cvtsi2ss xmm0, rax 2613 // jmp .exit 2614 // .handle_sign_bit_case: 2615 // mov rdx, rax 2616 // shr rdx 2617 // and eax, 1 2618 // or rdx, rax 2619 // cvtsi2ss xmm0, rdx 2620 // addsd xmm0, xmm0 2621 // .exit: ... 2622 // 2623 // tl;dr is that we have a branch depending on whether or not sign bit is set. 2624 2625 origin := c.locationStack.pop() 2626 if err := c.compileEnsureOnRegister(origin); err != nil { 2627 return err 2628 } 2629 2630 dest, err := c.allocateRegister(registerTypeVector) 2631 if err != nil { 2632 return err 2633 } 2634 2635 c.locationStack.markRegisterUsed(dest) 2636 2637 tmpReg, err := c.allocateRegister(registerTypeGeneralPurpose) 2638 if err != nil { 2639 return err 2640 } 2641 2642 // Check if the most significant bit (sign bit) is set. 2643 c.assembler.CompileRegisterToRegister(amd64.TESTQ, origin.register, origin.register) 2644 2645 // Jump if the sign bit is set. 2646 jmpIfSignbitSet := c.assembler.CompileJump(amd64.JMI) 2647 2648 // Otherwise, we could fit the unsigned int into float32. 2649 // So, we convert it to float32 and emit jump instruction to exit from this branch. 2650 if isFloat32bit { 2651 c.assembler.CompileRegisterToRegister(amd64.CVTSQ2SS, origin.register, dest) 2652 } else { 2653 c.assembler.CompileRegisterToRegister(amd64.CVTSQ2SD, origin.register, dest) 2654 } 2655 exitFromSignbitUnSet := c.assembler.CompileJump(amd64.JMP) 2656 2657 // Now handling the case where sign-bit is set. 2658 // We emit the following sequences: 2659 // mov tmpReg, origin 2660 // shr tmpReg, 1 2661 // and origin, 1 2662 // or tmpReg, origin 2663 // cvtsi2ss xmm0, tmpReg 2664 // addsd xmm0, xmm0 2665 2666 c.assembler.SetJumpTargetOnNext(jmpIfSignbitSet) 2667 c.assembler.CompileRegisterToRegister(amd64.MOVQ, origin.register, tmpReg) 2668 c.assembler.CompileConstToRegister(amd64.SHRQ, 1, tmpReg) 2669 c.assembler.CompileConstToRegister(amd64.ANDQ, 1, origin.register) 2670 c.assembler.CompileRegisterToRegister(amd64.ORQ, origin.register, tmpReg) 2671 if isFloat32bit { 2672 c.assembler.CompileRegisterToRegister(amd64.CVTSQ2SS, tmpReg, dest) 2673 } else { 2674 c.assembler.CompileRegisterToRegister(amd64.CVTSQ2SD, tmpReg, dest) 2675 } 2676 if isFloat32bit { 2677 c.assembler.CompileRegisterToRegister(amd64.ADDSS, dest, dest) 2678 } else { 2679 c.assembler.CompileRegisterToRegister(amd64.ADDSD, dest, dest) 2680 } 2681 2682 // Now, we finished the sign-bit set branch. 2683 // We have to make the exit jump target of sign-bit unset branch 2684 // towards the next instruction. 2685 c.assembler.SetJumpTargetOnNext(exitFromSignbitUnSet) 2686 2687 // We consumed the origin's register and placed the conversion result 2688 // in the dest register. 2689 c.locationStack.markRegisterUnused(origin.register) 2690 if isFloat32bit { 2691 c.pushRuntimeValueLocationOnRegister(dest, runtimeValueTypeF32) 2692 } else { 2693 c.pushRuntimeValueLocationOnRegister(dest, runtimeValueTypeF64) 2694 } 2695 return nil 2696 } 2697 2698 // compileSimpleConversion pops a value type from the stack, and applies the 2699 // given instruction on it, and push the result onto a register of the given type. 2700 func (c *amd64Compiler) compileSimpleConversion(convInstruction asm.Instruction, 2701 destinationRegisterType registerType, destinationValueType runtimeValueType, 2702 ) error { 2703 origin := c.locationStack.pop() 2704 if err := c.compileEnsureOnRegister(origin); err != nil { 2705 return err 2706 } 2707 2708 dest, err := c.allocateRegister(destinationRegisterType) 2709 if err != nil { 2710 return err 2711 } 2712 2713 c.assembler.CompileRegisterToRegister(convInstruction, origin.register, dest) 2714 2715 c.locationStack.markRegisterUnused(origin.register) 2716 c.pushRuntimeValueLocationOnRegister(dest, destinationValueType) 2717 return nil 2718 } 2719 2720 // compileF32DemoteFromF64 implements compiler.compileF32DemoteFromF64 for the amd64 architecture. 2721 func (c *amd64Compiler) compileF32DemoteFromF64() error { 2722 target := c.locationStack.peek() // Note this is peek! 2723 if err := c.compileEnsureOnRegister(target); err != nil { 2724 return err 2725 } 2726 2727 c.assembler.CompileRegisterToRegister(amd64.CVTSD2SS, target.register, target.register) 2728 target.valueType = runtimeValueTypeF32 2729 return nil 2730 } 2731 2732 // compileF64PromoteFromF32 implements compiler.compileF64PromoteFromF32 for the amd64 architecture. 2733 func (c *amd64Compiler) compileF64PromoteFromF32() error { 2734 target := c.locationStack.peek() // Note this is peek! 2735 if err := c.compileEnsureOnRegister(target); err != nil { 2736 return err 2737 } 2738 2739 c.assembler.CompileRegisterToRegister(amd64.CVTSS2SD, target.register, target.register) 2740 target.valueType = runtimeValueTypeF64 2741 return nil 2742 } 2743 2744 // compileI32ReinterpretFromF32 implements compiler.compileI32ReinterpretFromF32 for the amd64 architecture. 2745 func (c *amd64Compiler) compileI32ReinterpretFromF32() error { 2746 if peek := c.locationStack.peek(); peek.onStack() { 2747 // If the value is on the stack, this is no-op as there is nothing to do for converting type. 2748 peek.valueType = runtimeValueTypeI32 2749 return nil 2750 } 2751 return c.compileSimpleConversion(amd64.MOVL, registerTypeGeneralPurpose, runtimeValueTypeI32) 2752 } 2753 2754 // compileI64ReinterpretFromF64 implements compiler.compileI64ReinterpretFromF64 for the amd64 architecture. 2755 func (c *amd64Compiler) compileI64ReinterpretFromF64() error { 2756 if peek := c.locationStack.peek(); peek.onStack() { 2757 // If the value is on the stack, this is no-op as there is nothing to do for converting type. 2758 peek.valueType = runtimeValueTypeI64 2759 return nil 2760 } 2761 return c.compileSimpleConversion(amd64.MOVQ, registerTypeGeneralPurpose, runtimeValueTypeI64) 2762 } 2763 2764 // compileF32ReinterpretFromI32 implements compiler.compileF32ReinterpretFromI32 for the amd64 architecture. 2765 func (c *amd64Compiler) compileF32ReinterpretFromI32() error { 2766 if peek := c.locationStack.peek(); peek.onStack() { 2767 // If the value is on the stack, this is no-op as there is nothing to do for converting type. 2768 peek.valueType = runtimeValueTypeF32 2769 return nil 2770 } 2771 return c.compileSimpleConversion(amd64.MOVL, registerTypeVector, runtimeValueTypeF32) 2772 } 2773 2774 // compileF64ReinterpretFromI64 implements compiler.compileF64ReinterpretFromI64 for the amd64 architecture. 2775 func (c *amd64Compiler) compileF64ReinterpretFromI64() error { 2776 if peek := c.locationStack.peek(); peek.onStack() { 2777 // If the value is on the stack, this is no-op as there is nothing to do for converting type. 2778 peek.valueType = runtimeValueTypeF64 2779 return nil 2780 } 2781 return c.compileSimpleConversion(amd64.MOVQ, registerTypeVector, runtimeValueTypeF64) 2782 } 2783 2784 // compileExtend implements compiler.compileExtend for the amd64 architecture. 2785 func (c *amd64Compiler) compileExtend(o *wazeroir.OperationExtend) error { 2786 var inst asm.Instruction 2787 if o.Signed { 2788 inst = amd64.MOVLQSX // = MOVSXD https://www.felixcloutier.com/x86/movsx:movsxd 2789 } else { 2790 inst = amd64.MOVL 2791 } 2792 return c.compileExtendImpl(inst, runtimeValueTypeI64) 2793 } 2794 2795 // compileSignExtend32From8 implements compiler.compileSignExtend32From8 for the amd64 architecture. 2796 func (c *amd64Compiler) compileSignExtend32From8() error { 2797 return c.compileExtendImpl(amd64.MOVBLSX, runtimeValueTypeI32) 2798 } 2799 2800 // compileSignExtend32From16 implements compiler.compileSignExtend32From16 for the amd64 architecture. 2801 func (c *amd64Compiler) compileSignExtend32From16() error { 2802 return c.compileExtendImpl(amd64.MOVWLSX, runtimeValueTypeI32) 2803 } 2804 2805 // compileSignExtend64From8 implements compiler.compileSignExtend64From8 for the amd64 architecture. 2806 func (c *amd64Compiler) compileSignExtend64From8() error { 2807 return c.compileExtendImpl(amd64.MOVBQSX, runtimeValueTypeI64) 2808 } 2809 2810 // compileSignExtend64From16 implements compiler.compileSignExtend64From16 for the amd64 architecture. 2811 func (c *amd64Compiler) compileSignExtend64From16() error { 2812 return c.compileExtendImpl(amd64.MOVWQSX, runtimeValueTypeI64) 2813 } 2814 2815 // compileSignExtend64From32 implements compiler.compileSignExtend64From32 for the amd64 architecture. 2816 func (c *amd64Compiler) compileSignExtend64From32() error { 2817 return c.compileExtendImpl(amd64.MOVLQSX, runtimeValueTypeI64) 2818 } 2819 2820 func (c *amd64Compiler) compileExtendImpl(inst asm.Instruction, destinationType runtimeValueType) error { 2821 target := c.locationStack.peek() // Note this is peek! 2822 if err := c.compileEnsureOnRegister(target); err != nil { 2823 return err 2824 } 2825 2826 c.assembler.CompileRegisterToRegister(inst, target.register, target.register) 2827 target.valueType = destinationType 2828 return nil 2829 } 2830 2831 // compileEq implements compiler.compileEq for the amd64 architecture. 2832 func (c *amd64Compiler) compileEq(o *wazeroir.OperationEq) error { 2833 return c.compileEqOrNe(o.Type, true) 2834 } 2835 2836 // compileNe implements compiler.compileNe for the amd64 architecture. 2837 func (c *amd64Compiler) compileNe(o *wazeroir.OperationNe) error { 2838 return c.compileEqOrNe(o.Type, false) 2839 } 2840 2841 func (c *amd64Compiler) compileEqOrNe(t wazeroir.UnsignedType, shouldEqual bool) (err error) { 2842 x2 := c.locationStack.pop() 2843 if err := c.compileEnsureOnRegister(x2); err != nil { 2844 return err 2845 } 2846 2847 x1 := c.locationStack.pop() 2848 if err := c.compileEnsureOnRegister(x1); err != nil { 2849 return err 2850 } 2851 2852 switch t { 2853 case wazeroir.UnsignedTypeI32: 2854 err = c.compileEqOrNeForInts(x1.register, x2.register, amd64.CMPL, shouldEqual) 2855 case wazeroir.UnsignedTypeI64: 2856 err = c.compileEqOrNeForInts(x1.register, x2.register, amd64.CMPQ, shouldEqual) 2857 case wazeroir.UnsignedTypeF32: 2858 err = c.compileEqOrNeForFloats(x1.register, x2.register, amd64.UCOMISS, shouldEqual) 2859 case wazeroir.UnsignedTypeF64: 2860 err = c.compileEqOrNeForFloats(x1.register, x2.register, amd64.UCOMISD, shouldEqual) 2861 } 2862 if err != nil { 2863 return 2864 } 2865 2866 // x1 and x2 are temporary registers only used for the cmp operation. Release them. 2867 c.locationStack.releaseRegister(x1) 2868 c.locationStack.releaseRegister(x2) 2869 return 2870 } 2871 2872 func (c *amd64Compiler) compileEqOrNeForInts(x1Reg, x2Reg asm.Register, cmpInstruction asm.Instruction, 2873 shouldEqual bool, 2874 ) error { 2875 c.assembler.CompileRegisterToRegister(cmpInstruction, x2Reg, x1Reg) 2876 2877 // Record that the result is on the conditional register. 2878 var condReg asm.ConditionalRegisterState 2879 if shouldEqual { 2880 condReg = amd64.ConditionalRegisterStateE 2881 } else { 2882 condReg = amd64.ConditionalRegisterStateNE 2883 } 2884 loc := c.locationStack.pushRuntimeValueLocationOnConditionalRegister(condReg) 2885 loc.valueType = runtimeValueTypeI32 2886 return nil 2887 } 2888 2889 // For float EQ and NE, we have to take NaN values into account. 2890 // Notably, Wasm specification states that if one of targets is NaN, 2891 // the result must be zero for EQ or one for NE. 2892 func (c *amd64Compiler) compileEqOrNeForFloats(x1Reg, x2Reg asm.Register, cmpInstruction asm.Instruction, shouldEqual bool) error { 2893 // Before we allocate the result, we have to reserve two int registers. 2894 nanFragReg, err := c.allocateRegister(registerTypeGeneralPurpose) 2895 if err != nil { 2896 return err 2897 } 2898 c.locationStack.markRegisterUsed(nanFragReg) 2899 cmpResultReg, err := c.allocateRegister(registerTypeGeneralPurpose) 2900 if err != nil { 2901 return err 2902 } 2903 2904 // Then, execute the comparison. 2905 c.assembler.CompileRegisterToRegister(cmpInstruction, x2Reg, x1Reg) 2906 2907 // First, we get the parity flag which indicates whether one of values was NaN. 2908 if shouldEqual { 2909 // Set 1 if two values are NOT NaN. 2910 c.assembler.CompileNoneToRegister(amd64.SETPC, nanFragReg) 2911 } else { 2912 // Set 1 if one of values is NaN. 2913 c.assembler.CompileNoneToRegister(amd64.SETPS, nanFragReg) 2914 } 2915 2916 // next, we get the usual comparison flag. 2917 if shouldEqual { 2918 // Set 1 if equal. 2919 c.assembler.CompileNoneToRegister(amd64.SETEQ, cmpResultReg) 2920 } else { 2921 // Set 1 if not equal. 2922 c.assembler.CompileNoneToRegister(amd64.SETNE, cmpResultReg) 2923 } 2924 2925 // Do "and" or "or" operations on these two flags to get the actual result. 2926 if shouldEqual { 2927 c.assembler.CompileRegisterToRegister(amd64.ANDL, nanFragReg, cmpResultReg) 2928 } else { 2929 c.assembler.CompileRegisterToRegister(amd64.ORL, nanFragReg, cmpResultReg) 2930 } 2931 2932 // Clear the unnecessary bits by zero extending the first byte. 2933 // This is necessary the upper bits (5 to 32 bits) of SET* instruction result is undefined. 2934 c.assembler.CompileRegisterToRegister(amd64.MOVBLZX, cmpResultReg, cmpResultReg) 2935 2936 // Now we have the result in cmpResultReg register, so we record it. 2937 c.pushRuntimeValueLocationOnRegister(cmpResultReg, runtimeValueTypeI32) 2938 // Also, we no longer need nanFragRegister. 2939 c.locationStack.markRegisterUnused(nanFragReg) 2940 return nil 2941 } 2942 2943 // compileEqz implements compiler.compileEqz for the amd64 architecture. 2944 func (c *amd64Compiler) compileEqz(o *wazeroir.OperationEqz) (err error) { 2945 v := c.locationStack.pop() 2946 if err = c.compileEnsureOnRegister(v); err != nil { 2947 return err 2948 } 2949 2950 switch o.Type { 2951 case wazeroir.UnsignedInt32: 2952 err = c.assembler.CompileStaticConstToRegister(amd64.CMPL, asm.NewStaticConst([]byte{0, 0, 0, 0}), v.register) 2953 case wazeroir.UnsignedInt64: 2954 err = c.assembler.CompileStaticConstToRegister(amd64.CMPQ, asm.NewStaticConst([]byte{0, 0, 0, 0, 0, 0, 0, 0}), v.register) 2955 } 2956 if err != nil { 2957 return err 2958 } 2959 2960 // v is consumed by the cmp operation so release it. 2961 c.locationStack.releaseRegister(v) 2962 2963 // Finally, record that the result is on the conditional register. 2964 loc := c.locationStack.pushRuntimeValueLocationOnConditionalRegister(amd64.ConditionalRegisterStateE) 2965 loc.valueType = runtimeValueTypeI32 2966 return nil 2967 } 2968 2969 // compileLt implements compiler.compileLt for the amd64 architecture. 2970 func (c *amd64Compiler) compileLt(o *wazeroir.OperationLt) error { 2971 x2 := c.locationStack.pop() 2972 if err := c.compileEnsureOnRegister(x2); err != nil { 2973 return err 2974 } 2975 2976 x1 := c.locationStack.pop() 2977 if err := c.compileEnsureOnRegister(x1); err != nil { 2978 return err 2979 } 2980 2981 // Emit the compare instruction. 2982 var resultConditionState asm.ConditionalRegisterState 2983 var inst asm.Instruction 2984 switch o.Type { 2985 case wazeroir.SignedTypeInt32: 2986 resultConditionState = amd64.ConditionalRegisterStateL 2987 inst = amd64.CMPL 2988 case wazeroir.SignedTypeUint32: 2989 resultConditionState = amd64.ConditionalRegisterStateB 2990 inst = amd64.CMPL 2991 case wazeroir.SignedTypeInt64: 2992 inst = amd64.CMPQ 2993 resultConditionState = amd64.ConditionalRegisterStateL 2994 case wazeroir.SignedTypeUint64: 2995 resultConditionState = amd64.ConditionalRegisterStateB 2996 inst = amd64.CMPQ 2997 case wazeroir.SignedTypeFloat32: 2998 resultConditionState = amd64.ConditionalRegisterStateA 2999 inst = amd64.COMISS 3000 case wazeroir.SignedTypeFloat64: 3001 resultConditionState = amd64.ConditionalRegisterStateA 3002 inst = amd64.COMISD 3003 } 3004 c.assembler.CompileRegisterToRegister(inst, x1.register, x2.register) 3005 3006 // x1 and x2 are temporary registers only used for the cmp operation. Release them. 3007 c.locationStack.releaseRegister(x1) 3008 c.locationStack.releaseRegister(x2) 3009 3010 // Finally, record that the result is on the conditional register. 3011 loc := c.locationStack.pushRuntimeValueLocationOnConditionalRegister(resultConditionState) 3012 loc.valueType = runtimeValueTypeI32 3013 return nil 3014 } 3015 3016 // compileGt implements compiler.compileGt for the amd64 architecture. 3017 func (c *amd64Compiler) compileGt(o *wazeroir.OperationGt) error { 3018 x2 := c.locationStack.pop() 3019 if err := c.compileEnsureOnRegister(x2); err != nil { 3020 return err 3021 } 3022 3023 x1 := c.locationStack.pop() 3024 if err := c.compileEnsureOnRegister(x1); err != nil { 3025 return err 3026 } 3027 3028 // Emit the compare instruction. 3029 var resultConditionState asm.ConditionalRegisterState 3030 switch o.Type { 3031 case wazeroir.SignedTypeInt32: 3032 resultConditionState = amd64.ConditionalRegisterStateG 3033 c.assembler.CompileRegisterToRegister(amd64.CMPL, x1.register, x2.register) 3034 case wazeroir.SignedTypeUint32: 3035 c.assembler.CompileRegisterToRegister(amd64.CMPL, x1.register, x2.register) 3036 resultConditionState = amd64.ConditionalRegisterStateA 3037 case wazeroir.SignedTypeInt64: 3038 c.assembler.CompileRegisterToRegister(amd64.CMPQ, x1.register, x2.register) 3039 resultConditionState = amd64.ConditionalRegisterStateG 3040 case wazeroir.SignedTypeUint64: 3041 c.assembler.CompileRegisterToRegister(amd64.CMPQ, x1.register, x2.register) 3042 resultConditionState = amd64.ConditionalRegisterStateA 3043 case wazeroir.SignedTypeFloat32: 3044 c.assembler.CompileRegisterToRegister(amd64.UCOMISS, x2.register, x1.register) 3045 resultConditionState = amd64.ConditionalRegisterStateA 3046 case wazeroir.SignedTypeFloat64: 3047 c.assembler.CompileRegisterToRegister(amd64.UCOMISD, x2.register, x1.register) 3048 resultConditionState = amd64.ConditionalRegisterStateA 3049 } 3050 3051 // x1 and x2 are temporary registers only used for the cmp operation. Release them. 3052 c.locationStack.releaseRegister(x1) 3053 c.locationStack.releaseRegister(x2) 3054 3055 // Finally, record that the result is on the conditional register. 3056 loc := c.locationStack.pushRuntimeValueLocationOnConditionalRegister(resultConditionState) 3057 loc.valueType = runtimeValueTypeI32 3058 return nil 3059 } 3060 3061 // compileLe implements compiler.compileLe for the amd64 architecture. 3062 func (c *amd64Compiler) compileLe(o *wazeroir.OperationLe) error { 3063 x2 := c.locationStack.pop() 3064 if err := c.compileEnsureOnRegister(x2); err != nil { 3065 return err 3066 } 3067 3068 x1 := c.locationStack.pop() 3069 if err := c.compileEnsureOnRegister(x1); err != nil { 3070 return err 3071 } 3072 3073 // Emit the compare instruction. 3074 var inst asm.Instruction 3075 var resultConditionState asm.ConditionalRegisterState 3076 switch o.Type { 3077 case wazeroir.SignedTypeInt32: 3078 resultConditionState = amd64.ConditionalRegisterStateLE 3079 inst = amd64.CMPL 3080 case wazeroir.SignedTypeUint32: 3081 resultConditionState = amd64.ConditionalRegisterStateBE 3082 inst = amd64.CMPL 3083 case wazeroir.SignedTypeInt64: 3084 resultConditionState = amd64.ConditionalRegisterStateLE 3085 inst = amd64.CMPQ 3086 case wazeroir.SignedTypeUint64: 3087 resultConditionState = amd64.ConditionalRegisterStateBE 3088 inst = amd64.CMPQ 3089 case wazeroir.SignedTypeFloat32: 3090 resultConditionState = amd64.ConditionalRegisterStateAE 3091 inst = amd64.UCOMISS 3092 case wazeroir.SignedTypeFloat64: 3093 resultConditionState = amd64.ConditionalRegisterStateAE 3094 inst = amd64.UCOMISD 3095 } 3096 c.assembler.CompileRegisterToRegister(inst, x1.register, x2.register) 3097 3098 // x1 and x2 are temporary registers only used for the cmp operation. Release them. 3099 c.locationStack.releaseRegister(x1) 3100 c.locationStack.releaseRegister(x2) 3101 3102 // Finally, record that the result is on the conditional register. 3103 loc := c.locationStack.pushRuntimeValueLocationOnConditionalRegister(resultConditionState) 3104 loc.valueType = runtimeValueTypeI32 3105 return nil 3106 } 3107 3108 // compileGe implements compiler.compileGe for the amd64 architecture. 3109 func (c *amd64Compiler) compileGe(o *wazeroir.OperationGe) error { 3110 x2 := c.locationStack.pop() 3111 if err := c.compileEnsureOnRegister(x2); err != nil { 3112 return err 3113 } 3114 3115 x1 := c.locationStack.pop() 3116 if err := c.compileEnsureOnRegister(x1); err != nil { 3117 return err 3118 } 3119 3120 // Emit the compare instruction. 3121 var resultConditionState asm.ConditionalRegisterState 3122 switch o.Type { 3123 case wazeroir.SignedTypeInt32: 3124 c.assembler.CompileRegisterToRegister(amd64.CMPL, x1.register, x2.register) 3125 resultConditionState = amd64.ConditionalRegisterStateGE 3126 case wazeroir.SignedTypeUint32: 3127 c.assembler.CompileRegisterToRegister(amd64.CMPL, x1.register, x2.register) 3128 resultConditionState = amd64.ConditionalRegisterStateAE 3129 case wazeroir.SignedTypeInt64: 3130 c.assembler.CompileRegisterToRegister(amd64.CMPQ, x1.register, x2.register) 3131 resultConditionState = amd64.ConditionalRegisterStateGE 3132 case wazeroir.SignedTypeUint64: 3133 c.assembler.CompileRegisterToRegister(amd64.CMPQ, x1.register, x2.register) 3134 resultConditionState = amd64.ConditionalRegisterStateAE 3135 case wazeroir.SignedTypeFloat32: 3136 c.assembler.CompileRegisterToRegister(amd64.COMISS, x2.register, x1.register) 3137 resultConditionState = amd64.ConditionalRegisterStateAE 3138 case wazeroir.SignedTypeFloat64: 3139 c.assembler.CompileRegisterToRegister(amd64.COMISD, x2.register, x1.register) 3140 resultConditionState = amd64.ConditionalRegisterStateAE 3141 } 3142 3143 // x1 and x2 are temporary registers only used for the cmp operation. Release them. 3144 c.locationStack.releaseRegister(x1) 3145 c.locationStack.releaseRegister(x2) 3146 3147 // Finally, record that the result is on the conditional register. 3148 loc := c.locationStack.pushRuntimeValueLocationOnConditionalRegister(resultConditionState) 3149 loc.valueType = runtimeValueTypeI32 3150 return nil 3151 } 3152 3153 // compileLoad implements compiler.compileLoad for the amd64 architecture. 3154 func (c *amd64Compiler) compileLoad(o *wazeroir.OperationLoad) error { 3155 var ( 3156 isIntType bool 3157 movInst asm.Instruction 3158 targetSizeInBytes int64 3159 vt runtimeValueType 3160 ) 3161 switch o.Type { 3162 case wazeroir.UnsignedTypeI32: 3163 isIntType = true 3164 movInst = amd64.MOVL 3165 targetSizeInBytes = 32 / 8 3166 vt = runtimeValueTypeI32 3167 case wazeroir.UnsignedTypeI64: 3168 isIntType = true 3169 movInst = amd64.MOVQ 3170 targetSizeInBytes = 64 / 8 3171 vt = runtimeValueTypeI64 3172 case wazeroir.UnsignedTypeF32: 3173 isIntType = false 3174 movInst = amd64.MOVL 3175 targetSizeInBytes = 32 / 8 3176 vt = runtimeValueTypeF32 3177 case wazeroir.UnsignedTypeF64: 3178 isIntType = false 3179 movInst = amd64.MOVQ 3180 targetSizeInBytes = 64 / 8 3181 vt = runtimeValueTypeF64 3182 } 3183 3184 reg, err := c.compileMemoryAccessCeilSetup(o.Arg.Offset, targetSizeInBytes) 3185 if err != nil { 3186 return err 3187 } 3188 3189 if isIntType { 3190 // For integer types, read the corresponding bytes from the offset to the memory 3191 // and store the value to the int register. 3192 c.assembler.CompileMemoryWithIndexToRegister(movInst, 3193 // we access memory as memory.Buffer[ceil-targetSizeInBytes: ceil]. 3194 amd64ReservedRegisterForMemory, -targetSizeInBytes, reg, 1, 3195 reg) 3196 c.pushRuntimeValueLocationOnRegister(reg, vt) 3197 } else { 3198 // For float types, we read the value to the float register. 3199 floatReg, err := c.allocateRegister(registerTypeVector) 3200 if err != nil { 3201 return err 3202 } 3203 c.assembler.CompileMemoryWithIndexToRegister(movInst, 3204 // we access memory as memory.Buffer[ceil-targetSizeInBytes: ceil]. 3205 amd64ReservedRegisterForMemory, -targetSizeInBytes, reg, 1, 3206 floatReg) 3207 c.pushRuntimeValueLocationOnRegister(floatReg, vt) 3208 // We no longer need the int register so mark it unused. 3209 c.locationStack.markRegisterUnused(reg) 3210 } 3211 return nil 3212 } 3213 3214 // compileLoad8 implements compiler.compileLoad8 for the amd64 architecture. 3215 func (c *amd64Compiler) compileLoad8(o *wazeroir.OperationLoad8) error { 3216 const targetSizeInBytes = 1 3217 reg, err := c.compileMemoryAccessCeilSetup(o.Arg.Offset, targetSizeInBytes) 3218 if err != nil { 3219 return err 3220 } 3221 3222 // Then move a byte at the offset to the register. 3223 // Note that Load8 is only for integer types. 3224 var inst asm.Instruction 3225 var vt runtimeValueType 3226 switch o.Type { 3227 case wazeroir.SignedInt32: 3228 inst = amd64.MOVBLSX 3229 vt = runtimeValueTypeI32 3230 case wazeroir.SignedUint32: 3231 inst = amd64.MOVBLZX 3232 vt = runtimeValueTypeI32 3233 case wazeroir.SignedInt64: 3234 inst = amd64.MOVBQSX 3235 vt = runtimeValueTypeI64 3236 case wazeroir.SignedUint64: 3237 inst = amd64.MOVBQZX 3238 vt = runtimeValueTypeI64 3239 } 3240 3241 c.assembler.CompileMemoryWithIndexToRegister(inst, 3242 // we access memory as memory.Buffer[ceil-targetSizeInBytes: ceil]. 3243 amd64ReservedRegisterForMemory, -targetSizeInBytes, reg, 1, 3244 reg) 3245 3246 c.pushRuntimeValueLocationOnRegister(reg, vt) 3247 return nil 3248 } 3249 3250 // compileLoad16 implements compiler.compileLoad16 for the amd64 architecture. 3251 func (c *amd64Compiler) compileLoad16(o *wazeroir.OperationLoad16) error { 3252 const targetSizeInBytes = 16 / 8 3253 reg, err := c.compileMemoryAccessCeilSetup(o.Arg.Offset, targetSizeInBytes) 3254 if err != nil { 3255 return err 3256 } 3257 3258 // Then move 2 bytes at the offset to the register. 3259 // Note that Load16 is only for integer types. 3260 var inst asm.Instruction 3261 var vt runtimeValueType 3262 switch o.Type { 3263 case wazeroir.SignedInt32: 3264 inst = amd64.MOVWLSX 3265 vt = runtimeValueTypeI32 3266 case wazeroir.SignedInt64: 3267 inst = amd64.MOVWQSX 3268 vt = runtimeValueTypeI64 3269 case wazeroir.SignedUint32: 3270 inst = amd64.MOVWLZX 3271 vt = runtimeValueTypeI32 3272 case wazeroir.SignedUint64: 3273 inst = amd64.MOVWQZX 3274 vt = runtimeValueTypeI64 3275 } 3276 3277 c.assembler.CompileMemoryWithIndexToRegister(inst, 3278 // we access memory as memory.Buffer[ceil-targetSizeInBytes: ceil]. 3279 amd64ReservedRegisterForMemory, -targetSizeInBytes, reg, 1, 3280 reg) 3281 3282 c.pushRuntimeValueLocationOnRegister(reg, vt) 3283 return nil 3284 } 3285 3286 // compileLoad32 implements compiler.compileLoad32 for the amd64 architecture. 3287 func (c *amd64Compiler) compileLoad32(o *wazeroir.OperationLoad32) error { 3288 const targetSizeInBytes = 32 / 8 3289 reg, err := c.compileMemoryAccessCeilSetup(o.Arg.Offset, targetSizeInBytes) 3290 if err != nil { 3291 return err 3292 } 3293 3294 // Then move 4 bytes at the offset to the register. 3295 var inst asm.Instruction 3296 if o.Signed { 3297 inst = amd64.MOVLQSX 3298 } else { 3299 inst = amd64.MOVLQZX 3300 } 3301 c.assembler.CompileMemoryWithIndexToRegister(inst, 3302 // We access memory as memory.Buffer[ceil-targetSizeInBytes: ceil]. 3303 amd64ReservedRegisterForMemory, -targetSizeInBytes, reg, 1, 3304 reg) 3305 c.pushRuntimeValueLocationOnRegister(reg, runtimeValueTypeI64) 3306 return nil 3307 } 3308 3309 // compileMemoryAccessCeilSetup pops the top value from the stack (called "base"), stores "base + offsetArg + targetSizeInBytes" 3310 // into a register, and returns the stored register. We call the result "ceil" because we access the memory 3311 // as memory.Buffer[ceil-targetSizeInBytes: ceil]. 3312 // 3313 // Note: this also emits the instructions to check the out-of-bounds memory access. 3314 // In other words, if the ceil exceeds the memory size, the code exits with nativeCallStatusCodeMemoryOutOfBounds status. 3315 func (c *amd64Compiler) compileMemoryAccessCeilSetup(offsetArg uint32, targetSizeInBytes int64) (asm.Register, error) { 3316 base := c.locationStack.pop() 3317 if err := c.compileEnsureOnRegister(base); err != nil { 3318 return asm.NilRegister, err 3319 } 3320 3321 result := base.register 3322 if offsetConst := int64(offsetArg) + targetSizeInBytes; offsetConst <= math.MaxInt32 { 3323 c.assembler.CompileConstToRegister(amd64.ADDQ, offsetConst, result) 3324 } else if offsetConst <= math.MaxUint32 { 3325 // Note: in practice, this branch rarely happens as in this case, the wasm binary know that 3326 // memory has more than 1 GBi or at least tries to access above 1 GBi memory region. 3327 // 3328 // This case, we cannot directly add the offset to a register by ADDQ(const) instruction. 3329 // That is because the imm32 const is sign-extended to 64-bit in ADDQ(const), and we end up 3330 // making offsetConst as the negative number, which is wrong. 3331 tmp, err := c.allocateRegister(registerTypeGeneralPurpose) 3332 if err != nil { 3333 return asm.NilRegister, err 3334 } 3335 c.assembler.CompileConstToRegister(amd64.MOVL, int64(uint32(offsetConst)), tmp) 3336 c.assembler.CompileRegisterToRegister(amd64.ADDQ, tmp, result) 3337 } else { 3338 // If the offset const is too large, we exit with nativeCallStatusCodeMemoryOutOfBounds. 3339 c.compileExitFromNativeCode(nativeCallStatusCodeMemoryOutOfBounds) 3340 return result, nil 3341 } 3342 3343 // Now we compare the value with the memory length which is held by callEngine. 3344 c.assembler.CompileMemoryToRegister(amd64.CMPQ, 3345 amd64ReservedRegisterForCallEngine, callEngineModuleContextMemorySliceLenOffset, result) 3346 3347 // Jump if the value is within the memory length. 3348 okJmp := c.assembler.CompileJump(amd64.JCC) 3349 3350 // Otherwise, we exit the function with out-of-bounds status code. 3351 c.compileExitFromNativeCode(nativeCallStatusCodeMemoryOutOfBounds) 3352 3353 c.assembler.SetJumpTargetOnNext(okJmp) 3354 3355 c.locationStack.markRegisterUnused(result) 3356 return result, nil 3357 } 3358 3359 // compileStore implements compiler.compileStore for the amd64 architecture. 3360 func (c *amd64Compiler) compileStore(o *wazeroir.OperationStore) error { 3361 var movInst asm.Instruction 3362 var targetSizeInByte int64 3363 switch o.Type { 3364 case wazeroir.UnsignedTypeI32, wazeroir.UnsignedTypeF32: 3365 movInst = amd64.MOVL 3366 targetSizeInByte = 32 / 8 3367 case wazeroir.UnsignedTypeI64, wazeroir.UnsignedTypeF64: 3368 movInst = amd64.MOVQ 3369 targetSizeInByte = 64 / 8 3370 } 3371 return c.compileStoreImpl(o.Arg.Offset, movInst, targetSizeInByte) 3372 } 3373 3374 // compileStore8 implements compiler.compileStore8 for the amd64 architecture. 3375 func (c *amd64Compiler) compileStore8(o *wazeroir.OperationStore8) error { 3376 return c.compileStoreImpl(o.Arg.Offset, amd64.MOVB, 1) 3377 } 3378 3379 // compileStore32 implements compiler.compileStore32 for the amd64 architecture. 3380 func (c *amd64Compiler) compileStore16(o *wazeroir.OperationStore16) error { 3381 return c.compileStoreImpl(o.Arg.Offset, amd64.MOVW, 16/8) 3382 } 3383 3384 // compileStore32 implements compiler.compileStore32 for the amd64 architecture. 3385 func (c *amd64Compiler) compileStore32(o *wazeroir.OperationStore32) error { 3386 return c.compileStoreImpl(o.Arg.Offset, amd64.MOVL, 32/8) 3387 } 3388 3389 func (c *amd64Compiler) compileStoreImpl(offsetConst uint32, inst asm.Instruction, targetSizeInBytes int64) error { 3390 val := c.locationStack.pop() 3391 if err := c.compileEnsureOnRegister(val); err != nil { 3392 return err 3393 } 3394 3395 reg, err := c.compileMemoryAccessCeilSetup(offsetConst, targetSizeInBytes) 3396 if err != nil { 3397 return nil 3398 } 3399 3400 c.assembler.CompileRegisterToMemoryWithIndex( 3401 inst, val.register, 3402 amd64ReservedRegisterForMemory, -targetSizeInBytes, reg, 1, 3403 ) 3404 3405 // We no longer need both the value and base registers. 3406 c.locationStack.releaseRegister(val) 3407 c.locationStack.markRegisterUnused(reg) 3408 return nil 3409 } 3410 3411 // compileMemoryGrow implements compiler.compileMemoryGrow for the amd64 architecture. 3412 func (c *amd64Compiler) compileMemoryGrow() error { 3413 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 3414 return err 3415 } 3416 3417 if err := c.compileCallBuiltinFunction(builtinFunctionIndexMemoryGrow); err != nil { 3418 return err 3419 } 3420 3421 // After the function call, we have to initialize the stack base pointer and memory reserved registers. 3422 c.compileReservedStackBasePointerInitialization() 3423 c.compileReservedMemoryPointerInitialization() 3424 return nil 3425 } 3426 3427 // compileMemorySize implements compiler.compileMemorySize for the amd64 architecture. 3428 func (c *amd64Compiler) compileMemorySize() error { 3429 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 3430 return err 3431 } 3432 3433 reg, err := c.allocateRegister(registerTypeGeneralPurpose) 3434 if err != nil { 3435 return err 3436 } 3437 loc := c.pushRuntimeValueLocationOnRegister(reg, runtimeValueTypeI32) 3438 3439 c.assembler.CompileMemoryToRegister(amd64.MOVQ, amd64ReservedRegisterForCallEngine, callEngineModuleContextMemorySliceLenOffset, loc.register) 3440 3441 // WebAssembly's memory.size returns the page size (65536) of memory region. 3442 // That is equivalent to divide the len of memory slice by 65536 and 3443 // that can be calculated as SHR by 16 bits as 65536 = 2^16. 3444 c.assembler.CompileConstToRegister(amd64.SHRQ, wasm.MemoryPageSizeInBits, loc.register) 3445 return nil 3446 } 3447 3448 // compileMemoryInit implements compiler.compileMemoryInit for the amd64 architecture. 3449 func (c *amd64Compiler) compileMemoryInit(o *wazeroir.OperationMemoryInit) error { 3450 return c.compileInitImpl(false, o.DataIndex, 0) 3451 } 3452 3453 // compileInitImpl implements compileTableInit and compileMemoryInit. 3454 // 3455 // TODO: the compiled code in this function should be reused and compile at once as 3456 // the code is independent of any module. 3457 func (c *amd64Compiler) compileInitImpl(isTable bool, index, tableIndex uint32) error { 3458 outOfBoundsErrorStatus := nativeCallStatusCodeMemoryOutOfBounds 3459 if isTable { 3460 outOfBoundsErrorStatus = nativeCallStatusCodeInvalidTableAccess 3461 } 3462 3463 copySize := c.locationStack.pop() 3464 if err := c.compileEnsureOnRegister(copySize); err != nil { 3465 return err 3466 } 3467 3468 sourceOffset := c.locationStack.pop() 3469 if err := c.compileEnsureOnRegister(sourceOffset); err != nil { 3470 return err 3471 } 3472 3473 destinationOffset := c.locationStack.pop() 3474 if err := c.compileEnsureOnRegister(destinationOffset); err != nil { 3475 return err 3476 } 3477 3478 instanceAddr, err := c.allocateRegister(registerTypeGeneralPurpose) 3479 if err != nil { 3480 return err 3481 } 3482 c.locationStack.markRegisterUsed(instanceAddr) 3483 if isTable { 3484 c.compileLoadElemInstanceAddress(index, instanceAddr) 3485 } else { 3486 c.compileLoadDataInstanceAddress(index, instanceAddr) 3487 } 3488 3489 tmp, err := c.allocateRegister(registerTypeGeneralPurpose) 3490 if err != nil { 3491 return err 3492 } 3493 c.locationStack.markRegisterUsed(tmp) 3494 3495 // sourceOffset += size. 3496 c.assembler.CompileRegisterToRegister(amd64.ADDQ, copySize.register, sourceOffset.register) 3497 // destinationOffset += size. 3498 c.assembler.CompileRegisterToRegister(amd64.ADDQ, copySize.register, destinationOffset.register) 3499 3500 // Check instance bounds and if exceeds the length, exit with out of bounds error. 3501 c.assembler.CompileMemoryToRegister(amd64.CMPQ, 3502 instanceAddr, 8, // DataInstance and Element instance holds the length is stored at offset 8. 3503 sourceOffset.register) 3504 sourceBoundOKJump := c.assembler.CompileJump(amd64.JCC) 3505 c.compileExitFromNativeCode(outOfBoundsErrorStatus) 3506 c.assembler.SetJumpTargetOnNext(sourceBoundOKJump) 3507 3508 // Check destination bounds and if exceeds the length, exit with out of bounds error. 3509 if isTable { 3510 // Load the target table's address. 3511 c.assembler.CompileMemoryToRegister(amd64.MOVQ, amd64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset, tmp) 3512 c.assembler.CompileMemoryToRegister(amd64.MOVQ, tmp, int64(tableIndex*8), tmp) 3513 // Compare length. 3514 c.assembler.CompileMemoryToRegister(amd64.CMPQ, tmp, tableInstanceTableLenOffset, destinationOffset.register) 3515 } else { 3516 c.assembler.CompileMemoryToRegister(amd64.CMPQ, 3517 amd64ReservedRegisterForCallEngine, callEngineModuleContextMemorySliceLenOffset, 3518 destinationOffset.register) 3519 } 3520 3521 destinationBoundOKJump := c.assembler.CompileJump(amd64.JCC) 3522 c.compileExitFromNativeCode(outOfBoundsErrorStatus) 3523 c.assembler.SetJumpTargetOnNext(destinationBoundOKJump) 3524 3525 // Otherwise, ready to copy the value from source to destination. 3526 // 3527 // If the copy size equal zero, we skip the entire instructions below. 3528 c.assembler.CompileRegisterToConst(amd64.CMPQ, copySize.register, 0) 3529 skipJump := c.assembler.CompileJump(amd64.JEQ) 3530 3531 var scale int16 3532 var memToReg, regToMem asm.Instruction 3533 if isTable { 3534 // Each element is of type uintptr; 2^3 = 1 << pointerSizeLog2. 3535 c.assembler.CompileConstToRegister(amd64.SHLQ, pointerSizeLog2, sourceOffset.register) 3536 c.assembler.CompileConstToRegister(amd64.SHLQ, pointerSizeLog2, destinationOffset.register) 3537 // destinationOffset += table buffer's absolute address. 3538 c.assembler.CompileMemoryToRegister(amd64.ADDQ, 3539 tmp, tableInstanceTableOffset, destinationOffset.register) 3540 // sourceOffset += data buffer's absolute address. 3541 c.assembler.CompileMemoryToRegister(amd64.ADDQ, 3542 instanceAddr, 0, sourceOffset.register) 3543 3544 // For tables, we move 8 bytes at once. 3545 memToReg = amd64.MOVQ 3546 regToMem = memToReg 3547 scale = 8 3548 } else { 3549 // destinationOffset += memory buffer's absolute address. 3550 c.assembler.CompileRegisterToRegister(amd64.ADDQ, amd64ReservedRegisterForMemory, destinationOffset.register) 3551 3552 // sourceOffset += data buffer's absolute address. 3553 c.assembler.CompileMemoryToRegister(amd64.ADDQ, instanceAddr, 0, sourceOffset.register) 3554 3555 // Move one byte at once. 3556 memToReg = amd64.MOVBQZX 3557 regToMem = amd64.MOVB 3558 scale = 1 3559 } 3560 3561 // Negate the counter. 3562 c.assembler.CompileNoneToRegister(amd64.NEGQ, copySize.register) 3563 3564 beginCopyLoop := c.assembler.CompileStandAlone(amd64.NOP) 3565 3566 c.assembler.CompileMemoryWithIndexToRegister(memToReg, 3567 sourceOffset.register, 0, copySize.register, scale, 3568 tmp) 3569 // [destinationOffset + (size.register)] = tmp. 3570 c.assembler.CompileRegisterToMemoryWithIndex(regToMem, 3571 tmp, 3572 destinationOffset.register, 0, copySize.register, scale, 3573 ) 3574 3575 // size += 1 3576 c.assembler.CompileNoneToRegister(amd64.INCQ, copySize.register) 3577 c.assembler.CompileJump(amd64.JMI).AssignJumpTarget(beginCopyLoop) 3578 3579 c.locationStack.markRegisterUnused(copySize.register, sourceOffset.register, 3580 destinationOffset.register, instanceAddr, tmp) 3581 c.assembler.SetJumpTargetOnNext(skipJump) 3582 return nil 3583 } 3584 3585 // compileDataDrop implements compiler.compileDataDrop for the amd64 architecture. 3586 func (c *amd64Compiler) compileDataDrop(o *wazeroir.OperationDataDrop) error { 3587 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 3588 return err 3589 } 3590 3591 tmp, err := c.allocateRegister(registerTypeGeneralPurpose) 3592 if err != nil { 3593 return err 3594 } 3595 3596 c.compileLoadDataInstanceAddress(o.DataIndex, tmp) 3597 3598 // Clears the content of DataInstance[o.DataIndex] (== []byte type). 3599 c.assembler.CompileConstToMemory(amd64.MOVQ, 0, tmp, 0) 3600 c.assembler.CompileConstToMemory(amd64.MOVQ, 0, tmp, 8) 3601 c.assembler.CompileConstToMemory(amd64.MOVQ, 0, tmp, 16) 3602 return nil 3603 } 3604 3605 func (c *amd64Compiler) compileLoadDataInstanceAddress(dataIndex uint32, dst asm.Register) { 3606 // dst = dataIndex * dataInstanceStructSize. 3607 c.assembler.CompileConstToRegister(amd64.MOVQ, int64(dataIndex)*dataInstanceStructSize, dst) 3608 3609 // dst = &moduleInstance.DataInstances[0] + dst 3610 // = &moduleInstance.DataInstances[0] + dataIndex*dataInstanceStructSize 3611 // = &moduleInstance.DataInstances[dataIndex] 3612 c.assembler.CompileMemoryToRegister(amd64.ADDQ, 3613 amd64ReservedRegisterForCallEngine, callEngineModuleContextDataInstancesElement0AddressOffset, 3614 dst, 3615 ) 3616 } 3617 3618 // compileCopyLoopImpl implements a REP MOVSQ memory copy for the given range with support for both directions. 3619 func (c *amd64Compiler) compileCopyLoopImpl(destinationOffset, sourceOffset, copySize *runtimeValueLocation, backwards bool, bwOffset uint8) { 3620 // skip if nothing to copy 3621 c.assembler.CompileRegisterToConst(amd64.CMPQ, copySize.register, 0) 3622 emptyEightGroupsJump := c.assembler.CompileJump(amd64.JEQ) 3623 3624 // Prepare registers for swaps. There will never be more than 3 XCHGs in total. 3625 restoreCrossing := c.compilePreventCrossedTargetRegisters( 3626 []*runtimeValueLocation{destinationOffset, sourceOffset, copySize}, 3627 []asm.Register{amd64.RegDI, amd64.RegSI, amd64.RegCX}) 3628 3629 // Prepare registers for REP MOVSQ: copy from rsi to rdi, rcx times. 3630 c.compileMaybeSwapRegisters(destinationOffset.register, amd64.RegDI) 3631 c.compileMaybeSwapRegisters(sourceOffset.register, amd64.RegSI) 3632 c.compileMaybeSwapRegisters(copySize.register, amd64.RegCX) 3633 3634 // Point on first byte of first quadword to copy. 3635 if backwards { 3636 c.assembler.CompileConstToRegister(amd64.ADDQ, -int64(bwOffset), amd64.RegDI) 3637 c.assembler.CompileConstToRegister(amd64.ADDQ, -int64(bwOffset), amd64.RegSI) 3638 // Set REP prefix direction backwards. 3639 c.assembler.CompileStandAlone(amd64.STD) 3640 } 3641 3642 c.assembler.CompileStandAlone(amd64.REPMOVSQ) 3643 3644 if backwards { 3645 // Reset direction. 3646 c.assembler.CompileStandAlone(amd64.CLD) 3647 } 3648 3649 // Restore registers. 3650 c.compileMaybeSwapRegisters(destinationOffset.register, amd64.RegDI) 3651 c.compileMaybeSwapRegisters(sourceOffset.register, amd64.RegSI) 3652 c.compileMaybeSwapRegisters(copySize.register, amd64.RegCX) 3653 restoreCrossing() 3654 3655 c.assembler.SetJumpTargetOnNext(emptyEightGroupsJump) 3656 c.assembler.CompileStandAlone(amd64.NOP) 3657 } 3658 3659 // compileMemoryCopyLoopImpl is used for directly copying after bounds/direction check. 3660 func (c *amd64Compiler) compileMemoryCopyLoopImpl(destinationOffset, sourceOffset, copySize *runtimeValueLocation, tmp asm.Register, backwards bool) { 3661 // Point on first byte to be copied depending on direction. 3662 if backwards { 3663 c.assembler.CompileNoneToRegister(amd64.DECQ, sourceOffset.register) 3664 c.assembler.CompileNoneToRegister(amd64.DECQ, destinationOffset.register) 3665 } else { 3666 c.assembler.CompileRegisterToRegister(amd64.SUBQ, copySize.register, sourceOffset.register) 3667 c.assembler.CompileRegisterToRegister(amd64.SUBQ, copySize.register, destinationOffset.register) 3668 } 3669 3670 // destinationOffset += memory buffer's absolute address. 3671 c.assembler.CompileRegisterToRegister(amd64.ADDQ, amd64ReservedRegisterForMemory, destinationOffset.register) 3672 // sourceOffset += memory buffer's absolute address. 3673 c.assembler.CompileRegisterToRegister(amd64.ADDQ, amd64ReservedRegisterForMemory, sourceOffset.register) 3674 3675 // Copy copySize % 8 bytes in loop to allow copying in 8 byte groups afterward. 3676 beginLoop := c.assembler.CompileStandAlone(amd64.NOP) 3677 3678 // Check copySize % 8 == 0. 3679 c.assembler.CompileConstToRegister(amd64.TESTQ, 7, copySize.register) 3680 breakLoop := c.assembler.CompileJump(amd64.JEQ) 3681 3682 c.assembler.CompileMemoryToRegister(amd64.MOVBQZX, sourceOffset.register, 0, tmp) 3683 c.assembler.CompileRegisterToMemory(amd64.MOVB, tmp, destinationOffset.register, 0) 3684 3685 if backwards { 3686 c.assembler.CompileNoneToRegister(amd64.DECQ, sourceOffset.register) 3687 c.assembler.CompileNoneToRegister(amd64.DECQ, destinationOffset.register) 3688 } else { 3689 c.assembler.CompileNoneToRegister(amd64.INCQ, sourceOffset.register) 3690 c.assembler.CompileNoneToRegister(amd64.INCQ, destinationOffset.register) 3691 } 3692 3693 c.assembler.CompileNoneToRegister(amd64.DECQ, copySize.register) 3694 c.assembler.CompileJump(amd64.JMP).AssignJumpTarget(beginLoop) 3695 c.assembler.SetJumpTargetOnNext(breakLoop) 3696 3697 // compileCopyLoopImpl counts in groups of 8 bytes, so we have to divide the copySize by 8. 3698 c.assembler.CompileConstToRegister(amd64.SHRQ, 3, copySize.register) 3699 3700 c.compileCopyLoopImpl(destinationOffset, sourceOffset, copySize, backwards, 7) 3701 } 3702 3703 // compileMemoryCopy implements compiler.compileMemoryCopy for the amd64 architecture. 3704 // 3705 // This uses efficient `REP MOVSQ` instructions to copy in quadword (8 bytes) batches. The remaining bytes 3706 // are copied with a simple `MOV` loop. It uses backward copying for overlapped segments. 3707 func (c *amd64Compiler) compileMemoryCopy() error { 3708 copySize := c.locationStack.pop() 3709 if err := c.compileEnsureOnRegister(copySize); err != nil { 3710 return err 3711 } 3712 3713 sourceOffset := c.locationStack.pop() 3714 if err := c.compileEnsureOnRegister(sourceOffset); err != nil { 3715 return err 3716 } 3717 3718 destinationOffset := c.locationStack.pop() 3719 if err := c.compileEnsureOnRegister(destinationOffset); err != nil { 3720 return err 3721 } 3722 3723 tmp, err := c.allocateRegister(registerTypeGeneralPurpose) 3724 if err != nil { 3725 return err 3726 } 3727 c.locationStack.markRegisterUsed(tmp) 3728 3729 // sourceOffset += size. 3730 c.assembler.CompileRegisterToRegister(amd64.ADDQ, copySize.register, sourceOffset.register) 3731 // destinationOffset += size. 3732 c.assembler.CompileRegisterToRegister(amd64.ADDQ, copySize.register, destinationOffset.register) 3733 3734 // Check source bounds and if exceeds the length, exit with out of bounds error. 3735 c.assembler.CompileMemoryToRegister(amd64.CMPQ, 3736 amd64ReservedRegisterForCallEngine, callEngineModuleContextMemorySliceLenOffset, sourceOffset.register) 3737 sourceBoundOKJump := c.assembler.CompileJump(amd64.JCC) 3738 c.compileExitFromNativeCode(nativeCallStatusCodeMemoryOutOfBounds) 3739 c.assembler.SetJumpTargetOnNext(sourceBoundOKJump) 3740 3741 // Check destination bounds and if exceeds the length, exit with out of bounds error. 3742 c.assembler.CompileMemoryToRegister(amd64.CMPQ, 3743 amd64ReservedRegisterForCallEngine, callEngineModuleContextMemorySliceLenOffset, destinationOffset.register) 3744 destinationBoundOKJump := c.assembler.CompileJump(amd64.JCC) 3745 c.compileExitFromNativeCode(nativeCallStatusCodeMemoryOutOfBounds) 3746 c.assembler.SetJumpTargetOnNext(destinationBoundOKJump) 3747 3748 // Skip zero size. 3749 c.assembler.CompileRegisterToConst(amd64.CMPQ, copySize.register, 0) 3750 skipJump := c.assembler.CompileJump(amd64.JEQ) 3751 3752 // If dest < source, we can copy forwards 3753 c.assembler.CompileRegisterToRegister(amd64.CMPQ, destinationOffset.register, sourceOffset.register) 3754 destLowerThanSourceJump := c.assembler.CompileJump(amd64.JLS) 3755 3756 // If source + size < dest, we can copy forwards 3757 c.assembler.CompileRegisterToRegister(amd64.MOVQ, destinationOffset.register, tmp) 3758 c.assembler.CompileRegisterToRegister(amd64.SUBQ, copySize.register, tmp) 3759 c.assembler.CompileRegisterToRegister(amd64.CMPQ, sourceOffset.register, tmp) 3760 sourceBoundLowerThanDestJump := c.assembler.CompileJump(amd64.JLS) 3761 3762 // Copy backwards. 3763 c.compileMemoryCopyLoopImpl(destinationOffset, sourceOffset, copySize, tmp, true) 3764 endJump := c.assembler.CompileJump(amd64.JMP) 3765 3766 // Copy forwards. 3767 c.assembler.SetJumpTargetOnNext(destLowerThanSourceJump, sourceBoundLowerThanDestJump) 3768 c.compileMemoryCopyLoopImpl(destinationOffset, sourceOffset, copySize, tmp, false) 3769 3770 c.locationStack.markRegisterUnused(copySize.register, sourceOffset.register, 3771 destinationOffset.register, tmp) 3772 c.assembler.SetJumpTargetOnNext(skipJump, endJump) 3773 3774 return nil 3775 } 3776 3777 // compileFillLoopImpl implements a REP STOSQ fill loop. 3778 func (c *amd64Compiler) compileFillLoopImpl(destinationOffset, value, fillSize *runtimeValueLocation, tmp asm.Register, replicateByte bool) { 3779 // Skip if nothing to fill. 3780 c.assembler.CompileRegisterToConst(amd64.CMPQ, fillSize.register, 0) 3781 emptyEightGroupsJump := c.assembler.CompileJump(amd64.JEQ) 3782 3783 if replicateByte { 3784 // Replicate single byte onto full 8-byte register. 3785 c.assembler.CompileConstToRegister(amd64.MOVQ, 0x0101010101010101, tmp) 3786 c.assembler.CompileRegisterToRegister(amd64.IMULQ, tmp, value.register) 3787 } 3788 3789 // Prepare registers for swaps. There will never be more than 3 XCHGs in total. 3790 restoreCrossing := c.compilePreventCrossedTargetRegisters( 3791 []*runtimeValueLocation{destinationOffset, value, fillSize}, 3792 []asm.Register{amd64.RegDI, amd64.RegAX, amd64.RegCX}) 3793 3794 // Prepare registers for REP STOSQ: fill at [rdi] with rax, rcx times. 3795 c.compileMaybeSwapRegisters(destinationOffset.register, amd64.RegDI) 3796 c.compileMaybeSwapRegisters(value.register, amd64.RegAX) 3797 c.compileMaybeSwapRegisters(fillSize.register, amd64.RegCX) 3798 3799 c.assembler.CompileStandAlone(amd64.REPSTOSQ) 3800 3801 // Restore registers. 3802 c.compileMaybeSwapRegisters(destinationOffset.register, amd64.RegDI) 3803 c.compileMaybeSwapRegisters(value.register, amd64.RegAX) 3804 c.compileMaybeSwapRegisters(fillSize.register, amd64.RegCX) 3805 restoreCrossing() 3806 3807 c.assembler.SetJumpTargetOnNext(emptyEightGroupsJump) 3808 } 3809 3810 // compileMemoryFill implements compiler.compileMemoryFill for the amd64 architecture. 3811 // 3812 // This function uses efficient `REP STOSQ` instructions to copy in quadword (8 bytes) batches 3813 // if the size if above 15 bytes. For smaller sizes, a simple MOVB copy loop is the best 3814 // option. 3815 // 3816 // TODO: the compiled code in this function should be reused and compile at once as 3817 // the code is independent of any module. 3818 func (c *amd64Compiler) compileFillImpl(isTable bool, tableIndex uint32) error { 3819 copySize := c.locationStack.pop() 3820 if err := c.compileEnsureOnRegister(copySize); err != nil { 3821 return err 3822 } 3823 3824 value := c.locationStack.pop() 3825 if err := c.compileEnsureOnRegister(value); err != nil { 3826 return err 3827 } 3828 3829 destinationOffset := c.locationStack.pop() 3830 if err := c.compileEnsureOnRegister(destinationOffset); err != nil { 3831 return err 3832 } 3833 3834 tmp, err := c.allocateRegister(registerTypeGeneralPurpose) 3835 if err != nil { 3836 return err 3837 } 3838 c.locationStack.markRegisterUsed(tmp) 3839 3840 // destinationOffset += size. 3841 c.assembler.CompileRegisterToRegister(amd64.ADDQ, copySize.register, destinationOffset.register) 3842 3843 // Check destination bounds and if exceeds the length, exit with out of bounds error. 3844 if isTable { 3845 // tmp = &tables[0] 3846 c.assembler.CompileMemoryToRegister(amd64.MOVQ, 3847 amd64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset, 3848 tmp) 3849 3850 // tmp = [tmp + TableIndex*8] 3851 // = [&tables[0] + TableIndex*sizeOf(*tableInstance)] 3852 // = [&tables[TableIndex]] = tables[TableIndex]. 3853 c.assembler.CompileMemoryToRegister(amd64.MOVQ, tmp, int64(tableIndex)*8, tmp) 3854 3855 c.assembler.CompileMemoryToRegister(amd64.CMPQ, 3856 tmp, tableInstanceTableLenOffset, 3857 destinationOffset.register) 3858 } else { 3859 c.assembler.CompileMemoryToRegister(amd64.CMPQ, 3860 amd64ReservedRegisterForCallEngine, callEngineModuleContextMemorySliceLenOffset, 3861 destinationOffset.register) 3862 } 3863 destinationBoundOKJump := c.assembler.CompileJump(amd64.JCC) 3864 if isTable { 3865 c.compileExitFromNativeCode(nativeCallStatusCodeInvalidTableAccess) 3866 } else { 3867 c.compileExitFromNativeCode(nativeCallStatusCodeMemoryOutOfBounds) 3868 } 3869 c.assembler.SetJumpTargetOnNext(destinationBoundOKJump) 3870 3871 // Otherwise, ready to copy the value from source to destination. 3872 // 3873 // If the copy size equal zero, we skip the entire instructions below. 3874 c.assembler.CompileRegisterToConst(amd64.CMPQ, copySize.register, 0) 3875 skipJump := c.assembler.CompileJump(amd64.JEQ) 3876 3877 // destinationOffset -= size. 3878 c.assembler.CompileRegisterToRegister(amd64.SUBQ, copySize.register, destinationOffset.register) 3879 3880 if isTable { 3881 // Each element is of type uintptr; 2^3 = 1 << pointerSizeLog2. 3882 c.assembler.CompileConstToRegister(amd64.SHLQ, pointerSizeLog2, destinationOffset.register) 3883 // destinationOffset += table buffer's absolute address. 3884 c.assembler.CompileMemoryToRegister(amd64.ADDQ, tmp, tableInstanceTableOffset, destinationOffset.register) 3885 3886 } else { 3887 // destinationOffset += memory buffer's absolute address. 3888 c.assembler.CompileRegisterToRegister(amd64.ADDQ, amd64ReservedRegisterForMemory, destinationOffset.register) 3889 3890 // Copy first %15 bytes with simple MOVB instruction. 3891 beginCopyLoop := c.assembler.CompileStandAlone(amd64.NOP) 3892 c.assembler.CompileConstToRegister(amd64.TESTQ, 15, copySize.register) 3893 breakLoop := c.assembler.CompileJump(amd64.JEQ) 3894 3895 c.assembler.CompileRegisterToMemory(amd64.MOVB, value.register, destinationOffset.register, 0) 3896 3897 c.assembler.CompileNoneToRegister(amd64.INCQ, destinationOffset.register) 3898 c.assembler.CompileNoneToRegister(amd64.DECQ, copySize.register) 3899 c.assembler.CompileJump(amd64.JMP).AssignJumpTarget(beginCopyLoop) 3900 3901 c.assembler.SetJumpTargetOnNext(breakLoop) 3902 // compileFillLoopImpl counts in groups of 8 bytes, so we have to divide the copySize by 8. 3903 c.assembler.CompileConstToRegister(amd64.SHRQ, 3, copySize.register) 3904 } 3905 3906 c.compileFillLoopImpl(destinationOffset, value, copySize, tmp, !isTable) 3907 3908 c.locationStack.markRegisterUnused(copySize.register, value.register, 3909 destinationOffset.register, tmp) 3910 c.assembler.SetJumpTargetOnNext(skipJump) 3911 return nil 3912 } 3913 3914 // compileMemoryFill implements compiler.compileMemoryFill for the amd64 architecture. 3915 // 3916 // TODO: the compiled code in this function should be reused and compile at once as 3917 // the code is independent of any module. 3918 func (c *amd64Compiler) compileMemoryFill() error { 3919 return c.compileFillImpl(false, 0) 3920 } 3921 3922 // compileTableInit implements compiler.compileTableInit for the amd64 architecture. 3923 func (c *amd64Compiler) compileTableInit(o *wazeroir.OperationTableInit) error { 3924 return c.compileInitImpl(true, o.ElemIndex, o.TableIndex) 3925 } 3926 3927 // compileTableCopyLoopImpl is used for directly copying after bounds/direction check. 3928 func (c *amd64Compiler) compileTableCopyLoopImpl(o *wazeroir.OperationTableCopy, destinationOffset, sourceOffset, copySize *runtimeValueLocation, tmp asm.Register, backwards bool) { 3929 // Point on first byte to be copied. 3930 if !backwards { 3931 c.assembler.CompileRegisterToRegister(amd64.SUBQ, copySize.register, sourceOffset.register) 3932 c.assembler.CompileRegisterToRegister(amd64.SUBQ, copySize.register, destinationOffset.register) 3933 } 3934 3935 // Each element is of type uintptr; 2^3 = 1 << pointerSizeLog2. 3936 c.assembler.CompileConstToRegister(amd64.SHLQ, pointerSizeLog2, sourceOffset.register) 3937 c.assembler.CompileConstToRegister(amd64.SHLQ, pointerSizeLog2, destinationOffset.register) 3938 // destinationOffset += table buffer's absolute address. 3939 c.assembler.CompileMemoryToRegister(amd64.MOVQ, amd64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset, tmp) 3940 c.assembler.CompileMemoryToRegister(amd64.MOVQ, tmp, int64(o.DstTableIndex*8), tmp) 3941 c.assembler.CompileMemoryToRegister(amd64.ADDQ, tmp, tableInstanceTableOffset, destinationOffset.register) 3942 // sourceOffset += table buffer's absolute address. 3943 c.assembler.CompileMemoryToRegister(amd64.MOVQ, amd64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset, tmp) 3944 c.assembler.CompileMemoryToRegister(amd64.MOVQ, tmp, int64(o.SrcTableIndex*8), tmp) 3945 c.assembler.CompileMemoryToRegister(amd64.ADDQ, tmp, tableInstanceTableOffset, sourceOffset.register) 3946 3947 c.compileCopyLoopImpl(destinationOffset, sourceOffset, copySize, backwards, 8) 3948 } 3949 3950 // compileTableCopy implements compiler.compileTableCopy for the amd64 architecture. 3951 // 3952 // It uses efficient `REP MOVSB` instructions for optimized copying. It uses backward copying for 3953 // overlapped segments. 3954 func (c *amd64Compiler) compileTableCopy(o *wazeroir.OperationTableCopy) error { 3955 copySize := c.locationStack.pop() 3956 if err := c.compileEnsureOnRegister(copySize); err != nil { 3957 return err 3958 } 3959 3960 sourceOffset := c.locationStack.pop() 3961 if err := c.compileEnsureOnRegister(sourceOffset); err != nil { 3962 return err 3963 } 3964 3965 destinationOffset := c.locationStack.pop() 3966 if err := c.compileEnsureOnRegister(destinationOffset); err != nil { 3967 return err 3968 } 3969 3970 tmp, err := c.allocateRegister(registerTypeGeneralPurpose) 3971 if err != nil { 3972 return err 3973 } 3974 3975 // sourceOffset += size. 3976 c.assembler.CompileRegisterToRegister(amd64.ADDQ, copySize.register, sourceOffset.register) 3977 // destinationOffset += size. 3978 c.assembler.CompileRegisterToRegister(amd64.ADDQ, copySize.register, destinationOffset.register) 3979 3980 // Check source bounds and if exceeds the length, exit with out of bounds error. 3981 c.assembler.CompileMemoryToRegister(amd64.MOVQ, amd64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset, tmp) 3982 c.assembler.CompileMemoryToRegister(amd64.MOVQ, tmp, int64(o.SrcTableIndex*8), tmp) 3983 c.assembler.CompileMemoryToRegister(amd64.CMPQ, tmp, tableInstanceTableLenOffset, sourceOffset.register) 3984 sourceBoundOKJump := c.assembler.CompileJump(amd64.JCC) 3985 c.compileExitFromNativeCode(nativeCallStatusCodeInvalidTableAccess) 3986 c.assembler.SetJumpTargetOnNext(sourceBoundOKJump) 3987 3988 // Check destination bounds and if exceeds the length, exit with out of bounds error. 3989 c.assembler.CompileMemoryToRegister(amd64.MOVQ, amd64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset, tmp) 3990 c.assembler.CompileMemoryToRegister(amd64.MOVQ, tmp, int64(o.DstTableIndex*8), tmp) 3991 c.assembler.CompileMemoryToRegister(amd64.CMPQ, tmp, tableInstanceTableLenOffset, destinationOffset.register) 3992 destinationBoundOKJump := c.assembler.CompileJump(amd64.JCC) 3993 c.compileExitFromNativeCode(nativeCallStatusCodeInvalidTableAccess) 3994 c.assembler.SetJumpTargetOnNext(destinationBoundOKJump) 3995 3996 // Skip zero size. 3997 c.assembler.CompileRegisterToConst(amd64.CMPQ, copySize.register, 0) 3998 skipJump := c.assembler.CompileJump(amd64.JEQ) 3999 4000 // If dest < source, we can copy forwards. 4001 c.assembler.CompileRegisterToRegister(amd64.CMPQ, destinationOffset.register, sourceOffset.register) 4002 destLowerThanSourceJump := c.assembler.CompileJump(amd64.JLS) 4003 4004 // If source + size < dest, we can copy forwards. 4005 c.assembler.CompileRegisterToRegister(amd64.MOVQ, destinationOffset.register, tmp) 4006 c.assembler.CompileRegisterToRegister(amd64.SUBQ, copySize.register, tmp) 4007 c.assembler.CompileRegisterToRegister(amd64.CMPQ, sourceOffset.register, tmp) 4008 sourceBoundLowerThanDestJump := c.assembler.CompileJump(amd64.JLS) 4009 4010 // Copy backwards. 4011 c.compileTableCopyLoopImpl(o, destinationOffset, sourceOffset, copySize, tmp, true) 4012 endJump := c.assembler.CompileJump(amd64.JMP) 4013 4014 // Copy forwards. 4015 c.assembler.SetJumpTargetOnNext(destLowerThanSourceJump, sourceBoundLowerThanDestJump) 4016 c.compileTableCopyLoopImpl(o, destinationOffset, sourceOffset, copySize, tmp, false) 4017 4018 c.locationStack.markRegisterUnused(copySize.register, sourceOffset.register, 4019 destinationOffset.register, tmp) 4020 c.assembler.SetJumpTargetOnNext(skipJump, endJump) 4021 return nil 4022 } 4023 4024 // compileElemDrop implements compiler.compileElemDrop for the amd64 architecture. 4025 func (c *amd64Compiler) compileElemDrop(o *wazeroir.OperationElemDrop) error { 4026 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 4027 return err 4028 } 4029 4030 tmp, err := c.allocateRegister(registerTypeGeneralPurpose) 4031 if err != nil { 4032 return err 4033 } 4034 4035 c.compileLoadElemInstanceAddress(o.ElemIndex, tmp) 4036 4037 // Clears the content of ElementInstances[o.ElemIndex].References (== []uintptr{} type). 4038 c.assembler.CompileConstToMemory(amd64.MOVQ, 0, tmp, 0) 4039 c.assembler.CompileConstToMemory(amd64.MOVQ, 0, tmp, 8) 4040 c.assembler.CompileConstToMemory(amd64.MOVQ, 0, tmp, 16) 4041 return nil 4042 } 4043 4044 func (c *amd64Compiler) compileLoadElemInstanceAddress(elemIndex uint32, dst asm.Register) { 4045 // dst = elemIndex * elementInstanceStructSize 4046 c.assembler.CompileConstToRegister(amd64.MOVQ, int64(elemIndex)*elementInstanceStructSize, dst) 4047 4048 // dst = &moduleInstance.ElementInstances[0] + dst 4049 // = &moduleInstance.ElementInstances[0] + elemIndex*elementInstanceStructSize 4050 // = &moduleInstance.ElementInstances[elemIndex] 4051 c.assembler.CompileMemoryToRegister(amd64.ADDQ, 4052 amd64ReservedRegisterForCallEngine, callEngineModuleContextElementInstancesElement0AddressOffset, 4053 dst, 4054 ) 4055 } 4056 4057 // compileTableGet implements compiler.compileTableGet for the amd64 architecture. 4058 func (c *amd64Compiler) compileTableGet(o *wazeroir.OperationTableGet) error { 4059 ref, err := c.allocateRegister(registerTypeGeneralPurpose) 4060 if err != nil { 4061 return err 4062 } 4063 4064 c.locationStack.markRegisterUsed(ref) 4065 4066 offset := c.locationStack.pop() 4067 if err := c.compileEnsureOnRegister(offset); err != nil { 4068 return err 4069 } 4070 4071 // ref = &tables[0] 4072 c.assembler.CompileMemoryToRegister(amd64.MOVQ, 4073 amd64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset, 4074 ref) 4075 4076 // ref = [ref + TableIndex*8] 4077 // = [&tables[0] + TableIndex*sizeOf(*tableInstance)] 4078 // = [&tables[TableIndex]] = tables[TableIndex]. 4079 c.assembler.CompileMemoryToRegister(amd64.MOVQ, ref, int64(o.TableIndex)*8, ref) 4080 4081 // Out of bounds check. 4082 c.assembler.CompileMemoryToRegister(amd64.CMPQ, ref, tableInstanceTableLenOffset, offset.register) 4083 boundOKJmp := c.assembler.CompileJump(amd64.JHI) 4084 c.compileExitFromNativeCode(nativeCallStatusCodeInvalidTableAccess) 4085 c.assembler.SetJumpTargetOnNext(boundOKJmp) 4086 4087 // ref = [&tables[TableIndex] + tableInstanceTableOffset] = &tables[TableIndex].References[0] 4088 c.assembler.CompileMemoryToRegister(amd64.MOVQ, ref, tableInstanceTableOffset, ref) 4089 4090 // ref = [ref + 0 + offset.register * 8] 4091 // = [&tables[TableIndex].References[0] + sizeOf(uintptr) * offset] 4092 // = [&tables[TableIndex].References[offset]] 4093 // = tables[TableIndex].References[offset] 4094 c.assembler.CompileMemoryWithIndexToRegister(amd64.MOVQ, ref, 4095 0, offset.register, 8, ref, 4096 ) 4097 4098 c.locationStack.markRegisterUnused(offset.register) 4099 c.pushRuntimeValueLocationOnRegister(ref, runtimeValueTypeI64) // table elements are opaque 64-bit at runtime. 4100 return nil 4101 } 4102 4103 // compileTableSet implements compiler.compileTableSet for the amd64 architecture. 4104 func (c *amd64Compiler) compileTableSet(o *wazeroir.OperationTableSet) error { 4105 ref := c.locationStack.pop() 4106 if err := c.compileEnsureOnRegister(ref); err != nil { 4107 return err 4108 } 4109 4110 offset := c.locationStack.pop() 4111 if err := c.compileEnsureOnRegister(offset); err != nil { 4112 return err 4113 } 4114 4115 tmp, err := c.allocateRegister(registerTypeGeneralPurpose) 4116 if err != nil { 4117 return err 4118 } 4119 4120 // tmp = &tables[0] 4121 c.assembler.CompileMemoryToRegister(amd64.MOVQ, 4122 amd64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset, 4123 tmp) 4124 4125 // ref = [ref + TableIndex*8] 4126 // = [&tables[0] + TableIndex*sizeOf(*tableInstance)] 4127 // = [&tables[TableIndex]] = tables[TableIndex]. 4128 c.assembler.CompileMemoryToRegister(amd64.MOVQ, tmp, int64(o.TableIndex)*8, tmp) 4129 4130 // Out of bounds check. 4131 c.assembler.CompileMemoryToRegister(amd64.CMPQ, tmp, tableInstanceTableLenOffset, offset.register) 4132 boundOKJmp := c.assembler.CompileJump(amd64.JHI) 4133 c.compileExitFromNativeCode(nativeCallStatusCodeInvalidTableAccess) 4134 c.assembler.SetJumpTargetOnNext(boundOKJmp) 4135 4136 // tmp = [&tables[TableIndex] + tableInstanceTableOffset] = &tables[TableIndex].References[0] 4137 c.assembler.CompileMemoryToRegister(amd64.MOVQ, tmp, tableInstanceTableOffset, tmp) 4138 4139 // [tmp + 0 + offset.register * 8] = ref 4140 // [&tables[TableIndex].References[0] + sizeOf(uintptr) * offset] = ref 4141 // [&tables[TableIndex].References[offset]] = ref 4142 // tables[TableIndex].References[offset] = ref 4143 c.assembler.CompileRegisterToMemoryWithIndex(amd64.MOVQ, 4144 ref.register, 4145 tmp, 0, offset.register, 8) 4146 4147 c.locationStack.markRegisterUnused(offset.register, ref.register) 4148 return nil 4149 } 4150 4151 // compileTableGrow implements compiler.compileTableGrow for the amd64 architecture. 4152 func (c *amd64Compiler) compileTableGrow(o *wazeroir.OperationTableGrow) error { 4153 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 4154 return err 4155 } 4156 4157 // Pushes the table index. 4158 if err := c.compileConstI32(&wazeroir.OperationConstI32{Value: o.TableIndex}); err != nil { 4159 return err 4160 } 4161 4162 // Table grow cannot be done in assembly just like memory grow as it involves with allocation in Go. 4163 // Therefore, call out to the built function for this purpose. 4164 if err := c.compileCallBuiltinFunction(builtinFunctionIndexTableGrow); err != nil { 4165 return err 4166 } 4167 4168 // TableGrow consumes three values (table index, number of items, initial value). 4169 for i := 0; i < 3; i++ { 4170 c.locationStack.pop() 4171 } 4172 4173 // Then, the previous length was pushed as the result. 4174 loc := c.locationStack.pushRuntimeValueLocationOnStack() 4175 loc.valueType = runtimeValueTypeI32 4176 4177 // After return, we re-initialize reserved registers just like preamble of functions. 4178 c.compileReservedStackBasePointerInitialization() 4179 c.compileReservedMemoryPointerInitialization() 4180 return nil 4181 } 4182 4183 // compileTableSize implements compiler.compileTableSize for the amd64 architecture. 4184 func (c *amd64Compiler) compileTableSize(o *wazeroir.OperationTableSize) error { 4185 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 4186 return err 4187 } 4188 4189 result, err := c.allocateRegister(registerTypeGeneralPurpose) 4190 if err != nil { 4191 return err 4192 } 4193 4194 // result = &tables[0] 4195 c.assembler.CompileMemoryToRegister(amd64.MOVQ, 4196 amd64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset, 4197 result) 4198 4199 // result = [result + TableIndex*8] 4200 // = [&tables[0] + TableIndex*sizeOf(*tableInstance)] 4201 // = [&tables[TableIndex]] = tables[TableIndex]. 4202 c.assembler.CompileMemoryToRegister(amd64.MOVQ, result, int64(o.TableIndex)*8, result) 4203 4204 // result = [result + tableInstanceTableLenOffset] 4205 // = [tables[TableIndex] + tableInstanceTableLenOffset] 4206 // = len(tables[TableIndex]) 4207 c.assembler.CompileMemoryToRegister(amd64.MOVQ, result, tableInstanceTableLenOffset, result) 4208 4209 c.pushRuntimeValueLocationOnRegister(result, runtimeValueTypeI32) 4210 return nil 4211 } 4212 4213 // compileTableFill implements compiler.compileTableFill for the amd64 architecture. 4214 func (c *amd64Compiler) compileTableFill(o *wazeroir.OperationTableFill) error { 4215 return c.compileFillImpl(true, o.TableIndex) 4216 } 4217 4218 // compileRefFunc implements compiler.compileRefFunc for the amd64 architecture. 4219 func (c *amd64Compiler) compileRefFunc(o *wazeroir.OperationRefFunc) error { 4220 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 4221 return err 4222 } 4223 4224 ref, err := c.allocateRegister(registerTypeGeneralPurpose) 4225 if err != nil { 4226 return err 4227 } 4228 4229 // ref = [amd64ReservedRegisterForCallEngine + callEngineModuleContextFunctionsElement0AddressOffset] 4230 // = &moduleEngine.functions[0] 4231 c.assembler.CompileMemoryToRegister( 4232 amd64.MOVQ, amd64ReservedRegisterForCallEngine, callEngineModuleContextFunctionsElement0AddressOffset, 4233 ref, 4234 ) 4235 4236 // ref = [ref + int64(o.FunctionIndex)*8] 4237 // = [&moduleEngine.functions[0] + sizeOf(*function) * index] 4238 // = moduleEngine.functions[index] 4239 c.assembler.CompileMemoryToRegister(amd64.MOVQ, 4240 ref, int64(o.FunctionIndex)*8, // * 8 because the size of *code equals 8 bytes. 4241 ref, 4242 ) 4243 c.pushRuntimeValueLocationOnRegister(ref, runtimeValueTypeI64) 4244 return nil 4245 } 4246 4247 // compileConstI32 implements compiler.compileConstI32 for the amd64 architecture. 4248 func (c *amd64Compiler) compileConstI32(o *wazeroir.OperationConstI32) error { 4249 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 4250 return err 4251 } 4252 4253 reg, err := c.allocateRegister(registerTypeGeneralPurpose) 4254 if err != nil { 4255 return err 4256 } 4257 c.pushRuntimeValueLocationOnRegister(reg, runtimeValueTypeI32) 4258 c.assembler.CompileConstToRegister(amd64.MOVL, int64(o.Value), reg) 4259 return nil 4260 } 4261 4262 // compileConstI64 implements compiler.compileConstI64 for the amd64 architecture. 4263 func (c *amd64Compiler) compileConstI64(o *wazeroir.OperationConstI64) error { 4264 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 4265 return err 4266 } 4267 4268 reg, err := c.allocateRegister(registerTypeGeneralPurpose) 4269 if err != nil { 4270 return err 4271 } 4272 c.pushRuntimeValueLocationOnRegister(reg, runtimeValueTypeI64) 4273 4274 c.assembler.CompileConstToRegister(amd64.MOVQ, int64(o.Value), reg) 4275 return nil 4276 } 4277 4278 // compileConstF32 implements compiler.compileConstF32 for the amd64 architecture. 4279 func (c *amd64Compiler) compileConstF32(o *wazeroir.OperationConstF32) error { 4280 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 4281 return err 4282 } 4283 4284 reg, err := c.allocateRegister(registerTypeVector) 4285 if err != nil { 4286 return err 4287 } 4288 c.pushRuntimeValueLocationOnRegister(reg, runtimeValueTypeF32) 4289 4290 // We cannot directly load the value from memory to float regs, 4291 // so we move it to int reg temporarily. 4292 tmpReg, err := c.allocateRegister(registerTypeGeneralPurpose) 4293 if err != nil { 4294 return err 4295 } 4296 4297 c.assembler.CompileConstToRegister(amd64.MOVL, int64(math.Float32bits(o.Value)), tmpReg) 4298 c.assembler.CompileRegisterToRegister(amd64.MOVL, tmpReg, reg) 4299 return nil 4300 } 4301 4302 // compileConstF64 implements compiler.compileConstF64 for the amd64 architecture. 4303 func (c *amd64Compiler) compileConstF64(o *wazeroir.OperationConstF64) error { 4304 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 4305 return err 4306 } 4307 4308 reg, err := c.allocateRegister(registerTypeVector) 4309 if err != nil { 4310 return err 4311 } 4312 c.pushRuntimeValueLocationOnRegister(reg, runtimeValueTypeF64) 4313 4314 // We cannot directly load the value from memory to float regs, 4315 // so we move it to int reg temporarily. 4316 tmpReg, err := c.allocateRegister(registerTypeGeneralPurpose) 4317 if err != nil { 4318 return err 4319 } 4320 4321 c.assembler.CompileConstToRegister(amd64.MOVQ, int64(math.Float64bits(o.Value)), tmpReg) 4322 c.assembler.CompileRegisterToRegister(amd64.MOVQ, tmpReg, reg) 4323 return nil 4324 } 4325 4326 // compileLoadValueOnStackToRegister implements compiler.compileLoadValueOnStackToRegister for amd64. 4327 func (c *amd64Compiler) compileLoadValueOnStackToRegister(loc *runtimeValueLocation) { 4328 var inst asm.Instruction 4329 switch loc.valueType { 4330 case runtimeValueTypeV128Lo: 4331 inst = amd64.MOVDQU 4332 case runtimeValueTypeV128Hi: 4333 panic("BUG: V128Hi must be be loaded to a register along with V128Lo") 4334 case runtimeValueTypeI32, runtimeValueTypeF32: 4335 inst = amd64.MOVL 4336 case runtimeValueTypeI64, runtimeValueTypeF64: 4337 inst = amd64.MOVQ 4338 default: 4339 panic("BUG: unknown runtime value type") 4340 } 4341 4342 // Copy the value from the stack. 4343 c.assembler.CompileMemoryToRegister(inst, 4344 // Note: stack pointers are ensured not to exceed 2^27 so this offset never exceeds 32-bit range. 4345 amd64ReservedRegisterForStackBasePointerAddress, int64(loc.stackPointer)*8, 4346 loc.register) 4347 4348 if loc.valueType == runtimeValueTypeV128Lo { 4349 // Higher 64-bits are loaded as well ^^. 4350 hi := c.locationStack.stack[loc.stackPointer+1] 4351 hi.setRegister(loc.register) 4352 } 4353 } 4354 4355 // maybeCompileMoveTopConditionalToGeneralPurposeRegister moves the top value on the stack 4356 // if the value is located on a conditional register. 4357 // 4358 // This is usually called at the beginning of methods on compiler interface where we possibly 4359 // compile instructions without saving the conditional register value. 4360 // The compileXXX functions without calling this function is saving the conditional 4361 // value to the stack or register by invoking compileEnsureOnRegister for the top. 4362 func (c *amd64Compiler) maybeCompileMoveTopConditionalToGeneralPurposeRegister() (err error) { 4363 if c.locationStack.sp > 0 { 4364 if loc := c.locationStack.peek(); loc.onConditionalRegister() { 4365 if err = c.compileLoadConditionalRegisterToGeneralPurposeRegister(loc); err != nil { 4366 return err 4367 } 4368 } 4369 } 4370 return 4371 } 4372 4373 // loadConditionalRegisterToGeneralPurposeRegister saves the conditional register value 4374 // to a general purpose register. 4375 func (c *amd64Compiler) compileLoadConditionalRegisterToGeneralPurposeRegister(loc *runtimeValueLocation) error { 4376 reg, err := c.allocateRegister(registerTypeGeneralPurpose) 4377 if err != nil { 4378 return err 4379 } 4380 c.compileMoveConditionalToGeneralPurposeRegister(loc, reg) 4381 return nil 4382 } 4383 4384 func (c *amd64Compiler) compileMoveConditionalToGeneralPurposeRegister(loc *runtimeValueLocation, reg asm.Register) { 4385 // Set the flag bit to the destination. See 4386 // - https://c9x.me/x86/html/file_module_x86_id_288.html 4387 // - https://github.com/golang/go/blob/master/src/cmd/internal/obj/x86/asm6.go#L1453-L1468 4388 // to translate conditionalRegisterState* to amd64.SET* 4389 var inst asm.Instruction 4390 switch loc.conditionalRegister { 4391 case amd64.ConditionalRegisterStateE: 4392 inst = amd64.SETEQ 4393 case amd64.ConditionalRegisterStateNE: 4394 inst = amd64.SETNE 4395 case amd64.ConditionalRegisterStateS: 4396 inst = amd64.SETMI 4397 case amd64.ConditionalRegisterStateNS: 4398 inst = amd64.SETPL 4399 case amd64.ConditionalRegisterStateG: 4400 inst = amd64.SETGT 4401 case amd64.ConditionalRegisterStateGE: 4402 inst = amd64.SETGE 4403 case amd64.ConditionalRegisterStateL: 4404 inst = amd64.SETLT 4405 case amd64.ConditionalRegisterStateLE: 4406 inst = amd64.SETLE 4407 case amd64.ConditionalRegisterStateA: 4408 inst = amd64.SETHI 4409 case amd64.ConditionalRegisterStateAE: 4410 inst = amd64.SETCC 4411 case amd64.ConditionalRegisterStateB: 4412 inst = amd64.SETCS 4413 case amd64.ConditionalRegisterStateBE: 4414 inst = amd64.SETLS 4415 } 4416 4417 c.assembler.CompileNoneToRegister(inst, reg) 4418 4419 // Then we reset the unnecessary bit. 4420 c.assembler.CompileConstToRegister(amd64.ANDQ, 0x1, reg) 4421 4422 // Mark it uses the register. 4423 loc.setRegister(reg) 4424 c.locationStack.markRegisterUsed(reg) 4425 } 4426 4427 // allocateRegister implements compiler.allocateRegister for amd64. 4428 func (c *amd64Compiler) allocateRegister(t registerType) (reg asm.Register, err error) { 4429 var ok bool 4430 // Try to get the unused register. 4431 reg, ok = c.locationStack.takeFreeRegister(t) 4432 if ok { 4433 return 4434 } 4435 4436 // If not found, we have to steal the register. 4437 stealTarget, ok := c.locationStack.takeStealTargetFromUsedRegister(t) 4438 if !ok { 4439 err = fmt.Errorf("cannot steal register") 4440 return 4441 } 4442 4443 // Release the steal target register value onto stack location. 4444 reg = stealTarget.register 4445 c.compileReleaseRegisterToStack(stealTarget) 4446 return 4447 } 4448 4449 // callFunction adds instructions to call a function whose address equals either addr parameter or the value on indexReg. 4450 // 4451 // Note: this is the counterpart for returnFunction, and see the comments there as well 4452 // to understand how the function calls are achieved. 4453 func (c *amd64Compiler) compileCallFunctionImpl(functionAddressRegister asm.Register, functype *wasm.FunctionType) error { 4454 // Release all the registers as our calling convention requires the caller-save. 4455 if err := c.compileReleaseAllRegistersToStack(); err != nil { 4456 return err 4457 } 4458 4459 c.locationStack.markRegisterUsed(functionAddressRegister) 4460 4461 // Obtain a temporary register to be used in the followings. 4462 tmpRegister, found := c.locationStack.takeFreeRegister(registerTypeGeneralPurpose) 4463 if !found { 4464 // This in theory never happen as all the registers must be free except codeAddressRegister. 4465 return fmt.Errorf("could not find enough free registers") 4466 } 4467 4468 // The stack should look like: 4469 // 4470 // reserved slots for results (if len(results) > len(args)) 4471 // | | 4472 // ,arg0, ..., argN, ..., _, .returnAddress, .returnStackBasePointerInBytes, .function, .... 4473 // | | | 4474 // | callFrame{^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^} 4475 // | 4476 // nextStackBasePointerOffset 4477 // 4478 // where callFrame is used to return to this currently executed function. 4479 4480 nextStackBasePointerOffset := int64(c.locationStack.sp) - int64(functype.ParamNumInUint64) 4481 4482 callFrameReturnAddressLoc, callFrameStackBasePointerInBytesLoc, callFrameFunctionLoc := c.locationStack.pushCallFrame(functype) 4483 4484 // Save the current stack base pointer at callFrameStackBasePointerInBytesLoc. 4485 c.assembler.CompileMemoryToRegister(amd64.MOVQ, 4486 amd64ReservedRegisterForCallEngine, callEngineStackContextStackBasePointerInBytesOffset, 4487 tmpRegister) 4488 callFrameStackBasePointerInBytesLoc.setRegister(tmpRegister) 4489 c.compileReleaseRegisterToStack(callFrameStackBasePointerInBytesLoc) 4490 4491 // Set callEngine.stackContext.stackBasePointer for the next function. 4492 c.assembler.CompileConstToRegister(amd64.ADDQ, nextStackBasePointerOffset<<3, tmpRegister) 4493 4494 // Write the calculated value to callEngine.stackContext.stackBasePointer. 4495 c.assembler.CompileRegisterToMemory(amd64.MOVQ, tmpRegister, 4496 amd64ReservedRegisterForCallEngine, callEngineStackContextStackBasePointerInBytesOffset) 4497 4498 // Save the currently executed *function (placed at callEngine.moduleContext.fn) into callFrameFunctionLoc. 4499 c.assembler.CompileMemoryToRegister(amd64.MOVQ, 4500 amd64ReservedRegisterForCallEngine, callEngineModuleContextFnOffset, 4501 tmpRegister) 4502 callFrameFunctionLoc.setRegister(tmpRegister) 4503 c.compileReleaseRegisterToStack(callFrameFunctionLoc) 4504 4505 // Set callEngine.moduleContext.fn to the next *function. 4506 c.assembler.CompileRegisterToMemory(amd64.MOVQ, functionAddressRegister, 4507 amd64ReservedRegisterForCallEngine, callEngineModuleContextFnOffset) 4508 4509 // Write the return address into callFrameReturnAddressLoc. 4510 c.assembler.CompileReadInstructionAddress(tmpRegister, amd64.JMP) 4511 callFrameReturnAddressLoc.setRegister(tmpRegister) 4512 c.compileReleaseRegisterToStack(callFrameReturnAddressLoc) 4513 4514 if amd64CallingConventionDestinationFunctionModuleInstanceAddressRegister == functionAddressRegister { 4515 // This case we must move the value on targetFunctionAddressRegister to another register, otherwise 4516 // the address (jump target below) will be modified and result in segfault. 4517 // See #526. 4518 c.assembler.CompileRegisterToRegister(amd64.MOVQ, functionAddressRegister, tmpRegister) 4519 functionAddressRegister = tmpRegister 4520 } 4521 4522 // Also, we have to put the target function's *wasm.ModuleInstance into amd64CallingConventionDestinationFunctionModuleInstanceAddressRegister. 4523 c.assembler.CompileMemoryToRegister(amd64.MOVQ, functionAddressRegister, functionModuleInstanceAddressOffset, 4524 amd64CallingConventionDestinationFunctionModuleInstanceAddressRegister) 4525 4526 // And jump into the initial address of the target function. 4527 c.assembler.CompileJumpToMemory(amd64.JMP, functionAddressRegister, functionCodeInitialAddressOffset) 4528 4529 // All the registers used are temporary, so we mark them unused. 4530 c.locationStack.markRegisterUnused(tmpRegister, functionAddressRegister) 4531 4532 // On the function return, we have to initialize the state. 4533 if err := c.compileModuleContextInitialization(); err != nil { 4534 return err 4535 } 4536 4537 // Due to the change to callEngine.stackContext.stackBasePointer. 4538 c.compileReservedStackBasePointerInitialization() 4539 4540 // Due to the change to callEngine.moduleContext.moduleInstanceAddress as that might result in 4541 // the memory instance manipulation. 4542 c.compileReservedMemoryPointerInitialization() 4543 4544 // We consumed the function parameters, the call frame stack and reserved slots during the call. 4545 c.locationStack.sp = uint64(nextStackBasePointerOffset) 4546 4547 // Now the function results are pushed by the call. 4548 for _, t := range functype.Results { 4549 loc := c.locationStack.pushRuntimeValueLocationOnStack() 4550 switch t { 4551 case wasm.ValueTypeI32: 4552 loc.valueType = runtimeValueTypeI32 4553 case wasm.ValueTypeI64, wasm.ValueTypeFuncref, wasm.ValueTypeExternref: 4554 loc.valueType = runtimeValueTypeI64 4555 case wasm.ValueTypeF32: 4556 loc.valueType = runtimeValueTypeF32 4557 case wasm.ValueTypeF64: 4558 loc.valueType = runtimeValueTypeF64 4559 case wasm.ValueTypeV128: 4560 loc.valueType = runtimeValueTypeV128Lo 4561 hi := c.locationStack.pushRuntimeValueLocationOnStack() 4562 hi.valueType = runtimeValueTypeV128Hi 4563 default: 4564 panic("BUG: invalid type: " + wasm.ValueTypeName(t)) 4565 } 4566 } 4567 return nil 4568 } 4569 4570 // returnFunction adds instructions to return from the current callframe back to the caller's frame. 4571 // If this is the current one is the origin, we return to the callEngine.execWasmFunction with the Returned status. 4572 // Otherwise, we jump into the callers' return address stored in callFrame.returnAddress while setting 4573 // up all the necessary change on the callEngine's state. 4574 // 4575 // Note: this is the counterpart for callFunction, and see the comments there as well 4576 // to understand how the function calls are achieved. 4577 func (c *amd64Compiler) compileReturnFunction() error { 4578 // Release all the registers as our calling convention requires the caller-save. 4579 if err := c.compileReleaseAllRegistersToStack(); err != nil { 4580 return err 4581 } 4582 4583 if c.withListener { 4584 if err := c.compileCallBuiltinFunction(builtinFunctionIndexFunctionListenerAfter); err != nil { 4585 return err 4586 } 4587 // After return, we re-initialize the stack base pointer as that is used to return to the caller below. 4588 c.compileReservedStackBasePointerInitialization() 4589 } 4590 4591 // amd64CallingConventionDestinationFunctionModuleInstanceAddressRegister holds the module instance's address 4592 // so mark it used so that it won't be used as a free register. 4593 c.locationStack.markRegisterUsed(amd64CallingConventionDestinationFunctionModuleInstanceAddressRegister) 4594 defer c.locationStack.markRegisterUnused(amd64CallingConventionDestinationFunctionModuleInstanceAddressRegister) 4595 4596 // Obtain a temporary register to be used in the following. 4597 returnAddressRegister, found := c.locationStack.takeFreeRegister(registerTypeGeneralPurpose) 4598 if !found { 4599 panic("BUG: all the registers should be free at this point: " + c.locationStack.String()) 4600 } 4601 4602 returnAddress, callerStackBasePointerInBytes, callerFunction := c.locationStack.getCallFrameLocations(c.ir.Signature) 4603 4604 // A zero return address means return from the execution. 4605 c.assembler.CompileMemoryToRegister(amd64.MOVQ, 4606 amd64ReservedRegisterForStackBasePointerAddress, int64(returnAddress.stackPointer)*8, 4607 returnAddressRegister, 4608 ) 4609 c.assembler.CompileRegisterToRegister(amd64.TESTQ, returnAddressRegister, returnAddressRegister) 4610 4611 jmpIfNotReturn := c.assembler.CompileJump(amd64.JNE) 4612 c.compileExitFromNativeCode(nativeCallStatusCodeReturned) 4613 4614 // Otherwise, we return to the caller. 4615 c.assembler.SetJumpTargetOnNext(jmpIfNotReturn) 4616 4617 // Alias for readability. 4618 tmpRegister := amd64CallingConventionDestinationFunctionModuleInstanceAddressRegister 4619 4620 // First, restore the stackContext.stackBasePointerInBytesOffset from callerStackBasePointerInBytes. 4621 callerStackBasePointerInBytes.setRegister(tmpRegister) 4622 c.compileLoadValueOnStackToRegister(callerStackBasePointerInBytes) 4623 c.assembler.CompileRegisterToMemory(amd64.MOVQ, 4624 tmpRegister, amd64ReservedRegisterForCallEngine, callEngineStackContextStackBasePointerInBytesOffset) 4625 4626 // Next, restore moduleContext.fn from callerFunction. 4627 callerFunction.setRegister(tmpRegister) 4628 c.compileLoadValueOnStackToRegister(callerFunction) 4629 c.assembler.CompileRegisterToMemory(amd64.MOVQ, 4630 tmpRegister, amd64ReservedRegisterForCallEngine, callEngineModuleContextFnOffset) 4631 4632 // Also, we have to put the target function's *wasm.ModuleInstance into amd64CallingConventionDestinationFunctionModuleInstanceAddressRegister. 4633 c.assembler.CompileMemoryToRegister(amd64.MOVQ, 4634 tmpRegister, functionModuleInstanceAddressOffset, 4635 amd64CallingConventionDestinationFunctionModuleInstanceAddressRegister) 4636 4637 // Then, jump into the return address! 4638 c.assembler.CompileJumpToRegister(amd64.JMP, returnAddressRegister) 4639 return nil 4640 } 4641 4642 func (c *amd64Compiler) compileCallGoHostFunction() error { 4643 return c.compileCallGoFunction(nativeCallStatusCodeCallGoHostFunction) 4644 } 4645 4646 func (c *amd64Compiler) compileCallBuiltinFunction(index wasm.Index) error { 4647 // Set the functionAddress to the callEngine.exitContext functionCallAddress. 4648 c.assembler.CompileConstToMemory(amd64.MOVL, int64(index), amd64ReservedRegisterForCallEngine, callEngineExitContextBuiltinFunctionCallIndexOffset) 4649 return c.compileCallGoFunction(nativeCallStatusCodeCallBuiltInFunction) 4650 } 4651 4652 func (c *amd64Compiler) compileCallGoFunction(compilerStatus nativeCallStatusCode) error { 4653 // Release all the registers as our calling convention requires the caller-save. 4654 if err := c.compileReleaseAllRegistersToStack(); err != nil { 4655 return err 4656 } 4657 4658 // Read the return address, and write it to callEngine.exitContext.returnAddress. 4659 returnAddressReg, ok := c.locationStack.takeFreeRegister(registerTypeGeneralPurpose) 4660 if !ok { 4661 panic("BUG: cannot take free register") 4662 } 4663 c.assembler.CompileReadInstructionAddress(returnAddressReg, amd64.RET) 4664 c.assembler.CompileRegisterToMemory(amd64.MOVQ, 4665 returnAddressReg, amd64ReservedRegisterForCallEngine, callEngineExitContextReturnAddressOffset) 4666 4667 c.compileExitFromNativeCode(compilerStatus) 4668 return nil 4669 } 4670 4671 // compileReleaseAllRegistersToStack add the instructions to release all the LIVE value 4672 // in the value location stack at this point into the stack memory location. 4673 func (c *amd64Compiler) compileReleaseAllRegistersToStack() (err error) { 4674 for i := uint64(0); i < c.locationStack.sp; i++ { 4675 if loc := c.locationStack.stack[i]; loc.onRegister() { 4676 c.compileReleaseRegisterToStack(loc) 4677 } else if loc.onConditionalRegister() { 4678 if err = c.compileLoadConditionalRegisterToGeneralPurposeRegister(loc); err != nil { 4679 return 4680 } 4681 c.compileReleaseRegisterToStack(loc) 4682 } 4683 } 4684 return 4685 } 4686 4687 func (c *amd64Compiler) onValueReleaseRegisterToStack(reg asm.Register) { 4688 for i := uint64(0); i < c.locationStack.sp; i++ { 4689 prevValue := c.locationStack.stack[i] 4690 if prevValue.register == reg { 4691 c.compileReleaseRegisterToStack(prevValue) 4692 break 4693 } 4694 } 4695 } 4696 4697 // compileReleaseRegisterToStack implements compiler.compileReleaseRegisterToStack for amd64. 4698 func (c *amd64Compiler) compileReleaseRegisterToStack(loc *runtimeValueLocation) { 4699 var inst asm.Instruction 4700 switch loc.valueType { 4701 case runtimeValueTypeV128Lo: 4702 inst = amd64.MOVDQU 4703 case runtimeValueTypeV128Hi: 4704 panic("BUG: V128Hi must be released to the stack along with V128Lo") 4705 case runtimeValueTypeI32, runtimeValueTypeF32: 4706 inst = amd64.MOVL 4707 case runtimeValueTypeI64, runtimeValueTypeF64: 4708 inst = amd64.MOVQ 4709 default: 4710 panic("BUG: unknown runtime value type") 4711 } 4712 4713 c.assembler.CompileRegisterToMemory(inst, loc.register, 4714 // Note: stack pointers are ensured not to exceed 2^27 so this offset never exceeds 32-bit range. 4715 amd64ReservedRegisterForStackBasePointerAddress, int64(loc.stackPointer)*8) 4716 4717 // Mark the register is free. 4718 c.locationStack.releaseRegister(loc) 4719 4720 if loc.valueType == runtimeValueTypeV128Lo { 4721 // Higher 64-bits are released as well ^^. 4722 hi := c.locationStack.stack[loc.stackPointer+1] 4723 c.locationStack.releaseRegister(hi) 4724 } 4725 } 4726 4727 func (c *amd64Compiler) compileExitFromNativeCode(status nativeCallStatusCode) { 4728 c.assembler.CompileConstToMemory(amd64.MOVB, int64(status), 4729 amd64ReservedRegisterForCallEngine, callEngineExitContextNativeCallStatusCodeOffset) 4730 4731 // Write back the cached SP to the actual eng.stackPointer. 4732 c.assembler.CompileConstToMemory(amd64.MOVQ, int64(c.locationStack.sp), 4733 amd64ReservedRegisterForCallEngine, callEngineStackContextStackPointerOffset) 4734 4735 c.assembler.CompileStandAlone(amd64.RET) 4736 } 4737 4738 func (c *amd64Compiler) compilePreamble() (err error) { 4739 // We assume all function parameters are already pushed onto the stack by 4740 // the caller. 4741 c.locationStack.init(c.ir.Signature) 4742 4743 if err := c.compileModuleContextInitialization(); err != nil { 4744 return err 4745 } 4746 4747 // Check if it's necessary to grow the value stack by using max stack pointer. 4748 if err = c.compileMaybeGrowStack(); err != nil { 4749 return err 4750 } 4751 4752 if c.withListener { 4753 if err = c.compileCallBuiltinFunction(builtinFunctionIndexFunctionListenerBefore); err != nil { 4754 return err 4755 } 4756 } 4757 4758 c.compileReservedStackBasePointerInitialization() 4759 4760 // Finally, we initialize the reserved memory register based on the module context. 4761 c.compileReservedMemoryPointerInitialization() 4762 return 4763 } 4764 4765 func (c *amd64Compiler) compileReservedStackBasePointerInitialization() { 4766 // First, make reservedRegisterForStackBasePointer point to the beginning of the slice backing array. 4767 c.assembler.CompileMemoryToRegister(amd64.MOVQ, 4768 amd64ReservedRegisterForCallEngine, callEngineStackContextStackElement0AddressOffset, 4769 amd64ReservedRegisterForStackBasePointerAddress) 4770 4771 // next we move the base pointer (callEngine.stackBasePointer) to the tmp register. 4772 c.assembler.CompileMemoryToRegister(amd64.ADDQ, 4773 amd64ReservedRegisterForCallEngine, callEngineStackContextStackBasePointerInBytesOffset, 4774 amd64ReservedRegisterForStackBasePointerAddress, 4775 ) 4776 } 4777 4778 func (c *amd64Compiler) compileReservedMemoryPointerInitialization() { 4779 if c.ir.HasMemory || c.ir.UsesMemory { 4780 c.assembler.CompileMemoryToRegister(amd64.MOVQ, 4781 amd64ReservedRegisterForCallEngine, callEngineModuleContextMemoryElement0AddressOffset, 4782 amd64ReservedRegisterForMemory, 4783 ) 4784 } 4785 } 4786 4787 // compileMaybeGrowStack adds instructions to check the necessity to grow the value stack, 4788 // and if so, make the builtin function call to do so. These instructions are called in the function's 4789 // preamble. 4790 func (c *amd64Compiler) compileMaybeGrowStack() error { 4791 tmpRegister, ok := c.locationStack.takeFreeRegister(registerTypeGeneralPurpose) 4792 if !ok { 4793 panic("BUG: cannot take free register") 4794 } 4795 4796 c.assembler.CompileMemoryToRegister(amd64.MOVQ, 4797 amd64ReservedRegisterForCallEngine, callEngineStackContextStackLenInBytesOffset, tmpRegister) 4798 c.assembler.CompileMemoryToRegister(amd64.SUBQ, 4799 amd64ReservedRegisterForCallEngine, callEngineStackContextStackBasePointerInBytesOffset, tmpRegister) 4800 4801 // If stack base pointer + max stack pointer > stackLen, we need to grow the stack. 4802 cmpWithStackPointerCeil := c.assembler.CompileRegisterToConst(amd64.CMPQ, tmpRegister, 0) 4803 c.onStackPointerCeilDeterminedCallBack = func(stackPointerCeil uint64) { 4804 cmpWithStackPointerCeil.AssignDestinationConstant(int64(stackPointerCeil) << 3) 4805 } 4806 4807 // Jump if we have no need to grow. 4808 jmpIfNoNeedToGrowStack := c.assembler.CompileJump(amd64.JCC) 4809 4810 // Otherwise, we have to make the builtin function call to grow the call stack. 4811 if err := c.compileCallBuiltinFunction(builtinFunctionIndexGrowStack); err != nil { 4812 return err 4813 } 4814 4815 c.assembler.SetJumpTargetOnNext(jmpIfNoNeedToGrowStack) 4816 return nil 4817 } 4818 4819 // compileModuleContextInitialization adds instructions to initialize callEngine.ModuleContext's fields based on 4820 // callEngine.ModuleContext.ModuleInstanceAddress. 4821 // This is called in two cases: in function preamble, and on the return from (non-Go) function calls. 4822 func (c *amd64Compiler) compileModuleContextInitialization() error { 4823 // amd64CallingConventionDestinationFunctionModuleInstanceAddressRegister holds the module instance's address 4824 // so mark it used so that it won't be used as a free register until the module context initialization finishes. 4825 c.locationStack.markRegisterUsed(amd64CallingConventionDestinationFunctionModuleInstanceAddressRegister) 4826 defer c.locationStack.markRegisterUnused(amd64CallingConventionDestinationFunctionModuleInstanceAddressRegister) 4827 4828 // Obtain the temporary registers to be used in the followings. 4829 regs, found := c.locationStack.takeFreeRegisters(registerTypeGeneralPurpose, 2) 4830 if !found { 4831 // This in theory never happen as all the registers must be free except indexReg. 4832 return fmt.Errorf("could not find enough free registers") 4833 } 4834 c.locationStack.markRegisterUsed(regs...) 4835 4836 // Alias these free tmp registers for readability. 4837 tmpRegister, tmpRegister2 := regs[0], regs[1] 4838 4839 // If the module instance address stays the same, we could skip the entire code below. 4840 // The rationale/idea for this is that, in almost all use cases, users instantiate a single 4841 // Wasm binary and run the functions from it, rather than doing import/export on multiple 4842 // binaries. As a result, this cmp and jmp instruction sequence below must be easy for 4843 // x64 CPU to do branch prediction since almost 100% jump happens across function calls. 4844 c.assembler.CompileMemoryToRegister(amd64.CMPQ, 4845 amd64ReservedRegisterForCallEngine, callEngineModuleContextModuleInstanceAddressOffset, amd64CallingConventionDestinationFunctionModuleInstanceAddressRegister) 4846 jmpIfModuleNotChange := c.assembler.CompileJump(amd64.JEQ) 4847 4848 // If engine.CallContext.ModuleInstanceAddress is not equal the value on amd64CallingConventionDestinationFunctionModuleInstanceAddressRegister, 4849 // we have to put the new value there. 4850 c.assembler.CompileRegisterToMemory(amd64.MOVQ, amd64CallingConventionDestinationFunctionModuleInstanceAddressRegister, 4851 amd64ReservedRegisterForCallEngine, callEngineModuleContextModuleInstanceAddressOffset) 4852 4853 // Also, we have to update the following fields: 4854 // * callEngine.moduleContext.globalElement0Address 4855 // * callEngine.moduleContext.tableElement0Address 4856 // * callEngine.moduleContext.memoryInstance 4857 // * callEngine.moduleContext.memoryElement0Address 4858 // * callEngine.moduleContext.memorySliceLen 4859 // * callEngine.moduleContext.codesElement0Address 4860 // * callEngine.moduleContext.typeIDsElement0Address 4861 // * callEngine.moduleContext.dataInstancesElement0Address 4862 // * callEngine.moduleContext.elementInstancesElement0Address 4863 4864 // Update globalElement0Address. 4865 // 4866 // Note: if there's global.get or set instruction in the function, the existence of the globals 4867 // is ensured by function validation at module instantiation phase, and that's why it is ok to 4868 // skip the initialization if the module's globals slice is empty. 4869 if len(c.ir.Globals) > 0 { 4870 // Since ModuleInstance.Globals is []*globalInstance, internally 4871 // the address of the first item in the underlying array lies exactly on the globals offset. 4872 // See https://go.dev/blog/slices-intro if unfamiliar. 4873 c.assembler.CompileMemoryToRegister(amd64.MOVQ, amd64CallingConventionDestinationFunctionModuleInstanceAddressRegister, moduleInstanceGlobalsOffset, tmpRegister) 4874 4875 c.assembler.CompileRegisterToMemory(amd64.MOVQ, tmpRegister, amd64ReservedRegisterForCallEngine, callEngineModuleContextGlobalElement0AddressOffset) 4876 } 4877 4878 // Update tableElement0Address. 4879 // 4880 // Note: if there's table instruction in the function, the existence of the table 4881 // is ensured by function validation at module instantiation phase, and that's 4882 // why it is ok to skip the initialization if the module's table doesn't exist. 4883 if c.ir.HasTable { 4884 // First, we need to read the *wasm.Table. 4885 c.assembler.CompileMemoryToRegister(amd64.MOVQ, amd64CallingConventionDestinationFunctionModuleInstanceAddressRegister, moduleInstanceTablesOffset, tmpRegister) 4886 4887 // At this point, tmpRegister holds the address of ModuleInstance.Table. 4888 // So we are ready to read and put the first item's address stored in Table.Table. 4889 // Here we read the value into tmpRegister2. 4890 c.assembler.CompileRegisterToMemory(amd64.MOVQ, tmpRegister, 4891 amd64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset) 4892 4893 // Finally, we put &ModuleInstance.TypeIDs[0] into moduleContext.typeIDsElement0Address. 4894 c.assembler.CompileMemoryToRegister(amd64.MOVQ, 4895 amd64CallingConventionDestinationFunctionModuleInstanceAddressRegister, moduleInstanceTypeIDsOffset, tmpRegister) 4896 c.assembler.CompileRegisterToMemory(amd64.MOVQ, 4897 tmpRegister, amd64ReservedRegisterForCallEngine, callEngineModuleContextTypeIDsElement0AddressOffset) 4898 } 4899 4900 // Update memoryElement0Address and memorySliceLen. 4901 // 4902 // Note: if there's memory instruction in the function, memory instance must be non-nil. 4903 // That is ensured by function validation at module instantiation phase, and that's 4904 // why it is ok to skip the initialization if the module's memory instance is nil. 4905 if c.ir.HasMemory { 4906 c.assembler.CompileMemoryToRegister(amd64.MOVQ, 4907 amd64CallingConventionDestinationFunctionModuleInstanceAddressRegister, moduleInstanceMemoryOffset, 4908 tmpRegister) 4909 4910 // Set memory instance. 4911 c.assembler.CompileRegisterToMemory(amd64.MOVQ, tmpRegister, 4912 amd64ReservedRegisterForCallEngine, callEngineModuleContextMemoryInstanceOffset) 4913 4914 // Set length. 4915 c.assembler.CompileMemoryToRegister(amd64.MOVQ, tmpRegister, memoryInstanceBufferLenOffset, tmpRegister2) 4916 c.assembler.CompileRegisterToMemory(amd64.MOVQ, tmpRegister2, 4917 amd64ReservedRegisterForCallEngine, callEngineModuleContextMemorySliceLenOffset) 4918 4919 // Set element zero address. 4920 c.assembler.CompileMemoryToRegister(amd64.MOVQ, tmpRegister, memoryInstanceBufferOffset, tmpRegister2) 4921 c.assembler.CompileRegisterToMemory(amd64.MOVQ, tmpRegister2, 4922 amd64ReservedRegisterForCallEngine, callEngineModuleContextMemoryElement0AddressOffset) 4923 } 4924 4925 // Update moduleContext.codesElement0Address 4926 { 4927 // "tmpRegister = [moduleInstanceAddressRegister + moduleInstanceEngineOffset + interfaceDataOffset] (== *moduleEngine)" 4928 // 4929 // Go's interface is laid out on memory as two quad words as struct {tab, data uintptr} 4930 // where tab points to the interface table, and the latter points to the actual 4931 // implementation of interface. This case, we extract "data" pointer as *moduleEngine. 4932 // See the following references for detail: 4933 // * https://research.swtch.com/interfaces 4934 // * https://github.com/golang/go/blob/release-branch.go1.17/src/runtime/runtime2.go#L207-L210 4935 c.assembler.CompileMemoryToRegister(amd64.MOVQ, amd64CallingConventionDestinationFunctionModuleInstanceAddressRegister, moduleInstanceEngineOffset+interfaceDataOffset, tmpRegister) 4936 4937 // "tmpRegister = [tmpRegister + moduleEnginecodesOffset] (== &moduleEngine.codes[0])" 4938 c.assembler.CompileMemoryToRegister(amd64.MOVQ, tmpRegister, moduleEngineFunctionsOffset, tmpRegister) 4939 4940 // "callEngine.moduleContext.functionsElement0Address = tmpRegister". 4941 c.assembler.CompileRegisterToMemory(amd64.MOVQ, tmpRegister, amd64ReservedRegisterForCallEngine, 4942 callEngineModuleContextFunctionsElement0AddressOffset) 4943 } 4944 4945 // Update dataInstancesElement0Address. 4946 if c.ir.HasDataInstances { 4947 // "tmpRegister = &moduleInstance.DataInstances[0]" 4948 c.assembler.CompileMemoryToRegister( 4949 amd64.MOVQ, 4950 amd64CallingConventionDestinationFunctionModuleInstanceAddressRegister, moduleInstanceDataInstancesOffset, 4951 tmpRegister, 4952 ) 4953 // "callEngine.moduleContext.dataInstancesElement0Address = tmpRegister". 4954 c.assembler.CompileRegisterToMemory( 4955 amd64.MOVQ, 4956 tmpRegister, 4957 amd64ReservedRegisterForCallEngine, callEngineModuleContextDataInstancesElement0AddressOffset, 4958 ) 4959 } 4960 4961 // Update callEngine.moduleContext.elementInstancesElement0Address 4962 if c.ir.HasElementInstances { 4963 // "tmpRegister = &moduleInstance.ElementInstnaces[0]" 4964 c.assembler.CompileMemoryToRegister( 4965 amd64.MOVQ, 4966 amd64CallingConventionDestinationFunctionModuleInstanceAddressRegister, moduleInstanceElementInstancesOffset, 4967 tmpRegister, 4968 ) 4969 // "callEngine.moduleContext.dataInstancesElement0Address = tmpRegister". 4970 c.assembler.CompileRegisterToMemory( 4971 amd64.MOVQ, 4972 tmpRegister, 4973 amd64ReservedRegisterForCallEngine, callEngineModuleContextElementInstancesElement0AddressOffset, 4974 ) 4975 } 4976 4977 c.locationStack.markRegisterUnused(regs...) 4978 4979 // Set the jump target towards the next instruction for the case where module instance address hasn't changed. 4980 c.assembler.SetJumpTargetOnNext(jmpIfModuleNotChange) 4981 return nil 4982 } 4983 4984 // compileEnsureOnRegister ensures that the given value is located on a 4985 // general purpose register of an appropriate type. 4986 func (c *amd64Compiler) compileEnsureOnRegister(loc *runtimeValueLocation) (err error) { 4987 if loc.onStack() { 4988 // Allocate the register. 4989 reg, err := c.allocateRegister(loc.getRegisterType()) 4990 if err != nil { 4991 return err 4992 } 4993 4994 // Mark it uses the register. 4995 loc.setRegister(reg) 4996 c.locationStack.markRegisterUsed(reg) 4997 4998 c.compileLoadValueOnStackToRegister(loc) 4999 } else if loc.onConditionalRegister() { 5000 err = c.compileLoadConditionalRegisterToGeneralPurposeRegister(loc) 5001 } 5002 return 5003 } 5004 5005 // compileMaybeSwapRegisters swaps two registers if they're not equal. 5006 func (c *amd64Compiler) compileMaybeSwapRegisters(reg1, reg2 asm.Register) { 5007 if reg1 != reg2 { 5008 c.assembler.CompileRegisterToRegister(amd64.XCHGQ, reg1, reg2) 5009 } 5010 } 5011 5012 // compilePreventCrossedTargetRegisters swaps registers in such a way, that for neither runtimeValueLocation from locs its 5013 // corresponding register with the same index from targets is occupied by some other runtimeValueLocation from locs. It returns a 5014 // closure to restore the original register placement. 5015 // 5016 // This function makes it possible to safely exchange one set of registers with another, where a register might be in both sets. 5017 // Each register will correspond either to itself or another register not present in its own set. 5018 // 5019 // For example, if we have locs = [AX, BX, CX], targets = [BX, SI, AX], then it'll do two swaps 5020 // to make locs = [BX, CX, AX]. 5021 func (c *amd64Compiler) compilePreventCrossedTargetRegisters(locs []*runtimeValueLocation, targets []asm.Register) (restore func()) { 5022 type swap struct{ srcIndex, dstIndex int } 5023 var swaps []swap 5024 for i := range locs { 5025 targetLocation := -1 // -1 means not found. 5026 for j := range locs { 5027 if locs[j].register == targets[i] { 5028 targetLocation = j 5029 break 5030 } 5031 } 5032 if targetLocation != -1 && targetLocation != i { 5033 c.compileMaybeSwapRegisters(locs[i].register, locs[targetLocation].register) 5034 locs[i].register, locs[targetLocation].register = locs[targetLocation].register, locs[i].register 5035 swaps = append(swaps, swap{i, targetLocation}) 5036 } 5037 } 5038 return func() { 5039 // Restore in reverse order because a register can be moved multiple times. 5040 for i := len(swaps) - 1; i >= 0; i -= 1 { 5041 r1, r2 := swaps[i].srcIndex, swaps[i].dstIndex 5042 c.compileMaybeSwapRegisters(locs[r1].register, locs[r2].register) 5043 locs[r1].register, locs[r2].register = locs[r2].register, locs[r1].register 5044 } 5045 } 5046 }