github.com/tetratelabs/wazero@v1.2.1/internal/engine/compiler/impl_arm64.go (about) 1 // This file implements the compiler for arm64 target. 2 // Please refer to https://developer.arm.com/documentation/102374/latest/ 3 // if unfamiliar with arm64 instructions and semantics. 4 package compiler 5 6 import ( 7 "bytes" 8 "errors" 9 "fmt" 10 "math" 11 12 "github.com/tetratelabs/wazero/internal/asm" 13 "github.com/tetratelabs/wazero/internal/asm/arm64" 14 "github.com/tetratelabs/wazero/internal/wasm" 15 "github.com/tetratelabs/wazero/internal/wazeroir" 16 ) 17 18 type arm64Compiler struct { 19 assembler arm64.Assembler 20 ir *wazeroir.CompilationResult 21 // locationStack holds the state of wazeroir virtual stack. 22 // and each item is either placed in register or the actual memory stack. 23 locationStack *runtimeValueLocationStack 24 // labels maps a label (e.g. ".L1_then") to *arm64LabelInfo. 25 labels [wazeroir.LabelKindNum][]arm64LabelInfo 26 // stackPointerCeil is the greatest stack pointer value (from runtimeValueLocationStack) seen during compilation. 27 stackPointerCeil uint64 28 // assignStackPointerCeilNeeded holds an asm.Node whose AssignDestinationConstant must be called with the determined stack pointer ceiling. 29 assignStackPointerCeilNeeded asm.Node 30 compiledTrapTargets [nativeCallStatusModuleClosed]asm.Node 31 withListener bool 32 typ *wasm.FunctionType 33 br *bytes.Reader 34 // locationStackForEntrypoint is the initial location stack for all functions. To reuse the allocated stack, 35 // we cache it here, and reset and set to .locationStack in the Init method. 36 locationStackForEntrypoint runtimeValueLocationStack 37 // frameIDMax tracks the maximum value of frame id per function. 38 frameIDMax int 39 brTableTmp []runtimeValueLocation 40 } 41 42 func newArm64Compiler() compiler { 43 return &arm64Compiler{ 44 assembler: arm64.NewAssembler(arm64ReservedRegisterForTemporary), 45 locationStackForEntrypoint: newRuntimeValueLocationStack(), 46 br: bytes.NewReader(nil), 47 } 48 } 49 50 // Init implements compiler.Init. 51 func (c *arm64Compiler) Init(typ *wasm.FunctionType, ir *wazeroir.CompilationResult, withListener bool) { 52 c.assembler.Reset() 53 c.locationStackForEntrypoint.reset() 54 c.resetLabels() 55 56 *c = arm64Compiler{ 57 ir: ir, 58 withListener: withListener, 59 typ: typ, 60 assembler: c.assembler, 61 labels: c.labels, 62 br: c.br, 63 brTableTmp: c.brTableTmp, 64 locationStackForEntrypoint: c.locationStackForEntrypoint, 65 } 66 67 // Reuses the initial location stack for the compilation of subsequent functions. 68 c.locationStack = &c.locationStackForEntrypoint 69 } 70 71 // resetLabels resets the existing content in arm64Compiler.labels so that 72 // we could reuse the allocated slices and stacks in the subsequent compilations. 73 func (c *arm64Compiler) resetLabels() { 74 for i := range c.labels { 75 for j := range c.labels[i] { 76 if j > c.frameIDMax { 77 // Only need to reset until the maximum frame id. This makes the compilation faster for large binary. 78 break 79 } 80 l := &c.labels[i][j] 81 l.initialInstruction = nil 82 l.stackInitialized = false 83 l.initialStack.reset() 84 } 85 } 86 } 87 88 var ( 89 arm64UnreservedVectorRegisters = []asm.Register{ 90 arm64.RegV0, arm64.RegV1, arm64.RegV2, arm64.RegV3, 91 arm64.RegV4, arm64.RegV5, arm64.RegV6, arm64.RegV7, arm64.RegV8, 92 arm64.RegV9, arm64.RegV10, arm64.RegV11, arm64.RegV12, arm64.RegV13, 93 arm64.RegV14, arm64.RegV15, arm64.RegV16, arm64.RegV17, arm64.RegV18, 94 arm64.RegV19, arm64.RegV20, arm64.RegV21, arm64.RegV22, arm64.RegV23, 95 arm64.RegV24, arm64.RegV25, arm64.RegV26, arm64.RegV27, arm64.RegV28, 96 arm64.RegV29, arm64.RegV30, arm64.RegV31, 97 } 98 99 // Note (see arm64 section in https://go.dev/doc/asm): 100 // * RegR18 is reserved as a platform register, and we don't use it in Compiler. 101 // * RegR28 is reserved for Goroutine by Go runtime, and we don't use it in Compiler. 102 arm64UnreservedGeneralPurposeRegisters = []asm.Register{ //nolint 103 arm64.RegR3, arm64.RegR4, arm64.RegR5, arm64.RegR6, arm64.RegR7, arm64.RegR8, 104 arm64.RegR9, arm64.RegR10, arm64.RegR11, arm64.RegR12, arm64.RegR13, 105 arm64.RegR14, arm64.RegR15, arm64.RegR16, arm64.RegR17, arm64.RegR19, 106 arm64.RegR20, arm64.RegR21, arm64.RegR22, arm64.RegR23, arm64.RegR24, 107 arm64.RegR25, arm64.RegR26, arm64.RegR29, arm64.RegR30, 108 } 109 ) 110 111 const ( 112 // arm64ReservedRegisterForCallEngine holds the pointer to callEngine instance (i.e. *callEngine as uintptr) 113 arm64ReservedRegisterForCallEngine = arm64.RegR0 114 // arm64ReservedRegisterForStackBasePointerAddress holds stack base pointer's address (callEngine.stackBasePointer) in the current function call. 115 arm64ReservedRegisterForStackBasePointerAddress = arm64.RegR1 116 // arm64ReservedRegisterForMemory holds the pointer to the memory slice's data (i.e. &memory.Buffer[0] as uintptr). 117 arm64ReservedRegisterForMemory = arm64.RegR2 118 // arm64ReservedRegisterForTemporary is the temporary register which is available at any point of execution, but its content shouldn't be supposed to live beyond the single operation. 119 // Note: we choose R27 as that is the temporary register used in Go's assembler. 120 arm64ReservedRegisterForTemporary = arm64.RegR27 121 ) 122 123 var arm64CallingConventionModuleInstanceAddressRegister = arm64.RegR29 124 125 const ( 126 // arm64CallEngineArchContextCompilerCallReturnAddressOffset is the offset of archContext.nativeCallReturnAddress in callEngine. 127 arm64CallEngineArchContextCompilerCallReturnAddressOffset = 144 128 // arm64CallEngineArchContextMinimum32BitSignedIntOffset is the offset of archContext.minimum32BitSignedIntAddress in callEngine. 129 arm64CallEngineArchContextMinimum32BitSignedIntOffset = 152 130 // arm64CallEngineArchContextMinimum64BitSignedIntOffset is the offset of archContext.minimum64BitSignedIntAddress in callEngine. 131 arm64CallEngineArchContextMinimum64BitSignedIntOffset = 160 132 ) 133 134 func isZeroRegister(r asm.Register) bool { 135 return r == arm64.RegRZR 136 } 137 138 // compileNOP implements compiler.compileNOP for the arm64 architecture. 139 func (c *arm64Compiler) compileNOP() asm.Node { 140 return c.assembler.CompileStandAlone(arm64.NOP) 141 } 142 143 // compile implements compiler.compile for the arm64 architecture. 144 func (c *arm64Compiler) compile(buf asm.Buffer) (stackPointerCeil uint64, err error) { 145 // c.stackPointerCeil tracks the stack pointer ceiling (max seen) value across all runtimeValueLocationStack(s) 146 // used for all labels (via setLocationStack), excluding the current one. 147 // Hence, we check here if the final block's max one exceeds the current c.stackPointerCeil. 148 stackPointerCeil = c.stackPointerCeil 149 if stackPointerCeil < c.locationStack.stackPointerCeil { 150 stackPointerCeil = c.locationStack.stackPointerCeil 151 } 152 153 // Now that the ceil of stack pointer is determined, we are invoking the callback. 154 // Note: this must be called before Assemble() below. 155 c.assignStackPointerCeil(stackPointerCeil) 156 157 err = c.assembler.Assemble(buf) 158 return 159 } 160 161 // arm64LabelInfo holds a wazeroir label specific information in this function. 162 type arm64LabelInfo struct { 163 // initialInstruction is the initial instruction for this label so other block can branch into it. 164 initialInstruction asm.Node 165 // initialStack is the initial value location stack from which we start compiling this label. 166 initialStack runtimeValueLocationStack 167 stackInitialized bool 168 } 169 170 // assignStackPointerCeil implements compilerImpl.assignStackPointerCeil for the arm64 architecture. 171 func (c *arm64Compiler) assignStackPointerCeil(ceil uint64) { 172 if c.assignStackPointerCeilNeeded != nil { 173 c.assignStackPointerCeilNeeded.AssignSourceConstant(int64(ceil) << 3) 174 } 175 } 176 177 func (c *arm64Compiler) label(label wazeroir.Label) *arm64LabelInfo { 178 kind := label.Kind() 179 frames := c.labels[kind] 180 frameID := label.FrameID() 181 if c.frameIDMax < frameID { 182 c.frameIDMax = frameID 183 } 184 // If the frameID is not allocated yet, expand the slice by twice of the diff, 185 // so that we could reduce the allocation in the subsequent compilation. 186 if diff := frameID - len(frames) + 1; diff > 0 { 187 for i := 0; i < diff; i++ { 188 frames = append(frames, arm64LabelInfo{initialStack: newRuntimeValueLocationStack()}) 189 } 190 c.labels[kind] = frames 191 } 192 return &frames[frameID] 193 } 194 195 // runtimeValueLocationStack implements compilerImpl.runtimeValueLocationStack for the amd64 architecture. 196 func (c *arm64Compiler) runtimeValueLocationStack() *runtimeValueLocationStack { 197 return c.locationStack 198 } 199 200 // pushRuntimeValueLocationOnRegister implements compiler.pushRuntimeValueLocationOnRegister for arm64. 201 func (c *arm64Compiler) pushRuntimeValueLocationOnRegister(reg asm.Register, vt runtimeValueType) (ret *runtimeValueLocation) { 202 ret = c.locationStack.pushRuntimeValueLocationOnRegister(reg, vt) 203 c.markRegisterUsed(reg) 204 return 205 } 206 207 // pushVectorRuntimeValueLocationOnRegister implements compiler.pushVectorRuntimeValueLocationOnRegister for arm64. 208 func (c *arm64Compiler) pushVectorRuntimeValueLocationOnRegister(reg asm.Register) (lowerBitsLocation *runtimeValueLocation) { 209 lowerBitsLocation = c.locationStack.pushRuntimeValueLocationOnRegister(reg, runtimeValueTypeV128Lo) 210 c.locationStack.pushRuntimeValueLocationOnRegister(reg, runtimeValueTypeV128Hi) 211 c.markRegisterUsed(reg) 212 return 213 } 214 215 func (c *arm64Compiler) markRegisterUsed(regs ...asm.Register) { 216 for _, reg := range regs { 217 if !isZeroRegister(reg) && reg != asm.NilRegister { 218 c.locationStack.markRegisterUsed(reg) 219 } 220 } 221 } 222 223 func (c *arm64Compiler) markRegisterUnused(regs ...asm.Register) { 224 for _, reg := range regs { 225 if !isZeroRegister(reg) && reg != asm.NilRegister { 226 c.locationStack.markRegisterUnused(reg) 227 } 228 } 229 } 230 231 func (c *arm64Compiler) String() (ret string) { return c.locationStack.String() } 232 233 // compilePreamble implements compiler.compilePreamble for the arm64 architecture. 234 func (c *arm64Compiler) compilePreamble() error { 235 c.markRegisterUsed(arm64CallingConventionModuleInstanceAddressRegister) 236 defer c.markRegisterUnused(arm64CallingConventionModuleInstanceAddressRegister) 237 238 c.locationStack.init(c.typ) 239 240 // Check if it's necessary to grow the value stack before entering function body. 241 if err := c.compileMaybeGrowStack(); err != nil { 242 return err 243 } 244 245 if err := c.compileModuleContextInitialization(); err != nil { 246 return err 247 } 248 249 if c.withListener { 250 if err := c.compileCallGoFunction(nativeCallStatusCodeCallBuiltInFunction, builtinFunctionIndexFunctionListenerBefore); err != nil { 251 return err 252 } 253 } 254 255 // We must initialize the stack base pointer register so that we can manipulate the stack properly. 256 c.compileReservedStackBasePointerRegisterInitialization() 257 258 c.compileReservedMemoryRegisterInitialization() 259 260 return nil 261 } 262 263 // compileMaybeGrowStack adds instructions to check the necessity to grow the value stack, 264 // and if so, make the builtin function call to do so. These instructions are called in the function's 265 // preamble. 266 func (c *arm64Compiler) compileMaybeGrowStack() error { 267 tmpX, found := c.locationStack.takeFreeRegister(registerTypeGeneralPurpose) 268 if !found { 269 panic("BUG: all the registers should be free at this point") 270 } 271 c.markRegisterUsed(tmpX) 272 tmpY, found := c.locationStack.takeFreeRegister(registerTypeGeneralPurpose) 273 if !found { 274 panic("BUG: all the registers should be free at this point") 275 } 276 c.markRegisterUsed(tmpY) 277 278 // "tmpX = len(ce.stack)" 279 c.assembler.CompileMemoryToRegister( 280 arm64.LDRD, 281 arm64ReservedRegisterForCallEngine, callEngineStackContextStackLenInBytesOffset, 282 tmpX, 283 ) 284 285 // "tmpY = ce.stackBasePointer" 286 c.assembler.CompileMemoryToRegister( 287 arm64.LDRD, 288 arm64ReservedRegisterForCallEngine, callEngineStackContextStackBasePointerInBytesOffset, 289 tmpY, 290 ) 291 292 // "tmpX = tmpX - tmpY", in other words "tmpX = len(ce.stack) - ce.stackBasePointer" 293 c.assembler.CompileRegisterToRegister( 294 arm64.SUB, 295 tmpY, 296 tmpX, 297 ) 298 299 // "tmpY = stackPointerCeil" 300 loadStackPointerCeil := c.assembler.CompileConstToRegister( 301 arm64.MOVD, 302 math.MaxInt32, 303 tmpY, 304 ) 305 // At this point of compilation, we don't know the value of stack point ceil, 306 // so we lazily resolve the value later. 307 c.assignStackPointerCeilNeeded = loadStackPointerCeil 308 309 // Compare tmpX (len(ce.stack) - ce.stackBasePointer) and tmpY (ce.stackPointerCeil) 310 c.assembler.CompileTwoRegistersToNone(arm64.CMP, tmpX, tmpY) 311 312 // If ceil > stackLen - stack base pointer, we need to grow the stack by calling builtin Go function. 313 brIfStackOK := c.assembler.CompileJump(arm64.BCONDLS) 314 if err := c.compileCallGoFunction(nativeCallStatusCodeCallBuiltInFunction, builtinFunctionIndexGrowStack); err != nil { 315 return err 316 } 317 318 // Otherwise, skip calling it. 319 c.assembler.SetJumpTargetOnNext(brIfStackOK) 320 321 c.markRegisterUnused(tmpX, tmpY) 322 return nil 323 } 324 325 // returnFunction emits instructions to return from the current function frame. 326 // If the current frame is the bottom, the code goes back to the Go code with nativeCallStatusCodeReturned status. 327 // Otherwise, we branch into the caller's return address. 328 func (c *arm64Compiler) compileReturnFunction() error { 329 // Release all the registers as our calling convention requires the caller-save. 330 if err := c.compileReleaseAllRegistersToStack(); err != nil { 331 return err 332 } 333 334 if c.withListener { 335 if err := c.compileCallGoFunction(nativeCallStatusCodeCallBuiltInFunction, builtinFunctionIndexFunctionListenerAfter); err != nil { 336 return err 337 } 338 // After return, we re-initialize the stack base pointer as that is used to return to the caller below. 339 c.compileReservedStackBasePointerRegisterInitialization() 340 } 341 342 // arm64CallingConventionModuleInstanceAddressRegister holds the module intstance's address 343 // so mark it used so that it won't be used as a free register. 344 c.locationStack.markRegisterUsed(arm64CallingConventionModuleInstanceAddressRegister) 345 defer c.locationStack.markRegisterUnused(arm64CallingConventionModuleInstanceAddressRegister) 346 347 returnAddress, callerStackBasePointerInBytes, callerFunction := c.locationStack.getCallFrameLocations(c.typ) 348 349 // If the return address is zero, meaning that we return from the execution. 350 returnAddress.setRegister(arm64ReservedRegisterForTemporary) 351 c.compileLoadValueOnStackToRegister(returnAddress) 352 c.assembler.CompileTwoRegistersToNone(arm64.CMP, arm64ReservedRegisterForTemporary, arm64.RegRZR) 353 354 // Br if the address does not equal zero, otherwise, exit. 355 // If the address doesn't equal zero, return br into returnAddressRegister (caller's return address). 356 c.compileMaybeExitFromNativeCode(arm64.BCONDNE, nativeCallStatusCodeReturned) 357 358 // Alias for readability. 359 tmp := arm64CallingConventionModuleInstanceAddressRegister 360 361 // First, restore the stackContext.stackBasePointerInBytesOffset from callerStackBasePointerInBytes. 362 callerStackBasePointerInBytes.setRegister(tmp) 363 c.compileLoadValueOnStackToRegister(callerStackBasePointerInBytes) 364 c.assembler.CompileRegisterToMemory(arm64.STRD, tmp, 365 arm64ReservedRegisterForCallEngine, callEngineStackContextStackBasePointerInBytesOffset) 366 367 // Next, restore moduleContext.fn from callerFunction. 368 callerFunction.setRegister(tmp) 369 c.compileLoadValueOnStackToRegister(callerFunction) 370 c.assembler.CompileRegisterToMemory(arm64.STRD, tmp, 371 arm64ReservedRegisterForCallEngine, callEngineModuleContextFnOffset) 372 373 // Also, we have to put the target function's *wasm.ModuleInstance into arm64CallingConventionModuleInstanceAddressRegister. 374 c.assembler.CompileMemoryToRegister(arm64.LDRD, 375 tmp, functionModuleInstanceOffset, 376 arm64CallingConventionModuleInstanceAddressRegister) 377 378 c.assembler.CompileJumpToRegister(arm64.B, returnAddress.register) 379 return nil 380 } 381 382 func (c *arm64Compiler) compileMaybeExitFromNativeCode(skipCondition asm.Instruction, status nativeCallStatusCode) { 383 if target := c.compiledTrapTargets[status]; target != nil { 384 // We've already compiled this. 385 // Invert the condition to jump into the appropriate target. 386 var trapCondition asm.Instruction 387 switch skipCondition { 388 case arm64.BCONDEQ: 389 trapCondition = arm64.BCONDNE 390 case arm64.BCONDNE: 391 trapCondition = arm64.BCONDEQ 392 case arm64.BCONDLO: 393 trapCondition = arm64.BCONDHS 394 case arm64.BCONDHS: 395 trapCondition = arm64.BCONDLO 396 case arm64.BCONDLS: 397 trapCondition = arm64.BCONDHI 398 case arm64.BCONDHI: 399 trapCondition = arm64.BCONDLS 400 case arm64.BCONDVS: 401 trapCondition = arm64.BCONDVC 402 case arm64.BCONDVC: 403 trapCondition = arm64.BCONDVS 404 default: 405 panic("BUG: couldn't invert condition") 406 } 407 c.assembler.CompileJump(trapCondition).AssignJumpTarget(target) 408 } else { 409 skip := c.assembler.CompileJump(skipCondition) 410 c.compileExitFromNativeCode(status) 411 c.assembler.SetJumpTargetOnNext(skip) 412 } 413 } 414 415 // compileExitFromNativeCode adds instructions to give the control back to ce.exec with the given status code. 416 func (c *arm64Compiler) compileExitFromNativeCode(status nativeCallStatusCode) { 417 if target := c.compiledTrapTargets[status]; target != nil { 418 c.assembler.CompileJump(arm64.B).AssignJumpTarget(target) 419 } 420 421 switch status { 422 case nativeCallStatusCodeReturned: 423 // Save the target for reuse. 424 c.compiledTrapTargets[status] = c.compileNOP() 425 case nativeCallStatusCodeCallGoHostFunction, nativeCallStatusCodeCallBuiltInFunction: 426 // Read the return address, and write it to callEngine.exitContext.returnAddress. 427 c.assembler.CompileReadInstructionAddress(arm64ReservedRegisterForTemporary, arm64.RET) 428 c.assembler.CompileRegisterToMemory( 429 arm64.STRD, arm64ReservedRegisterForTemporary, 430 arm64ReservedRegisterForCallEngine, callEngineExitContextReturnAddressOffset, 431 ) 432 default: 433 if c.ir.IROperationSourceOffsetsInWasmBinary != nil { 434 // This case, the execution traps, and we want the top frame's source position in the stack trace. 435 // We store the instruction address onto callEngine.returnAddress. 436 c.assembler.CompileReadInstructionAddress(arm64ReservedRegisterForTemporary, arm64.STRD) 437 c.assembler.CompileRegisterToMemory( 438 arm64.STRD, arm64ReservedRegisterForTemporary, 439 arm64ReservedRegisterForCallEngine, callEngineExitContextReturnAddressOffset, 440 ) 441 } else { 442 // We won't use the source position, so just save the target for reuse. 443 c.compiledTrapTargets[status] = c.compileNOP() 444 } 445 } 446 447 // Write the current stack pointer to the ce.stackPointer. 448 c.assembler.CompileConstToRegister(arm64.MOVD, int64(c.locationStack.sp), arm64ReservedRegisterForTemporary) 449 c.assembler.CompileRegisterToMemory(arm64.STRD, arm64ReservedRegisterForTemporary, arm64ReservedRegisterForCallEngine, 450 callEngineStackContextStackPointerOffset) 451 452 // Write the status to callEngine.exitContext.statusCode. 453 if status != 0 { 454 c.assembler.CompileConstToRegister(arm64.MOVW, int64(status), arm64ReservedRegisterForTemporary) 455 c.assembler.CompileRegisterToMemory(arm64.STRW, arm64ReservedRegisterForTemporary, 456 arm64ReservedRegisterForCallEngine, callEngineExitContextNativeCallStatusCodeOffset) 457 } else { 458 // If the status == 0, we use zero register to store zero. 459 c.assembler.CompileRegisterToMemory(arm64.STRW, arm64.RegRZR, 460 arm64ReservedRegisterForCallEngine, callEngineExitContextNativeCallStatusCodeOffset) 461 } 462 463 // The return address to the Go code is stored in archContext.compilerReturnAddress which 464 // is embedded in ce. We load the value to the tmpRegister, and then 465 // invoke RET with that register. 466 c.assembler.CompileMemoryToRegister(arm64.LDRD, 467 arm64ReservedRegisterForCallEngine, arm64CallEngineArchContextCompilerCallReturnAddressOffset, 468 arm64ReservedRegisterForTemporary) 469 470 c.assembler.CompileJumpToRegister(arm64.RET, arm64ReservedRegisterForTemporary) 471 } 472 473 // compileGoHostFunction implements compiler.compileHostFunction for the arm64 architecture. 474 func (c *arm64Compiler) compileGoDefinedHostFunction() error { 475 // First we must update the location stack to reflect the number of host function inputs. 476 c.locationStack.init(c.typ) 477 478 if c.withListener { 479 if err := c.compileCallGoFunction(nativeCallStatusCodeCallBuiltInFunction, 480 builtinFunctionIndexFunctionListenerBefore); err != nil { 481 return err 482 } 483 } 484 485 // Host function needs access to the caller's Function Instance, and the caller's information is stored in the stack 486 // (as described in the doc of callEngine.stack). Here, we get the caller's *wasm.FunctionInstance from the stack, 487 // and save it in callEngine.exitContext.callerFunctionInstance so we can pass it to the host function 488 // without sacrificing the performance. 489 c.compileReservedStackBasePointerRegisterInitialization() 490 // Alias for readability. 491 tmp := arm64CallingConventionModuleInstanceAddressRegister 492 // Get the location of the callerFunction (*function) in the stack, which depends on the signature. 493 _, _, callerFunction := c.locationStack.getCallFrameLocations(c.typ) 494 // Load the value into the tmp register: tmp = &function{..} 495 callerFunction.setRegister(tmp) 496 c.compileLoadValueOnStackToRegister(callerFunction) 497 // tmp = *(tmp+functionModuleInstanceOffset) = &wasm.ModuleInstance{...} 498 c.assembler.CompileMemoryToRegister(arm64.LDRD, tmp, functionModuleInstanceOffset, tmp) 499 // Load it onto callEngine.exitContext.callerModuleInstance. 500 c.assembler.CompileRegisterToMemory(arm64.STRD, 501 tmp, 502 arm64ReservedRegisterForCallEngine, callEngineExitContextCallerModuleInstanceOffset) 503 // Reset the state of callerFunction value location so that we won't mess up subsequent code generation below. 504 c.locationStack.releaseRegister(callerFunction) 505 506 if err := c.compileCallGoFunction(nativeCallStatusCodeCallGoHostFunction, 0); err != nil { 507 return err 508 } 509 510 // Initializes the reserved stack base pointer which is used to retrieve the call frame stack. 511 c.compileReservedStackBasePointerRegisterInitialization() 512 513 // Go function can change the module state in arbitrary way, so we have to force 514 // the callEngine.moduleContext initialization on the function return. To do so, 515 // we zero-out callEngine.moduleInstance. 516 c.assembler.CompileRegisterToMemory(arm64.STRD, 517 arm64.RegRZR, 518 arm64ReservedRegisterForCallEngine, callEngineModuleContextModuleInstanceOffset) 519 520 return c.compileReturnFunction() 521 } 522 523 // setLocationStack sets the given runtimeValueLocationStack to .locationStack field, 524 // while allowing us to track runtimeValueLocationStack.stackPointerCeil across multiple stacks. 525 // This is called when we branch into different block. 526 func (c *arm64Compiler) setLocationStack(newStack *runtimeValueLocationStack) { 527 if c.stackPointerCeil < c.locationStack.stackPointerCeil { 528 c.stackPointerCeil = c.locationStack.stackPointerCeil 529 } 530 c.locationStack = newStack 531 } 532 533 // compileBuiltinFunctionCheckExitCode implements compiler.compileBuiltinFunctionCheckExitCode for the arm64 architecture. 534 func (c *arm64Compiler) compileBuiltinFunctionCheckExitCode() error { 535 if err := c.compileCallGoFunction(nativeCallStatusCodeCallBuiltInFunction, builtinFunctionIndexCheckExitCode); err != nil { 536 return err 537 } 538 539 // After return, we re-initialize reserved registers just like preamble of functions. 540 c.compileReservedStackBasePointerRegisterInitialization() 541 c.compileReservedMemoryRegisterInitialization() 542 return nil 543 } 544 545 // compileLabel implements compiler.compileLabel for the arm64 architecture. 546 func (c *arm64Compiler) compileLabel(o *wazeroir.UnionOperation) (skipThisLabel bool) { 547 labelKey := wazeroir.Label(o.U1) 548 labelInfo := c.label(labelKey) 549 550 // If initialStack is not set, that means this label has never been reached. 551 if !labelInfo.stackInitialized { 552 skipThisLabel = true 553 return 554 } 555 556 if labelBegin := labelInfo.initialInstruction; labelBegin == nil { 557 // We use NOP as a beginning of instructions in a label. 558 // This should be eventually optimized out by assembler. 559 labelInfo.initialInstruction = c.assembler.CompileStandAlone(arm64.NOP) 560 } else { 561 c.assembler.Add(labelBegin) 562 } 563 564 // Set the initial stack. 565 c.setLocationStack(&labelInfo.initialStack) 566 return false 567 } 568 569 // compileUnreachable implements compiler.compileUnreachable for the arm64 architecture. 570 func (c *arm64Compiler) compileUnreachable() error { 571 c.compileExitFromNativeCode(nativeCallStatusCodeUnreachable) 572 return nil 573 } 574 575 // compileSet implements compiler.compileSet for the arm64 architecture. 576 func (c *arm64Compiler) compileSet(o *wazeroir.UnionOperation) error { 577 depth := int(o.U1) 578 isTargetVector := o.B3 579 580 setTargetIndex := int(c.locationStack.sp) - 1 - depth 581 582 if isTargetVector { 583 _ = c.locationStack.pop() 584 } 585 v := c.locationStack.pop() 586 if err := c.compileEnsureOnRegister(v); err != nil { 587 return err 588 } 589 590 targetLocation := &c.locationStack.stack[setTargetIndex] 591 if targetLocation.onRegister() { 592 // We no longer need the register previously used by the target location. 593 c.markRegisterUnused(targetLocation.register) 594 } 595 596 reg := v.register 597 targetLocation.setRegister(reg) 598 targetLocation.valueType = v.valueType 599 if isTargetVector { 600 hi := &c.locationStack.stack[setTargetIndex+1] 601 hi.setRegister(reg) 602 } 603 return nil 604 } 605 606 // compileGlobalGet implements compiler.compileGlobalGet for the arm64 architecture. 607 func (c *arm64Compiler) compileGlobalGet(o *wazeroir.UnionOperation) error { 608 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 609 return err 610 } 611 612 index := uint32(o.U1) 613 614 wasmValueType := c.ir.Globals[index].ValType 615 isV128 := wasmValueType == wasm.ValueTypeV128 616 // Get the address of globals[index] into globalAddressReg. 617 globalAddressReg, err := c.compileReadGlobalAddress(index) 618 if err != nil { 619 return err 620 } 621 622 if isV128 { 623 resultReg, err := c.allocateRegister(registerTypeVector) 624 if err != nil { 625 return err 626 } 627 c.assembler.CompileConstToRegister(arm64.ADD, globalInstanceValueOffset, globalAddressReg) 628 c.assembler.CompileMemoryToVectorRegister(arm64.VMOV, globalAddressReg, 0, 629 resultReg, arm64.VectorArrangementQ) 630 631 c.pushVectorRuntimeValueLocationOnRegister(resultReg) 632 } else { 633 ldr := arm64.NOP 634 var result asm.Register 635 var vt runtimeValueType 636 switch wasmValueType { 637 case wasm.ValueTypeI32: 638 ldr = arm64.LDRW 639 vt = runtimeValueTypeI32 640 result = globalAddressReg 641 case wasm.ValueTypeI64, wasm.ValueTypeExternref, wasm.ValueTypeFuncref: 642 ldr = arm64.LDRD 643 vt = runtimeValueTypeI64 644 result = globalAddressReg 645 case wasm.ValueTypeF32: 646 result, err = c.allocateRegister(registerTypeVector) 647 if err != nil { 648 return err 649 } 650 ldr = arm64.FLDRS 651 vt = runtimeValueTypeF32 652 case wasm.ValueTypeF64: 653 result, err = c.allocateRegister(registerTypeVector) 654 if err != nil { 655 return err 656 } 657 ldr = arm64.FLDRD 658 vt = runtimeValueTypeF64 659 } 660 661 // "result = [globalAddressReg + globalInstanceValueOffset] (== globals[index].Val)" 662 c.assembler.CompileMemoryToRegister( 663 ldr, 664 globalAddressReg, globalInstanceValueOffset, 665 result, 666 ) 667 668 c.pushRuntimeValueLocationOnRegister(result, vt) 669 } 670 return nil 671 } 672 673 // compileGlobalSet implements compiler.compileGlobalSet for the arm64 architecture. 674 func (c *arm64Compiler) compileGlobalSet(o *wazeroir.UnionOperation) error { 675 index := uint32(o.U1) 676 677 wasmValueType := c.ir.Globals[index].ValType 678 isV128 := wasmValueType == wasm.ValueTypeV128 679 680 var val *runtimeValueLocation 681 if isV128 { 682 val = c.locationStack.popV128() 683 } else { 684 val = c.locationStack.pop() 685 } 686 if err := c.compileEnsureOnRegister(val); err != nil { 687 return err 688 } 689 690 globalInstanceAddressRegister, err := c.compileReadGlobalAddress(index) 691 if err != nil { 692 return err 693 } 694 695 if isV128 { 696 c.assembler.CompileVectorRegisterToMemory(arm64.VMOV, 697 val.register, globalInstanceAddressRegister, globalInstanceValueOffset, 698 arm64.VectorArrangementQ) 699 } else { 700 var str asm.Instruction 701 switch c.ir.Globals[index].ValType { 702 case wasm.ValueTypeI32: 703 str = arm64.STRW 704 case wasm.ValueTypeI64, wasm.ValueTypeExternref, wasm.ValueTypeFuncref: 705 str = arm64.STRD 706 case wasm.ValueTypeF32: 707 str = arm64.FSTRS 708 case wasm.ValueTypeF64: 709 str = arm64.FSTRD 710 } 711 712 // At this point "globalInstanceAddressRegister = globals[index]". 713 // Therefore, this means "globals[index].Val = val.register" 714 c.assembler.CompileRegisterToMemory( 715 str, 716 val.register, 717 globalInstanceAddressRegister, globalInstanceValueOffset, 718 ) 719 } 720 721 c.markRegisterUnused(val.register) 722 return nil 723 } 724 725 // compileReadGlobalAddress adds instructions to store the absolute address of the global instance at globalIndex into a register 726 func (c *arm64Compiler) compileReadGlobalAddress(globalIndex uint32) (destinationRegister asm.Register, err error) { 727 // TODO: rethink about the type used in store `globals []*GlobalInstance`. 728 // If we use `[]GlobalInstance` instead, we could reduce one MOV instruction here. 729 730 destinationRegister, err = c.allocateRegister(registerTypeGeneralPurpose) 731 if err != nil { 732 return 733 } 734 735 // "destinationRegister = globalIndex * 8" 736 c.assembler.CompileConstToRegister( 737 // globalIndex is an index to []*GlobalInstance, therefore 738 // we have to multiply it by the size of *GlobalInstance == the pointer size == 8. 739 arm64.MOVD, int64(globalIndex)*8, destinationRegister, 740 ) 741 742 // "arm64ReservedRegisterForTemporary = &globals[0]" 743 c.assembler.CompileMemoryToRegister( 744 arm64.LDRD, 745 arm64ReservedRegisterForCallEngine, callEngineModuleContextGlobalElement0AddressOffset, 746 arm64ReservedRegisterForTemporary, 747 ) 748 749 // "destinationRegister = [arm64ReservedRegisterForTemporary + destinationRegister] (== globals[globalIndex])". 750 c.assembler.CompileMemoryWithRegisterOffsetToRegister( 751 arm64.LDRD, 752 arm64ReservedRegisterForTemporary, destinationRegister, 753 destinationRegister, 754 ) 755 return 756 } 757 758 // compileBr implements compiler.compileBr for the arm64 architecture. 759 func (c *arm64Compiler) compileBr(o *wazeroir.UnionOperation) error { 760 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 761 return err 762 } 763 return c.compileBranchInto(wazeroir.Label(o.U1)) 764 } 765 766 // compileBrIf implements compiler.compileBrIf for the arm64 architecture. 767 func (c *arm64Compiler) compileBrIf(o *wazeroir.UnionOperation) error { 768 cond := c.locationStack.pop() 769 770 var conditionalBR asm.Node 771 if cond.onConditionalRegister() { 772 // If the cond is on a conditional register, it corresponds to one of "conditional codes" 773 // https://developer.arm.com/documentation/dui0801/a/Condition-Codes/Condition-code-suffixes 774 // Here we represent the conditional codes by using arm64.COND_** registers, and that means the 775 // conditional jump can be performed if we use arm64.B**. 776 // For example, if we have arm64.CondEQ on cond, that means we performed compileEq right before 777 // this compileBrIf and BrIf can be achieved by arm64.BCONDEQ. 778 var brInst asm.Instruction 779 switch cond.conditionalRegister { 780 case arm64.CondEQ: 781 brInst = arm64.BCONDEQ 782 case arm64.CondNE: 783 brInst = arm64.BCONDNE 784 case arm64.CondHS: 785 brInst = arm64.BCONDHS 786 case arm64.CondLO: 787 brInst = arm64.BCONDLO 788 case arm64.CondMI: 789 brInst = arm64.BCONDMI 790 case arm64.CondHI: 791 brInst = arm64.BCONDHI 792 case arm64.CondLS: 793 brInst = arm64.BCONDLS 794 case arm64.CondGE: 795 brInst = arm64.BCONDGE 796 case arm64.CondLT: 797 brInst = arm64.BCONDLT 798 case arm64.CondGT: 799 brInst = arm64.BCONDGT 800 case arm64.CondLE: 801 brInst = arm64.BCONDLE 802 default: 803 // BUG: This means that we use the cond.conditionalRegister somewhere in this file, 804 // but not covered in switch ^. That shouldn't happen. 805 return fmt.Errorf("unsupported condition for br_if: %v", cond.conditionalRegister) 806 } 807 conditionalBR = c.assembler.CompileJump(brInst) 808 } else { 809 // If the value is not on the conditional register, we compare the value with the zero register, 810 // and then do the conditional BR if the value doesn't equal zero. 811 if err := c.compileEnsureOnRegister(cond); err != nil { 812 return err 813 } 814 // Compare the value with zero register. Note that the value is ensured to be i32 by function validation phase, 815 // so we use CMPW (32-bit compare) here. 816 c.assembler.CompileTwoRegistersToNone(arm64.CMPW, cond.register, arm64.RegRZR) 817 818 conditionalBR = c.assembler.CompileJump(arm64.BCONDNE) 819 820 c.markRegisterUnused(cond.register) 821 } 822 823 // Emit the code for branching into else branch. 824 elseTarget := wazeroir.Label(o.U2) 825 if err := c.compileBranchInto(elseTarget); err != nil { 826 return err 827 } 828 // We branch into here from the original conditional BR (conditionalBR). 829 c.assembler.SetJumpTargetOnNext(conditionalBR) 830 thenTarget := wazeroir.Label(o.U1) 831 if err := compileDropRange(c, o.U3); err != nil { 832 return err 833 } 834 return c.compileBranchInto(thenTarget) 835 } 836 837 func (c *arm64Compiler) compileBranchInto(target wazeroir.Label) error { 838 if target.IsReturnTarget() { 839 return c.compileReturnFunction() 840 } else { 841 if c.ir.LabelCallers[target] > 1 { 842 // We can only re-use register state if when there's a single call-site. 843 // Release existing values on registers to the stack if there's multiple ones to have 844 // the consistent value location state at the beginning of label. 845 if err := c.compileReleaseAllRegistersToStack(); err != nil { 846 return err 847 } 848 } 849 // Set the initial stack of the target label, so we can start compiling the label 850 // with the appropriate value locations. Note we clone the stack here as we maybe 851 // manipulate the stack before compiler reaches the label. 852 targetLabel := c.label(target) 853 if !targetLabel.stackInitialized { 854 targetLabel.initialStack.cloneFrom(*c.locationStack) 855 targetLabel.stackInitialized = true 856 } 857 858 br := c.assembler.CompileJump(arm64.B) 859 c.assignBranchTarget(target, br) 860 return nil 861 } 862 } 863 864 // assignBranchTarget assigns the given label's initial instruction to the destination of br. 865 func (c *arm64Compiler) assignBranchTarget(label wazeroir.Label, br asm.Node) { 866 target := c.label(label) 867 868 targetInst := target.initialInstruction 869 if targetInst == nil { 870 // If the label isn't compiled yet, allocate the NOP node, and set as the initial instruction. 871 targetInst = c.assembler.AllocateNOP() 872 target.initialInstruction = targetInst 873 } 874 875 br.AssignJumpTarget(targetInst) 876 } 877 878 // compileBrTable implements compiler.compileBrTable for the arm64 architecture. 879 func (c *arm64Compiler) compileBrTable(o *wazeroir.UnionOperation) error { 880 // If the operation only consists of the default target, we branch into it and return early. 881 if len(o.Us) == 2 { 882 loc := c.locationStack.pop() 883 if loc.onRegister() { 884 c.markRegisterUnused(loc.register) 885 } 886 if err := compileDropRange(c, o.Us[1]); err != nil { 887 return err 888 } 889 return c.compileBranchInto(wazeroir.Label(o.Us[0])) 890 } 891 892 index := c.locationStack.pop() 893 if err := c.compileEnsureOnRegister(index); err != nil { 894 return err 895 } 896 897 if isZeroRegister(index.register) { 898 reg, err := c.allocateRegister(registerTypeGeneralPurpose) 899 if err != nil { 900 return err 901 } 902 index.setRegister(reg) 903 c.markRegisterUsed(reg) 904 905 // Zero the value on a picked register. 906 c.assembler.CompileRegisterToRegister(arm64.MOVD, arm64.RegRZR, reg) 907 } 908 909 tmpReg, err := c.allocateRegister(registerTypeGeneralPurpose) 910 if err != nil { 911 return err 912 } 913 914 // Load the branch table's length. 915 // "tmpReg = len(o.Targets)" 916 c.assembler.CompileConstToRegister(arm64.MOVW, int64(len(o.Us)/2-1), tmpReg) 917 // Compare the length with offset. 918 c.assembler.CompileTwoRegistersToNone(arm64.CMPW, tmpReg, index.register) 919 // If the value exceeds the length, we will branch into the default target (corresponding to len(o.Targets) index). 920 brDefaultIndex := c.assembler.CompileJump(arm64.BCONDLO) 921 c.assembler.CompileRegisterToRegister(arm64.MOVW, tmpReg, index.register) 922 c.assembler.SetJumpTargetOnNext(brDefaultIndex) 923 924 // We prepare the asm.StaticConst which holds the offset of 925 // each target's first instruction (incl. default) 926 // relative to the beginning of label tables. 927 // 928 // For example, if we have targets=[L0, L1] and default=L_DEFAULT, 929 // we emit the code like this at [Emit the code for each target and default branch] below. 930 // 931 // L0: 932 // 0x123001: XXXX, ... 933 // ..... 934 // L1: 935 // 0x123005: YYY, ... 936 // ..... 937 // L_DEFAULT: 938 // 0x123009: ZZZ, ... 939 // 940 // then offsetData becomes like [0x0, 0x5, 0x8]. 941 // By using this offset list, we could jump into the label for the index by 942 // "jmp offsetData[index]+0x123001" and "0x123001" can be acquired by ADR instruction. 943 // 944 // Note: We store each offset of 32-bit unsigned integer as 4 consecutive bytes. So more precisely, 945 // the above example's offsetData would be [0x0, 0x0, 0x0, 0x0, 0x5, 0x0, 0x0, 0x0, 0x8, 0x0, 0x0, 0x0]. 946 // 947 // Note: this is similar to how GCC implements Switch statements in C. 948 offsetData := asm.NewStaticConst(make([]byte, 4*(len(o.Us)/2))) 949 950 // "tmpReg = &offsetData[0]" 951 c.assembler.CompileStaticConstToRegister(arm64.ADR, offsetData, tmpReg) 952 953 // "index.register = tmpReg + (index.register << 2) (== &offsetData[offset])" 954 c.assembler.CompileLeftShiftedRegisterToRegister(arm64.ADD, index.register, 2, tmpReg, index.register) 955 956 // "index.register = *index.register (== offsetData[offset])" 957 c.assembler.CompileMemoryToRegister(arm64.LDRW, index.register, 0, index.register) 958 959 // Now we read the address of the beginning of the jump table. 960 // In the above example, this corresponds to reading the address of 0x123001. 961 c.assembler.CompileReadInstructionAddress(tmpReg, arm64.B) 962 963 // Now we have the address of L0 in tmp register, and the offset to the target label in the index.register. 964 // So we could achieve the br_table jump by adding them and jump into the resulting address. 965 c.assembler.CompileRegisterToRegister(arm64.ADD, tmpReg, index.register) 966 967 c.assembler.CompileJumpToRegister(arm64.B, index.register) 968 969 // We no longer need the index's register, so mark it unused. 970 c.markRegisterUnused(index.register) 971 972 // [Emit the code for each targets and default branch] 973 labelInitialInstructions := make([]asm.Node, len(o.Us)/2) 974 975 // Since we might end up having the different stack state in each branch, 976 // we need to save the initial stack state here, and use the same initial state 977 // for each iteration. 978 initialLocationStack := c.getSavedTemporaryLocationStack() 979 980 for i := range labelInitialInstructions { 981 // Emit the initial instruction of each target where 982 // we use NOP as we don't yet know the next instruction in each label. 983 init := c.assembler.CompileStandAlone(arm64.NOP) 984 labelInitialInstructions[i] = init 985 986 targetLabel := wazeroir.Label(o.Us[i*2]) 987 targetToDrop := o.Us[i*2+1] 988 if err = compileDropRange(c, targetToDrop); err != nil { 989 return err 990 } 991 if err = c.compileBranchInto(targetLabel); err != nil { 992 return err 993 } 994 // After the iteration, reset the stack's state with initialLocationStack. 995 c.locationStack.cloneFrom(initialLocationStack) 996 } 997 998 c.assembler.BuildJumpTable(offsetData, labelInitialInstructions) 999 return nil 1000 } 1001 1002 func (c *arm64Compiler) getSavedTemporaryLocationStack() runtimeValueLocationStack { 1003 initialLocationStack := *c.locationStack // Take copy! 1004 // Use c.brTableTmp for the underlying stack so that we could reduce the allocations. 1005 if diff := int(initialLocationStack.sp) - len(c.brTableTmp); diff > 0 { 1006 c.brTableTmp = append(c.brTableTmp, make([]runtimeValueLocation, diff)...) 1007 } 1008 copy(c.brTableTmp, initialLocationStack.stack[:initialLocationStack.sp]) 1009 initialLocationStack.stack = c.brTableTmp 1010 return initialLocationStack 1011 } 1012 1013 // compileCall implements compiler.compileCall for the arm64 architecture. 1014 func (c *arm64Compiler) compileCall(o *wazeroir.UnionOperation) error { 1015 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 1016 return err 1017 } 1018 1019 functionIndex := o.U1 1020 1021 tp := &c.ir.Types[c.ir.Functions[functionIndex]] 1022 1023 targetFunctionAddressReg, err := c.allocateRegister(registerTypeGeneralPurpose) 1024 if err != nil { 1025 return err 1026 } 1027 c.markRegisterUsed(targetFunctionAddressReg) 1028 defer c.markRegisterUnused(targetFunctionAddressReg) 1029 1030 // 3) Set rc.next to specify which function is executed on the current call frame. 1031 // 1032 // First, we read the address of the first item of ce.functions slice (= &ce.functions[0]) 1033 // into tmp. 1034 c.assembler.CompileMemoryToRegister(arm64.LDRD, 1035 arm64ReservedRegisterForCallEngine, callEngineModuleContextFunctionsElement0AddressOffset, 1036 targetFunctionAddressReg) 1037 1038 c.assembler.CompileConstToRegister( 1039 arm64.ADD, 1040 int64(functionIndex)*functionSize, // * 8 because the size of *function equals 8 bytes. 1041 targetFunctionAddressReg) 1042 1043 return c.compileCallImpl(targetFunctionAddressReg, tp) 1044 } 1045 1046 // compileCallImpl implements compiler.compileCall and compiler.compileCallIndirect for the arm64 architecture. 1047 func (c *arm64Compiler) compileCallImpl(targetFunctionAddressRegister asm.Register, functype *wasm.FunctionType) error { 1048 // Release all the registers as our calling convention requires the caller-save. 1049 if err := c.compileReleaseAllRegistersToStack(); err != nil { 1050 return err 1051 } 1052 1053 tmp, ok := c.locationStack.takeFreeRegister(registerTypeGeneralPurpose) 1054 if !ok { 1055 panic("BUG: cannot take a free register") 1056 } 1057 1058 // The stack should look like: 1059 // 1060 // reserved slots for results (if len(results) > len(args)) 1061 // | | 1062 // ,arg0, ..., argN, ..., _, .returnAddress, .returnStackBasePointerInBytes, .function, .... 1063 // | | | 1064 // | callFrame{^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^} 1065 // | 1066 // nextStackBasePointerOffset 1067 // 1068 // where callFrame is used to return to this currently executed function. 1069 1070 nextStackBasePointerOffset := int64(c.locationStack.sp) - int64(functype.ParamNumInUint64) 1071 1072 callFrameReturnAddressLoc, callFrameStackBasePointerInBytesLoc, callFrameFunctionLoc := c.locationStack.pushCallFrame(functype) 1073 1074 // Save the current stack base pointer at callFrameStackBasePointerInBytesLoc. 1075 c.assembler.CompileMemoryToRegister(arm64.LDRD, 1076 arm64ReservedRegisterForCallEngine, callEngineStackContextStackBasePointerInBytesOffset, 1077 tmp) 1078 callFrameStackBasePointerInBytesLoc.setRegister(tmp) 1079 c.compileReleaseRegisterToStack(callFrameStackBasePointerInBytesLoc) 1080 1081 // Set callEngine.stackContext.stackBasePointer for the next function. 1082 c.assembler.CompileConstToRegister(arm64.ADD, nextStackBasePointerOffset<<3, tmp) 1083 c.assembler.CompileRegisterToMemory(arm64.STRD, 1084 tmp, 1085 arm64ReservedRegisterForCallEngine, callEngineStackContextStackBasePointerInBytesOffset) 1086 1087 // Save the currently executed *function (placed at callEngine.moduleContext.fn) into callFrameFunctionLoc. 1088 c.assembler.CompileMemoryToRegister(arm64.LDRD, 1089 arm64ReservedRegisterForCallEngine, callEngineModuleContextFnOffset, 1090 tmp) 1091 callFrameFunctionLoc.setRegister(tmp) 1092 c.compileReleaseRegisterToStack(callFrameFunctionLoc) 1093 1094 // Set callEngine.moduleContext.fn to the next *function. 1095 c.assembler.CompileRegisterToMemory(arm64.STRD, 1096 targetFunctionAddressRegister, 1097 arm64ReservedRegisterForCallEngine, callEngineModuleContextFnOffset) 1098 1099 // Write the return address into callFrameReturnAddressLoc. 1100 c.assembler.CompileReadInstructionAddress(tmp, arm64.B) 1101 callFrameReturnAddressLoc.setRegister(tmp) 1102 c.compileReleaseRegisterToStack(callFrameReturnAddressLoc) 1103 1104 if targetFunctionAddressRegister == arm64CallingConventionModuleInstanceAddressRegister { 1105 // This case we must move the value on targetFunctionAddressRegister to another register, otherwise 1106 // the address (jump target below) will be modified and result in segfault. 1107 // See #526. 1108 c.assembler.CompileRegisterToRegister(arm64.MOVD, targetFunctionAddressRegister, tmp) 1109 targetFunctionAddressRegister = tmp 1110 } 1111 1112 // Also, we have to put the code's moduleInstance address into arm64CallingConventionModuleInstanceAddressRegister. 1113 c.assembler.CompileMemoryToRegister(arm64.LDRD, 1114 targetFunctionAddressRegister, functionModuleInstanceOffset, 1115 arm64CallingConventionModuleInstanceAddressRegister, 1116 ) 1117 1118 // Then, br into the target function's initial address. 1119 c.assembler.CompileMemoryToRegister(arm64.LDRD, 1120 targetFunctionAddressRegister, functionCodeInitialAddressOffset, 1121 targetFunctionAddressRegister) 1122 1123 c.assembler.CompileJumpToRegister(arm64.B, targetFunctionAddressRegister) 1124 1125 // We consumed the function parameters, the call frame stack and reserved slots during the call. 1126 c.locationStack.sp = uint64(nextStackBasePointerOffset) 1127 1128 // Also, the function results were pushed by the call. 1129 for _, t := range functype.Results { 1130 loc := c.locationStack.pushRuntimeValueLocationOnStack() 1131 switch t { 1132 case wasm.ValueTypeI32: 1133 loc.valueType = runtimeValueTypeI32 1134 case wasm.ValueTypeI64, wasm.ValueTypeFuncref, wasm.ValueTypeExternref: 1135 loc.valueType = runtimeValueTypeI64 1136 case wasm.ValueTypeF32: 1137 loc.valueType = runtimeValueTypeF32 1138 case wasm.ValueTypeF64: 1139 loc.valueType = runtimeValueTypeF64 1140 case wasm.ValueTypeV128: 1141 loc.valueType = runtimeValueTypeV128Lo 1142 hi := c.locationStack.pushRuntimeValueLocationOnStack() 1143 hi.valueType = runtimeValueTypeV128Hi 1144 } 1145 } 1146 1147 if err := c.compileModuleContextInitialization(); err != nil { 1148 return err 1149 } 1150 1151 // On the function return, we initialize the state for this function. 1152 c.compileReservedStackBasePointerRegisterInitialization() 1153 1154 c.compileReservedMemoryRegisterInitialization() 1155 return nil 1156 } 1157 1158 // compileCallIndirect implements compiler.compileCallIndirect for the arm64 architecture. 1159 func (c *arm64Compiler) compileCallIndirect(o *wazeroir.UnionOperation) (err error) { 1160 offset := c.locationStack.pop() 1161 if err = c.compileEnsureOnRegister(offset); err != nil { 1162 return err 1163 } 1164 typeIndex := o.U1 1165 tableIndex := o.U2 1166 1167 offsetReg := offset.register 1168 if isZeroRegister(offsetReg) { 1169 offsetReg, err = c.allocateRegister(registerTypeGeneralPurpose) 1170 if err != nil { 1171 return err 1172 } 1173 c.markRegisterUsed(offsetReg) 1174 1175 // Zero the value on a picked register. 1176 c.assembler.CompileRegisterToRegister(arm64.MOVD, arm64.RegRZR, offsetReg) 1177 } 1178 1179 tmp, err := c.allocateRegister(registerTypeGeneralPurpose) 1180 if err != nil { 1181 return err 1182 } 1183 c.markRegisterUsed(tmp) 1184 1185 tmp2, err := c.allocateRegister(registerTypeGeneralPurpose) 1186 if err != nil { 1187 return err 1188 } 1189 c.markRegisterUsed(tmp2) 1190 1191 // First, we need to check if the offset doesn't exceed the length of table. 1192 // "tmp = &Tables[0]" 1193 c.assembler.CompileMemoryToRegister(arm64.LDRD, 1194 arm64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset, 1195 tmp, 1196 ) 1197 // tmp = [tmp + TableIndex*8] = [&Tables[0] + TableIndex*sizeOf(*tableInstance)] = Tables[tableIndex] 1198 c.assembler.CompileMemoryToRegister(arm64.LDRD, 1199 tmp, int64(tableIndex)*8, 1200 tmp, 1201 ) 1202 // tmp2 = [tmp + tableInstanceTableLenOffset] = len(Tables[tableIndex]) 1203 c.assembler.CompileMemoryToRegister(arm64.LDRD, tmp, tableInstanceTableLenOffset, tmp2) 1204 1205 // "cmp tmp2, offset" 1206 c.assembler.CompileTwoRegistersToNone(arm64.CMP, tmp2, offsetReg) 1207 1208 // If it exceeds len(table), we trap. 1209 c.compileMaybeExitFromNativeCode(arm64.BCONDLO, nativeCallStatusCodeInvalidTableAccess) 1210 // Otherwise, we proceed to do function type check. 1211 1212 // We need to obtain the absolute address of table element. 1213 // "tmp = &Tables[tableIndex].table[0]" 1214 c.assembler.CompileMemoryToRegister( 1215 arm64.LDRD, 1216 tmp, tableInstanceTableOffset, 1217 tmp, 1218 ) 1219 // "offset = tmp + (offset << pointerSizeLog2) (== &table[offset])" 1220 // Here we left shifting by 3 in order to get the offset in bytes, 1221 // and the table element type is uintptr which is 8 bytes. 1222 c.assembler.CompileLeftShiftedRegisterToRegister( 1223 arm64.ADD, 1224 offsetReg, pointerSizeLog2, 1225 tmp, 1226 offsetReg, 1227 ) 1228 1229 // "offset = (*offset) (== table[offset])" 1230 c.assembler.CompileMemoryToRegister(arm64.LDRD, offsetReg, 0, offsetReg) 1231 1232 // Check if the value of table[offset] equals zero, meaning that the target element is uninitialized. 1233 c.assembler.CompileTwoRegistersToNone(arm64.CMP, arm64.RegRZR, offsetReg) 1234 1235 // Skipped if the target is initialized. 1236 c.compileMaybeExitFromNativeCode(arm64.BCONDNE, nativeCallStatusCodeInvalidTableAccess) 1237 1238 // next we check the type matches, i.e. table[offset].source.TypeID == targetFunctionType. 1239 // "tmp = table[offset].typeID" 1240 c.assembler.CompileMemoryToRegister( 1241 arm64.LDRD, 1242 offsetReg, functionTypeIDOffset, 1243 tmp, 1244 ) 1245 // "tmp2 = ModuleInstance.TypeIDs[index]" 1246 c.assembler.CompileMemoryToRegister(arm64.LDRD, 1247 arm64ReservedRegisterForCallEngine, callEngineModuleContextTypeIDsElement0AddressOffset, 1248 tmp2) 1249 c.assembler.CompileMemoryToRegister(arm64.LDRW, tmp2, int64(typeIndex)*4, tmp2) 1250 1251 // Compare these two values, and if they equal, we are ready to make function call. 1252 c.assembler.CompileTwoRegistersToNone(arm64.CMPW, tmp, tmp2) 1253 // Skipped if the type matches. 1254 c.compileMaybeExitFromNativeCode(arm64.BCONDEQ, nativeCallStatusCodeTypeMismatchOnIndirectCall) 1255 1256 targetFunctionType := &c.ir.Types[typeIndex] 1257 if err := c.compileCallImpl(offsetReg, targetFunctionType); err != nil { 1258 return err 1259 } 1260 1261 // The offset register should be marked as un-used as we consumed in the function call. 1262 c.markRegisterUnused(offsetReg, tmp, tmp2) 1263 return nil 1264 } 1265 1266 // compileDrop implements compiler.compileDrop for the arm64 architecture. 1267 func (c *arm64Compiler) compileDrop(o *wazeroir.UnionOperation) error { 1268 return compileDropRange(c, o.U1) 1269 } 1270 1271 func (c *arm64Compiler) compileSelectV128Impl(selectorRegister asm.Register) error { 1272 x2 := c.locationStack.popV128() 1273 if err := c.compileEnsureOnRegister(x2); err != nil { 1274 return err 1275 } 1276 1277 x1 := c.locationStack.popV128() 1278 if err := c.compileEnsureOnRegister(x1); err != nil { 1279 return err 1280 } 1281 1282 c.assembler.CompileTwoRegistersToNone(arm64.CMPW, arm64.RegRZR, selectorRegister) 1283 brIfNotZero := c.assembler.CompileJump(arm64.BCONDNE) 1284 1285 // In this branch, we select the value of x2, so we move the value into x1.register so that 1286 // we can have the result in x1.register regardless of the selection. 1287 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.VORR, 1288 x2.register, x2.register, x1.register, arm64.VectorArrangement16B) 1289 1290 c.assembler.SetJumpTargetOnNext(brIfNotZero) 1291 1292 // As noted, the result exists in x1.register regardless of the selector. 1293 c.pushVectorRuntimeValueLocationOnRegister(x1.register) 1294 // Plus, x2.register is no longer used. 1295 c.markRegisterUnused(x2.register) 1296 return nil 1297 } 1298 1299 // compileSelect implements compiler.compileSelect for the arm64 architecture. 1300 func (c *arm64Compiler) compileSelect(o *wazeroir.UnionOperation) error { 1301 cv, err := c.popValueOnRegister() 1302 if err != nil { 1303 return err 1304 } 1305 1306 isTargetVector := o.B3 1307 if isTargetVector { 1308 return c.compileSelectV128Impl(cv.register) 1309 } 1310 1311 c.markRegisterUsed(cv.register) 1312 1313 x1, x2, err := c.popTwoValuesOnRegisters() 1314 if err != nil { 1315 return err 1316 } 1317 1318 if isZeroRegister(x1.register) && isZeroRegister(x2.register) { 1319 // If both values are zero, the result is always zero. 1320 c.pushRuntimeValueLocationOnRegister(arm64.RegRZR, x1.valueType) 1321 c.markRegisterUnused(cv.register) 1322 return nil 1323 } 1324 1325 // In the following, we emit the code so that x1's register contains the chosen value 1326 // no matter which of original x1 or x2 is selected. 1327 // 1328 // If x1 is currently on zero register, we cannot place the result because 1329 // "MOV arm64.RegRZR x2.register" results in arm64.RegRZR regardless of the value. 1330 // So we explicitly assign a general purpose register to x1 here. 1331 if isZeroRegister(x1.register) { 1332 // Mark x2 and cv's registers are used so they won't be chosen. 1333 c.markRegisterUsed(x2.register) 1334 // Pick the non-zero register for x1. 1335 x1Reg, err := c.allocateRegister(registerTypeGeneralPurpose) 1336 if err != nil { 1337 return err 1338 } 1339 x1.setRegister(x1Reg) 1340 // And zero our the picked register. 1341 c.assembler.CompileRegisterToRegister(arm64.MOVD, arm64.RegRZR, x1Reg) 1342 } 1343 1344 // At this point, x1 is non-zero register, and x2 is either general purpose or zero register. 1345 1346 c.assembler.CompileTwoRegistersToNone(arm64.CMPW, arm64.RegRZR, cv.register) 1347 brIfNotZero := c.assembler.CompileJump(arm64.BCONDNE) 1348 1349 // If cv == 0, we move the value of x2 to the x1.register. 1350 1351 switch x1.valueType { 1352 case runtimeValueTypeI32: 1353 // TODO: use 32-bit mov 1354 c.assembler.CompileRegisterToRegister(arm64.MOVD, x2.register, x1.register) 1355 case runtimeValueTypeI64: 1356 c.assembler.CompileRegisterToRegister(arm64.MOVD, x2.register, x1.register) 1357 case runtimeValueTypeF32: 1358 // TODO: use 32-bit mov 1359 c.assembler.CompileRegisterToRegister(arm64.FMOVD, x2.register, x1.register) 1360 case runtimeValueTypeF64: 1361 c.assembler.CompileRegisterToRegister(arm64.FMOVD, x2.register, x1.register) 1362 default: 1363 return errors.New("TODO: implement vector type select") 1364 } 1365 1366 c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) 1367 1368 // Otherwise, nothing to do for select. 1369 c.assembler.SetJumpTargetOnNext(brIfNotZero) 1370 1371 // Only x1.register is reused. 1372 c.markRegisterUnused(cv.register, x2.register) 1373 return nil 1374 } 1375 1376 // compilePick implements compiler.compilePick for the arm64 architecture. 1377 func (c *arm64Compiler) compilePick(o *wazeroir.UnionOperation) error { 1378 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 1379 return err 1380 } 1381 depth := o.U1 1382 isTargetVector := o.B3 1383 1384 pickTarget := &c.locationStack.stack[c.locationStack.sp-1-uint64(depth)] 1385 pickedRegister, err := c.allocateRegister(pickTarget.getRegisterType()) 1386 if err != nil { 1387 return err 1388 } 1389 1390 if pickTarget.onRegister() { // Copy the value to the pickedRegister. 1391 switch pickTarget.valueType { 1392 case runtimeValueTypeI32: 1393 c.assembler.CompileRegisterToRegister(arm64.MOVW, pickTarget.register, pickedRegister) 1394 case runtimeValueTypeI64: 1395 c.assembler.CompileRegisterToRegister(arm64.MOVD, pickTarget.register, pickedRegister) 1396 case runtimeValueTypeF32: 1397 c.assembler.CompileRegisterToRegister(arm64.FMOVS, pickTarget.register, pickedRegister) 1398 case runtimeValueTypeF64: 1399 c.assembler.CompileRegisterToRegister(arm64.FMOVD, pickTarget.register, pickedRegister) 1400 case runtimeValueTypeV128Lo: 1401 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.VORR, 1402 pickTarget.register, pickTarget.register, pickedRegister, arm64.VectorArrangement16B) 1403 case runtimeValueTypeV128Hi: 1404 panic("BUG") // since pick target must point to the lower 64-bits of vectors. 1405 } 1406 } else if pickTarget.onStack() { 1407 // Temporarily assign a register to the pick target, and then load the value. 1408 pickTarget.setRegister(pickedRegister) 1409 c.compileLoadValueOnStackToRegister(pickTarget) 1410 1411 // After the load, we revert the register assignment to the pick target. 1412 pickTarget.setRegister(asm.NilRegister) 1413 if isTargetVector { 1414 hi := &c.locationStack.stack[pickTarget.stackPointer+1] 1415 hi.setRegister(asm.NilRegister) 1416 } 1417 } 1418 1419 // Now we have the value of the target on the pickedRegister, 1420 // so push the location. 1421 c.pushRuntimeValueLocationOnRegister(pickedRegister, pickTarget.valueType) 1422 if isTargetVector { 1423 c.pushRuntimeValueLocationOnRegister(pickedRegister, runtimeValueTypeV128Hi) 1424 } 1425 return nil 1426 } 1427 1428 // compileAdd implements compiler.compileAdd for the arm64 architecture. 1429 func (c *arm64Compiler) compileAdd(o *wazeroir.UnionOperation) error { 1430 x1, x2, err := c.popTwoValuesOnRegisters() 1431 if err != nil { 1432 return err 1433 } 1434 1435 // Addition can be nop if one of operands is zero. 1436 if isZeroRegister(x1.register) { 1437 c.pushRuntimeValueLocationOnRegister(x2.register, x1.valueType) 1438 return nil 1439 } else if isZeroRegister(x2.register) { 1440 c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) 1441 return nil 1442 } 1443 1444 var inst asm.Instruction 1445 unsignedType := wazeroir.UnsignedType(o.B1) 1446 switch unsignedType { 1447 case wazeroir.UnsignedTypeI32: 1448 inst = arm64.ADDW 1449 case wazeroir.UnsignedTypeI64: 1450 inst = arm64.ADD 1451 case wazeroir.UnsignedTypeF32: 1452 inst = arm64.FADDS 1453 case wazeroir.UnsignedTypeF64: 1454 inst = arm64.FADDD 1455 } 1456 1457 c.assembler.CompileRegisterToRegister(inst, x2.register, x1.register) 1458 // The result is placed on a register for x1, so record it. 1459 c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) 1460 return nil 1461 } 1462 1463 // compileSub implements compiler.compileSub for the arm64 architecture. 1464 func (c *arm64Compiler) compileSub(o *wazeroir.UnionOperation) error { 1465 x1, x2, err := c.popTwoValuesOnRegisters() 1466 if err != nil { 1467 return err 1468 } 1469 1470 // If both of registers are zeros, this can be nop and push the zero register. 1471 if isZeroRegister(x1.register) && isZeroRegister(x2.register) { 1472 c.pushRuntimeValueLocationOnRegister(arm64.RegRZR, x1.valueType) 1473 return nil 1474 } 1475 1476 // At this point, at least one of x1 or x2 registers is non zero. 1477 // Choose the non-zero register as destination. 1478 destinationReg := x1.register 1479 if isZeroRegister(x1.register) { 1480 destinationReg = x2.register 1481 } 1482 1483 var inst asm.Instruction 1484 var vt runtimeValueType 1485 unsignedType := wazeroir.UnsignedType(o.B1) 1486 switch unsignedType { 1487 case wazeroir.UnsignedTypeI32: 1488 inst = arm64.SUBW 1489 vt = runtimeValueTypeI32 1490 case wazeroir.UnsignedTypeI64: 1491 inst = arm64.SUB 1492 vt = runtimeValueTypeI64 1493 case wazeroir.UnsignedTypeF32: 1494 inst = arm64.FSUBS 1495 vt = runtimeValueTypeF32 1496 case wazeroir.UnsignedTypeF64: 1497 inst = arm64.FSUBD 1498 vt = runtimeValueTypeF64 1499 } 1500 1501 c.assembler.CompileTwoRegistersToRegister(inst, x2.register, x1.register, destinationReg) 1502 c.pushRuntimeValueLocationOnRegister(destinationReg, vt) 1503 return nil 1504 } 1505 1506 // compileMul implements compiler.compileMul for the arm64 architecture. 1507 func (c *arm64Compiler) compileMul(o *wazeroir.UnionOperation) error { 1508 x1, x2, err := c.popTwoValuesOnRegisters() 1509 if err != nil { 1510 return err 1511 } 1512 1513 // Multiplication can be done by putting a zero register if one of operands is zero. 1514 if isZeroRegister(x1.register) || isZeroRegister(x2.register) { 1515 c.pushRuntimeValueLocationOnRegister(arm64.RegRZR, x1.valueType) 1516 return nil 1517 } 1518 1519 var inst asm.Instruction 1520 var vt runtimeValueType 1521 unsignedType := wazeroir.UnsignedType(o.B1) 1522 switch unsignedType { 1523 case wazeroir.UnsignedTypeI32: 1524 inst = arm64.MULW 1525 vt = runtimeValueTypeI32 1526 case wazeroir.UnsignedTypeI64: 1527 inst = arm64.MUL 1528 vt = runtimeValueTypeI64 1529 case wazeroir.UnsignedTypeF32: 1530 inst = arm64.FMULS 1531 vt = runtimeValueTypeF32 1532 case wazeroir.UnsignedTypeF64: 1533 inst = arm64.FMULD 1534 vt = runtimeValueTypeF64 1535 } 1536 1537 c.assembler.CompileRegisterToRegister(inst, x2.register, x1.register) 1538 // The result is placed on a register for x1, so record it. 1539 c.pushRuntimeValueLocationOnRegister(x1.register, vt) 1540 return nil 1541 } 1542 1543 // compileClz implements compiler.compileClz for the arm64 architecture. 1544 func (c *arm64Compiler) compileClz(o *wazeroir.UnionOperation) error { 1545 v, err := c.popValueOnRegister() 1546 if err != nil { 1547 return err 1548 } 1549 1550 unsignedInt := wazeroir.UnsignedInt(o.B1) 1551 if isZeroRegister(v.register) { 1552 // If the target is zero register, the result is always 32 (or 64 for 64-bits), 1553 // so we allocate a register and put the const on it. 1554 reg, err := c.allocateRegister(registerTypeGeneralPurpose) 1555 if err != nil { 1556 return err 1557 } 1558 var vt runtimeValueType 1559 if unsignedInt == wazeroir.UnsignedInt32 { 1560 vt = runtimeValueTypeI32 1561 c.assembler.CompileConstToRegister(arm64.MOVW, 32, reg) 1562 } else { 1563 vt = runtimeValueTypeI64 1564 c.assembler.CompileConstToRegister(arm64.MOVD, 64, reg) 1565 } 1566 c.pushRuntimeValueLocationOnRegister(reg, vt) 1567 return nil 1568 } 1569 1570 reg := v.register 1571 var vt runtimeValueType 1572 if unsignedInt == wazeroir.UnsignedInt32 { 1573 vt = runtimeValueTypeI32 1574 c.assembler.CompileRegisterToRegister(arm64.CLZW, reg, reg) 1575 } else { 1576 vt = runtimeValueTypeI64 1577 c.assembler.CompileRegisterToRegister(arm64.CLZ, reg, reg) 1578 } 1579 c.pushRuntimeValueLocationOnRegister(reg, vt) 1580 return nil 1581 } 1582 1583 // compileCtz implements compiler.compileCtz for the arm64 architecture. 1584 func (c *arm64Compiler) compileCtz(o *wazeroir.UnionOperation) error { 1585 v, err := c.popValueOnRegister() 1586 if err != nil { 1587 return err 1588 } 1589 1590 unsignedInt := wazeroir.UnsignedInt(o.B1) 1591 reg := v.register 1592 if isZeroRegister(reg) { 1593 // If the target is zero register, the result is always 32 (or 64 for 64-bits), 1594 // so we allocate a register and put the const on it. 1595 reg, err := c.allocateRegister(registerTypeGeneralPurpose) 1596 if err != nil { 1597 return err 1598 } 1599 var vt runtimeValueType 1600 if unsignedInt == wazeroir.UnsignedInt32 { 1601 vt = runtimeValueTypeI32 1602 c.assembler.CompileConstToRegister(arm64.MOVW, 32, reg) 1603 } else { 1604 vt = runtimeValueTypeI64 1605 c.assembler.CompileConstToRegister(arm64.MOVD, 64, reg) 1606 } 1607 c.pushRuntimeValueLocationOnRegister(reg, vt) 1608 return nil 1609 } 1610 1611 // Since arm64 doesn't have an instruction directly counting trailing zeros, 1612 // we reverse the bits first, and then do CLZ, which is exactly the same as 1613 // gcc implements __builtin_ctz for arm64. 1614 var vt runtimeValueType 1615 if unsignedInt == wazeroir.UnsignedInt32 { 1616 vt = runtimeValueTypeI32 1617 c.assembler.CompileRegisterToRegister(arm64.RBITW, reg, reg) 1618 c.assembler.CompileRegisterToRegister(arm64.CLZW, reg, reg) 1619 } else { 1620 vt = runtimeValueTypeI64 1621 c.assembler.CompileRegisterToRegister(arm64.RBIT, reg, reg) 1622 c.assembler.CompileRegisterToRegister(arm64.CLZ, reg, reg) 1623 } 1624 c.pushRuntimeValueLocationOnRegister(reg, vt) 1625 return nil 1626 } 1627 1628 // compilePopcnt implements compiler.compilePopcnt for the arm64 architecture. 1629 func (c *arm64Compiler) compilePopcnt(o *wazeroir.UnionOperation) error { 1630 v, err := c.popValueOnRegister() 1631 if err != nil { 1632 return err 1633 } 1634 1635 reg := v.register 1636 if isZeroRegister(reg) { 1637 c.pushRuntimeValueLocationOnRegister(reg, v.valueType) 1638 return nil 1639 } 1640 1641 freg, err := c.allocateRegister(registerTypeVector) 1642 if err != nil { 1643 return err 1644 } 1645 1646 // arm64 doesn't have an instruction for population count on scalar register, 1647 // so we use the vector one (VCNT). 1648 // This exactly what the official Go implements bits.OneCount. 1649 // For example, "func () int { return bits.OneCount(10) }" is compiled as 1650 // 1651 // MOVD $10, R0 ;; Load 10. 1652 // FMOVD R0, F0 1653 // VCNT V0.B8, V0.B8 1654 // UADDLV V0.B8, V0 1655 // 1656 var movInst asm.Instruction 1657 unsignedInt := wazeroir.UnsignedInt(o.B1) 1658 if unsignedInt == wazeroir.UnsignedInt32 { 1659 movInst = arm64.FMOVS 1660 } else { 1661 movInst = arm64.FMOVD 1662 } 1663 c.assembler.CompileRegisterToRegister(movInst, reg, freg) 1664 c.assembler.CompileVectorRegisterToVectorRegister(arm64.VCNT, freg, freg, 1665 arm64.VectorArrangement16B, arm64.VectorIndexNone, arm64.VectorIndexNone) 1666 c.assembler.CompileVectorRegisterToVectorRegister(arm64.UADDLV, freg, freg, arm64.VectorArrangement8B, 1667 arm64.VectorIndexNone, arm64.VectorIndexNone) 1668 1669 c.assembler.CompileRegisterToRegister(movInst, freg, reg) 1670 1671 c.pushRuntimeValueLocationOnRegister(reg, v.valueType) 1672 return nil 1673 } 1674 1675 // compileDiv implements compiler.compileDiv for the arm64 architecture. 1676 func (c *arm64Compiler) compileDiv(o *wazeroir.UnionOperation) error { 1677 dividend, divisor, err := c.popTwoValuesOnRegisters() 1678 if err != nil { 1679 return err 1680 } 1681 1682 signedType := wazeroir.SignedType(o.B1) 1683 1684 // If the divisor is on the zero register, exit from the function deterministically. 1685 if isZeroRegister(divisor.register) { 1686 // Push any value so that the subsequent instruction can have a consistent location stack state. 1687 v := c.locationStack.pushRuntimeValueLocationOnStack() 1688 switch signedType { 1689 case wazeroir.SignedTypeInt32, wazeroir.SignedTypeUint32: 1690 v.valueType = runtimeValueTypeI32 1691 case wazeroir.SignedTypeUint64, wazeroir.SignedTypeInt64: 1692 v.valueType = runtimeValueTypeI64 1693 } 1694 c.compileExitFromNativeCode(nativeCallStatusIntegerDivisionByZero) 1695 return nil 1696 } 1697 1698 var inst asm.Instruction 1699 var vt runtimeValueType 1700 switch signedType { 1701 case wazeroir.SignedTypeUint32: 1702 inst = arm64.UDIVW 1703 if err := c.compileIntegerDivPrecheck(true, false, dividend.register, divisor.register); err != nil { 1704 return err 1705 } 1706 vt = runtimeValueTypeI32 1707 case wazeroir.SignedTypeUint64: 1708 if err := c.compileIntegerDivPrecheck(false, false, dividend.register, divisor.register); err != nil { 1709 return err 1710 } 1711 inst = arm64.UDIV 1712 vt = runtimeValueTypeI64 1713 case wazeroir.SignedTypeInt32: 1714 if err := c.compileIntegerDivPrecheck(true, true, dividend.register, divisor.register); err != nil { 1715 return err 1716 } 1717 inst = arm64.SDIVW 1718 vt = runtimeValueTypeI32 1719 case wazeroir.SignedTypeInt64: 1720 if err := c.compileIntegerDivPrecheck(false, true, dividend.register, divisor.register); err != nil { 1721 return err 1722 } 1723 inst = arm64.SDIV 1724 vt = runtimeValueTypeI64 1725 case wazeroir.SignedTypeFloat32: 1726 inst = arm64.FDIVS 1727 vt = runtimeValueTypeF32 1728 case wazeroir.SignedTypeFloat64: 1729 inst = arm64.FDIVD 1730 vt = runtimeValueTypeF64 1731 } 1732 1733 c.assembler.CompileRegisterToRegister(inst, divisor.register, dividend.register) 1734 1735 c.pushRuntimeValueLocationOnRegister(dividend.register, vt) 1736 return nil 1737 } 1738 1739 // compileIntegerDivPrecheck adds instructions to check if the divisor and dividend are sound for division operation. 1740 // First, this adds instructions to check if the divisor equals zero, and if so, exits the function. 1741 // Plus, for signed divisions, check if the result might result in overflow or not. 1742 func (c *arm64Compiler) compileIntegerDivPrecheck(is32Bit, isSigned bool, dividend, divisor asm.Register) error { 1743 // We check the divisor value equals zero. 1744 var cmpInst, movInst, loadInst asm.Instruction 1745 var minValueOffsetInVM int64 1746 if is32Bit { 1747 cmpInst = arm64.CMPW 1748 movInst = arm64.MOVW 1749 loadInst = arm64.LDRW 1750 minValueOffsetInVM = arm64CallEngineArchContextMinimum32BitSignedIntOffset 1751 } else { 1752 cmpInst = arm64.CMP 1753 movInst = arm64.MOVD 1754 loadInst = arm64.LDRD 1755 minValueOffsetInVM = arm64CallEngineArchContextMinimum64BitSignedIntOffset 1756 } 1757 c.assembler.CompileTwoRegistersToNone(cmpInst, arm64.RegRZR, divisor) 1758 1759 // If it is zero, we exit with nativeCallStatusIntegerDivisionByZero. 1760 c.compileMaybeExitFromNativeCode(arm64.BCONDNE, nativeCallStatusIntegerDivisionByZero) 1761 // Otherwise, we proceed. 1762 1763 // If the operation is a signed integer div, we have to do an additional check on overflow. 1764 if isSigned { 1765 // For signed division, we have to have branches for "math.MinInt{32,64} / -1" 1766 // case which results in the overflow. 1767 1768 // First, we compare the divisor with -1. 1769 c.assembler.CompileConstToRegister(movInst, -1, arm64ReservedRegisterForTemporary) 1770 c.assembler.CompileTwoRegistersToNone(cmpInst, arm64ReservedRegisterForTemporary, divisor) 1771 1772 // If they not equal, we skip the following check. 1773 brIfDivisorNonMinusOne := c.assembler.CompileJump(arm64.BCONDNE) 1774 1775 // Otherwise, we further check if the dividend equals math.MinInt32 or MinInt64. 1776 c.assembler.CompileMemoryToRegister( 1777 loadInst, 1778 arm64ReservedRegisterForCallEngine, minValueOffsetInVM, 1779 arm64ReservedRegisterForTemporary, 1780 ) 1781 c.assembler.CompileTwoRegistersToNone(cmpInst, arm64ReservedRegisterForTemporary, dividend) 1782 1783 // If they not equal, we are safe to execute the division. 1784 // Otherwise, we raise overflow error. 1785 c.compileMaybeExitFromNativeCode(arm64.BCONDNE, nativeCallStatusIntegerOverflow) 1786 1787 c.assembler.SetJumpTargetOnNext(brIfDivisorNonMinusOne) 1788 } 1789 return nil 1790 } 1791 1792 // compileRem implements compiler.compileRem for the arm64 architecture. 1793 func (c *arm64Compiler) compileRem(o *wazeroir.UnionOperation) error { 1794 dividend, divisor, err := c.popTwoValuesOnRegisters() 1795 if err != nil { 1796 return err 1797 } 1798 1799 dividendReg := dividend.register 1800 divisorReg := divisor.register 1801 1802 // If the divisor is on the zero register, exit from the function deterministically. 1803 if isZeroRegister(divisor.register) { 1804 // Push any value so that the subsequent instruction can have a consistent location stack state. 1805 v := c.locationStack.pushRuntimeValueLocationOnStack() 1806 v.valueType = runtimeValueTypeI32 1807 c.compileExitFromNativeCode(nativeCallStatusIntegerDivisionByZero) 1808 return nil 1809 } 1810 1811 var divInst, msubInst, cmpInst asm.Instruction 1812 signedInt := wazeroir.SignedInt(o.B1) 1813 switch signedInt { 1814 case wazeroir.SignedUint32: 1815 divInst = arm64.UDIVW 1816 msubInst = arm64.MSUBW 1817 cmpInst = arm64.CMPW 1818 case wazeroir.SignedUint64: 1819 divInst = arm64.UDIV 1820 msubInst = arm64.MSUB 1821 cmpInst = arm64.CMP 1822 case wazeroir.SignedInt32: 1823 divInst = arm64.SDIVW 1824 msubInst = arm64.MSUBW 1825 cmpInst = arm64.CMPW 1826 case wazeroir.SignedInt64: 1827 divInst = arm64.SDIV 1828 msubInst = arm64.MSUB 1829 cmpInst = arm64.CMP 1830 } 1831 1832 // We check the divisor value equals zero. 1833 c.assembler.CompileTwoRegistersToNone(cmpInst, arm64.RegRZR, divisorReg) 1834 1835 // If it is zero, we exit with nativeCallStatusIntegerDivisionByZero. 1836 c.compileMaybeExitFromNativeCode(arm64.BCONDNE, nativeCallStatusIntegerDivisionByZero) 1837 // Otherwise, we proceed. 1838 1839 // Temporarily mark them used to allocate a result register while keeping these values. 1840 c.markRegisterUsed(dividend.register, divisor.register) 1841 1842 resultReg, err := c.allocateRegister(registerTypeGeneralPurpose) 1843 if err != nil { 1844 return err 1845 } 1846 1847 // arm64 doesn't have an instruction for rem, we use calculate it by two instructions: UDIV (SDIV for signed) and MSUB. 1848 // This exactly the same code that Clang emits. 1849 // [input: x0=dividend, x1=divisor] 1850 // >> UDIV x2, x0, x1 1851 // >> MSUB x3, x2, x1, x0 1852 // [result: x2=quotient, x3=remainder] 1853 // 1854 c.assembler.CompileTwoRegistersToRegister(divInst, divisorReg, dividendReg, resultReg) 1855 // ResultReg = dividendReg - (divisorReg * resultReg) 1856 c.assembler.CompileThreeRegistersToRegister(msubInst, divisorReg, dividendReg, resultReg, resultReg) 1857 1858 c.markRegisterUnused(dividend.register, divisor.register) 1859 c.pushRuntimeValueLocationOnRegister(resultReg, dividend.valueType) 1860 return nil 1861 } 1862 1863 // compileAnd implements compiler.compileAnd for the arm64 architecture. 1864 func (c *arm64Compiler) compileAnd(o *wazeroir.UnionOperation) error { 1865 x1, x2, err := c.popTwoValuesOnRegisters() 1866 if err != nil { 1867 return err 1868 } 1869 1870 // If either of the registers x1 or x2 is zero, 1871 // the result will always be zero. 1872 if isZeroRegister(x1.register) || isZeroRegister(x2.register) { 1873 c.pushRuntimeValueLocationOnRegister(arm64.RegRZR, x1.valueType) 1874 return nil 1875 } 1876 1877 // At this point, at least one of x1 or x2 registers is non zero. 1878 // Choose the non-zero register as destination. 1879 destinationReg := x1.register 1880 if isZeroRegister(x1.register) { 1881 destinationReg = x2.register 1882 } 1883 1884 var inst asm.Instruction 1885 unsignedInt := wazeroir.UnsignedInt(o.B1) 1886 switch unsignedInt { 1887 case wazeroir.UnsignedInt32: 1888 inst = arm64.ANDW 1889 case wazeroir.UnsignedInt64: 1890 inst = arm64.AND 1891 } 1892 1893 c.assembler.CompileTwoRegistersToRegister(inst, x2.register, x1.register, destinationReg) 1894 c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) 1895 return nil 1896 } 1897 1898 // compileOr implements compiler.compileOr for the arm64 architecture. 1899 func (c *arm64Compiler) compileOr(o *wazeroir.UnionOperation) error { 1900 x1, x2, err := c.popTwoValuesOnRegisters() 1901 if err != nil { 1902 return err 1903 } 1904 1905 if isZeroRegister(x1.register) { 1906 c.pushRuntimeValueLocationOnRegister(x2.register, x2.valueType) 1907 return nil 1908 } 1909 if isZeroRegister(x2.register) { 1910 c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) 1911 return nil 1912 } 1913 1914 var inst asm.Instruction 1915 unsignedInt := wazeroir.UnsignedInt(o.B1) 1916 switch unsignedInt { 1917 case wazeroir.UnsignedInt32: 1918 inst = arm64.ORRW 1919 case wazeroir.UnsignedInt64: 1920 inst = arm64.ORR 1921 } 1922 1923 c.assembler.CompileTwoRegistersToRegister(inst, x2.register, x1.register, x1.register) 1924 c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) 1925 return nil 1926 } 1927 1928 // compileXor implements compiler.compileXor for the arm64 architecture. 1929 func (c *arm64Compiler) compileXor(o *wazeroir.UnionOperation) error { 1930 x1, x2, err := c.popTwoValuesOnRegisters() 1931 if err != nil { 1932 return err 1933 } 1934 1935 // At this point, at least one of x1 or x2 registers is non zero. 1936 // Choose the non-zero register as destination. 1937 destinationReg := x1.register 1938 if isZeroRegister(x1.register) { 1939 destinationReg = x2.register 1940 } 1941 1942 var inst asm.Instruction 1943 unsignedInt := wazeroir.UnsignedInt(o.B1) 1944 switch unsignedInt { 1945 case wazeroir.UnsignedInt32: 1946 inst = arm64.EORW 1947 case wazeroir.UnsignedInt64: 1948 inst = arm64.EOR 1949 } 1950 1951 c.assembler.CompileTwoRegistersToRegister(inst, x2.register, x1.register, destinationReg) 1952 c.pushRuntimeValueLocationOnRegister(destinationReg, x1.valueType) 1953 return nil 1954 } 1955 1956 // compileShl implements compiler.compileShl for the arm64 architecture. 1957 func (c *arm64Compiler) compileShl(o *wazeroir.UnionOperation) error { 1958 x1, x2, err := c.popTwoValuesOnRegisters() 1959 if err != nil { 1960 return err 1961 } 1962 1963 if isZeroRegister(x1.register) || isZeroRegister(x2.register) { 1964 c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) 1965 return nil 1966 } 1967 1968 var inst asm.Instruction 1969 unsignedInt := wazeroir.UnsignedInt(o.B1) 1970 switch unsignedInt { 1971 case wazeroir.UnsignedInt32: 1972 inst = arm64.LSLW 1973 case wazeroir.UnsignedInt64: 1974 inst = arm64.LSL 1975 } 1976 1977 c.assembler.CompileTwoRegistersToRegister(inst, x2.register, x1.register, x1.register) 1978 c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) 1979 return nil 1980 } 1981 1982 // compileShr implements compiler.compileShr for the arm64 architecture. 1983 func (c *arm64Compiler) compileShr(o *wazeroir.UnionOperation) error { 1984 x1, x2, err := c.popTwoValuesOnRegisters() 1985 if err != nil { 1986 return err 1987 } 1988 1989 if isZeroRegister(x1.register) || isZeroRegister(x2.register) { 1990 c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) 1991 return nil 1992 } 1993 1994 var inst asm.Instruction 1995 signedInt := wazeroir.SignedInt(o.B1) 1996 switch signedInt { 1997 case wazeroir.SignedInt32: 1998 inst = arm64.ASRW 1999 case wazeroir.SignedInt64: 2000 inst = arm64.ASR 2001 case wazeroir.SignedUint32: 2002 inst = arm64.LSRW 2003 case wazeroir.SignedUint64: 2004 inst = arm64.LSR 2005 } 2006 2007 c.assembler.CompileTwoRegistersToRegister(inst, x2.register, x1.register, x1.register) 2008 c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) 2009 return nil 2010 } 2011 2012 // compileRotl implements compiler.compileRotl for the arm64 architecture. 2013 func (c *arm64Compiler) compileRotl(o *wazeroir.UnionOperation) error { 2014 x1, x2, err := c.popTwoValuesOnRegisters() 2015 if err != nil { 2016 return err 2017 } 2018 2019 if isZeroRegister(x1.register) || isZeroRegister(x2.register) { 2020 c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) 2021 return nil 2022 } 2023 2024 var inst, neginst asm.Instruction 2025 unsignedInt := wazeroir.UnsignedInt(o.B1) 2026 switch unsignedInt { 2027 case wazeroir.UnsignedInt32: 2028 inst = arm64.RORW 2029 neginst = arm64.NEGW 2030 case wazeroir.UnsignedInt64: 2031 inst = arm64.ROR 2032 neginst = arm64.NEG 2033 } 2034 2035 // Arm64 doesn't have rotate left instruction. 2036 // The shift amount needs to be converted to a negative number, similar to assembly output of bits.RotateLeft. 2037 c.assembler.CompileRegisterToRegister(neginst, x2.register, x2.register) 2038 2039 c.assembler.CompileTwoRegistersToRegister(inst, x2.register, x1.register, x1.register) 2040 c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) 2041 return nil 2042 } 2043 2044 // compileRotr implements compiler.compileRotr for the arm64 architecture. 2045 func (c *arm64Compiler) compileRotr(o *wazeroir.UnionOperation) error { 2046 x1, x2, err := c.popTwoValuesOnRegisters() 2047 if err != nil { 2048 return err 2049 } 2050 2051 if isZeroRegister(x1.register) || isZeroRegister(x2.register) { 2052 c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) 2053 return nil 2054 } 2055 2056 var inst asm.Instruction 2057 unsignedInt := wazeroir.UnsignedInt(o.B1) 2058 switch unsignedInt { 2059 case wazeroir.UnsignedInt32: 2060 inst = arm64.RORW 2061 case wazeroir.UnsignedInt64: 2062 inst = arm64.ROR 2063 } 2064 2065 c.assembler.CompileTwoRegistersToRegister(inst, x2.register, x1.register, x1.register) 2066 c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) 2067 return nil 2068 } 2069 2070 // compileAbs implements compiler.compileAbs for the arm64 architecture. 2071 func (c *arm64Compiler) compileAbs(o *wazeroir.UnionOperation) error { 2072 if wazeroir.Float(o.B1) == wazeroir.Float32 { 2073 return c.compileSimpleUnop(arm64.FABSS, runtimeValueTypeF32) 2074 } else { 2075 return c.compileSimpleUnop(arm64.FABSD, runtimeValueTypeF64) 2076 } 2077 } 2078 2079 // compileNeg implements compiler.compileNeg for the arm64 architecture. 2080 func (c *arm64Compiler) compileNeg(o *wazeroir.UnionOperation) error { 2081 if wazeroir.Float(o.B1) == wazeroir.Float32 { 2082 return c.compileSimpleUnop(arm64.FNEGS, runtimeValueTypeF32) 2083 } else { 2084 return c.compileSimpleUnop(arm64.FNEGD, runtimeValueTypeF64) 2085 } 2086 } 2087 2088 // compileCeil implements compiler.compileCeil for the arm64 architecture. 2089 func (c *arm64Compiler) compileCeil(o *wazeroir.UnionOperation) error { 2090 if wazeroir.Float(o.B1) == wazeroir.Float32 { 2091 return c.compileSimpleUnop(arm64.FRINTPS, runtimeValueTypeF32) 2092 } else { 2093 return c.compileSimpleUnop(arm64.FRINTPD, runtimeValueTypeF64) 2094 } 2095 } 2096 2097 // compileFloor implements compiler.compileFloor for the arm64 architecture. 2098 func (c *arm64Compiler) compileFloor(o *wazeroir.UnionOperation) error { 2099 if wazeroir.Float(o.B1) == wazeroir.Float32 { 2100 return c.compileSimpleUnop(arm64.FRINTMS, runtimeValueTypeF32) 2101 } else { 2102 return c.compileSimpleUnop(arm64.FRINTMD, runtimeValueTypeF64) 2103 } 2104 } 2105 2106 // compileTrunc implements compiler.compileTrunc for the arm64 architecture. 2107 func (c *arm64Compiler) compileTrunc(o *wazeroir.UnionOperation) error { 2108 if wazeroir.Float(o.B1) == wazeroir.Float32 { 2109 return c.compileSimpleUnop(arm64.FRINTZS, runtimeValueTypeF32) 2110 } else { 2111 return c.compileSimpleUnop(arm64.FRINTZD, runtimeValueTypeF64) 2112 } 2113 } 2114 2115 // compileNearest implements compiler.compileNearest for the arm64 architecture. 2116 func (c *arm64Compiler) compileNearest(o *wazeroir.UnionOperation) error { 2117 if wazeroir.Float(o.B1) == wazeroir.Float32 { 2118 return c.compileSimpleUnop(arm64.FRINTNS, runtimeValueTypeF32) 2119 } else { 2120 return c.compileSimpleUnop(arm64.FRINTND, runtimeValueTypeF64) 2121 } 2122 } 2123 2124 // compileSqrt implements compiler.compileSqrt for the arm64 architecture. 2125 func (c *arm64Compiler) compileSqrt(o *wazeroir.UnionOperation) error { 2126 if wazeroir.Float(o.B1) == wazeroir.Float32 { 2127 return c.compileSimpleUnop(arm64.FSQRTS, runtimeValueTypeF32) 2128 } else { 2129 return c.compileSimpleUnop(arm64.FSQRTD, runtimeValueTypeF64) 2130 } 2131 } 2132 2133 // compileMin implements compiler.compileMin for the arm64 architecture. 2134 func (c *arm64Compiler) compileMin(o *wazeroir.UnionOperation) error { 2135 if wazeroir.Float(o.B1) == wazeroir.Float32 { 2136 return c.compileSimpleFloatBinop(arm64.FMINS) 2137 } else { 2138 return c.compileSimpleFloatBinop(arm64.FMIND) 2139 } 2140 } 2141 2142 // compileMax implements compiler.compileMax for the arm64 architecture. 2143 func (c *arm64Compiler) compileMax(o *wazeroir.UnionOperation) error { 2144 if wazeroir.Float(o.B1) == wazeroir.Float32 { 2145 return c.compileSimpleFloatBinop(arm64.FMAXS) 2146 } else { 2147 return c.compileSimpleFloatBinop(arm64.FMAXD) 2148 } 2149 } 2150 2151 func (c *arm64Compiler) compileSimpleFloatBinop(inst asm.Instruction) error { 2152 x1, x2, err := c.popTwoValuesOnRegisters() 2153 if err != nil { 2154 return err 2155 } 2156 c.assembler.CompileRegisterToRegister(inst, x2.register, x1.register) 2157 c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) 2158 return nil 2159 } 2160 2161 // compileCopysign implements compiler.compileCopysign for the arm64 architecture. 2162 func (c *arm64Compiler) compileCopysign(o *wazeroir.UnionOperation) error { 2163 x1, x2, err := c.popTwoValuesOnRegisters() 2164 if err != nil { 2165 return err 2166 } 2167 2168 var ldr asm.Instruction 2169 var minValueOffsetInVM int64 2170 if wazeroir.Float(o.B1) == wazeroir.Float32 { 2171 ldr = arm64.FLDRS 2172 minValueOffsetInVM = arm64CallEngineArchContextMinimum32BitSignedIntOffset 2173 } else { 2174 ldr = arm64.FLDRD 2175 minValueOffsetInVM = arm64CallEngineArchContextMinimum64BitSignedIntOffset 2176 } 2177 2178 c.markRegisterUsed(x1.register, x2.register) 2179 freg, err := c.allocateRegister(registerTypeVector) 2180 if err != nil { 2181 return err 2182 } 2183 2184 // This is exactly the same code emitted by GCC for "__builtin_copysign": 2185 // 2186 // mov x0, -9223372036854775808 2187 // fmov d2, x0 2188 // vbit v0.8b, v1.8b, v2.8b 2189 // 2190 // "mov freg, -9223372036854775808 (stored at ce.minimum64BitSignedInt)" 2191 c.assembler.CompileMemoryToRegister( 2192 ldr, 2193 arm64ReservedRegisterForCallEngine, minValueOffsetInVM, 2194 freg, 2195 ) 2196 2197 // VBIT inserts each bit from the first operand into the destination if the corresponding bit of the second operand is 1, 2198 // otherwise it leaves the destination bit unchanged. 2199 // See https://developer.arm.com/documentation/dui0801/g/Advanced-SIMD-Instructions--32-bit-/VBIT 2200 // 2201 // "vbit vreg.8b, x2vreg.8b, x1vreg.8b" == "inserting 64th bit of x2 into x1". 2202 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.VBIT, 2203 freg, x2.register, x1.register, arm64.VectorArrangement16B) 2204 2205 c.markRegisterUnused(x2.register) 2206 c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) 2207 return nil 2208 } 2209 2210 // compileI32WrapFromI64 implements compiler.compileI32WrapFromI64 for the arm64 architecture. 2211 func (c *arm64Compiler) compileI32WrapFromI64() error { 2212 return c.compileSimpleUnop(arm64.MOVW, runtimeValueTypeI32) 2213 } 2214 2215 // compileITruncFromF implements compiler.compileITruncFromF for the arm64 architecture. 2216 func (c *arm64Compiler) compileITruncFromF(o *wazeroir.UnionOperation) error { 2217 // Clear the floating point status register (FPSR). 2218 c.assembler.CompileRegisterToRegister(arm64.MSR, arm64.RegRZR, arm64.RegFPSR) 2219 2220 var vt runtimeValueType 2221 var convinst asm.Instruction 2222 inputType := wazeroir.Float(o.B1) 2223 outputType := wazeroir.SignedInt(o.B2) 2224 nonTrapping := o.B3 2225 2226 is32bitFloat := inputType == wazeroir.Float32 2227 if is32bitFloat && outputType == wazeroir.SignedInt32 { 2228 convinst = arm64.FCVTZSSW 2229 vt = runtimeValueTypeI32 2230 } else if is32bitFloat && outputType == wazeroir.SignedInt64 { 2231 convinst = arm64.FCVTZSS 2232 vt = runtimeValueTypeI64 2233 } else if !is32bitFloat && outputType == wazeroir.SignedInt32 { 2234 convinst = arm64.FCVTZSDW 2235 vt = runtimeValueTypeI32 2236 } else if !is32bitFloat && outputType == wazeroir.SignedInt64 { 2237 convinst = arm64.FCVTZSD 2238 vt = runtimeValueTypeI64 2239 } else if is32bitFloat && outputType == wazeroir.SignedUint32 { 2240 convinst = arm64.FCVTZUSW 2241 vt = runtimeValueTypeI32 2242 } else if is32bitFloat && outputType == wazeroir.SignedUint64 { 2243 convinst = arm64.FCVTZUS 2244 vt = runtimeValueTypeI64 2245 } else if !is32bitFloat && outputType == wazeroir.SignedUint32 { 2246 convinst = arm64.FCVTZUDW 2247 vt = runtimeValueTypeI32 2248 } else if !is32bitFloat && outputType == wazeroir.SignedUint64 { 2249 convinst = arm64.FCVTZUD 2250 vt = runtimeValueTypeI64 2251 } 2252 2253 source, err := c.popValueOnRegister() 2254 if err != nil { 2255 return err 2256 } 2257 sourceReg := source.register 2258 2259 destinationReg, err := c.allocateRegister(registerTypeGeneralPurpose) 2260 if err != nil { 2261 return err 2262 } 2263 2264 c.assembler.CompileRegisterToRegister(convinst, sourceReg, destinationReg) 2265 c.pushRuntimeValueLocationOnRegister(destinationReg, vt) 2266 2267 if !nonTrapping { 2268 // Obtain the floating point status register value into the general purpose register, 2269 // so that we can check if the conversion resulted in undefined behavior. 2270 c.assembler.CompileRegisterToRegister(arm64.MRS, arm64.RegFPSR, arm64ReservedRegisterForTemporary) 2271 // Check if the conversion was undefined by comparing the status with 1. 2272 // See https://developer.arm.com/documentation/ddi0595/2020-12/AArch64-Registers/FPSR--Floating-point-Status-Register 2273 c.assembler.CompileRegisterAndConstToNone(arm64.CMP, arm64ReservedRegisterForTemporary, 1) 2274 2275 brOK := c.assembler.CompileJump(arm64.BCONDNE) 2276 2277 // If so, exit the execution with errors depending on whether or not the source value is NaN. 2278 var floatcmp asm.Instruction 2279 if is32bitFloat { 2280 floatcmp = arm64.FCMPS 2281 } else { 2282 floatcmp = arm64.FCMPD 2283 } 2284 c.assembler.CompileTwoRegistersToNone(floatcmp, sourceReg, sourceReg) 2285 // VS flag is set if at least one of values for FCMP is NaN. 2286 // https://developer.arm.com/documentation/dui0801/g/Condition-Codes/Comparison-of-condition-code-meanings-in-integer-and-floating-point-code 2287 // If the source value is not NaN, the operation was overflow. 2288 c.compileMaybeExitFromNativeCode(arm64.BCONDVS, nativeCallStatusIntegerOverflow) 2289 2290 // Otherwise, the operation was invalid as this is trying to convert NaN to integer. 2291 c.compileExitFromNativeCode(nativeCallStatusCodeInvalidFloatToIntConversion) 2292 2293 // Otherwise, we branch into the next instruction. 2294 c.assembler.SetJumpTargetOnNext(brOK) 2295 } 2296 return nil 2297 } 2298 2299 // compileFConvertFromI implements compiler.compileFConvertFromI for the arm64 architecture. 2300 func (c *arm64Compiler) compileFConvertFromI(o *wazeroir.UnionOperation) error { 2301 var convinst asm.Instruction 2302 inputType := wazeroir.SignedInt(o.B1) 2303 outputType := wazeroir.Float(o.B2) 2304 2305 if outputType == wazeroir.Float32 && inputType == wazeroir.SignedInt32 { 2306 convinst = arm64.SCVTFWS 2307 } else if outputType == wazeroir.Float32 && inputType == wazeroir.SignedInt64 { 2308 convinst = arm64.SCVTFS 2309 } else if outputType == wazeroir.Float64 && inputType == wazeroir.SignedInt32 { 2310 convinst = arm64.SCVTFWD 2311 } else if outputType == wazeroir.Float64 && inputType == wazeroir.SignedInt64 { 2312 convinst = arm64.SCVTFD 2313 } else if outputType == wazeroir.Float32 && inputType == wazeroir.SignedUint32 { 2314 convinst = arm64.UCVTFWS 2315 } else if outputType == wazeroir.Float32 && inputType == wazeroir.SignedUint64 { 2316 convinst = arm64.UCVTFS 2317 } else if outputType == wazeroir.Float64 && inputType == wazeroir.SignedUint32 { 2318 convinst = arm64.UCVTFWD 2319 } else if outputType == wazeroir.Float64 && inputType == wazeroir.SignedUint64 { 2320 convinst = arm64.UCVTFD 2321 } 2322 2323 var vt runtimeValueType 2324 if outputType == wazeroir.Float32 { 2325 vt = runtimeValueTypeF32 2326 } else { 2327 vt = runtimeValueTypeF64 2328 } 2329 return c.compileSimpleConversion(convinst, registerTypeVector, vt) 2330 } 2331 2332 // compileF32DemoteFromF64 implements compiler.compileF32DemoteFromF64 for the arm64 architecture. 2333 func (c *arm64Compiler) compileF32DemoteFromF64() error { 2334 return c.compileSimpleUnop(arm64.FCVTDS, runtimeValueTypeF32) 2335 } 2336 2337 // compileF64PromoteFromF32 implements compiler.compileF64PromoteFromF32 for the arm64 architecture. 2338 func (c *arm64Compiler) compileF64PromoteFromF32() error { 2339 return c.compileSimpleUnop(arm64.FCVTSD, runtimeValueTypeF64) 2340 } 2341 2342 // compileI32ReinterpretFromF32 implements compiler.compileI32ReinterpretFromF32 for the arm64 architecture. 2343 func (c *arm64Compiler) compileI32ReinterpretFromF32() error { 2344 if peek := c.locationStack.peek(); peek.onStack() { 2345 // If the value is on the stack, this is no-op as there is nothing to do for converting type. 2346 peek.valueType = runtimeValueTypeI32 2347 return nil 2348 } 2349 return c.compileSimpleConversion(arm64.FMOVS, registerTypeGeneralPurpose, runtimeValueTypeI32) 2350 } 2351 2352 // compileI64ReinterpretFromF64 implements compiler.compileI64ReinterpretFromF64 for the arm64 architecture. 2353 func (c *arm64Compiler) compileI64ReinterpretFromF64() error { 2354 if peek := c.locationStack.peek(); peek.onStack() { 2355 // If the value is on the stack, this is no-op as there is nothing to do for converting type. 2356 peek.valueType = runtimeValueTypeI64 2357 return nil 2358 } 2359 return c.compileSimpleConversion(arm64.FMOVD, registerTypeGeneralPurpose, runtimeValueTypeI64) 2360 } 2361 2362 // compileF32ReinterpretFromI32 implements compiler.compileF32ReinterpretFromI32 for the arm64 architecture. 2363 func (c *arm64Compiler) compileF32ReinterpretFromI32() error { 2364 if peek := c.locationStack.peek(); peek.onStack() { 2365 // If the value is on the stack, this is no-op as there is nothing to do for converting type. 2366 peek.valueType = runtimeValueTypeF32 2367 return nil 2368 } 2369 return c.compileSimpleConversion(arm64.FMOVS, registerTypeVector, runtimeValueTypeF32) 2370 } 2371 2372 // compileF64ReinterpretFromI64 implements compiler.compileF64ReinterpretFromI64 for the arm64 architecture. 2373 func (c *arm64Compiler) compileF64ReinterpretFromI64() error { 2374 if peek := c.locationStack.peek(); peek.onStack() { 2375 // If the value is on the stack, this is no-op as there is nothing to do for converting type. 2376 peek.valueType = runtimeValueTypeF64 2377 return nil 2378 } 2379 return c.compileSimpleConversion(arm64.FMOVD, registerTypeVector, runtimeValueTypeF64) 2380 } 2381 2382 func (c *arm64Compiler) compileSimpleConversion(inst asm.Instruction, destinationRegType registerType, resultRuntimeValueType runtimeValueType) error { 2383 source, err := c.popValueOnRegister() 2384 if err != nil { 2385 return err 2386 } 2387 2388 destinationReg, err := c.allocateRegister(destinationRegType) 2389 if err != nil { 2390 return err 2391 } 2392 2393 c.assembler.CompileRegisterToRegister(inst, source.register, destinationReg) 2394 c.pushRuntimeValueLocationOnRegister(destinationReg, resultRuntimeValueType) 2395 return nil 2396 } 2397 2398 // compileExtend implements compiler.compileExtend for the arm64 architecture. 2399 func (c *arm64Compiler) compileExtend(o *wazeroir.UnionOperation) error { 2400 signed := o.B1 != 0 2401 if signed { 2402 return c.compileSimpleUnop(arm64.SXTW, runtimeValueTypeI64) 2403 } else { 2404 return c.compileSimpleUnop(arm64.MOVW, runtimeValueTypeI64) 2405 } 2406 } 2407 2408 // compileSignExtend32From8 implements compiler.compileSignExtend32From8 for the arm64 architecture. 2409 func (c *arm64Compiler) compileSignExtend32From8() error { 2410 return c.compileSimpleUnop(arm64.SXTBW, runtimeValueTypeI32) 2411 } 2412 2413 // compileSignExtend32From16 implements compiler.compileSignExtend32From16 for the arm64 architecture. 2414 func (c *arm64Compiler) compileSignExtend32From16() error { 2415 return c.compileSimpleUnop(arm64.SXTHW, runtimeValueTypeI32) 2416 } 2417 2418 // compileSignExtend64From8 implements compiler.compileSignExtend64From8 for the arm64 architecture. 2419 func (c *arm64Compiler) compileSignExtend64From8() error { 2420 return c.compileSimpleUnop(arm64.SXTB, runtimeValueTypeI64) 2421 } 2422 2423 // compileSignExtend64From16 implements compiler.compileSignExtend64From16 for the arm64 architecture. 2424 func (c *arm64Compiler) compileSignExtend64From16() error { 2425 return c.compileSimpleUnop(arm64.SXTH, runtimeValueTypeI64) 2426 } 2427 2428 // compileSignExtend64From32 implements compiler.compileSignExtend64From32 for the arm64 architecture. 2429 func (c *arm64Compiler) compileSignExtend64From32() error { 2430 return c.compileSimpleUnop(arm64.SXTW, runtimeValueTypeI64) 2431 } 2432 2433 func (c *arm64Compiler) compileSimpleUnop(inst asm.Instruction, resultRuntimeValueType runtimeValueType) error { 2434 v, err := c.popValueOnRegister() 2435 if err != nil { 2436 return err 2437 } 2438 reg := v.register 2439 c.assembler.CompileRegisterToRegister(inst, reg, reg) 2440 c.pushRuntimeValueLocationOnRegister(reg, resultRuntimeValueType) 2441 return nil 2442 } 2443 2444 // compileEq implements compiler.compileEq for the arm64 architecture. 2445 func (c *arm64Compiler) compileEq(o *wazeroir.UnionOperation) error { 2446 return c.emitEqOrNe(true, wazeroir.UnsignedType(o.B1)) 2447 } 2448 2449 // compileNe implements compiler.compileNe for the arm64 architecture. 2450 func (c *arm64Compiler) compileNe(o *wazeroir.UnionOperation) error { 2451 return c.emitEqOrNe(false, wazeroir.UnsignedType(o.B1)) 2452 } 2453 2454 // emitEqOrNe implements compiler.compileEq and compiler.compileNe for the arm64 architecture. 2455 func (c *arm64Compiler) emitEqOrNe(isEq bool, unsignedType wazeroir.UnsignedType) error { 2456 x1, x2, err := c.popTwoValuesOnRegisters() 2457 if err != nil { 2458 return err 2459 } 2460 2461 var inst asm.Instruction 2462 switch unsignedType { 2463 case wazeroir.UnsignedTypeI32: 2464 inst = arm64.CMPW 2465 case wazeroir.UnsignedTypeI64: 2466 inst = arm64.CMP 2467 case wazeroir.UnsignedTypeF32: 2468 inst = arm64.FCMPS 2469 case wazeroir.UnsignedTypeF64: 2470 inst = arm64.FCMPD 2471 } 2472 2473 c.assembler.CompileTwoRegistersToNone(inst, x2.register, x1.register) 2474 2475 // Push the comparison result as a conditional register value. 2476 cond := arm64.CondNE 2477 if isEq { 2478 cond = arm64.CondEQ 2479 } 2480 c.locationStack.pushRuntimeValueLocationOnConditionalRegister(cond) 2481 return nil 2482 } 2483 2484 // compileEqz implements compiler.compileEqz for the arm64 architecture. 2485 func (c *arm64Compiler) compileEqz(o *wazeroir.UnionOperation) error { 2486 x1, err := c.popValueOnRegister() 2487 if err != nil { 2488 return err 2489 } 2490 2491 var inst asm.Instruction 2492 unsignedInt := wazeroir.UnsignedInt(o.B1) 2493 switch unsignedInt { 2494 case wazeroir.UnsignedInt32: 2495 inst = arm64.CMPW 2496 case wazeroir.UnsignedInt64: 2497 inst = arm64.CMP 2498 } 2499 2500 c.assembler.CompileTwoRegistersToNone(inst, arm64.RegRZR, x1.register) 2501 2502 // Push the comparison result as a conditional register value. 2503 c.locationStack.pushRuntimeValueLocationOnConditionalRegister(arm64.CondEQ) 2504 return nil 2505 } 2506 2507 // compileLt implements compiler.compileLt for the arm64 architecture. 2508 func (c *arm64Compiler) compileLt(o *wazeroir.UnionOperation) error { 2509 x1, x2, err := c.popTwoValuesOnRegisters() 2510 if err != nil { 2511 return err 2512 } 2513 2514 var inst asm.Instruction 2515 var conditionalRegister asm.ConditionalRegisterState 2516 signedType := wazeroir.SignedType(o.B1) 2517 switch signedType { 2518 case wazeroir.SignedTypeUint32: 2519 inst = arm64.CMPW 2520 conditionalRegister = arm64.CondLO 2521 case wazeroir.SignedTypeUint64: 2522 inst = arm64.CMP 2523 conditionalRegister = arm64.CondLO 2524 case wazeroir.SignedTypeInt32: 2525 inst = arm64.CMPW 2526 conditionalRegister = arm64.CondLT 2527 case wazeroir.SignedTypeInt64: 2528 inst = arm64.CMP 2529 conditionalRegister = arm64.CondLT 2530 case wazeroir.SignedTypeFloat32: 2531 inst = arm64.FCMPS 2532 conditionalRegister = arm64.CondMI 2533 case wazeroir.SignedTypeFloat64: 2534 inst = arm64.FCMPD 2535 conditionalRegister = arm64.CondMI 2536 } 2537 2538 c.assembler.CompileTwoRegistersToNone(inst, x2.register, x1.register) 2539 2540 // Push the comparison result as a conditional register value. 2541 c.locationStack.pushRuntimeValueLocationOnConditionalRegister(conditionalRegister) 2542 return nil 2543 } 2544 2545 // compileGt implements compiler.compileGt for the arm64 architecture. 2546 func (c *arm64Compiler) compileGt(o *wazeroir.UnionOperation) error { 2547 x1, x2, err := c.popTwoValuesOnRegisters() 2548 if err != nil { 2549 return err 2550 } 2551 2552 var inst asm.Instruction 2553 var conditionalRegister asm.ConditionalRegisterState 2554 signedType := wazeroir.SignedType(o.B1) 2555 switch signedType { 2556 case wazeroir.SignedTypeUint32: 2557 inst = arm64.CMPW 2558 conditionalRegister = arm64.CondHI 2559 case wazeroir.SignedTypeUint64: 2560 inst = arm64.CMP 2561 conditionalRegister = arm64.CondHI 2562 case wazeroir.SignedTypeInt32: 2563 inst = arm64.CMPW 2564 conditionalRegister = arm64.CondGT 2565 case wazeroir.SignedTypeInt64: 2566 inst = arm64.CMP 2567 conditionalRegister = arm64.CondGT 2568 case wazeroir.SignedTypeFloat32: 2569 inst = arm64.FCMPS 2570 conditionalRegister = arm64.CondGT 2571 case wazeroir.SignedTypeFloat64: 2572 inst = arm64.FCMPD 2573 conditionalRegister = arm64.CondGT 2574 } 2575 2576 c.assembler.CompileTwoRegistersToNone(inst, x2.register, x1.register) 2577 2578 // Push the comparison result as a conditional register value. 2579 c.locationStack.pushRuntimeValueLocationOnConditionalRegister(conditionalRegister) 2580 return nil 2581 } 2582 2583 // compileLe implements compiler.compileLe for the arm64 architecture. 2584 func (c *arm64Compiler) compileLe(o *wazeroir.UnionOperation) error { 2585 x1, x2, err := c.popTwoValuesOnRegisters() 2586 if err != nil { 2587 return err 2588 } 2589 2590 var inst asm.Instruction 2591 var conditionalRegister asm.ConditionalRegisterState 2592 signedType := wazeroir.SignedType(o.B1) 2593 switch signedType { 2594 case wazeroir.SignedTypeUint32: 2595 inst = arm64.CMPW 2596 conditionalRegister = arm64.CondLS 2597 case wazeroir.SignedTypeUint64: 2598 inst = arm64.CMP 2599 conditionalRegister = arm64.CondLS 2600 case wazeroir.SignedTypeInt32: 2601 inst = arm64.CMPW 2602 conditionalRegister = arm64.CondLE 2603 case wazeroir.SignedTypeInt64: 2604 inst = arm64.CMP 2605 conditionalRegister = arm64.CondLE 2606 case wazeroir.SignedTypeFloat32: 2607 inst = arm64.FCMPS 2608 conditionalRegister = arm64.CondLS 2609 case wazeroir.SignedTypeFloat64: 2610 inst = arm64.FCMPD 2611 conditionalRegister = arm64.CondLS 2612 } 2613 2614 c.assembler.CompileTwoRegistersToNone(inst, x2.register, x1.register) 2615 2616 // Push the comparison result as a conditional register value. 2617 c.locationStack.pushRuntimeValueLocationOnConditionalRegister(conditionalRegister) 2618 return nil 2619 } 2620 2621 // compileGe implements compiler.compileGe for the arm64 architecture. 2622 func (c *arm64Compiler) compileGe(o *wazeroir.UnionOperation) error { 2623 x1, x2, err := c.popTwoValuesOnRegisters() 2624 if err != nil { 2625 return err 2626 } 2627 2628 var inst asm.Instruction 2629 var conditionalRegister asm.ConditionalRegisterState 2630 signedType := wazeroir.SignedType(o.B1) 2631 switch signedType { 2632 case wazeroir.SignedTypeUint32: 2633 inst = arm64.CMPW 2634 conditionalRegister = arm64.CondHS 2635 case wazeroir.SignedTypeUint64: 2636 inst = arm64.CMP 2637 conditionalRegister = arm64.CondHS 2638 case wazeroir.SignedTypeInt32: 2639 inst = arm64.CMPW 2640 conditionalRegister = arm64.CondGE 2641 case wazeroir.SignedTypeInt64: 2642 inst = arm64.CMP 2643 conditionalRegister = arm64.CondGE 2644 case wazeroir.SignedTypeFloat32: 2645 inst = arm64.FCMPS 2646 conditionalRegister = arm64.CondGE 2647 case wazeroir.SignedTypeFloat64: 2648 inst = arm64.FCMPD 2649 conditionalRegister = arm64.CondGE 2650 } 2651 2652 c.assembler.CompileTwoRegistersToNone(inst, x2.register, x1.register) 2653 2654 // Push the comparison result as a conditional register value. 2655 c.locationStack.pushRuntimeValueLocationOnConditionalRegister(conditionalRegister) 2656 return nil 2657 } 2658 2659 // compileLoad implements compiler.compileLoad for the arm64 architecture. 2660 func (c *arm64Compiler) compileLoad(o *wazeroir.UnionOperation) error { 2661 var ( 2662 isFloat bool 2663 loadInst asm.Instruction 2664 targetSizeInBytes int64 2665 vt runtimeValueType 2666 ) 2667 2668 unsignedType := wazeroir.UnsignedType(o.B1) 2669 offset := uint32(o.U2) 2670 2671 switch unsignedType { 2672 case wazeroir.UnsignedTypeI32: 2673 loadInst = arm64.LDRW 2674 targetSizeInBytes = 32 / 8 2675 vt = runtimeValueTypeI32 2676 case wazeroir.UnsignedTypeI64: 2677 loadInst = arm64.LDRD 2678 targetSizeInBytes = 64 / 8 2679 vt = runtimeValueTypeI64 2680 case wazeroir.UnsignedTypeF32: 2681 loadInst = arm64.FLDRS 2682 isFloat = true 2683 targetSizeInBytes = 32 / 8 2684 vt = runtimeValueTypeF32 2685 case wazeroir.UnsignedTypeF64: 2686 loadInst = arm64.FLDRD 2687 isFloat = true 2688 targetSizeInBytes = 64 / 8 2689 vt = runtimeValueTypeF64 2690 } 2691 return c.compileLoadImpl(offset, loadInst, targetSizeInBytes, isFloat, vt) 2692 } 2693 2694 // compileLoad8 implements compiler.compileLoad8 for the arm64 architecture. 2695 func (c *arm64Compiler) compileLoad8(o *wazeroir.UnionOperation) error { 2696 var loadInst asm.Instruction 2697 var vt runtimeValueType 2698 2699 signedInt := wazeroir.SignedInt(o.B1) 2700 offset := uint32(o.U2) 2701 2702 switch signedInt { 2703 case wazeroir.SignedInt32: 2704 loadInst = arm64.LDRSBW 2705 vt = runtimeValueTypeI32 2706 case wazeroir.SignedInt64: 2707 loadInst = arm64.LDRSBD 2708 vt = runtimeValueTypeI64 2709 case wazeroir.SignedUint32: 2710 loadInst = arm64.LDRB 2711 vt = runtimeValueTypeI32 2712 case wazeroir.SignedUint64: 2713 loadInst = arm64.LDRB 2714 vt = runtimeValueTypeI64 2715 } 2716 return c.compileLoadImpl(offset, loadInst, 1, false, vt) 2717 } 2718 2719 // compileLoad16 implements compiler.compileLoad16 for the arm64 architecture. 2720 func (c *arm64Compiler) compileLoad16(o *wazeroir.UnionOperation) error { 2721 var loadInst asm.Instruction 2722 var vt runtimeValueType 2723 2724 signedInt := wazeroir.SignedInt(o.B1) 2725 offset := uint32(o.U2) 2726 2727 switch signedInt { 2728 case wazeroir.SignedInt32: 2729 loadInst = arm64.LDRSHW 2730 vt = runtimeValueTypeI32 2731 case wazeroir.SignedInt64: 2732 loadInst = arm64.LDRSHD 2733 vt = runtimeValueTypeI64 2734 case wazeroir.SignedUint32: 2735 loadInst = arm64.LDRH 2736 vt = runtimeValueTypeI32 2737 case wazeroir.SignedUint64: 2738 loadInst = arm64.LDRH 2739 vt = runtimeValueTypeI64 2740 } 2741 return c.compileLoadImpl(offset, loadInst, 16/8, false, vt) 2742 } 2743 2744 // compileLoad32 implements compiler.compileLoad32 for the arm64 architecture. 2745 func (c *arm64Compiler) compileLoad32(o *wazeroir.UnionOperation) error { 2746 var loadInst asm.Instruction 2747 signed := o.B1 == 1 2748 offset := uint32(o.U2) 2749 2750 if signed { 2751 loadInst = arm64.LDRSW 2752 } else { 2753 loadInst = arm64.LDRW 2754 } 2755 return c.compileLoadImpl(offset, loadInst, 32/8, false, runtimeValueTypeI64) 2756 } 2757 2758 // compileLoadImpl implements compileLoadImpl* variants for arm64 architecture. 2759 func (c *arm64Compiler) compileLoadImpl(offsetArg uint32, loadInst asm.Instruction, 2760 targetSizeInBytes int64, isFloat bool, resultRuntimeValueType runtimeValueType, 2761 ) error { 2762 offsetReg, err := c.compileMemoryAccessOffsetSetup(offsetArg, targetSizeInBytes) 2763 if err != nil { 2764 return err 2765 } 2766 2767 resultRegister := offsetReg 2768 if isFloat { 2769 resultRegister, err = c.allocateRegister(registerTypeVector) 2770 if err != nil { 2771 return err 2772 } 2773 } 2774 2775 // "resultRegister = [arm64ReservedRegisterForMemory + offsetReg]" 2776 // In other words, "resultRegister = memory.Buffer[offset: offset+targetSizeInBytes]" 2777 c.assembler.CompileMemoryWithRegisterOffsetToRegister( 2778 loadInst, 2779 arm64ReservedRegisterForMemory, offsetReg, 2780 resultRegister, 2781 ) 2782 2783 c.pushRuntimeValueLocationOnRegister(resultRegister, resultRuntimeValueType) 2784 return nil 2785 } 2786 2787 // compileStore implements compiler.compileStore for the arm64 architecture. 2788 func (c *arm64Compiler) compileStore(o *wazeroir.UnionOperation) error { 2789 var movInst asm.Instruction 2790 var targetSizeInBytes int64 2791 unsignedType := wazeroir.UnsignedType(o.B1) 2792 offset := uint32(o.U2) 2793 switch unsignedType { 2794 case wazeroir.UnsignedTypeI32: 2795 movInst = arm64.STRW 2796 targetSizeInBytes = 32 / 8 2797 case wazeroir.UnsignedTypeI64: 2798 movInst = arm64.STRD 2799 targetSizeInBytes = 64 / 8 2800 case wazeroir.UnsignedTypeF32: 2801 movInst = arm64.FSTRS 2802 targetSizeInBytes = 32 / 8 2803 case wazeroir.UnsignedTypeF64: 2804 movInst = arm64.FSTRD 2805 targetSizeInBytes = 64 / 8 2806 } 2807 return c.compileStoreImpl(offset, movInst, targetSizeInBytes) 2808 } 2809 2810 // compileStore8 implements compiler.compileStore8 for the arm64 architecture. 2811 func (c *arm64Compiler) compileStore8(o *wazeroir.UnionOperation) error { 2812 return c.compileStoreImpl(uint32(o.U2), arm64.STRB, 1) 2813 } 2814 2815 // compileStore16 implements compiler.compileStore16 for the arm64 architecture. 2816 func (c *arm64Compiler) compileStore16(o *wazeroir.UnionOperation) error { 2817 return c.compileStoreImpl(uint32(o.U2), arm64.STRH, 16/8) 2818 } 2819 2820 // compileStore32 implements compiler.compileStore32 for the arm64 architecture. 2821 func (c *arm64Compiler) compileStore32(o *wazeroir.UnionOperation) error { 2822 return c.compileStoreImpl(uint32(o.U2), arm64.STRW, 32/8) 2823 } 2824 2825 // compileStoreImpl implements compleStore* variants for arm64 architecture. 2826 func (c *arm64Compiler) compileStoreImpl(offsetArg uint32, storeInst asm.Instruction, targetSizeInBytes int64) error { 2827 val, err := c.popValueOnRegister() 2828 if err != nil { 2829 return err 2830 } 2831 // Mark temporarily used as compileMemoryAccessOffsetSetup might try allocating register. 2832 c.markRegisterUsed(val.register) 2833 2834 offsetReg, err := c.compileMemoryAccessOffsetSetup(offsetArg, targetSizeInBytes) 2835 if err != nil { 2836 return err 2837 } 2838 2839 // "[arm64ReservedRegisterForMemory + offsetReg] = val.register" 2840 // In other words, "memory.Buffer[offset: offset+targetSizeInBytes] = val.register" 2841 c.assembler.CompileRegisterToMemoryWithRegisterOffset( 2842 storeInst, val.register, 2843 arm64ReservedRegisterForMemory, offsetReg, 2844 ) 2845 2846 c.markRegisterUnused(val.register) 2847 return nil 2848 } 2849 2850 // compileMemoryAccessOffsetSetup pops the top value from the stack (called "base"), stores "base + offsetArg" 2851 // into a register, and returns the stored register. We call the result "offset" because we access the memory 2852 // as memory.Buffer[offset: offset+targetSizeInBytes]. 2853 // 2854 // Note: this also emits the instructions to check the out of bounds memory access. 2855 // In other words, if the offset+targetSizeInBytes exceeds the memory size, the code exits with nativeCallStatusCodeMemoryOutOfBounds status. 2856 func (c *arm64Compiler) compileMemoryAccessOffsetSetup(offsetArg uint32, targetSizeInBytes int64) (offsetRegister asm.Register, err error) { 2857 base, err := c.popValueOnRegister() 2858 if err != nil { 2859 return 0, err 2860 } 2861 2862 offsetRegister = base.register 2863 if isZeroRegister(base.register) { 2864 offsetRegister, err = c.allocateRegister(registerTypeGeneralPurpose) 2865 if err != nil { 2866 return 2867 } 2868 c.assembler.CompileRegisterToRegister(arm64.MOVD, arm64.RegRZR, offsetRegister) 2869 } 2870 2871 if offsetConst := int64(offsetArg) + targetSizeInBytes; offsetConst <= math.MaxUint32 { 2872 // "offsetRegister = base + offsetArg + targetSizeInBytes" 2873 c.assembler.CompileConstToRegister(arm64.ADD, offsetConst, offsetRegister) 2874 } else { 2875 // If the offset const is too large, we exit with nativeCallStatusCodeMemoryOutOfBounds. 2876 c.compileExitFromNativeCode(nativeCallStatusCodeMemoryOutOfBounds) 2877 return 2878 } 2879 2880 // "arm64ReservedRegisterForTemporary = len(memory.Buffer)" 2881 c.assembler.CompileMemoryToRegister(arm64.LDRD, 2882 arm64ReservedRegisterForCallEngine, callEngineModuleContextMemorySliceLenOffset, 2883 arm64ReservedRegisterForTemporary) 2884 2885 // Check if offsetRegister(= base+offsetArg+targetSizeInBytes) > len(memory.Buffer). 2886 c.assembler.CompileTwoRegistersToNone(arm64.CMP, arm64ReservedRegisterForTemporary, offsetRegister) 2887 2888 // If offsetRegister(= base+offsetArg+targetSizeInBytes) exceeds the memory length, 2889 // we exit the function with nativeCallStatusCodeMemoryOutOfBounds. 2890 c.compileMaybeExitFromNativeCode(arm64.BCONDLS, nativeCallStatusCodeMemoryOutOfBounds) 2891 2892 // Otherwise, we subtract targetSizeInBytes from offsetRegister. 2893 c.assembler.CompileConstToRegister(arm64.SUB, targetSizeInBytes, offsetRegister) 2894 return offsetRegister, nil 2895 } 2896 2897 // compileMemoryGrow implements compileMemoryGrow variants for arm64 architecture. 2898 func (c *arm64Compiler) compileMemoryGrow() error { 2899 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 2900 return err 2901 } 2902 2903 if err := c.compileCallGoFunction(nativeCallStatusCodeCallBuiltInFunction, builtinFunctionIndexMemoryGrow); err != nil { 2904 return err 2905 } 2906 2907 // After return, we re-initialize reserved registers just like preamble of functions. 2908 c.compileReservedStackBasePointerRegisterInitialization() 2909 c.compileReservedMemoryRegisterInitialization() 2910 return nil 2911 } 2912 2913 // compileMemorySize implements compileMemorySize variants for arm64 architecture. 2914 func (c *arm64Compiler) compileMemorySize() error { 2915 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 2916 return err 2917 } 2918 2919 reg, err := c.allocateRegister(registerTypeGeneralPurpose) 2920 if err != nil { 2921 return err 2922 } 2923 2924 // "reg = len(memory.Buffer)" 2925 c.assembler.CompileMemoryToRegister( 2926 arm64.LDRD, 2927 arm64ReservedRegisterForCallEngine, callEngineModuleContextMemorySliceLenOffset, 2928 reg, 2929 ) 2930 2931 // memory.size loads the page size of memory, so we have to divide by the page size. 2932 // "reg = reg >> wasm.MemoryPageSizeInBits (== reg / wasm.MemoryPageSize) " 2933 c.assembler.CompileConstToRegister( 2934 arm64.LSR, 2935 wasm.MemoryPageSizeInBits, 2936 reg, 2937 ) 2938 2939 c.pushRuntimeValueLocationOnRegister(reg, runtimeValueTypeI32) 2940 return nil 2941 } 2942 2943 // compileCallGoFunction adds instructions to call a Go function whose address equals the addr parameter. 2944 // compilerStatus is set before making call, and it should be either nativeCallStatusCodeCallBuiltInFunction or 2945 // nativeCallStatusCodeCallGoHostFunction. 2946 func (c *arm64Compiler) compileCallGoFunction(compilerStatus nativeCallStatusCode, builtinFunction wasm.Index) error { 2947 // Release all the registers as our calling convention requires the caller-save. 2948 if err := c.compileReleaseAllRegistersToStack(); err != nil { 2949 return err 2950 } 2951 2952 if compilerStatus == nativeCallStatusCodeCallBuiltInFunction { 2953 // Set the target function address to ce.functionCallAddress 2954 // "tmp = $index" 2955 c.assembler.CompileConstToRegister( 2956 arm64.MOVD, 2957 int64(builtinFunction), 2958 arm64ReservedRegisterForTemporary, 2959 ) 2960 // "[arm64ReservedRegisterForCallEngine + callEngineExitContextFunctionCallAddressOffset] = tmp" 2961 // In other words, "ce.functionCallAddress = tmp (== $addr)" 2962 c.assembler.CompileRegisterToMemory( 2963 arm64.STRW, 2964 arm64ReservedRegisterForTemporary, 2965 arm64ReservedRegisterForCallEngine, callEngineExitContextBuiltinFunctionCallIndexOffset, 2966 ) 2967 } 2968 2969 c.compileExitFromNativeCode(compilerStatus) 2970 return nil 2971 } 2972 2973 // compileConstI32 implements compiler.compileConstI32 for the arm64 architecture. 2974 func (c *arm64Compiler) compileConstI32(o *wazeroir.UnionOperation) error { 2975 return c.compileIntConstant(true, o.U1) 2976 } 2977 2978 // compileConstI64 implements compiler.compileConstI64 for the arm64 architecture. 2979 func (c *arm64Compiler) compileConstI64(o *wazeroir.UnionOperation) error { 2980 return c.compileIntConstant(false, o.U1) 2981 } 2982 2983 // compileIntConstant adds instructions to load an integer constant. 2984 // is32bit is true if the target value is originally 32-bit const, false otherwise. 2985 // value holds the (zero-extended for 32-bit case) load target constant. 2986 func (c *arm64Compiler) compileIntConstant(is32bit bool, value uint64) error { 2987 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 2988 return err 2989 } 2990 2991 var inst asm.Instruction 2992 var vt runtimeValueType 2993 if is32bit { 2994 inst = arm64.MOVW 2995 vt = runtimeValueTypeI32 2996 } else { 2997 inst = arm64.MOVD 2998 vt = runtimeValueTypeI64 2999 } 3000 3001 if value == 0 { 3002 c.pushRuntimeValueLocationOnRegister(arm64.RegRZR, vt) 3003 } else { 3004 // Take a register to load the value. 3005 reg, err := c.allocateRegister(registerTypeGeneralPurpose) 3006 if err != nil { 3007 return err 3008 } 3009 3010 c.assembler.CompileConstToRegister(inst, int64(value), reg) 3011 3012 c.pushRuntimeValueLocationOnRegister(reg, vt) 3013 } 3014 return nil 3015 } 3016 3017 // compileConstF32 implements compiler.compileConstF32 for the arm64 architecture. 3018 func (c *arm64Compiler) compileConstF32(o *wazeroir.UnionOperation) error { 3019 return c.compileFloatConstant(true, o.U1 /*uint64(math.Float32bits(o.Value))*/) 3020 } 3021 3022 // compileConstF64 implements compiler.compileConstF64 for the arm64 architecture. 3023 func (c *arm64Compiler) compileConstF64(o *wazeroir.UnionOperation) error { 3024 return c.compileFloatConstant(false, o.U1 /*math.Float64bits(o.Value)*/) 3025 } 3026 3027 // compileFloatConstant adds instructions to load a float constant. 3028 // is32bit is true if the target value is originally 32-bit const, false otherwise. 3029 // value holds the (zero-extended for 32-bit case) bit representation of load target float constant. 3030 func (c *arm64Compiler) compileFloatConstant(is32bit bool, value uint64) error { 3031 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 3032 return err 3033 } 3034 3035 // Take a register to load the value. 3036 reg, err := c.allocateRegister(registerTypeVector) 3037 if err != nil { 3038 return err 3039 } 3040 3041 tmpReg := arm64.RegRZR 3042 if value != 0 { 3043 tmpReg = arm64ReservedRegisterForTemporary 3044 var inst asm.Instruction 3045 if is32bit { 3046 inst = arm64.MOVW 3047 } else { 3048 inst = arm64.MOVD 3049 } 3050 c.assembler.CompileConstToRegister(inst, int64(value), tmpReg) 3051 } 3052 3053 // Use FMOV instruction to move the value on integer register into the float one. 3054 var inst asm.Instruction 3055 var vt runtimeValueType 3056 if is32bit { 3057 vt = runtimeValueTypeF32 3058 inst = arm64.FMOVS 3059 } else { 3060 vt = runtimeValueTypeF64 3061 inst = arm64.FMOVD 3062 } 3063 c.assembler.CompileRegisterToRegister(inst, tmpReg, reg) 3064 3065 c.pushRuntimeValueLocationOnRegister(reg, vt) 3066 return nil 3067 } 3068 3069 // compileMemoryInit implements compiler.compileMemoryInit for the arm64 architecture. 3070 func (c *arm64Compiler) compileMemoryInit(o *wazeroir.UnionOperation) error { 3071 dataIndex := uint32(o.U1) 3072 return c.compileInitImpl(false, dataIndex, 0) 3073 } 3074 3075 // compileInitImpl implements compileTableInit and compileMemoryInit. 3076 // 3077 // TODO: the compiled code in this function should be reused and compile at once as 3078 // the code is independent of any module. 3079 func (c *arm64Compiler) compileInitImpl(isTable bool, index, tableIndex uint32) error { 3080 outOfBoundsErrorStatus := nativeCallStatusCodeMemoryOutOfBounds 3081 if isTable { 3082 outOfBoundsErrorStatus = nativeCallStatusCodeInvalidTableAccess 3083 } 3084 3085 copySize, err := c.popValueOnRegister() 3086 if err != nil { 3087 return err 3088 } 3089 c.markRegisterUsed(copySize.register) 3090 3091 sourceOffset, err := c.popValueOnRegister() 3092 if err != nil { 3093 return err 3094 } 3095 if isZeroRegister(sourceOffset.register) { 3096 sourceOffset.register, err = c.allocateRegister(registerTypeGeneralPurpose) 3097 if err != nil { 3098 return err 3099 } 3100 c.assembler.CompileRegisterToRegister(arm64.MOVD, arm64.RegRZR, sourceOffset.register) 3101 } 3102 c.markRegisterUsed(sourceOffset.register) 3103 3104 destinationOffset, err := c.popValueOnRegister() 3105 if err != nil { 3106 return err 3107 } 3108 if isZeroRegister(destinationOffset.register) { 3109 destinationOffset.register, err = c.allocateRegister(registerTypeGeneralPurpose) 3110 if err != nil { 3111 return err 3112 } 3113 c.assembler.CompileRegisterToRegister(arm64.MOVD, arm64.RegRZR, destinationOffset.register) 3114 } 3115 c.markRegisterUsed(destinationOffset.register) 3116 3117 tableInstanceAddressReg := asm.NilRegister 3118 if isTable { 3119 tableInstanceAddressReg, err = c.allocateRegister(registerTypeGeneralPurpose) 3120 if err != nil { 3121 return err 3122 } 3123 c.markRegisterUsed(tableInstanceAddressReg) 3124 } 3125 3126 if !isZeroRegister(copySize.register) { 3127 // sourceOffset += size. 3128 c.assembler.CompileRegisterToRegister(arm64.ADD, copySize.register, sourceOffset.register) 3129 // destinationOffset += size. 3130 c.assembler.CompileRegisterToRegister(arm64.ADD, copySize.register, destinationOffset.register) 3131 } 3132 3133 instanceAddr, err := c.allocateRegister(registerTypeGeneralPurpose) 3134 if err != nil { 3135 return err 3136 } 3137 3138 if isTable { 3139 c.compileLoadElemInstanceAddress(index, instanceAddr) 3140 } else { 3141 c.compileLoadDataInstanceAddress(index, instanceAddr) 3142 } 3143 3144 // Check data instance bounds. 3145 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3146 instanceAddr, 8, // DataInstance and Element instance holds the length is stored at offset 8. 3147 arm64ReservedRegisterForTemporary) 3148 3149 c.assembler.CompileTwoRegistersToNone(arm64.CMP, arm64ReservedRegisterForTemporary, sourceOffset.register) 3150 // If not, raise out of bounds memory access error. 3151 c.compileMaybeExitFromNativeCode(arm64.BCONDLS, outOfBoundsErrorStatus) 3152 3153 // Otherwise, ready to copy the value from destination to source. 3154 // Check destination bounds. 3155 if isTable { 3156 // arm64ReservedRegisterForTemporary = &tables[0] 3157 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3158 arm64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset, 3159 arm64ReservedRegisterForTemporary) 3160 // tableInstanceAddressReg = arm64ReservedRegisterForTemporary + tableIndex*8 3161 // = &tables[0] + sizeOf(*tableInstance)*8 3162 // = &tables[tableIndex] 3163 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3164 arm64ReservedRegisterForTemporary, int64(tableIndex)*8, 3165 tableInstanceAddressReg) 3166 // arm64ReservedRegisterForTemporary = [tableInstanceAddressReg+tableInstanceTableLenOffset] = len(tables[tableIndex]) 3167 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3168 tableInstanceAddressReg, tableInstanceTableLenOffset, 3169 arm64ReservedRegisterForTemporary) 3170 } else { 3171 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3172 arm64ReservedRegisterForCallEngine, callEngineModuleContextMemorySliceLenOffset, 3173 arm64ReservedRegisterForTemporary) 3174 } 3175 3176 c.assembler.CompileTwoRegistersToNone(arm64.CMP, arm64ReservedRegisterForTemporary, destinationOffset.register) 3177 // If not, raise out of bounds memory access error. 3178 c.compileMaybeExitFromNativeCode(arm64.BCONDLS, outOfBoundsErrorStatus) 3179 3180 // Otherwise, ready to copy the value from source to destination. 3181 if !isZeroRegister(copySize.register) { 3182 // If the size equals zero, we can skip the entire instructions beflow. 3183 c.assembler.CompileTwoRegistersToNone(arm64.CMP, arm64.RegRZR, copySize.register) 3184 skipCopyJump := c.assembler.CompileJump(arm64.BCONDEQ) 3185 3186 var ldr, str asm.Instruction 3187 var movSize int64 3188 if isTable { 3189 ldr, str = arm64.LDRD, arm64.STRD 3190 movSize = 8 3191 3192 // arm64ReservedRegisterForTemporary = &Table[0] 3193 c.assembler.CompileMemoryToRegister(arm64.LDRD, tableInstanceAddressReg, 3194 tableInstanceTableOffset, arm64ReservedRegisterForTemporary) 3195 // destinationOffset = (destinationOffset<< pointerSizeLog2) + arm64ReservedRegisterForTemporary 3196 c.assembler.CompileLeftShiftedRegisterToRegister(arm64.ADD, 3197 destinationOffset.register, pointerSizeLog2, 3198 arm64ReservedRegisterForTemporary, destinationOffset.register) 3199 3200 // arm64ReservedRegisterForTemporary = &ElementInstance.References[0] 3201 c.assembler.CompileMemoryToRegister(arm64.LDRD, instanceAddr, 0, arm64ReservedRegisterForTemporary) 3202 // sourceOffset = (sourceOffset<< pointerSizeLog2) + arm64ReservedRegisterForTemporary 3203 c.assembler.CompileLeftShiftedRegisterToRegister(arm64.ADD, 3204 sourceOffset.register, pointerSizeLog2, 3205 arm64ReservedRegisterForTemporary, sourceOffset.register) 3206 3207 // copySize = copySize << pointerSizeLog2 3208 c.assembler.CompileConstToRegister(arm64.LSL, pointerSizeLog2, copySize.register) 3209 } else { 3210 ldr, str = arm64.LDRB, arm64.STRB 3211 movSize = 1 3212 3213 // destinationOffset += memory buffer's absolute address. 3214 c.assembler.CompileRegisterToRegister(arm64.ADD, arm64ReservedRegisterForMemory, destinationOffset.register) 3215 3216 // sourceOffset += data buffer's absolute address. 3217 c.assembler.CompileMemoryToRegister(arm64.LDRD, instanceAddr, 0, arm64ReservedRegisterForTemporary) 3218 c.assembler.CompileRegisterToRegister(arm64.ADD, arm64ReservedRegisterForTemporary, sourceOffset.register) 3219 3220 } 3221 3222 // Negate the counter. 3223 c.assembler.CompileRegisterToRegister(arm64.NEG, copySize.register, copySize.register) 3224 3225 beginCopyLoop := c.assembler.CompileStandAlone(arm64.NOP) 3226 3227 // arm64ReservedRegisterForTemporary = [sourceOffset + (size.register)] 3228 c.assembler.CompileMemoryWithRegisterOffsetToRegister(ldr, 3229 sourceOffset.register, copySize.register, 3230 arm64ReservedRegisterForTemporary) 3231 // [destinationOffset + (size.register)] = arm64ReservedRegisterForTemporary. 3232 c.assembler.CompileRegisterToMemoryWithRegisterOffset(str, 3233 arm64ReservedRegisterForTemporary, 3234 destinationOffset.register, copySize.register, 3235 ) 3236 3237 // Decrement the size counter and if the value is still negative, continue the loop. 3238 c.assembler.CompileConstToRegister(arm64.ADDS, movSize, copySize.register) 3239 c.assembler.CompileJump(arm64.BCONDMI).AssignJumpTarget(beginCopyLoop) 3240 3241 c.assembler.SetJumpTargetOnNext(skipCopyJump) 3242 } 3243 3244 c.markRegisterUnused(copySize.register, sourceOffset.register, 3245 destinationOffset.register, instanceAddr, tableInstanceAddressReg) 3246 return nil 3247 } 3248 3249 // compileDataDrop implements compiler.compileDataDrop for the arm64 architecture. 3250 func (c *arm64Compiler) compileDataDrop(o *wazeroir.UnionOperation) error { 3251 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 3252 return err 3253 } 3254 3255 tmp, err := c.allocateRegister(registerTypeGeneralPurpose) 3256 if err != nil { 3257 return err 3258 } 3259 3260 dataIndex := uint32(o.U1) 3261 c.compileLoadDataInstanceAddress(dataIndex, tmp) 3262 3263 // Clears the content of DataInstance[o.DataIndex] (== []byte type). 3264 c.assembler.CompileRegisterToMemory(arm64.STRD, arm64.RegRZR, tmp, 0) 3265 c.assembler.CompileRegisterToMemory(arm64.STRD, arm64.RegRZR, tmp, 8) 3266 c.assembler.CompileRegisterToMemory(arm64.STRD, arm64.RegRZR, tmp, 16) 3267 return nil 3268 } 3269 3270 func (c *arm64Compiler) compileLoadDataInstanceAddress(dataIndex uint32, dst asm.Register) { 3271 // dst = dataIndex * dataInstanceStructSize 3272 c.assembler.CompileConstToRegister(arm64.MOVD, int64(dataIndex)*dataInstanceStructSize, dst) 3273 3274 // arm64ReservedRegisterForTemporary = &moduleInstance.DataInstances[0] 3275 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3276 arm64ReservedRegisterForCallEngine, callEngineModuleContextDataInstancesElement0AddressOffset, 3277 arm64ReservedRegisterForTemporary, 3278 ) 3279 3280 // dst = arm64ReservedRegisterForTemporary + dst 3281 // = &moduleInstance.DataInstances[0] + dataIndex*dataInstanceStructSize 3282 // = &moduleInstance.DataInstances[dataIndex] 3283 c.assembler.CompileRegisterToRegister(arm64.ADD, arm64ReservedRegisterForTemporary, dst) 3284 } 3285 3286 // compileMemoryCopy implements compiler.compileMemoryCopy for the arm64 architecture. 3287 func (c *arm64Compiler) compileMemoryCopy() error { 3288 return c.compileCopyImpl(false, 0, 0) 3289 } 3290 3291 // compileCopyImpl implements compileTableCopy and compileMemoryCopy. 3292 // 3293 // TODO: the compiled code in this function should be reused and compile at once as 3294 // the code is independent of any module. 3295 func (c *arm64Compiler) compileCopyImpl(isTable bool, srcTableIndex, dstTableIndex uint32) error { 3296 outOfBoundsErrorStatus := nativeCallStatusCodeMemoryOutOfBounds 3297 if isTable { 3298 outOfBoundsErrorStatus = nativeCallStatusCodeInvalidTableAccess 3299 } 3300 3301 copySize, err := c.popValueOnRegister() 3302 if err != nil { 3303 return err 3304 } 3305 c.markRegisterUsed(copySize.register) 3306 3307 sourceOffset, err := c.popValueOnRegister() 3308 if err != nil { 3309 return err 3310 } 3311 if isZeroRegister(sourceOffset.register) { 3312 sourceOffset.register, err = c.allocateRegister(registerTypeGeneralPurpose) 3313 if err != nil { 3314 return err 3315 } 3316 c.assembler.CompileRegisterToRegister(arm64.MOVD, arm64.RegRZR, sourceOffset.register) 3317 } 3318 c.markRegisterUsed(sourceOffset.register) 3319 3320 destinationOffset, err := c.popValueOnRegister() 3321 if err != nil { 3322 return err 3323 } 3324 if isZeroRegister(destinationOffset.register) { 3325 destinationOffset.register, err = c.allocateRegister(registerTypeGeneralPurpose) 3326 if err != nil { 3327 return err 3328 } 3329 c.assembler.CompileRegisterToRegister(arm64.MOVD, arm64.RegRZR, destinationOffset.register) 3330 } 3331 c.markRegisterUsed(destinationOffset.register) 3332 3333 if !isZeroRegister(copySize.register) { 3334 // sourceOffset += size. 3335 c.assembler.CompileRegisterToRegister(arm64.ADD, copySize.register, sourceOffset.register) 3336 // destinationOffset += size. 3337 c.assembler.CompileRegisterToRegister(arm64.ADD, copySize.register, destinationOffset.register) 3338 } 3339 3340 if isTable { 3341 // arm64ReservedRegisterForTemporary = &tables[0] 3342 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3343 arm64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset, 3344 arm64ReservedRegisterForTemporary) 3345 // arm64ReservedRegisterForTemporary = arm64ReservedRegisterForTemporary + srcTableIndex*8 3346 // = &tables[0] + sizeOf(*tableInstance)*8 3347 // = &tables[srcTableIndex] 3348 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3349 arm64ReservedRegisterForTemporary, int64(srcTableIndex)*8, 3350 arm64ReservedRegisterForTemporary) 3351 // arm64ReservedRegisterForTemporary = [arm64ReservedRegisterForTemporary+tableInstanceTableLenOffset] = len(tables[srcTableIndex]) 3352 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3353 arm64ReservedRegisterForTemporary, tableInstanceTableLenOffset, 3354 arm64ReservedRegisterForTemporary) 3355 } else { 3356 // arm64ReservedRegisterForTemporary = len(memoryInst.Buffer). 3357 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3358 arm64ReservedRegisterForCallEngine, callEngineModuleContextMemorySliceLenOffset, 3359 arm64ReservedRegisterForTemporary) 3360 } 3361 3362 // Check memory len >= sourceOffset. 3363 c.assembler.CompileTwoRegistersToNone(arm64.CMP, arm64ReservedRegisterForTemporary, sourceOffset.register) 3364 // If not, raise out of bounds memory access error. 3365 c.compileMaybeExitFromNativeCode(arm64.BCONDLS, outOfBoundsErrorStatus) 3366 3367 // Otherwise, check memory len >= destinationOffset. 3368 if isTable { 3369 // arm64ReservedRegisterForTemporary = &tables[0] 3370 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3371 arm64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset, 3372 arm64ReservedRegisterForTemporary) 3373 // arm64ReservedRegisterForTemporary = arm64ReservedRegisterForTemporary + dstTableIndex*8 3374 // = &tables[0] + sizeOf(*tableInstance)*8 3375 // = &tables[dstTableIndex] 3376 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3377 arm64ReservedRegisterForTemporary, int64(dstTableIndex)*8, 3378 arm64ReservedRegisterForTemporary) 3379 // arm64ReservedRegisterForTemporary = [arm64ReservedRegisterForTemporary+tableInstanceTableLenOffset] = len(tables[dstTableIndex]) 3380 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3381 arm64ReservedRegisterForTemporary, tableInstanceTableLenOffset, 3382 arm64ReservedRegisterForTemporary) 3383 } 3384 3385 c.assembler.CompileTwoRegistersToNone(arm64.CMP, arm64ReservedRegisterForTemporary, destinationOffset.register) 3386 // If not, raise out of bounds memory access error. 3387 c.compileMaybeExitFromNativeCode(arm64.BCONDLS, outOfBoundsErrorStatus) 3388 3389 // Otherwise, ready to copy the value from source to destination. 3390 var ldr, str asm.Instruction 3391 var movSize int64 3392 if isTable { 3393 ldr, str = arm64.LDRD, arm64.STRD 3394 movSize = 8 3395 } else { 3396 ldr, str = arm64.LDRB, arm64.STRB 3397 movSize = 1 3398 } 3399 3400 // If the size equals zero, we can skip the entire instructions beflow. 3401 if !isZeroRegister(copySize.register) { 3402 c.assembler.CompileTwoRegistersToNone(arm64.CMP, arm64.RegRZR, copySize.register) 3403 skipCopyJump := c.assembler.CompileJump(arm64.BCONDEQ) 3404 3405 // If source offet < destination offset: for (i = size-1; i >= 0; i--) dst[i] = src[i]; 3406 c.assembler.CompileTwoRegistersToNone(arm64.CMP, sourceOffset.register, destinationOffset.register) 3407 destLowerThanSourceJump := c.assembler.CompileJump(arm64.BCONDLS) 3408 var endJump asm.Node 3409 { 3410 // sourceOffset -= size. 3411 c.assembler.CompileRegisterToRegister(arm64.SUB, copySize.register, sourceOffset.register) 3412 // destinationOffset -= size. 3413 c.assembler.CompileRegisterToRegister(arm64.SUB, copySize.register, destinationOffset.register) 3414 3415 if isTable { 3416 // arm64ReservedRegisterForTemporary = &Tables[dstTableIndex].Table[0] 3417 c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForCallEngine, 3418 callEngineModuleContextTablesElement0AddressOffset, arm64ReservedRegisterForTemporary) 3419 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3420 arm64ReservedRegisterForTemporary, int64(dstTableIndex)*8, 3421 arm64ReservedRegisterForTemporary) 3422 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3423 arm64ReservedRegisterForTemporary, tableInstanceTableOffset, 3424 arm64ReservedRegisterForTemporary) 3425 // destinationOffset = (destinationOffset<< pointerSizeLog2) + &Table[dstTableIndex].Table[0] 3426 c.assembler.CompileLeftShiftedRegisterToRegister(arm64.ADD, 3427 destinationOffset.register, pointerSizeLog2, 3428 arm64ReservedRegisterForTemporary, destinationOffset.register) 3429 3430 // arm64ReservedRegisterForTemporary = &Tables[srcTableIndex] 3431 c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForCallEngine, 3432 callEngineModuleContextTablesElement0AddressOffset, arm64ReservedRegisterForTemporary) 3433 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3434 arm64ReservedRegisterForTemporary, int64(srcTableIndex)*8, 3435 arm64ReservedRegisterForTemporary) 3436 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3437 arm64ReservedRegisterForTemporary, tableInstanceTableOffset, 3438 arm64ReservedRegisterForTemporary) 3439 // sourceOffset = (sourceOffset<< 3) + &Table[0] 3440 c.assembler.CompileLeftShiftedRegisterToRegister(arm64.ADD, 3441 sourceOffset.register, pointerSizeLog2, 3442 arm64ReservedRegisterForTemporary, sourceOffset.register) 3443 3444 // copySize = copySize << pointerSizeLog2 as each element has 8 bytes and we copy one by one. 3445 c.assembler.CompileConstToRegister(arm64.LSL, pointerSizeLog2, copySize.register) 3446 } else { 3447 // sourceOffset += memory buffer's absolute address. 3448 c.assembler.CompileRegisterToRegister(arm64.ADD, arm64ReservedRegisterForMemory, sourceOffset.register) 3449 // destinationOffset += memory buffer's absolute address. 3450 c.assembler.CompileRegisterToRegister(arm64.ADD, arm64ReservedRegisterForMemory, destinationOffset.register) 3451 } 3452 3453 beginCopyLoop := c.assembler.CompileStandAlone(arm64.NOP) 3454 3455 // size -= 1 3456 c.assembler.CompileConstToRegister(arm64.SUBS, movSize, copySize.register) 3457 3458 // arm64ReservedRegisterForTemporary = [sourceOffset + (size.register)] 3459 c.assembler.CompileMemoryWithRegisterOffsetToRegister(ldr, 3460 sourceOffset.register, copySize.register, 3461 arm64ReservedRegisterForTemporary) 3462 // [destinationOffset + (size.register)] = arm64ReservedRegisterForTemporary. 3463 c.assembler.CompileRegisterToMemoryWithRegisterOffset(str, 3464 arm64ReservedRegisterForTemporary, 3465 destinationOffset.register, copySize.register, 3466 ) 3467 3468 // If the value on the copySize.register is not equal zero, continue the loop. 3469 c.assembler.CompileJump(arm64.BCONDNE).AssignJumpTarget(beginCopyLoop) 3470 3471 // Otherwise, exit the loop. 3472 endJump = c.assembler.CompileJump(arm64.B) 3473 } 3474 3475 // Else (destination offet < source offset): for (i = 0; i < size; i++) dst[counter-1-i] = src[counter-1-i]; 3476 c.assembler.SetJumpTargetOnNext(destLowerThanSourceJump) 3477 { 3478 3479 if isTable { 3480 // arm64ReservedRegisterForTemporary = &Tables[dstTableIndex].Table[0] 3481 c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForCallEngine, 3482 callEngineModuleContextTablesElement0AddressOffset, arm64ReservedRegisterForTemporary) 3483 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3484 arm64ReservedRegisterForTemporary, int64(dstTableIndex)*8, 3485 arm64ReservedRegisterForTemporary) 3486 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3487 arm64ReservedRegisterForTemporary, tableInstanceTableOffset, 3488 arm64ReservedRegisterForTemporary) 3489 // destinationOffset = (destinationOffset<< interfaceDataySizeLog2) + &Table[dstTableIndex].Table[0] 3490 c.assembler.CompileLeftShiftedRegisterToRegister(arm64.ADD, 3491 destinationOffset.register, pointerSizeLog2, 3492 arm64ReservedRegisterForTemporary, destinationOffset.register) 3493 3494 // arm64ReservedRegisterForTemporary = &Tables[srcTableIndex] 3495 c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForCallEngine, 3496 callEngineModuleContextTablesElement0AddressOffset, arm64ReservedRegisterForTemporary) 3497 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3498 arm64ReservedRegisterForTemporary, int64(srcTableIndex)*8, 3499 arm64ReservedRegisterForTemporary) 3500 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3501 arm64ReservedRegisterForTemporary, tableInstanceTableOffset, 3502 arm64ReservedRegisterForTemporary) 3503 // sourceOffset = (sourceOffset<< 3) + &Table[0] 3504 c.assembler.CompileLeftShiftedRegisterToRegister(arm64.ADD, 3505 sourceOffset.register, pointerSizeLog2, 3506 arm64ReservedRegisterForTemporary, sourceOffset.register) 3507 3508 // copySize = copySize << pointerSizeLog2 as each element has 8 bytes and we copy one by one. 3509 c.assembler.CompileConstToRegister(arm64.LSL, pointerSizeLog2, copySize.register) 3510 } else { 3511 // sourceOffset += memory buffer's absolute address. 3512 c.assembler.CompileRegisterToRegister(arm64.ADD, arm64ReservedRegisterForMemory, sourceOffset.register) 3513 // destinationOffset += memory buffer's absolute address. 3514 c.assembler.CompileRegisterToRegister(arm64.ADD, arm64ReservedRegisterForMemory, destinationOffset.register) 3515 } 3516 3517 // Negate the counter. 3518 c.assembler.CompileRegisterToRegister(arm64.NEG, copySize.register, copySize.register) 3519 3520 beginCopyLoop := c.assembler.CompileStandAlone(arm64.NOP) 3521 3522 // arm64ReservedRegisterForTemporary = [sourceOffset + (size.register)] 3523 c.assembler.CompileMemoryWithRegisterOffsetToRegister(ldr, 3524 sourceOffset.register, copySize.register, 3525 arm64ReservedRegisterForTemporary) 3526 // [destinationOffset + (size.register)] = arm64ReservedRegisterForTemporary. 3527 c.assembler.CompileRegisterToMemoryWithRegisterOffset(str, 3528 arm64ReservedRegisterForTemporary, 3529 destinationOffset.register, copySize.register, 3530 ) 3531 3532 // size += 1 3533 c.assembler.CompileConstToRegister(arm64.ADDS, movSize, copySize.register) 3534 c.assembler.CompileJump(arm64.BCONDMI).AssignJumpTarget(beginCopyLoop) 3535 } 3536 c.assembler.SetJumpTargetOnNext(skipCopyJump) 3537 c.assembler.SetJumpTargetOnNext(endJump) 3538 } 3539 3540 // Mark all of the operand registers. 3541 c.markRegisterUnused(copySize.register, sourceOffset.register, destinationOffset.register) 3542 3543 return nil 3544 } 3545 3546 // compileMemoryFill implements compiler.compileMemoryCopy for the arm64 architecture. 3547 func (c *arm64Compiler) compileMemoryFill() error { 3548 return c.compileFillImpl(false, 0) 3549 } 3550 3551 // compileFillImpl implements TableFill and MemoryFill. 3552 // 3553 // TODO: the compiled code in this function should be reused and compile at once as 3554 // the code is independent of any module. 3555 func (c *arm64Compiler) compileFillImpl(isTable bool, tableIndex uint32) error { 3556 outOfBoundsErrorStatus := nativeCallStatusCodeMemoryOutOfBounds 3557 if isTable { 3558 outOfBoundsErrorStatus = nativeCallStatusCodeInvalidTableAccess 3559 } 3560 3561 fillSize, err := c.popValueOnRegister() 3562 if err != nil { 3563 return err 3564 } 3565 c.markRegisterUsed(fillSize.register) 3566 3567 value, err := c.popValueOnRegister() 3568 if err != nil { 3569 return err 3570 } 3571 c.markRegisterUsed(value.register) 3572 3573 destinationOffset, err := c.popValueOnRegister() 3574 if err != nil { 3575 return err 3576 } 3577 if isZeroRegister(destinationOffset.register) { 3578 destinationOffset.register, err = c.allocateRegister(registerTypeGeneralPurpose) 3579 if err != nil { 3580 return err 3581 } 3582 c.assembler.CompileRegisterToRegister(arm64.MOVD, arm64.RegRZR, destinationOffset.register) 3583 } 3584 c.markRegisterUsed(destinationOffset.register) 3585 3586 // destinationOffset += size. 3587 c.assembler.CompileRegisterToRegister(arm64.ADD, fillSize.register, destinationOffset.register) 3588 3589 if isTable { 3590 // arm64ReservedRegisterForTemporary = &tables[0] 3591 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3592 arm64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset, 3593 arm64ReservedRegisterForTemporary) 3594 // arm64ReservedRegisterForTemporary = arm64ReservedRegisterForTemporary + srcTableIndex*8 3595 // = &tables[0] + sizeOf(*tableInstance)*8 3596 // = &tables[srcTableIndex] 3597 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3598 arm64ReservedRegisterForTemporary, int64(tableIndex)*8, 3599 arm64ReservedRegisterForTemporary) 3600 // arm64ReservedRegisterForTemporary = [arm64ReservedRegisterForTemporary+tableInstanceTableLenOffset] = len(tables[srcTableIndex]) 3601 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3602 arm64ReservedRegisterForTemporary, tableInstanceTableLenOffset, 3603 arm64ReservedRegisterForTemporary) 3604 } else { 3605 // arm64ReservedRegisterForTemporary = len(memoryInst.Buffer). 3606 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3607 arm64ReservedRegisterForCallEngine, callEngineModuleContextMemorySliceLenOffset, 3608 arm64ReservedRegisterForTemporary) 3609 } 3610 3611 // Check len >= destinationOffset. 3612 c.assembler.CompileTwoRegistersToNone(arm64.CMP, arm64ReservedRegisterForTemporary, destinationOffset.register) 3613 3614 // If not, raise the runtime error. 3615 c.compileMaybeExitFromNativeCode(arm64.BCONDLS, outOfBoundsErrorStatus) 3616 3617 // Otherwise, ready to copy the value from destination to source. 3618 // If the size equals zero, we can skip the entire instructions below. 3619 c.assembler.CompileTwoRegistersToNone(arm64.CMP, arm64.RegRZR, fillSize.register) 3620 skipCopyJump := c.assembler.CompileJump(arm64.BCONDEQ) 3621 3622 // destinationOffset -= size. 3623 c.assembler.CompileRegisterToRegister(arm64.SUB, fillSize.register, destinationOffset.register) 3624 3625 var str asm.Instruction 3626 var movSize int64 3627 if isTable { 3628 str = arm64.STRD 3629 movSize = 8 3630 3631 // arm64ReservedRegisterForTemporary = &Tables[dstTableIndex].Table[0] 3632 c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForCallEngine, 3633 callEngineModuleContextTablesElement0AddressOffset, arm64ReservedRegisterForTemporary) 3634 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3635 arm64ReservedRegisterForTemporary, int64(tableIndex)*8, 3636 arm64ReservedRegisterForTemporary) 3637 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3638 arm64ReservedRegisterForTemporary, tableInstanceTableOffset, 3639 arm64ReservedRegisterForTemporary) 3640 // destinationOffset = (destinationOffset<< pointerSizeLog2) + &Table[dstTableIndex].Table[0] 3641 c.assembler.CompileLeftShiftedRegisterToRegister(arm64.ADD, 3642 destinationOffset.register, pointerSizeLog2, 3643 arm64ReservedRegisterForTemporary, destinationOffset.register) 3644 3645 // copySize = copySize << pointerSizeLog2 as each element has 8 bytes and we copy one by one. 3646 c.assembler.CompileConstToRegister(arm64.LSL, pointerSizeLog2, fillSize.register) 3647 } else { 3648 str = arm64.STRB 3649 movSize = 1 3650 3651 // destinationOffset += memory buffer's absolute address. 3652 c.assembler.CompileRegisterToRegister(arm64.ADD, arm64ReservedRegisterForMemory, destinationOffset.register) 3653 } 3654 3655 // Naively implement the copy with "for loop" by copying byte one by one. 3656 beginCopyLoop := c.assembler.CompileStandAlone(arm64.NOP) 3657 3658 // size -= 1 3659 c.assembler.CompileConstToRegister(arm64.SUBS, movSize, fillSize.register) 3660 3661 // [destinationOffset + (size.register)] = arm64ReservedRegisterForTemporary. 3662 c.assembler.CompileRegisterToMemoryWithRegisterOffset(str, 3663 value.register, 3664 destinationOffset.register, fillSize.register, 3665 ) 3666 3667 // If the value on the copySizeRgister.register is not equal zero, continue the loop. 3668 continueJump := c.assembler.CompileJump(arm64.BCONDNE) 3669 continueJump.AssignJumpTarget(beginCopyLoop) 3670 3671 // Mark all of the operand registers. 3672 c.markRegisterUnused(fillSize.register, value.register, destinationOffset.register) 3673 3674 c.assembler.SetJumpTargetOnNext(skipCopyJump) 3675 return nil 3676 } 3677 3678 // compileTableInit implements compiler.compileTableInit for the arm64 architecture. 3679 func (c *arm64Compiler) compileTableInit(o *wazeroir.UnionOperation) error { 3680 elemIndex := uint32(o.U1) 3681 tableIndex := uint32(o.U2) 3682 return c.compileInitImpl(true, elemIndex, tableIndex) 3683 } 3684 3685 // compileTableCopy implements compiler.compileTableCopy for the arm64 architecture. 3686 func (c *arm64Compiler) compileTableCopy(o *wazeroir.UnionOperation) error { 3687 return c.compileCopyImpl(true, uint32(o.U1), uint32(o.U2)) 3688 } 3689 3690 // compileElemDrop implements compiler.compileElemDrop for the arm64 architecture. 3691 func (c *arm64Compiler) compileElemDrop(o *wazeroir.UnionOperation) error { 3692 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 3693 return err 3694 } 3695 3696 tmp, err := c.allocateRegister(registerTypeGeneralPurpose) 3697 if err != nil { 3698 return err 3699 } 3700 3701 elemIndex := uint32(o.U1) 3702 c.compileLoadElemInstanceAddress(elemIndex, tmp) 3703 3704 // Clears the content of ElementInstances[o.ElemIndex] (== []interface{} type). 3705 c.assembler.CompileRegisterToMemory(arm64.STRD, arm64.RegRZR, tmp, 0) 3706 c.assembler.CompileRegisterToMemory(arm64.STRD, arm64.RegRZR, tmp, 8) 3707 c.assembler.CompileRegisterToMemory(arm64.STRD, arm64.RegRZR, tmp, 16) 3708 return nil 3709 } 3710 3711 func (c *arm64Compiler) compileLoadElemInstanceAddress(elemIndex uint32, dst asm.Register) { 3712 // dst = dataIndex * elementInstanceStructSize 3713 c.assembler.CompileConstToRegister(arm64.MOVD, int64(elemIndex)*elementInstanceStructSize, dst) 3714 3715 // arm64ReservedRegisterForTemporary = &moduleInstance.ElementInstances[0] 3716 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3717 arm64ReservedRegisterForCallEngine, callEngineModuleContextElementInstancesElement0AddressOffset, 3718 arm64ReservedRegisterForTemporary, 3719 ) 3720 3721 // dst = arm64ReservedRegisterForTemporary + dst 3722 // = &moduleInstance.ElementInstances[0] + elemIndex*elementInstanceStructSize 3723 // = &moduleInstance.ElementInstances[elemIndex] 3724 c.assembler.CompileRegisterToRegister(arm64.ADD, arm64ReservedRegisterForTemporary, dst) 3725 } 3726 3727 // compileRefFunc implements compiler.compileRefFunc for the arm64 architecture. 3728 func (c *arm64Compiler) compileRefFunc(o *wazeroir.UnionOperation) error { 3729 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 3730 return err 3731 } 3732 3733 ref, err := c.allocateRegister(registerTypeGeneralPurpose) 3734 if err != nil { 3735 return err 3736 } 3737 // arm64ReservedRegisterForTemporary = [arm64ReservedRegisterForCallEngine + callEngineModuleContextFunctionsElement0AddressOffset] 3738 // = &moduleEngine.functions[0] 3739 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3740 arm64ReservedRegisterForCallEngine, callEngineModuleContextFunctionsElement0AddressOffset, 3741 ref) 3742 3743 // ref = ref + int64(o.FunctionIndex)*sizeOf(function) 3744 // = &moduleEngine.functions[index] 3745 functionIndex := int64(o.U1) 3746 c.assembler.CompileConstToRegister(arm64.ADD, 3747 functionIndex*functionSize, 3748 ref, 3749 ) 3750 3751 c.pushRuntimeValueLocationOnRegister(ref, runtimeValueTypeI64) 3752 return nil 3753 } 3754 3755 // compileTableGet implements compiler.compileTableGet for the arm64 architecture. 3756 func (c *arm64Compiler) compileTableGet(o *wazeroir.UnionOperation) error { 3757 ref, err := c.allocateRegister(registerTypeGeneralPurpose) 3758 if err != nil { 3759 return err 3760 } 3761 c.markRegisterUsed(ref) 3762 3763 offset, err := c.popValueOnRegister() 3764 if err != nil { 3765 return err 3766 } 3767 3768 // arm64ReservedRegisterForTemporary = &tables[0] 3769 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3770 arm64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset, 3771 arm64ReservedRegisterForTemporary) 3772 // arm64ReservedRegisterForTemporary = [arm64ReservedRegisterForTemporary + TableIndex*8] 3773 // = [&tables[0] + TableIndex*sizeOf(*tableInstance)] 3774 // = [&tables[TableIndex]] = tables[TableIndex]. 3775 tableIndex := int64(o.U1) 3776 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3777 arm64ReservedRegisterForTemporary, tableIndex*8, 3778 arm64ReservedRegisterForTemporary) 3779 3780 // Out of bounds check. 3781 // ref = [&tables[TableIndex] + tableInstanceTableLenOffset] = len(tables[TableIndex]) 3782 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3783 arm64ReservedRegisterForTemporary, tableInstanceTableLenOffset, 3784 ref, 3785 ) 3786 // "cmp ref, offset" 3787 c.assembler.CompileTwoRegistersToNone(arm64.CMP, ref, offset.register) 3788 3789 // If it exceeds len(table), we exit the execution. 3790 c.compileMaybeExitFromNativeCode(arm64.BCONDLO, nativeCallStatusCodeInvalidTableAccess) 3791 3792 // ref = [&tables[TableIndex] + tableInstanceTableOffset] = &tables[TableIndex].References[0] 3793 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3794 arm64ReservedRegisterForTemporary, tableInstanceTableOffset, 3795 ref, 3796 ) 3797 3798 // ref = (offset << pointerSizeLog2) + ref 3799 // = &tables[TableIndex].References[0] + sizeOf(uintptr) * offset 3800 // = &tables[TableIndex].References[offset] 3801 c.assembler.CompileLeftShiftedRegisterToRegister(arm64.ADD, 3802 offset.register, pointerSizeLog2, ref, ref) 3803 3804 // ref = [&tables[TableIndex]] = load the Reference's pointer as uint64. 3805 c.assembler.CompileMemoryToRegister(arm64.LDRD, ref, 0, ref) 3806 3807 c.pushRuntimeValueLocationOnRegister(ref, runtimeValueTypeI64) // table elements are opaque 64-bit at runtime. 3808 return nil 3809 } 3810 3811 // compileTableSet implements compiler.compileTableSet for the arm64 architecture. 3812 func (c *arm64Compiler) compileTableSet(o *wazeroir.UnionOperation) error { 3813 ref := c.locationStack.pop() 3814 if err := c.compileEnsureOnRegister(ref); err != nil { 3815 return err 3816 } 3817 3818 offset := c.locationStack.pop() 3819 if err := c.compileEnsureOnRegister(offset); err != nil { 3820 return err 3821 } 3822 3823 tmp, err := c.allocateRegister(registerTypeGeneralPurpose) 3824 if err != nil { 3825 return err 3826 } 3827 3828 // arm64ReservedRegisterForTemporary = &tables[0] 3829 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3830 arm64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset, 3831 arm64ReservedRegisterForTemporary) 3832 // arm64ReservedRegisterForTemporary = arm64ReservedRegisterForTemporary + TableIndex*8 3833 // = &tables[0] + TableIndex*sizeOf(*tableInstance) 3834 // = &tables[TableIndex] 3835 tableIndex := int64(o.U1) 3836 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3837 arm64ReservedRegisterForTemporary, tableIndex*8, 3838 arm64ReservedRegisterForTemporary) 3839 3840 // Out of bounds check. 3841 // tmp = [&tables[TableIndex] + tableInstanceTableLenOffset] = len(tables[TableIndex]) 3842 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3843 arm64ReservedRegisterForTemporary, tableInstanceTableLenOffset, 3844 tmp, 3845 ) 3846 // "cmp tmp, offset" 3847 c.assembler.CompileTwoRegistersToNone(arm64.CMP, tmp, offset.register) 3848 3849 // If it exceeds len(table), we exit the execution. 3850 c.compileMaybeExitFromNativeCode(arm64.BCONDLO, nativeCallStatusCodeInvalidTableAccess) 3851 3852 // tmp = [&tables[TableIndex] + tableInstanceTableOffset] = &tables[TableIndex].References[0] 3853 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3854 arm64ReservedRegisterForTemporary, tableInstanceTableOffset, 3855 tmp, 3856 ) 3857 3858 // tmp = (offset << pointerSizeLog2) + tmp 3859 // = &tables[TableIndex].References[0] + sizeOf(uintptr) * offset 3860 // = &tables[TableIndex].References[offset] 3861 c.assembler.CompileLeftShiftedRegisterToRegister(arm64.ADD, offset.register, pointerSizeLog2, tmp, tmp) 3862 3863 // Set the reference's raw pointer. 3864 c.assembler.CompileRegisterToMemory(arm64.STRD, ref.register, tmp, 0) 3865 3866 c.markRegisterUnused(offset.register, ref.register, tmp) 3867 return nil 3868 } 3869 3870 // compileTableGrow implements compiler.compileTableGrow for the arm64 architecture. 3871 func (c *arm64Compiler) compileTableGrow(o *wazeroir.UnionOperation) error { 3872 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 3873 return err 3874 } 3875 3876 // Pushes the table index. 3877 tableIndex := o.U1 3878 if err := c.compileIntConstant(true, tableIndex); err != nil { 3879 return err 3880 } 3881 3882 // Table grow cannot be done in assembly just like memory grow as it involves with allocation in Go. 3883 // Therefore, call out to the built function for this purpose. 3884 if err := c.compileCallGoFunction(nativeCallStatusCodeCallBuiltInFunction, builtinFunctionIndexTableGrow); err != nil { 3885 return err 3886 } 3887 3888 // TableGrow consumes three values (table index, number of items, initial value). 3889 for i := 0; i < 3; i++ { 3890 c.locationStack.pop() 3891 } 3892 3893 // Then, the previous length was pushed as the result. 3894 v := c.locationStack.pushRuntimeValueLocationOnStack() 3895 v.valueType = runtimeValueTypeI32 3896 3897 // After return, we re-initialize reserved registers just like preamble of functions. 3898 c.compileReservedStackBasePointerRegisterInitialization() 3899 c.compileReservedMemoryRegisterInitialization() 3900 return nil 3901 } 3902 3903 // compileTableSize implements compiler.compileTableSize for the arm64 architecture. 3904 func (c *arm64Compiler) compileTableSize(o *wazeroir.UnionOperation) error { 3905 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 3906 return err 3907 } 3908 result, err := c.allocateRegister(registerTypeGeneralPurpose) 3909 if err != nil { 3910 return err 3911 } 3912 c.markRegisterUsed(result) 3913 3914 // arm64ReservedRegisterForTemporary = &tables[0] 3915 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3916 arm64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset, 3917 arm64ReservedRegisterForTemporary) 3918 // arm64ReservedRegisterForTemporary = [arm64ReservedRegisterForTemporary + TableIndex*8] 3919 // = [&tables[0] + TableIndex*sizeOf(*tableInstance)] 3920 // = [&tables[TableIndex]] = tables[TableIndex]. 3921 tableIndex := int64(o.U1) 3922 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3923 arm64ReservedRegisterForTemporary, tableIndex*8, 3924 arm64ReservedRegisterForTemporary) 3925 3926 // result = [&tables[TableIndex] + tableInstanceTableLenOffset] = len(tables[TableIndex]) 3927 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3928 arm64ReservedRegisterForTemporary, tableInstanceTableLenOffset, 3929 result, 3930 ) 3931 3932 c.pushRuntimeValueLocationOnRegister(result, runtimeValueTypeI32) 3933 return nil 3934 } 3935 3936 // compileTableFill implements compiler.compileTableFill for the arm64 architecture. 3937 func (c *arm64Compiler) compileTableFill(o *wazeroir.UnionOperation) error { 3938 tableIndex := uint32(o.U1) 3939 return c.compileFillImpl(true, tableIndex) 3940 } 3941 3942 // popTwoValuesOnRegisters pops two values from the location stacks, ensures 3943 // these two values are located on registers, and mark them unused. 3944 // 3945 // TODO: we’d usually prefix this with compileXXX as this might end up emitting instructions, 3946 // but the name seems awkward. 3947 func (c *arm64Compiler) popTwoValuesOnRegisters() (x1, x2 *runtimeValueLocation, err error) { 3948 x2 = c.locationStack.pop() 3949 if err = c.compileEnsureOnRegister(x2); err != nil { 3950 return 3951 } 3952 3953 x1 = c.locationStack.pop() 3954 if err = c.compileEnsureOnRegister(x1); err != nil { 3955 return 3956 } 3957 3958 c.markRegisterUnused(x2.register) 3959 c.markRegisterUnused(x1.register) 3960 return 3961 } 3962 3963 // popValueOnRegister pops one value from the location stack, ensures 3964 // that it is located on a register, and mark it unused. 3965 // 3966 // TODO: we’d usually prefix this with compileXXX as this might end up emitting instructions, 3967 // but the name seems awkward. 3968 func (c *arm64Compiler) popValueOnRegister() (v *runtimeValueLocation, err error) { 3969 v = c.locationStack.pop() 3970 if err = c.compileEnsureOnRegister(v); err != nil { 3971 return 3972 } 3973 3974 c.markRegisterUnused(v.register) 3975 return 3976 } 3977 3978 // compileEnsureOnRegister emits instructions to ensure that a value is located on a register. 3979 func (c *arm64Compiler) compileEnsureOnRegister(loc *runtimeValueLocation) (err error) { 3980 if loc.onStack() { 3981 reg, err := c.allocateRegister(loc.getRegisterType()) 3982 if err != nil { 3983 return err 3984 } 3985 3986 // Record that the value holds the register and the register is marked used. 3987 loc.setRegister(reg) 3988 c.markRegisterUsed(reg) 3989 3990 c.compileLoadValueOnStackToRegister(loc) 3991 } else if loc.onConditionalRegister() { 3992 err = c.compileLoadConditionalRegisterToGeneralPurposeRegister(loc) 3993 } 3994 return 3995 } 3996 3997 // maybeCompileMoveTopConditionalToGeneralPurposeRegister moves the top value on the stack 3998 // if the value is located on a conditional register. 3999 // 4000 // This is usually called at the beginning of methods on compiler interface where we possibly 4001 // compile instructions without saving the conditional register value. 4002 // compile* functions without calling this function is saving the conditional 4003 // value to the stack or register by invoking ensureOnGeneralPurposeRegister for the top. 4004 func (c *arm64Compiler) maybeCompileMoveTopConditionalToGeneralPurposeRegister() (err error) { 4005 if c.locationStack.sp > 0 { 4006 if loc := c.locationStack.peek(); loc.onConditionalRegister() { 4007 err = c.compileLoadConditionalRegisterToGeneralPurposeRegister(loc) 4008 } 4009 } 4010 return 4011 } 4012 4013 // loadConditionalRegisterToGeneralPurposeRegister saves the conditional register value 4014 // to a general purpose register. 4015 func (c *arm64Compiler) compileLoadConditionalRegisterToGeneralPurposeRegister(loc *runtimeValueLocation) error { 4016 reg, err := c.allocateRegister(loc.getRegisterType()) 4017 if err != nil { 4018 return err 4019 } 4020 4021 c.markRegisterUsed(reg) 4022 c.assembler.CompileConditionalRegisterSet(loc.conditionalRegister, reg) 4023 4024 // Record that now the value is located on a general purpose register. 4025 loc.setRegister(reg) 4026 return nil 4027 } 4028 4029 // compileLoadValueOnStackToRegister implements compiler.compileLoadValueOnStackToRegister for arm64. 4030 func (c *arm64Compiler) compileLoadValueOnStackToRegister(loc *runtimeValueLocation) { 4031 switch loc.valueType { 4032 case runtimeValueTypeI32: 4033 c.assembler.CompileMemoryToRegister(arm64.LDRW, arm64ReservedRegisterForStackBasePointerAddress, 4034 int64(loc.stackPointer)*8, loc.register) 4035 case runtimeValueTypeI64: 4036 c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForStackBasePointerAddress, 4037 int64(loc.stackPointer)*8, loc.register) 4038 case runtimeValueTypeF32: 4039 c.assembler.CompileMemoryToRegister(arm64.FLDRS, arm64ReservedRegisterForStackBasePointerAddress, 4040 int64(loc.stackPointer)*8, loc.register) 4041 case runtimeValueTypeF64: 4042 c.assembler.CompileMemoryToRegister(arm64.FLDRD, arm64ReservedRegisterForStackBasePointerAddress, 4043 int64(loc.stackPointer)*8, loc.register) 4044 case runtimeValueTypeV128Lo: 4045 c.assembler.CompileMemoryToVectorRegister(arm64.VMOV, 4046 arm64ReservedRegisterForStackBasePointerAddress, int64(loc.stackPointer)*8, loc.register, 4047 arm64.VectorArrangementQ) 4048 // Higher 64-bits are loaded as well ^^. 4049 hi := &c.locationStack.stack[loc.stackPointer+1] 4050 hi.setRegister(loc.register) 4051 case runtimeValueTypeV128Hi: 4052 panic("BUG: V128Hi must be be loaded to a register along with V128Lo") 4053 } 4054 } 4055 4056 // allocateRegister implements compiler.allocateRegister for arm64. 4057 func (c *arm64Compiler) allocateRegister(t registerType) (reg asm.Register, err error) { 4058 var ok bool 4059 // Try to get the unused register. 4060 reg, ok = c.locationStack.takeFreeRegister(t) 4061 if ok { 4062 return 4063 } 4064 4065 // If not found, we have to steal the register. 4066 stealTarget, ok := c.locationStack.takeStealTargetFromUsedRegister(t) 4067 if !ok { 4068 err = fmt.Errorf("cannot steal register") 4069 return 4070 } 4071 4072 // Release the steal target register value onto stack location. 4073 reg = stealTarget.register 4074 c.compileReleaseRegisterToStack(stealTarget) 4075 return 4076 } 4077 4078 // compileReleaseAllRegistersToStack adds instructions to store all the values located on 4079 // either general purpose or conditional registers onto the memory stack. 4080 // See releaseRegisterToStack. 4081 func (c *arm64Compiler) compileReleaseAllRegistersToStack() (err error) { 4082 for i := uint64(0); i < c.locationStack.sp; i++ { 4083 if loc := &c.locationStack.stack[i]; loc.onRegister() { 4084 c.compileReleaseRegisterToStack(loc) 4085 } else if loc.onConditionalRegister() { 4086 if err = c.compileLoadConditionalRegisterToGeneralPurposeRegister(loc); err != nil { 4087 return 4088 } 4089 c.compileReleaseRegisterToStack(loc) 4090 } 4091 } 4092 return 4093 } 4094 4095 // releaseRegisterToStack adds an instruction to write the value on a register back to memory stack region. 4096 func (c *arm64Compiler) compileReleaseRegisterToStack(loc *runtimeValueLocation) { 4097 switch loc.valueType { 4098 case runtimeValueTypeI32: 4099 c.assembler.CompileRegisterToMemory(arm64.STRW, loc.register, arm64ReservedRegisterForStackBasePointerAddress, int64(loc.stackPointer)*8) 4100 case runtimeValueTypeI64: 4101 c.assembler.CompileRegisterToMemory(arm64.STRD, loc.register, arm64ReservedRegisterForStackBasePointerAddress, int64(loc.stackPointer)*8) 4102 case runtimeValueTypeF32: 4103 c.assembler.CompileRegisterToMemory(arm64.FSTRS, loc.register, arm64ReservedRegisterForStackBasePointerAddress, int64(loc.stackPointer)*8) 4104 case runtimeValueTypeF64: 4105 c.assembler.CompileRegisterToMemory(arm64.FSTRD, loc.register, arm64ReservedRegisterForStackBasePointerAddress, int64(loc.stackPointer)*8) 4106 case runtimeValueTypeV128Lo: 4107 c.assembler.CompileVectorRegisterToMemory(arm64.VMOV, 4108 loc.register, arm64ReservedRegisterForStackBasePointerAddress, int64(loc.stackPointer)*8, 4109 arm64.VectorArrangementQ) 4110 // Higher 64-bits are released as well ^^. 4111 hi := &c.locationStack.stack[loc.stackPointer+1] 4112 c.locationStack.releaseRegister(hi) 4113 case runtimeValueTypeV128Hi: 4114 panic("BUG: V128Hi must be released to the stack along with V128Lo") 4115 default: 4116 panic("BUG") 4117 } 4118 4119 // Mark the register is free. 4120 c.locationStack.releaseRegister(loc) 4121 } 4122 4123 // compileReservedStackBasePointerRegisterInitialization adds instructions to initialize arm64ReservedRegisterForStackBasePointerAddress 4124 // so that it points to the absolute address of the stack base for this function. 4125 func (c *arm64Compiler) compileReservedStackBasePointerRegisterInitialization() { 4126 // First, load the address of the first element in the value stack into arm64ReservedRegisterForStackBasePointerAddress temporarily. 4127 c.assembler.CompileMemoryToRegister(arm64.LDRD, 4128 arm64ReservedRegisterForCallEngine, callEngineStackContextStackElement0AddressOffset, 4129 arm64ReservedRegisterForStackBasePointerAddress) 4130 4131 // next we move the base pointer (ce.stackBasePointer) to arm64ReservedRegisterForTemporary. 4132 c.assembler.CompileMemoryToRegister(arm64.LDRD, 4133 arm64ReservedRegisterForCallEngine, callEngineStackContextStackBasePointerInBytesOffset, 4134 arm64ReservedRegisterForTemporary) 4135 4136 // Finally, we calculate "callEngineStackContextStackBasePointerInBytesOffset + arm64ReservedRegisterForTemporary" 4137 c.assembler.CompileRegisterToRegister(arm64.ADD, arm64ReservedRegisterForTemporary, arm64ReservedRegisterForStackBasePointerAddress) 4138 } 4139 4140 func (c *arm64Compiler) compileReservedMemoryRegisterInitialization() { 4141 if c.ir.HasMemory || c.ir.UsesMemory { 4142 // "arm64ReservedRegisterForMemory = ce.MemoryElement0Address" 4143 c.assembler.CompileMemoryToRegister( 4144 arm64.LDRD, 4145 arm64ReservedRegisterForCallEngine, callEngineModuleContextMemoryElement0AddressOffset, 4146 arm64ReservedRegisterForMemory, 4147 ) 4148 } 4149 } 4150 4151 // compileModuleContextInitialization adds instructions to initialize ce.moduleContext's fields based on 4152 // ce.moduleContext.ModuleInstanceAddress. 4153 // This is called in two cases: in function preamble, and on the return from (non-Go) function calls. 4154 func (c *arm64Compiler) compileModuleContextInitialization() error { 4155 tmpX, found := c.locationStack.takeFreeRegister(registerTypeGeneralPurpose) 4156 if !found { 4157 panic("BUG: all the registers should be free at this point") 4158 } 4159 c.markRegisterUsed(tmpX) 4160 tmpY, found := c.locationStack.takeFreeRegister(registerTypeGeneralPurpose) 4161 if !found { 4162 panic("BUG: all the registers should be free at this point") 4163 } 4164 c.markRegisterUsed(tmpY) 4165 4166 // "tmpX = ce.ModuleInstanceAddress" 4167 c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForCallEngine, callEngineModuleContextModuleInstanceOffset, tmpX) 4168 4169 // If the module instance address stays the same, we could skip the entire code below. 4170 c.assembler.CompileTwoRegistersToNone(arm64.CMP, arm64CallingConventionModuleInstanceAddressRegister, tmpX) 4171 brIfModuleUnchanged := c.assembler.CompileJump(arm64.BCONDEQ) 4172 4173 // Otherwise, update the moduleEngine.moduleContext.ModuleInstanceAddress. 4174 c.assembler.CompileRegisterToMemory(arm64.STRD, 4175 arm64CallingConventionModuleInstanceAddressRegister, 4176 arm64ReservedRegisterForCallEngine, callEngineModuleContextModuleInstanceOffset, 4177 ) 4178 4179 // Also, we have to update the following fields: 4180 // * callEngine.moduleContext.globalElement0Address 4181 // * callEngine.moduleContext.memoryElement0Address 4182 // * callEngine.moduleContext.memorySliceLen 4183 // * callEngine.moduleContext.memoryInstance 4184 // * callEngine.moduleContext.tableElement0Address 4185 // * callEngine.moduleContext.tableSliceLen 4186 // * callEngine.moduleContext.functionsElement0Address 4187 // * callEngine.moduleContext.typeIDsElement0Address 4188 // * callEngine.moduleContext.dataInstancesElement0Address 4189 // * callEngine.moduleContext.elementInstancesElement0Address 4190 4191 // Update globalElement0Address. 4192 // 4193 // Note: if there's global.get or set instruction in the function, the existence of the globals 4194 // is ensured by function validation at module instantiation phase, and that's why it is ok to 4195 // skip the initialization if the module's globals slice is empty. 4196 if len(c.ir.Globals) > 0 { 4197 // "tmpX = &moduleInstance.Globals[0]" 4198 c.assembler.CompileMemoryToRegister(arm64.LDRD, 4199 arm64CallingConventionModuleInstanceAddressRegister, moduleInstanceGlobalsOffset, 4200 tmpX, 4201 ) 4202 4203 // "ce.GlobalElement0Address = tmpX (== &moduleInstance.Globals[0])" 4204 c.assembler.CompileRegisterToMemory( 4205 arm64.STRD, tmpX, 4206 arm64ReservedRegisterForCallEngine, callEngineModuleContextGlobalElement0AddressOffset, 4207 ) 4208 } 4209 4210 // Update memoryElement0Address and memorySliceLen. 4211 // 4212 // Note: if there's memory instruction in the function, memory instance must be non-nil. 4213 // That is ensured by function validation at module instantiation phase, and that's 4214 // why it is ok to skip the initialization if the module's memory instance is nil. 4215 if c.ir.HasMemory { 4216 // "tmpX = moduleInstance.Memory" 4217 c.assembler.CompileMemoryToRegister( 4218 arm64.LDRD, 4219 arm64CallingConventionModuleInstanceAddressRegister, moduleInstanceMemoryOffset, 4220 tmpX, 4221 ) 4222 4223 // First, set ce.memoryInstance 4224 c.assembler.CompileRegisterToMemory( 4225 arm64.STRD, 4226 tmpX, 4227 arm64ReservedRegisterForCallEngine, callEngineModuleContextMemoryInstanceOffset, 4228 ) 4229 4230 // Next, we write the memory length into ce.MemorySliceLen. 4231 // 4232 // "tmpY = [tmpX + memoryInstanceBufferLenOffset] (== len(memory.Buffer))" 4233 c.assembler.CompileMemoryToRegister( 4234 arm64.LDRD, 4235 tmpX, memoryInstanceBufferLenOffset, 4236 tmpY, 4237 ) 4238 // "ce.MemorySliceLen = tmpY". 4239 c.assembler.CompileRegisterToMemory( 4240 arm64.STRD, 4241 tmpY, 4242 arm64ReservedRegisterForCallEngine, callEngineModuleContextMemorySliceLenOffset, 4243 ) 4244 4245 // Finally, we write ce.memoryElement0Address. 4246 // 4247 // "tmpY = *tmpX (== &memory.Buffer[0])" 4248 c.assembler.CompileMemoryToRegister( 4249 arm64.LDRD, 4250 tmpX, memoryInstanceBufferOffset, 4251 tmpY, 4252 ) 4253 // "ce.memoryElement0Address = tmpY". 4254 c.assembler.CompileRegisterToMemory( 4255 arm64.STRD, 4256 tmpY, 4257 arm64ReservedRegisterForCallEngine, callEngineModuleContextMemoryElement0AddressOffset, 4258 ) 4259 } 4260 4261 // Update tableElement0Address, tableSliceLen and typeIDsElement0Address. 4262 // 4263 // Note: if there's table instruction in the function, the existence of the table 4264 // is ensured by function validation at module instantiation phase, and that's 4265 // why it is ok to skip the initialization if the module's table doesn't exist. 4266 if c.ir.HasTable { 4267 // "tmpX = &tables[0] (type of **wasm.Table)" 4268 c.assembler.CompileMemoryToRegister( 4269 arm64.LDRD, 4270 arm64CallingConventionModuleInstanceAddressRegister, moduleInstanceTablesOffset, 4271 tmpX, 4272 ) 4273 4274 // Update ce.tableElement0Address. 4275 // "ce.tableElement0Address = tmpX". 4276 c.assembler.CompileRegisterToMemory( 4277 arm64.STRD, 4278 tmpX, 4279 arm64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset, 4280 ) 4281 4282 // Finally, we put &ModuleInstance.TypeIDs[0] into moduleContext.typeIDsElement0Address. 4283 c.assembler.CompileMemoryToRegister(arm64.LDRD, 4284 arm64CallingConventionModuleInstanceAddressRegister, moduleInstanceTypeIDsOffset, tmpX) 4285 c.assembler.CompileRegisterToMemory(arm64.STRD, 4286 tmpX, arm64ReservedRegisterForCallEngine, callEngineModuleContextTypeIDsElement0AddressOffset) 4287 } 4288 4289 // Update callEngine.moduleContext.functionsElement0Address 4290 { 4291 // "tmpX = [moduleInstanceAddressRegister + moduleInstanceEngineOffset + interfaceDataOffset] (== *moduleEngine)" 4292 // 4293 // Go's interface is laid out on memory as two quad words as struct {tab, data uintptr} 4294 // where tab points to the interface table, and the latter points to the actual 4295 // implementation of interface. This case, we extract "data" pointer as *moduleEngine. 4296 // See the following references for detail: 4297 // * https://research.swtch.com/interfaces 4298 // * https://github.com/golang/go/blob/release-branch.go1.20/src/runtime/runtime2.go#L207-L210 4299 c.assembler.CompileMemoryToRegister( 4300 arm64.LDRD, 4301 arm64CallingConventionModuleInstanceAddressRegister, moduleInstanceEngineOffset+interfaceDataOffset, 4302 tmpX, 4303 ) 4304 4305 // "tmpY = [tmpX + moduleEngineFunctionsOffset] (== &moduleEngine.functions[0])" 4306 c.assembler.CompileMemoryToRegister( 4307 arm64.LDRD, 4308 tmpX, moduleEngineFunctionsOffset, 4309 tmpY, 4310 ) 4311 4312 // "callEngine.moduleContext.functionsElement0Address = tmpY". 4313 c.assembler.CompileRegisterToMemory( 4314 arm64.STRD, 4315 tmpY, 4316 arm64ReservedRegisterForCallEngine, callEngineModuleContextFunctionsElement0AddressOffset, 4317 ) 4318 } 4319 4320 // Update dataInstancesElement0Address. 4321 if c.ir.HasDataInstances { 4322 // "tmpX = &moduleInstance.DataInstances[0]" 4323 c.assembler.CompileMemoryToRegister( 4324 arm64.LDRD, 4325 arm64CallingConventionModuleInstanceAddressRegister, moduleInstanceDataInstancesOffset, 4326 tmpX, 4327 ) 4328 // "callEngine.moduleContext.dataInstancesElement0Address = tmpX". 4329 c.assembler.CompileRegisterToMemory( 4330 arm64.STRD, 4331 tmpX, 4332 arm64ReservedRegisterForCallEngine, callEngineModuleContextDataInstancesElement0AddressOffset, 4333 ) 4334 } 4335 4336 // Update callEngine.moduleContext.elementInstancesElement0Address 4337 if c.ir.HasElementInstances { 4338 // "tmpX = &moduleInstance.DataInstances[0]" 4339 c.assembler.CompileMemoryToRegister( 4340 arm64.LDRD, 4341 arm64CallingConventionModuleInstanceAddressRegister, moduleInstanceElementInstancesOffset, 4342 tmpX, 4343 ) 4344 // "callEngine.moduleContext.dataInstancesElement0Address = tmpX". 4345 c.assembler.CompileRegisterToMemory( 4346 arm64.STRD, 4347 tmpX, 4348 arm64ReservedRegisterForCallEngine, callEngineModuleContextElementInstancesElement0AddressOffset, 4349 ) 4350 } 4351 4352 c.assembler.SetJumpTargetOnNext(brIfModuleUnchanged) 4353 c.markRegisterUnused(tmpX, tmpY) 4354 return nil 4355 }