github.com/bananabytelabs/wazero@v0.0.0-20240105073314-54b22a776da8/internal/engine/compiler/impl_arm64.go (about) 1 // This file implements the compiler for arm64 target. 2 // Please refer to https://developer.arm.com/documentation/102374/latest/ 3 // if unfamiliar with arm64 instructions and semantics. 4 package compiler 5 6 import ( 7 "bytes" 8 "errors" 9 "fmt" 10 "math" 11 12 "github.com/bananabytelabs/wazero/internal/asm" 13 "github.com/bananabytelabs/wazero/internal/asm/arm64" 14 "github.com/bananabytelabs/wazero/internal/wasm" 15 "github.com/bananabytelabs/wazero/internal/wazeroir" 16 ) 17 18 type arm64Compiler struct { 19 assembler arm64.Assembler 20 ir *wazeroir.CompilationResult 21 // locationStack holds the state of wazeroir virtual stack. 22 // and each item is either placed in register or the actual memory stack. 23 locationStack *runtimeValueLocationStack 24 // labels maps a label (e.g. ".L1_then") to *arm64LabelInfo. 25 labels [wazeroir.LabelKindNum][]arm64LabelInfo 26 // stackPointerCeil is the greatest stack pointer value (from runtimeValueLocationStack) seen during compilation. 27 stackPointerCeil uint64 28 // assignStackPointerCeilNeeded holds an asm.Node whose AssignDestinationConstant must be called with the determined stack pointer ceiling. 29 assignStackPointerCeilNeeded asm.Node 30 compiledTrapTargets [nativeCallStatusModuleClosed]asm.Node 31 withListener bool 32 typ *wasm.FunctionType 33 br *bytes.Reader 34 // locationStackForEntrypoint is the initial location stack for all functions. To reuse the allocated stack, 35 // we cache it here, and reset and set to .locationStack in the Init method. 36 locationStackForEntrypoint runtimeValueLocationStack 37 // frameIDMax tracks the maximum value of frame id per function. 38 frameIDMax int 39 brTableTmp []runtimeValueLocation 40 } 41 42 func newArm64Compiler() compiler { 43 return &arm64Compiler{ 44 assembler: arm64.NewAssembler(arm64ReservedRegisterForTemporary), 45 locationStackForEntrypoint: newRuntimeValueLocationStack(), 46 br: bytes.NewReader(nil), 47 } 48 } 49 50 // Init implements compiler.Init. 51 func (c *arm64Compiler) Init(typ *wasm.FunctionType, ir *wazeroir.CompilationResult, withListener bool) { 52 c.assembler.Reset() 53 c.locationStackForEntrypoint.reset() 54 c.resetLabels() 55 56 *c = arm64Compiler{ 57 ir: ir, 58 withListener: withListener, 59 typ: typ, 60 assembler: c.assembler, 61 labels: c.labels, 62 br: c.br, 63 brTableTmp: c.brTableTmp, 64 locationStackForEntrypoint: c.locationStackForEntrypoint, 65 } 66 67 // Reuses the initial location stack for the compilation of subsequent functions. 68 c.locationStack = &c.locationStackForEntrypoint 69 } 70 71 // resetLabels resets the existing content in arm64Compiler.labels so that 72 // we could reuse the allocated slices and stacks in the subsequent compilations. 73 func (c *arm64Compiler) resetLabels() { 74 for i := range c.labels { 75 for j := range c.labels[i] { 76 if j > c.frameIDMax { 77 // Only need to reset until the maximum frame id. This makes the compilation faster for large binary. 78 break 79 } 80 l := &c.labels[i][j] 81 l.initialInstruction = nil 82 l.stackInitialized = false 83 l.initialStack.reset() 84 } 85 } 86 } 87 88 var ( 89 arm64UnreservedVectorRegisters = []asm.Register{ 90 arm64.RegV0, arm64.RegV1, arm64.RegV2, arm64.RegV3, 91 arm64.RegV4, arm64.RegV5, arm64.RegV6, arm64.RegV7, arm64.RegV8, 92 arm64.RegV9, arm64.RegV10, arm64.RegV11, arm64.RegV12, arm64.RegV13, 93 arm64.RegV14, arm64.RegV15, arm64.RegV16, arm64.RegV17, arm64.RegV18, 94 arm64.RegV19, arm64.RegV20, arm64.RegV21, arm64.RegV22, arm64.RegV23, 95 arm64.RegV24, arm64.RegV25, arm64.RegV26, arm64.RegV27, arm64.RegV28, 96 arm64.RegV29, arm64.RegV30, arm64.RegV31, 97 } 98 99 // Note (see arm64 section in https://go.dev/doc/asm): 100 // * RegR18 is reserved as a platform register, and we don't use it in Compiler. 101 // * RegR28 is reserved for Goroutine by Go runtime, and we don't use it in Compiler. 102 arm64UnreservedGeneralPurposeRegisters = []asm.Register{ //nolint 103 arm64.RegR3, arm64.RegR4, arm64.RegR5, arm64.RegR6, arm64.RegR7, arm64.RegR8, 104 arm64.RegR9, arm64.RegR10, arm64.RegR11, arm64.RegR12, arm64.RegR13, 105 arm64.RegR14, arm64.RegR15, arm64.RegR16, arm64.RegR17, arm64.RegR19, 106 arm64.RegR20, arm64.RegR21, arm64.RegR22, arm64.RegR23, arm64.RegR24, 107 arm64.RegR25, arm64.RegR26, arm64.RegR29, arm64.RegR30, 108 } 109 ) 110 111 const ( 112 // arm64ReservedRegisterForCallEngine holds the pointer to callEngine instance (i.e. *callEngine as uintptr) 113 arm64ReservedRegisterForCallEngine = arm64.RegR0 114 // arm64ReservedRegisterForStackBasePointerAddress holds stack base pointer's address (callEngine.stackBasePointer) in the current function call. 115 arm64ReservedRegisterForStackBasePointerAddress = arm64.RegR1 116 // arm64ReservedRegisterForMemory holds the pointer to the memory slice's data (i.e. &memory.Buffer[0] as uintptr). 117 arm64ReservedRegisterForMemory = arm64.RegR2 118 // arm64ReservedRegisterForTemporary is the temporary register which is available at any point of execution, but its content shouldn't be supposed to live beyond the single operation. 119 // Note: we choose R27 as that is the temporary register used in Go's assembler. 120 arm64ReservedRegisterForTemporary = arm64.RegR27 121 ) 122 123 var arm64CallingConventionModuleInstanceAddressRegister = arm64.RegR29 124 125 const ( 126 // arm64CallEngineArchContextCompilerCallReturnAddressOffset is the offset of archContext.nativeCallReturnAddress in callEngine. 127 arm64CallEngineArchContextCompilerCallReturnAddressOffset = 144 128 // arm64CallEngineArchContextMinimum32BitSignedIntOffset is the offset of archContext.minimum32BitSignedIntAddress in callEngine. 129 arm64CallEngineArchContextMinimum32BitSignedIntOffset = 152 130 // arm64CallEngineArchContextMinimum64BitSignedIntOffset is the offset of archContext.minimum64BitSignedIntAddress in callEngine. 131 arm64CallEngineArchContextMinimum64BitSignedIntOffset = 160 132 ) 133 134 func isZeroRegister(r asm.Register) bool { 135 return r == arm64.RegRZR 136 } 137 138 // compileNOP implements compiler.compileNOP for the arm64 architecture. 139 func (c *arm64Compiler) compileNOP() asm.Node { 140 return c.assembler.CompileStandAlone(arm64.NOP) 141 } 142 143 // compile implements compiler.compile for the arm64 architecture. 144 func (c *arm64Compiler) compile(buf asm.Buffer) (stackPointerCeil uint64, err error) { 145 // c.stackPointerCeil tracks the stack pointer ceiling (max seen) value across all runtimeValueLocationStack(s) 146 // used for all labels (via setLocationStack), excluding the current one. 147 // Hence, we check here if the final block's max one exceeds the current c.stackPointerCeil. 148 stackPointerCeil = c.stackPointerCeil 149 if stackPointerCeil < c.locationStack.stackPointerCeil { 150 stackPointerCeil = c.locationStack.stackPointerCeil 151 } 152 153 // Now that the ceil of stack pointer is determined, we are invoking the callback. 154 // Note: this must be called before Assemble() below. 155 c.assignStackPointerCeil(stackPointerCeil) 156 157 err = c.assembler.Assemble(buf) 158 return 159 } 160 161 // arm64LabelInfo holds a wazeroir label specific information in this function. 162 type arm64LabelInfo struct { 163 // initialInstruction is the initial instruction for this label so other block can branch into it. 164 initialInstruction asm.Node 165 // initialStack is the initial value location stack from which we start compiling this label. 166 initialStack runtimeValueLocationStack 167 stackInitialized bool 168 } 169 170 // assignStackPointerCeil implements compilerImpl.assignStackPointerCeil for the arm64 architecture. 171 func (c *arm64Compiler) assignStackPointerCeil(ceil uint64) { 172 if c.assignStackPointerCeilNeeded != nil { 173 c.assignStackPointerCeilNeeded.AssignSourceConstant(int64(ceil) << 3) 174 } 175 } 176 177 func (c *arm64Compiler) label(label wazeroir.Label) *arm64LabelInfo { 178 kind := label.Kind() 179 frames := c.labels[kind] 180 frameID := label.FrameID() 181 if c.frameIDMax < frameID { 182 c.frameIDMax = frameID 183 } 184 // If the frameID is not allocated yet, expand the slice by twice of the diff, 185 // so that we could reduce the allocation in the subsequent compilation. 186 if diff := frameID - len(frames) + 1; diff > 0 { 187 for i := 0; i < diff; i++ { 188 frames = append(frames, arm64LabelInfo{initialStack: newRuntimeValueLocationStack()}) 189 } 190 c.labels[kind] = frames 191 } 192 return &frames[frameID] 193 } 194 195 // runtimeValueLocationStack implements compilerImpl.runtimeValueLocationStack for the amd64 architecture. 196 func (c *arm64Compiler) runtimeValueLocationStack() *runtimeValueLocationStack { 197 return c.locationStack 198 } 199 200 // pushRuntimeValueLocationOnRegister implements compiler.pushRuntimeValueLocationOnRegister for arm64. 201 func (c *arm64Compiler) pushRuntimeValueLocationOnRegister(reg asm.Register, vt runtimeValueType) (ret *runtimeValueLocation) { 202 ret = c.locationStack.pushRuntimeValueLocationOnRegister(reg, vt) 203 c.markRegisterUsed(reg) 204 return 205 } 206 207 // pushVectorRuntimeValueLocationOnRegister implements compiler.pushVectorRuntimeValueLocationOnRegister for arm64. 208 func (c *arm64Compiler) pushVectorRuntimeValueLocationOnRegister(reg asm.Register) (lowerBitsLocation *runtimeValueLocation) { 209 lowerBitsLocation = c.locationStack.pushRuntimeValueLocationOnRegister(reg, runtimeValueTypeV128Lo) 210 c.locationStack.pushRuntimeValueLocationOnRegister(reg, runtimeValueTypeV128Hi) 211 c.markRegisterUsed(reg) 212 return 213 } 214 215 func (c *arm64Compiler) markRegisterUsed(regs ...asm.Register) { 216 for _, reg := range regs { 217 if !isZeroRegister(reg) && reg != asm.NilRegister { 218 c.locationStack.markRegisterUsed(reg) 219 } 220 } 221 } 222 223 func (c *arm64Compiler) markRegisterUnused(regs ...asm.Register) { 224 for _, reg := range regs { 225 if !isZeroRegister(reg) && reg != asm.NilRegister { 226 c.locationStack.markRegisterUnused(reg) 227 } 228 } 229 } 230 231 func (c *arm64Compiler) String() (ret string) { return c.locationStack.String() } 232 233 // compilePreamble implements compiler.compilePreamble for the arm64 architecture. 234 func (c *arm64Compiler) compilePreamble() error { 235 c.markRegisterUsed(arm64CallingConventionModuleInstanceAddressRegister) 236 defer c.markRegisterUnused(arm64CallingConventionModuleInstanceAddressRegister) 237 238 c.locationStack.init(c.typ) 239 240 // Check if it's necessary to grow the value stack before entering function body. 241 if err := c.compileMaybeGrowStack(); err != nil { 242 return err 243 } 244 245 if err := c.compileModuleContextInitialization(); err != nil { 246 return err 247 } 248 249 if c.withListener { 250 if err := c.compileCallGoFunction(nativeCallStatusCodeCallBuiltInFunction, builtinFunctionIndexFunctionListenerBefore); err != nil { 251 return err 252 } 253 } 254 255 // We must initialize the stack base pointer register so that we can manipulate the stack properly. 256 c.compileReservedStackBasePointerRegisterInitialization() 257 258 c.compileReservedMemoryRegisterInitialization() 259 260 return nil 261 } 262 263 // compileMaybeGrowStack adds instructions to check the necessity to grow the value stack, 264 // and if so, make the builtin function call to do so. These instructions are called in the function's 265 // preamble. 266 func (c *arm64Compiler) compileMaybeGrowStack() error { 267 tmpX, found := c.locationStack.takeFreeRegister(registerTypeGeneralPurpose) 268 if !found { 269 panic("BUG: all the registers should be free at this point") 270 } 271 c.markRegisterUsed(tmpX) 272 tmpY, found := c.locationStack.takeFreeRegister(registerTypeGeneralPurpose) 273 if !found { 274 panic("BUG: all the registers should be free at this point") 275 } 276 c.markRegisterUsed(tmpY) 277 278 // "tmpX = len(ce.stack)" 279 c.assembler.CompileMemoryToRegister( 280 arm64.LDRD, 281 arm64ReservedRegisterForCallEngine, callEngineStackContextStackLenInBytesOffset, 282 tmpX, 283 ) 284 285 // "tmpY = ce.stackBasePointer" 286 c.assembler.CompileMemoryToRegister( 287 arm64.LDRD, 288 arm64ReservedRegisterForCallEngine, callEngineStackContextStackBasePointerInBytesOffset, 289 tmpY, 290 ) 291 292 // "tmpX = tmpX - tmpY", in other words "tmpX = len(ce.stack) - ce.stackBasePointer" 293 c.assembler.CompileRegisterToRegister( 294 arm64.SUB, 295 tmpY, 296 tmpX, 297 ) 298 299 // "tmpY = stackPointerCeil" 300 loadStackPointerCeil := c.assembler.CompileConstToRegister( 301 arm64.MOVD, 302 math.MaxInt32, 303 tmpY, 304 ) 305 // At this point of compilation, we don't know the value of stack point ceil, 306 // so we lazily resolve the value later. 307 c.assignStackPointerCeilNeeded = loadStackPointerCeil 308 309 // Compare tmpX (len(ce.stack) - ce.stackBasePointer) and tmpY (ce.stackPointerCeil) 310 c.assembler.CompileTwoRegistersToNone(arm64.CMP, tmpX, tmpY) 311 312 // If ceil > stackLen - stack base pointer, we need to grow the stack by calling builtin Go function. 313 brIfStackOK := c.assembler.CompileJump(arm64.BCONDLS) 314 if err := c.compileCallGoFunction(nativeCallStatusCodeCallBuiltInFunction, builtinFunctionIndexGrowStack); err != nil { 315 return err 316 } 317 318 // Otherwise, skip calling it. 319 c.assembler.SetJumpTargetOnNext(brIfStackOK) 320 321 c.markRegisterUnused(tmpX, tmpY) 322 return nil 323 } 324 325 // returnFunction emits instructions to return from the current function frame. 326 // If the current frame is the bottom, the code goes back to the Go code with nativeCallStatusCodeReturned status. 327 // Otherwise, we branch into the caller's return address. 328 func (c *arm64Compiler) compileReturnFunction() error { 329 // Release all the registers as our calling convention requires the caller-save. 330 if err := c.compileReleaseAllRegistersToStack(); err != nil { 331 return err 332 } 333 334 if c.withListener { 335 if err := c.compileCallGoFunction(nativeCallStatusCodeCallBuiltInFunction, builtinFunctionIndexFunctionListenerAfter); err != nil { 336 return err 337 } 338 // After return, we re-initialize the stack base pointer as that is used to return to the caller below. 339 c.compileReservedStackBasePointerRegisterInitialization() 340 } 341 342 // arm64CallingConventionModuleInstanceAddressRegister holds the module intstance's address 343 // so mark it used so that it won't be used as a free register. 344 c.locationStack.markRegisterUsed(arm64CallingConventionModuleInstanceAddressRegister) 345 defer c.locationStack.markRegisterUnused(arm64CallingConventionModuleInstanceAddressRegister) 346 347 returnAddress, callerStackBasePointerInBytes, callerFunction := c.locationStack.getCallFrameLocations(c.typ) 348 349 // If the return address is zero, meaning that we return from the execution. 350 returnAddress.setRegister(arm64ReservedRegisterForTemporary) 351 c.compileLoadValueOnStackToRegister(returnAddress) 352 c.assembler.CompileTwoRegistersToNone(arm64.CMP, arm64ReservedRegisterForTemporary, arm64.RegRZR) 353 354 // Br if the address does not equal zero, otherwise, exit. 355 // If the address doesn't equal zero, return br into returnAddressRegister (caller's return address). 356 c.compileMaybeExitFromNativeCode(arm64.BCONDNE, nativeCallStatusCodeReturned) 357 358 // Alias for readability. 359 tmp := arm64CallingConventionModuleInstanceAddressRegister 360 361 // First, restore the stackContext.stackBasePointerInBytesOffset from callerStackBasePointerInBytes. 362 callerStackBasePointerInBytes.setRegister(tmp) 363 c.compileLoadValueOnStackToRegister(callerStackBasePointerInBytes) 364 c.assembler.CompileRegisterToMemory(arm64.STRD, tmp, 365 arm64ReservedRegisterForCallEngine, callEngineStackContextStackBasePointerInBytesOffset) 366 367 // Next, restore moduleContext.fn from callerFunction. 368 callerFunction.setRegister(tmp) 369 c.compileLoadValueOnStackToRegister(callerFunction) 370 c.assembler.CompileRegisterToMemory(arm64.STRD, tmp, 371 arm64ReservedRegisterForCallEngine, callEngineModuleContextFnOffset) 372 373 // Also, we have to put the target function's *wasm.ModuleInstance into arm64CallingConventionModuleInstanceAddressRegister. 374 c.assembler.CompileMemoryToRegister(arm64.LDRD, 375 tmp, functionModuleInstanceOffset, 376 arm64CallingConventionModuleInstanceAddressRegister) 377 378 c.assembler.CompileJumpToRegister(arm64.B, returnAddress.register) 379 return nil 380 } 381 382 func (c *arm64Compiler) compileMaybeExitFromNativeCode(skipCondition asm.Instruction, status nativeCallStatusCode) { 383 skip := c.assembler.CompileJump(skipCondition) 384 c.compileExitFromNativeCode(status) 385 c.assembler.SetJumpTargetOnNext(skip) 386 } 387 388 // compileExitFromNativeCode adds instructions to give the control back to ce.exec with the given status code. 389 func (c *arm64Compiler) compileExitFromNativeCode(status nativeCallStatusCode) { 390 if target := c.compiledTrapTargets[status]; target != nil { 391 c.assembler.CompileJump(arm64.B).AssignJumpTarget(target) 392 return 393 } 394 395 switch status { 396 case nativeCallStatusCodeReturned: 397 // Save the target for reuse. 398 c.compiledTrapTargets[status] = c.compileNOP() 399 case nativeCallStatusCodeCallGoHostFunction, nativeCallStatusCodeCallBuiltInFunction: 400 // Read the return address, and write it to callEngine.exitContext.returnAddress. 401 c.assembler.CompileReadInstructionAddress(arm64ReservedRegisterForTemporary, arm64.RET) 402 c.assembler.CompileRegisterToMemory( 403 arm64.STRD, arm64ReservedRegisterForTemporary, 404 arm64ReservedRegisterForCallEngine, callEngineExitContextReturnAddressOffset, 405 ) 406 default: 407 if c.ir.IROperationSourceOffsetsInWasmBinary != nil { 408 // This case, the execution traps, and we want the top frame's source position in the stack trace. 409 // We store the instruction address onto callEngine.returnAddress. 410 c.assembler.CompileReadInstructionAddress(arm64ReservedRegisterForTemporary, arm64.STRD) 411 c.assembler.CompileRegisterToMemory( 412 arm64.STRD, arm64ReservedRegisterForTemporary, 413 arm64ReservedRegisterForCallEngine, callEngineExitContextReturnAddressOffset, 414 ) 415 } else { 416 // We won't use the source position, so just save the target for reuse. 417 c.compiledTrapTargets[status] = c.compileNOP() 418 } 419 } 420 421 // Write the current stack pointer to the ce.stackPointer. 422 c.assembler.CompileConstToRegister(arm64.MOVD, int64(c.locationStack.sp), arm64ReservedRegisterForTemporary) 423 c.assembler.CompileRegisterToMemory(arm64.STRD, arm64ReservedRegisterForTemporary, arm64ReservedRegisterForCallEngine, 424 callEngineStackContextStackPointerOffset) 425 426 // Write the status to callEngine.exitContext.statusCode. 427 if status != 0 { 428 c.assembler.CompileConstToRegister(arm64.MOVW, int64(status), arm64ReservedRegisterForTemporary) 429 c.assembler.CompileRegisterToMemory(arm64.STRW, arm64ReservedRegisterForTemporary, 430 arm64ReservedRegisterForCallEngine, callEngineExitContextNativeCallStatusCodeOffset) 431 } else { 432 // If the status == 0, we use zero register to store zero. 433 c.assembler.CompileRegisterToMemory(arm64.STRW, arm64.RegRZR, 434 arm64ReservedRegisterForCallEngine, callEngineExitContextNativeCallStatusCodeOffset) 435 } 436 437 // The return address to the Go code is stored in archContext.compilerReturnAddress which 438 // is embedded in ce. We load the value to the tmpRegister, and then 439 // invoke RET with that register. 440 c.assembler.CompileMemoryToRegister(arm64.LDRD, 441 arm64ReservedRegisterForCallEngine, arm64CallEngineArchContextCompilerCallReturnAddressOffset, 442 arm64ReservedRegisterForTemporary) 443 444 c.assembler.CompileJumpToRegister(arm64.RET, arm64ReservedRegisterForTemporary) 445 } 446 447 // compileGoHostFunction implements compiler.compileHostFunction for the arm64 architecture. 448 func (c *arm64Compiler) compileGoDefinedHostFunction() error { 449 // First we must update the location stack to reflect the number of host function inputs. 450 c.locationStack.init(c.typ) 451 452 if c.withListener { 453 if err := c.compileCallGoFunction(nativeCallStatusCodeCallBuiltInFunction, 454 builtinFunctionIndexFunctionListenerBefore); err != nil { 455 return err 456 } 457 } 458 459 // Host function needs access to the caller's Function Instance, and the caller's information is stored in the stack 460 // (as described in the doc of callEngine.stack). Here, we get the caller's *wasm.FunctionInstance from the stack, 461 // and save it in callEngine.exitContext.callerFunctionInstance so we can pass it to the host function 462 // without sacrificing the performance. 463 c.compileReservedStackBasePointerRegisterInitialization() 464 // Alias for readability. 465 tmp := arm64CallingConventionModuleInstanceAddressRegister 466 // Get the location of the callerFunction (*function) in the stack, which depends on the signature. 467 _, _, callerFunction := c.locationStack.getCallFrameLocations(c.typ) 468 // Load the value into the tmp register: tmp = &function{..} 469 callerFunction.setRegister(tmp) 470 c.compileLoadValueOnStackToRegister(callerFunction) 471 // tmp = *(tmp+functionModuleInstanceOffset) = &wasm.ModuleInstance{...} 472 c.assembler.CompileMemoryToRegister(arm64.LDRD, tmp, functionModuleInstanceOffset, tmp) 473 // Load it onto callEngine.exitContext.callerModuleInstance. 474 c.assembler.CompileRegisterToMemory(arm64.STRD, 475 tmp, 476 arm64ReservedRegisterForCallEngine, callEngineExitContextCallerModuleInstanceOffset) 477 // Reset the state of callerFunction value location so that we won't mess up subsequent code generation below. 478 c.locationStack.releaseRegister(callerFunction) 479 480 if err := c.compileCallGoFunction(nativeCallStatusCodeCallGoHostFunction, 0); err != nil { 481 return err 482 } 483 484 // Initializes the reserved stack base pointer which is used to retrieve the call frame stack. 485 c.compileReservedStackBasePointerRegisterInitialization() 486 487 // Go function can change the module state in arbitrary way, so we have to force 488 // the callEngine.moduleContext initialization on the function return. To do so, 489 // we zero-out callEngine.moduleInstance. 490 c.assembler.CompileRegisterToMemory(arm64.STRD, 491 arm64.RegRZR, 492 arm64ReservedRegisterForCallEngine, callEngineModuleContextModuleInstanceOffset) 493 494 return c.compileReturnFunction() 495 } 496 497 // setLocationStack sets the given runtimeValueLocationStack to .locationStack field, 498 // while allowing us to track runtimeValueLocationStack.stackPointerCeil across multiple stacks. 499 // This is called when we branch into different block. 500 func (c *arm64Compiler) setLocationStack(newStack *runtimeValueLocationStack) { 501 if c.stackPointerCeil < c.locationStack.stackPointerCeil { 502 c.stackPointerCeil = c.locationStack.stackPointerCeil 503 } 504 c.locationStack = newStack 505 } 506 507 // compileBuiltinFunctionCheckExitCode implements compiler.compileBuiltinFunctionCheckExitCode for the arm64 architecture. 508 func (c *arm64Compiler) compileBuiltinFunctionCheckExitCode() error { 509 if err := c.compileCallGoFunction(nativeCallStatusCodeCallBuiltInFunction, builtinFunctionIndexCheckExitCode); err != nil { 510 return err 511 } 512 513 // After return, we re-initialize reserved registers just like preamble of functions. 514 c.compileReservedStackBasePointerRegisterInitialization() 515 c.compileReservedMemoryRegisterInitialization() 516 return nil 517 } 518 519 // compileLabel implements compiler.compileLabel for the arm64 architecture. 520 func (c *arm64Compiler) compileLabel(o *wazeroir.UnionOperation) (skipThisLabel bool) { 521 labelKey := wazeroir.Label(o.U1) 522 labelInfo := c.label(labelKey) 523 524 // If initialStack is not set, that means this label has never been reached. 525 if !labelInfo.stackInitialized { 526 skipThisLabel = true 527 return 528 } 529 530 if labelBegin := labelInfo.initialInstruction; labelBegin == nil { 531 // We use NOP as a beginning of instructions in a label. 532 // This should be eventually optimized out by assembler. 533 labelInfo.initialInstruction = c.assembler.CompileStandAlone(arm64.NOP) 534 } else { 535 c.assembler.Add(labelBegin) 536 } 537 538 // Set the initial stack. 539 c.setLocationStack(&labelInfo.initialStack) 540 return false 541 } 542 543 // compileUnreachable implements compiler.compileUnreachable for the arm64 architecture. 544 func (c *arm64Compiler) compileUnreachable() error { 545 c.compileExitFromNativeCode(nativeCallStatusCodeUnreachable) 546 return nil 547 } 548 549 // compileSet implements compiler.compileSet for the arm64 architecture. 550 func (c *arm64Compiler) compileSet(o *wazeroir.UnionOperation) error { 551 depth := int(o.U1) 552 isTargetVector := o.B3 553 554 setTargetIndex := int(c.locationStack.sp) - 1 - depth 555 556 if isTargetVector { 557 _ = c.locationStack.pop() 558 } 559 v := c.locationStack.pop() 560 if err := c.compileEnsureOnRegister(v); err != nil { 561 return err 562 } 563 564 targetLocation := &c.locationStack.stack[setTargetIndex] 565 if targetLocation.onRegister() { 566 // We no longer need the register previously used by the target location. 567 c.markRegisterUnused(targetLocation.register) 568 } 569 570 reg := v.register 571 targetLocation.setRegister(reg) 572 targetLocation.valueType = v.valueType 573 if isTargetVector { 574 hi := &c.locationStack.stack[setTargetIndex+1] 575 hi.setRegister(reg) 576 } 577 return nil 578 } 579 580 // compileGlobalGet implements compiler.compileGlobalGet for the arm64 architecture. 581 func (c *arm64Compiler) compileGlobalGet(o *wazeroir.UnionOperation) error { 582 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 583 return err 584 } 585 586 index := uint32(o.U1) 587 588 wasmValueType := c.ir.Globals[index].ValType 589 isV128 := wasmValueType == wasm.ValueTypeV128 590 // Get the address of globals[index] into globalAddressReg. 591 globalAddressReg, err := c.compileReadGlobalAddress(index) 592 if err != nil { 593 return err 594 } 595 596 if isV128 { 597 resultReg, err := c.allocateRegister(registerTypeVector) 598 if err != nil { 599 return err 600 } 601 c.assembler.CompileConstToRegister(arm64.ADD, globalInstanceValueOffset, globalAddressReg) 602 c.assembler.CompileMemoryToVectorRegister(arm64.VMOV, globalAddressReg, 0, 603 resultReg, arm64.VectorArrangementQ) 604 605 c.pushVectorRuntimeValueLocationOnRegister(resultReg) 606 } else { 607 ldr := arm64.NOP 608 var result asm.Register 609 var vt runtimeValueType 610 switch wasmValueType { 611 case wasm.ValueTypeI32: 612 ldr = arm64.LDRW 613 vt = runtimeValueTypeI32 614 result = globalAddressReg 615 case wasm.ValueTypeI64, wasm.ValueTypeExternref, wasm.ValueTypeFuncref: 616 ldr = arm64.LDRD 617 vt = runtimeValueTypeI64 618 result = globalAddressReg 619 case wasm.ValueTypeF32: 620 result, err = c.allocateRegister(registerTypeVector) 621 if err != nil { 622 return err 623 } 624 ldr = arm64.FLDRS 625 vt = runtimeValueTypeF32 626 case wasm.ValueTypeF64: 627 result, err = c.allocateRegister(registerTypeVector) 628 if err != nil { 629 return err 630 } 631 ldr = arm64.FLDRD 632 vt = runtimeValueTypeF64 633 } 634 635 // "result = [globalAddressReg + globalInstanceValueOffset] (== globals[index].Val)" 636 c.assembler.CompileMemoryToRegister( 637 ldr, 638 globalAddressReg, globalInstanceValueOffset, 639 result, 640 ) 641 642 c.pushRuntimeValueLocationOnRegister(result, vt) 643 } 644 return nil 645 } 646 647 // compileGlobalSet implements compiler.compileGlobalSet for the arm64 architecture. 648 func (c *arm64Compiler) compileGlobalSet(o *wazeroir.UnionOperation) error { 649 index := uint32(o.U1) 650 651 wasmValueType := c.ir.Globals[index].ValType 652 isV128 := wasmValueType == wasm.ValueTypeV128 653 654 var val *runtimeValueLocation 655 if isV128 { 656 val = c.locationStack.popV128() 657 } else { 658 val = c.locationStack.pop() 659 } 660 if err := c.compileEnsureOnRegister(val); err != nil { 661 return err 662 } 663 664 globalInstanceAddressRegister, err := c.compileReadGlobalAddress(index) 665 if err != nil { 666 return err 667 } 668 669 if isV128 { 670 c.assembler.CompileVectorRegisterToMemory(arm64.VMOV, 671 val.register, globalInstanceAddressRegister, globalInstanceValueOffset, 672 arm64.VectorArrangementQ) 673 } else { 674 var str asm.Instruction 675 switch c.ir.Globals[index].ValType { 676 case wasm.ValueTypeI32: 677 str = arm64.STRW 678 case wasm.ValueTypeI64, wasm.ValueTypeExternref, wasm.ValueTypeFuncref: 679 str = arm64.STRD 680 case wasm.ValueTypeF32: 681 str = arm64.FSTRS 682 case wasm.ValueTypeF64: 683 str = arm64.FSTRD 684 } 685 686 // At this point "globalInstanceAddressRegister = globals[index]". 687 // Therefore, this means "globals[index].Val = val.register" 688 c.assembler.CompileRegisterToMemory( 689 str, 690 val.register, 691 globalInstanceAddressRegister, globalInstanceValueOffset, 692 ) 693 } 694 695 c.markRegisterUnused(val.register) 696 return nil 697 } 698 699 // compileReadGlobalAddress adds instructions to store the absolute address of the global instance at globalIndex into a register 700 func (c *arm64Compiler) compileReadGlobalAddress(globalIndex uint32) (destinationRegister asm.Register, err error) { 701 // TODO: rethink about the type used in store `globals []*GlobalInstance`. 702 // If we use `[]GlobalInstance` instead, we could reduce one MOV instruction here. 703 704 destinationRegister, err = c.allocateRegister(registerTypeGeneralPurpose) 705 if err != nil { 706 return 707 } 708 709 // "destinationRegister = globalIndex * 8" 710 c.assembler.CompileConstToRegister( 711 // globalIndex is an index to []*GlobalInstance, therefore 712 // we have to multiply it by the size of *GlobalInstance == the pointer size == 8. 713 arm64.MOVD, int64(globalIndex)*8, destinationRegister, 714 ) 715 716 // "arm64ReservedRegisterForTemporary = &globals[0]" 717 c.assembler.CompileMemoryToRegister( 718 arm64.LDRD, 719 arm64ReservedRegisterForCallEngine, callEngineModuleContextGlobalElement0AddressOffset, 720 arm64ReservedRegisterForTemporary, 721 ) 722 723 // "destinationRegister = [arm64ReservedRegisterForTemporary + destinationRegister] (== globals[globalIndex])". 724 c.assembler.CompileMemoryWithRegisterOffsetToRegister( 725 arm64.LDRD, 726 arm64ReservedRegisterForTemporary, destinationRegister, 727 destinationRegister, 728 ) 729 return 730 } 731 732 // compileBr implements compiler.compileBr for the arm64 architecture. 733 func (c *arm64Compiler) compileBr(o *wazeroir.UnionOperation) error { 734 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 735 return err 736 } 737 return c.compileBranchInto(wazeroir.Label(o.U1)) 738 } 739 740 // compileBrIf implements compiler.compileBrIf for the arm64 architecture. 741 func (c *arm64Compiler) compileBrIf(o *wazeroir.UnionOperation) error { 742 cond := c.locationStack.pop() 743 744 var conditionalBR asm.Node 745 if cond.onConditionalRegister() { 746 // If the cond is on a conditional register, it corresponds to one of "conditional codes" 747 // https://developer.arm.com/documentation/dui0801/a/Condition-Codes/Condition-code-suffixes 748 // Here we represent the conditional codes by using arm64.COND_** registers, and that means the 749 // conditional jump can be performed if we use arm64.B**. 750 // For example, if we have arm64.CondEQ on cond, that means we performed compileEq right before 751 // this compileBrIf and BrIf can be achieved by arm64.BCONDEQ. 752 var brInst asm.Instruction 753 switch cond.conditionalRegister { 754 case arm64.CondEQ: 755 brInst = arm64.BCONDEQ 756 case arm64.CondNE: 757 brInst = arm64.BCONDNE 758 case arm64.CondHS: 759 brInst = arm64.BCONDHS 760 case arm64.CondLO: 761 brInst = arm64.BCONDLO 762 case arm64.CondMI: 763 brInst = arm64.BCONDMI 764 case arm64.CondHI: 765 brInst = arm64.BCONDHI 766 case arm64.CondLS: 767 brInst = arm64.BCONDLS 768 case arm64.CondGE: 769 brInst = arm64.BCONDGE 770 case arm64.CondLT: 771 brInst = arm64.BCONDLT 772 case arm64.CondGT: 773 brInst = arm64.BCONDGT 774 case arm64.CondLE: 775 brInst = arm64.BCONDLE 776 default: 777 // BUG: This means that we use the cond.conditionalRegister somewhere in this file, 778 // but not covered in switch ^. That shouldn't happen. 779 return fmt.Errorf("unsupported condition for br_if: %v", cond.conditionalRegister) 780 } 781 conditionalBR = c.assembler.CompileJump(brInst) 782 } else { 783 // If the value is not on the conditional register, we compare the value with the zero register, 784 // and then do the conditional BR if the value doesn't equal zero. 785 if err := c.compileEnsureOnRegister(cond); err != nil { 786 return err 787 } 788 // Compare the value with zero register. Note that the value is ensured to be i32 by function validation phase, 789 // so we use CMPW (32-bit compare) here. 790 c.assembler.CompileTwoRegistersToNone(arm64.CMPW, cond.register, arm64.RegRZR) 791 792 conditionalBR = c.assembler.CompileJump(arm64.BCONDNE) 793 794 c.markRegisterUnused(cond.register) 795 } 796 797 // Emit the code for branching into else branch. 798 elseTarget := wazeroir.Label(o.U2) 799 if err := c.compileBranchInto(elseTarget); err != nil { 800 return err 801 } 802 // We branch into here from the original conditional BR (conditionalBR). 803 c.assembler.SetJumpTargetOnNext(conditionalBR) 804 thenTarget := wazeroir.Label(o.U1) 805 if err := compileDropRange(c, o.U3); err != nil { 806 return err 807 } 808 return c.compileBranchInto(thenTarget) 809 } 810 811 func (c *arm64Compiler) compileBranchInto(target wazeroir.Label) error { 812 if target.IsReturnTarget() { 813 return c.compileReturnFunction() 814 } else { 815 if c.ir.LabelCallers[target] > 1 { 816 // We can only re-use register state if when there's a single call-site. 817 // Release existing values on registers to the stack if there's multiple ones to have 818 // the consistent value location state at the beginning of label. 819 if err := c.compileReleaseAllRegistersToStack(); err != nil { 820 return err 821 } 822 } 823 // Set the initial stack of the target label, so we can start compiling the label 824 // with the appropriate value locations. Note we clone the stack here as we maybe 825 // manipulate the stack before compiler reaches the label. 826 targetLabel := c.label(target) 827 if !targetLabel.stackInitialized { 828 targetLabel.initialStack.cloneFrom(*c.locationStack) 829 targetLabel.stackInitialized = true 830 } 831 832 br := c.assembler.CompileJump(arm64.B) 833 c.assignBranchTarget(target, br) 834 return nil 835 } 836 } 837 838 // assignBranchTarget assigns the given label's initial instruction to the destination of br. 839 func (c *arm64Compiler) assignBranchTarget(label wazeroir.Label, br asm.Node) { 840 target := c.label(label) 841 842 targetInst := target.initialInstruction 843 if targetInst == nil { 844 // If the label isn't compiled yet, allocate the NOP node, and set as the initial instruction. 845 targetInst = c.assembler.AllocateNOP() 846 target.initialInstruction = targetInst 847 } 848 849 br.AssignJumpTarget(targetInst) 850 } 851 852 // compileBrTable implements compiler.compileBrTable for the arm64 architecture. 853 func (c *arm64Compiler) compileBrTable(o *wazeroir.UnionOperation) error { 854 // If the operation only consists of the default target, we branch into it and return early. 855 if len(o.Us) == 2 { 856 loc := c.locationStack.pop() 857 if loc.onRegister() { 858 c.markRegisterUnused(loc.register) 859 } 860 if err := compileDropRange(c, o.Us[1]); err != nil { 861 return err 862 } 863 return c.compileBranchInto(wazeroir.Label(o.Us[0])) 864 } 865 866 index := c.locationStack.pop() 867 if err := c.compileEnsureOnRegister(index); err != nil { 868 return err 869 } 870 871 if isZeroRegister(index.register) { 872 reg, err := c.allocateRegister(registerTypeGeneralPurpose) 873 if err != nil { 874 return err 875 } 876 index.setRegister(reg) 877 c.markRegisterUsed(reg) 878 879 // Zero the value on a picked register. 880 c.assembler.CompileRegisterToRegister(arm64.MOVD, arm64.RegRZR, reg) 881 } 882 883 tmpReg, err := c.allocateRegister(registerTypeGeneralPurpose) 884 if err != nil { 885 return err 886 } 887 888 // Load the branch table's length. 889 // "tmpReg = len(o.Targets)" 890 c.assembler.CompileConstToRegister(arm64.MOVW, int64(len(o.Us)/2-1), tmpReg) 891 // Compare the length with offset. 892 c.assembler.CompileTwoRegistersToNone(arm64.CMPW, tmpReg, index.register) 893 // If the value exceeds the length, we will branch into the default target (corresponding to len(o.Targets) index). 894 brDefaultIndex := c.assembler.CompileJump(arm64.BCONDLO) 895 c.assembler.CompileRegisterToRegister(arm64.MOVW, tmpReg, index.register) 896 c.assembler.SetJumpTargetOnNext(brDefaultIndex) 897 898 // We prepare the asm.StaticConst which holds the offset of 899 // each target's first instruction (incl. default) 900 // relative to the beginning of label tables. 901 // 902 // For example, if we have targets=[L0, L1] and default=L_DEFAULT, 903 // we emit the code like this at [Emit the code for each target and default branch] below. 904 // 905 // L0: 906 // 0x123001: XXXX, ... 907 // ..... 908 // L1: 909 // 0x123005: YYY, ... 910 // ..... 911 // L_DEFAULT: 912 // 0x123009: ZZZ, ... 913 // 914 // then offsetData becomes like [0x0, 0x5, 0x8]. 915 // By using this offset list, we could jump into the label for the index by 916 // "jmp offsetData[index]+0x123001" and "0x123001" can be acquired by ADR instruction. 917 // 918 // Note: We store each offset of 32-bit unsigned integer as 4 consecutive bytes. So more precisely, 919 // the above example's offsetData would be [0x0, 0x0, 0x0, 0x0, 0x5, 0x0, 0x0, 0x0, 0x8, 0x0, 0x0, 0x0]. 920 // 921 // Note: this is similar to how GCC implements Switch statements in C. 922 offsetData := asm.NewStaticConst(make([]byte, 4*(len(o.Us)/2))) 923 924 // "tmpReg = &offsetData[0]" 925 c.assembler.CompileStaticConstToRegister(arm64.ADR, offsetData, tmpReg) 926 927 // "index.register = tmpReg + (index.register << 2) (== &offsetData[offset])" 928 c.assembler.CompileLeftShiftedRegisterToRegister(arm64.ADD, index.register, 2, tmpReg, index.register) 929 930 // "index.register = *index.register (== offsetData[offset])" 931 c.assembler.CompileMemoryToRegister(arm64.LDRW, index.register, 0, index.register) 932 933 // Now we read the address of the beginning of the jump table. 934 // In the above example, this corresponds to reading the address of 0x123001. 935 c.assembler.CompileReadInstructionAddress(tmpReg, arm64.B) 936 937 // Now we have the address of L0 in tmp register, and the offset to the target label in the index.register. 938 // So we could achieve the br_table jump by adding them and jump into the resulting address. 939 c.assembler.CompileRegisterToRegister(arm64.ADD, tmpReg, index.register) 940 941 c.assembler.CompileJumpToRegister(arm64.B, index.register) 942 943 // We no longer need the index's register, so mark it unused. 944 c.markRegisterUnused(index.register) 945 946 // [Emit the code for each targets and default branch] 947 labelInitialInstructions := make([]asm.Node, len(o.Us)/2) 948 949 // Since we might end up having the different stack state in each branch, 950 // we need to save the initial stack state here, and use the same initial state 951 // for each iteration. 952 initialLocationStack := c.getSavedTemporaryLocationStack() 953 954 for i := range labelInitialInstructions { 955 // Emit the initial instruction of each target where 956 // we use NOP as we don't yet know the next instruction in each label. 957 init := c.assembler.CompileStandAlone(arm64.NOP) 958 labelInitialInstructions[i] = init 959 960 targetLabel := wazeroir.Label(o.Us[i*2]) 961 targetToDrop := o.Us[i*2+1] 962 if err = compileDropRange(c, targetToDrop); err != nil { 963 return err 964 } 965 if err = c.compileBranchInto(targetLabel); err != nil { 966 return err 967 } 968 // After the iteration, reset the stack's state with initialLocationStack. 969 c.locationStack.cloneFrom(initialLocationStack) 970 } 971 972 c.assembler.BuildJumpTable(offsetData, labelInitialInstructions) 973 return nil 974 } 975 976 func (c *arm64Compiler) getSavedTemporaryLocationStack() runtimeValueLocationStack { 977 initialLocationStack := *c.locationStack // Take copy! 978 // Use c.brTableTmp for the underlying stack so that we could reduce the allocations. 979 if diff := int(initialLocationStack.sp) - len(c.brTableTmp); diff > 0 { 980 c.brTableTmp = append(c.brTableTmp, make([]runtimeValueLocation, diff)...) 981 } 982 copy(c.brTableTmp, initialLocationStack.stack[:initialLocationStack.sp]) 983 initialLocationStack.stack = c.brTableTmp 984 return initialLocationStack 985 } 986 987 // compileCall implements compiler.compileCall for the arm64 architecture. 988 func (c *arm64Compiler) compileCall(o *wazeroir.UnionOperation) error { 989 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 990 return err 991 } 992 993 functionIndex := o.U1 994 995 tp := &c.ir.Types[c.ir.Functions[functionIndex]] 996 997 targetFunctionAddressReg, err := c.allocateRegister(registerTypeGeneralPurpose) 998 if err != nil { 999 return err 1000 } 1001 c.markRegisterUsed(targetFunctionAddressReg) 1002 defer c.markRegisterUnused(targetFunctionAddressReg) 1003 1004 // 3) Set rc.next to specify which function is executed on the current call frame. 1005 // 1006 // First, we read the address of the first item of ce.functions slice (= &ce.functions[0]) 1007 // into tmp. 1008 c.assembler.CompileMemoryToRegister(arm64.LDRD, 1009 arm64ReservedRegisterForCallEngine, callEngineModuleContextFunctionsElement0AddressOffset, 1010 targetFunctionAddressReg) 1011 1012 c.assembler.CompileConstToRegister( 1013 arm64.ADD, 1014 int64(functionIndex)*functionSize, // * 8 because the size of *function equals 8 bytes. 1015 targetFunctionAddressReg) 1016 1017 return c.compileCallImpl(targetFunctionAddressReg, tp) 1018 } 1019 1020 // compileCallImpl implements compiler.compileCall and compiler.compileCallIndirect for the arm64 architecture. 1021 func (c *arm64Compiler) compileCallImpl(targetFunctionAddressRegister asm.Register, functype *wasm.FunctionType) error { 1022 // Release all the registers as our calling convention requires the caller-save. 1023 if err := c.compileReleaseAllRegistersToStack(); err != nil { 1024 return err 1025 } 1026 1027 tmp, ok := c.locationStack.takeFreeRegister(registerTypeGeneralPurpose) 1028 if !ok { 1029 panic("BUG: cannot take a free register") 1030 } 1031 1032 // The stack should look like: 1033 // 1034 // reserved slots for results (if len(results) > len(args)) 1035 // | | 1036 // ,arg0, ..., argN, ..., _, .returnAddress, .returnStackBasePointerInBytes, .function, .... 1037 // | | | 1038 // | callFrame{^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^} 1039 // | 1040 // nextStackBasePointerOffset 1041 // 1042 // where callFrame is used to return to this currently executed function. 1043 1044 nextStackBasePointerOffset := int64(c.locationStack.sp) - int64(functype.ParamNumInUint64) 1045 1046 callFrameReturnAddressLoc, callFrameStackBasePointerInBytesLoc, callFrameFunctionLoc := c.locationStack.pushCallFrame(functype) 1047 1048 // Save the current stack base pointer at callFrameStackBasePointerInBytesLoc. 1049 c.assembler.CompileMemoryToRegister(arm64.LDRD, 1050 arm64ReservedRegisterForCallEngine, callEngineStackContextStackBasePointerInBytesOffset, 1051 tmp) 1052 callFrameStackBasePointerInBytesLoc.setRegister(tmp) 1053 c.compileReleaseRegisterToStack(callFrameStackBasePointerInBytesLoc) 1054 1055 // Set callEngine.stackContext.stackBasePointer for the next function. 1056 c.assembler.CompileConstToRegister(arm64.ADD, nextStackBasePointerOffset<<3, tmp) 1057 c.assembler.CompileRegisterToMemory(arm64.STRD, 1058 tmp, 1059 arm64ReservedRegisterForCallEngine, callEngineStackContextStackBasePointerInBytesOffset) 1060 1061 // Save the currently executed *function (placed at callEngine.moduleContext.fn) into callFrameFunctionLoc. 1062 c.assembler.CompileMemoryToRegister(arm64.LDRD, 1063 arm64ReservedRegisterForCallEngine, callEngineModuleContextFnOffset, 1064 tmp) 1065 callFrameFunctionLoc.setRegister(tmp) 1066 c.compileReleaseRegisterToStack(callFrameFunctionLoc) 1067 1068 // Set callEngine.moduleContext.fn to the next *function. 1069 c.assembler.CompileRegisterToMemory(arm64.STRD, 1070 targetFunctionAddressRegister, 1071 arm64ReservedRegisterForCallEngine, callEngineModuleContextFnOffset) 1072 1073 // Write the return address into callFrameReturnAddressLoc. 1074 c.assembler.CompileReadInstructionAddress(tmp, arm64.B) 1075 callFrameReturnAddressLoc.setRegister(tmp) 1076 c.compileReleaseRegisterToStack(callFrameReturnAddressLoc) 1077 1078 if targetFunctionAddressRegister == arm64CallingConventionModuleInstanceAddressRegister { 1079 // This case we must move the value on targetFunctionAddressRegister to another register, otherwise 1080 // the address (jump target below) will be modified and result in segfault. 1081 // See #526. 1082 c.assembler.CompileRegisterToRegister(arm64.MOVD, targetFunctionAddressRegister, tmp) 1083 targetFunctionAddressRegister = tmp 1084 } 1085 1086 // Also, we have to put the code's moduleInstance address into arm64CallingConventionModuleInstanceAddressRegister. 1087 c.assembler.CompileMemoryToRegister(arm64.LDRD, 1088 targetFunctionAddressRegister, functionModuleInstanceOffset, 1089 arm64CallingConventionModuleInstanceAddressRegister, 1090 ) 1091 1092 // Then, br into the target function's initial address. 1093 c.assembler.CompileMemoryToRegister(arm64.LDRD, 1094 targetFunctionAddressRegister, functionCodeInitialAddressOffset, 1095 targetFunctionAddressRegister) 1096 1097 c.assembler.CompileJumpToRegister(arm64.B, targetFunctionAddressRegister) 1098 1099 // We consumed the function parameters, the call frame stack and reserved slots during the call. 1100 c.locationStack.sp = uint64(nextStackBasePointerOffset) 1101 1102 // Also, the function results were pushed by the call. 1103 for _, t := range functype.Results { 1104 loc := c.locationStack.pushRuntimeValueLocationOnStack() 1105 switch t { 1106 case wasm.ValueTypeI32: 1107 loc.valueType = runtimeValueTypeI32 1108 case wasm.ValueTypeI64, wasm.ValueTypeFuncref, wasm.ValueTypeExternref: 1109 loc.valueType = runtimeValueTypeI64 1110 case wasm.ValueTypeF32: 1111 loc.valueType = runtimeValueTypeF32 1112 case wasm.ValueTypeF64: 1113 loc.valueType = runtimeValueTypeF64 1114 case wasm.ValueTypeV128: 1115 loc.valueType = runtimeValueTypeV128Lo 1116 hi := c.locationStack.pushRuntimeValueLocationOnStack() 1117 hi.valueType = runtimeValueTypeV128Hi 1118 } 1119 } 1120 1121 if err := c.compileModuleContextInitialization(); err != nil { 1122 return err 1123 } 1124 1125 // On the function return, we initialize the state for this function. 1126 c.compileReservedStackBasePointerRegisterInitialization() 1127 1128 c.compileReservedMemoryRegisterInitialization() 1129 return nil 1130 } 1131 1132 // compileCallIndirect implements compiler.compileCallIndirect for the arm64 architecture. 1133 func (c *arm64Compiler) compileCallIndirect(o *wazeroir.UnionOperation) (err error) { 1134 offset := c.locationStack.pop() 1135 if err = c.compileEnsureOnRegister(offset); err != nil { 1136 return err 1137 } 1138 typeIndex := o.U1 1139 tableIndex := o.U2 1140 1141 offsetReg := offset.register 1142 if isZeroRegister(offsetReg) { 1143 offsetReg, err = c.allocateRegister(registerTypeGeneralPurpose) 1144 if err != nil { 1145 return err 1146 } 1147 c.markRegisterUsed(offsetReg) 1148 1149 // Zero the value on a picked register. 1150 c.assembler.CompileRegisterToRegister(arm64.MOVD, arm64.RegRZR, offsetReg) 1151 } 1152 1153 tmp, err := c.allocateRegister(registerTypeGeneralPurpose) 1154 if err != nil { 1155 return err 1156 } 1157 c.markRegisterUsed(tmp) 1158 1159 tmp2, err := c.allocateRegister(registerTypeGeneralPurpose) 1160 if err != nil { 1161 return err 1162 } 1163 c.markRegisterUsed(tmp2) 1164 1165 // First, we need to check if the offset doesn't exceed the length of table. 1166 // "tmp = &Tables[0]" 1167 c.assembler.CompileMemoryToRegister(arm64.LDRD, 1168 arm64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset, 1169 tmp, 1170 ) 1171 // tmp = [tmp + TableIndex*8] = [&Tables[0] + TableIndex*sizeOf(*tableInstance)] = Tables[tableIndex] 1172 c.assembler.CompileMemoryToRegister(arm64.LDRD, 1173 tmp, int64(tableIndex)*8, 1174 tmp, 1175 ) 1176 // tmp2 = [tmp + tableInstanceTableLenOffset] = len(Tables[tableIndex]) 1177 c.assembler.CompileMemoryToRegister(arm64.LDRD, tmp, tableInstanceTableLenOffset, tmp2) 1178 1179 // "cmp tmp2, offset" 1180 c.assembler.CompileTwoRegistersToNone(arm64.CMP, tmp2, offsetReg) 1181 1182 // If it exceeds len(table), we trap. 1183 c.compileMaybeExitFromNativeCode(arm64.BCONDLO, nativeCallStatusCodeInvalidTableAccess) 1184 // Otherwise, we proceed to do function type check. 1185 1186 // We need to obtain the absolute address of table element. 1187 // "tmp = &Tables[tableIndex].table[0]" 1188 c.assembler.CompileMemoryToRegister( 1189 arm64.LDRD, 1190 tmp, tableInstanceTableOffset, 1191 tmp, 1192 ) 1193 // "offset = tmp + (offset << pointerSizeLog2) (== &table[offset])" 1194 // Here we left shifting by 3 in order to get the offset in bytes, 1195 // and the table element type is uintptr which is 8 bytes. 1196 c.assembler.CompileLeftShiftedRegisterToRegister( 1197 arm64.ADD, 1198 offsetReg, pointerSizeLog2, 1199 tmp, 1200 offsetReg, 1201 ) 1202 1203 // "offset = (*offset) (== table[offset])" 1204 c.assembler.CompileMemoryToRegister(arm64.LDRD, offsetReg, 0, offsetReg) 1205 1206 // Check if the value of table[offset] equals zero, meaning that the target element is uninitialized. 1207 c.assembler.CompileTwoRegistersToNone(arm64.CMP, arm64.RegRZR, offsetReg) 1208 1209 // Skipped if the target is initialized. 1210 c.compileMaybeExitFromNativeCode(arm64.BCONDNE, nativeCallStatusCodeInvalidTableAccess) 1211 1212 // next we check the type matches, i.e. table[offset].source.TypeID == targetFunctionType. 1213 // "tmp = table[offset].typeID" 1214 c.assembler.CompileMemoryToRegister( 1215 arm64.LDRD, 1216 offsetReg, functionTypeIDOffset, 1217 tmp, 1218 ) 1219 // "tmp2 = ModuleInstance.TypeIDs[index]" 1220 c.assembler.CompileMemoryToRegister(arm64.LDRD, 1221 arm64ReservedRegisterForCallEngine, callEngineModuleContextTypeIDsElement0AddressOffset, 1222 tmp2) 1223 c.assembler.CompileMemoryToRegister(arm64.LDRW, tmp2, int64(typeIndex)*4, tmp2) 1224 1225 // Compare these two values, and if they equal, we are ready to make function call. 1226 c.assembler.CompileTwoRegistersToNone(arm64.CMPW, tmp, tmp2) 1227 // Skipped if the type matches. 1228 c.compileMaybeExitFromNativeCode(arm64.BCONDEQ, nativeCallStatusCodeTypeMismatchOnIndirectCall) 1229 1230 targetFunctionType := &c.ir.Types[typeIndex] 1231 if err := c.compileCallImpl(offsetReg, targetFunctionType); err != nil { 1232 return err 1233 } 1234 1235 // The offset register should be marked as un-used as we consumed in the function call. 1236 c.markRegisterUnused(offsetReg, tmp, tmp2) 1237 return nil 1238 } 1239 1240 // compileDrop implements compiler.compileDrop for the arm64 architecture. 1241 func (c *arm64Compiler) compileDrop(o *wazeroir.UnionOperation) error { 1242 return compileDropRange(c, o.U1) 1243 } 1244 1245 func (c *arm64Compiler) compileSelectV128Impl(selectorRegister asm.Register) error { 1246 x2 := c.locationStack.popV128() 1247 if err := c.compileEnsureOnRegister(x2); err != nil { 1248 return err 1249 } 1250 1251 x1 := c.locationStack.popV128() 1252 if err := c.compileEnsureOnRegister(x1); err != nil { 1253 return err 1254 } 1255 1256 c.assembler.CompileTwoRegistersToNone(arm64.CMPW, arm64.RegRZR, selectorRegister) 1257 brIfNotZero := c.assembler.CompileJump(arm64.BCONDNE) 1258 1259 // In this branch, we select the value of x2, so we move the value into x1.register so that 1260 // we can have the result in x1.register regardless of the selection. 1261 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.VORR, 1262 x2.register, x2.register, x1.register, arm64.VectorArrangement16B) 1263 1264 c.assembler.SetJumpTargetOnNext(brIfNotZero) 1265 1266 // As noted, the result exists in x1.register regardless of the selector. 1267 c.pushVectorRuntimeValueLocationOnRegister(x1.register) 1268 // Plus, x2.register is no longer used. 1269 c.markRegisterUnused(x2.register) 1270 return nil 1271 } 1272 1273 // compileSelect implements compiler.compileSelect for the arm64 architecture. 1274 func (c *arm64Compiler) compileSelect(o *wazeroir.UnionOperation) error { 1275 cv, err := c.popValueOnRegister() 1276 if err != nil { 1277 return err 1278 } 1279 1280 isTargetVector := o.B3 1281 if isTargetVector { 1282 return c.compileSelectV128Impl(cv.register) 1283 } 1284 1285 c.markRegisterUsed(cv.register) 1286 1287 x1, x2, err := c.popTwoValuesOnRegisters() 1288 if err != nil { 1289 return err 1290 } 1291 1292 if isZeroRegister(x1.register) && isZeroRegister(x2.register) { 1293 // If both values are zero, the result is always zero. 1294 c.pushRuntimeValueLocationOnRegister(arm64.RegRZR, x1.valueType) 1295 c.markRegisterUnused(cv.register) 1296 return nil 1297 } 1298 1299 // In the following, we emit the code so that x1's register contains the chosen value 1300 // no matter which of original x1 or x2 is selected. 1301 // 1302 // If x1 is currently on zero register, we cannot place the result because 1303 // "MOV arm64.RegRZR x2.register" results in arm64.RegRZR regardless of the value. 1304 // So we explicitly assign a general purpose register to x1 here. 1305 if isZeroRegister(x1.register) { 1306 // Mark x2 and cv's registers are used so they won't be chosen. 1307 c.markRegisterUsed(x2.register) 1308 // Pick the non-zero register for x1. 1309 x1Reg, err := c.allocateRegister(registerTypeGeneralPurpose) 1310 if err != nil { 1311 return err 1312 } 1313 x1.setRegister(x1Reg) 1314 // And zero our the picked register. 1315 c.assembler.CompileRegisterToRegister(arm64.MOVD, arm64.RegRZR, x1Reg) 1316 } 1317 1318 // At this point, x1 is non-zero register, and x2 is either general purpose or zero register. 1319 1320 c.assembler.CompileTwoRegistersToNone(arm64.CMPW, arm64.RegRZR, cv.register) 1321 brIfNotZero := c.assembler.CompileJump(arm64.BCONDNE) 1322 1323 // If cv == 0, we move the value of x2 to the x1.register. 1324 1325 switch x1.valueType { 1326 case runtimeValueTypeI32: 1327 // TODO: use 32-bit mov 1328 c.assembler.CompileRegisterToRegister(arm64.MOVD, x2.register, x1.register) 1329 case runtimeValueTypeI64: 1330 c.assembler.CompileRegisterToRegister(arm64.MOVD, x2.register, x1.register) 1331 case runtimeValueTypeF32: 1332 // TODO: use 32-bit mov 1333 c.assembler.CompileRegisterToRegister(arm64.FMOVD, x2.register, x1.register) 1334 case runtimeValueTypeF64: 1335 c.assembler.CompileRegisterToRegister(arm64.FMOVD, x2.register, x1.register) 1336 default: 1337 return errors.New("TODO: implement vector type select") 1338 } 1339 1340 c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) 1341 1342 // Otherwise, nothing to do for select. 1343 c.assembler.SetJumpTargetOnNext(brIfNotZero) 1344 1345 // Only x1.register is reused. 1346 c.markRegisterUnused(cv.register, x2.register) 1347 return nil 1348 } 1349 1350 // compilePick implements compiler.compilePick for the arm64 architecture. 1351 func (c *arm64Compiler) compilePick(o *wazeroir.UnionOperation) error { 1352 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 1353 return err 1354 } 1355 depth := o.U1 1356 isTargetVector := o.B3 1357 1358 pickTarget := &c.locationStack.stack[c.locationStack.sp-1-uint64(depth)] 1359 pickedRegister, err := c.allocateRegister(pickTarget.getRegisterType()) 1360 if err != nil { 1361 return err 1362 } 1363 1364 if pickTarget.onRegister() { // Copy the value to the pickedRegister. 1365 switch pickTarget.valueType { 1366 case runtimeValueTypeI32: 1367 c.assembler.CompileRegisterToRegister(arm64.MOVW, pickTarget.register, pickedRegister) 1368 case runtimeValueTypeI64: 1369 c.assembler.CompileRegisterToRegister(arm64.MOVD, pickTarget.register, pickedRegister) 1370 case runtimeValueTypeF32: 1371 c.assembler.CompileRegisterToRegister(arm64.FMOVS, pickTarget.register, pickedRegister) 1372 case runtimeValueTypeF64: 1373 c.assembler.CompileRegisterToRegister(arm64.FMOVD, pickTarget.register, pickedRegister) 1374 case runtimeValueTypeV128Lo: 1375 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.VORR, 1376 pickTarget.register, pickTarget.register, pickedRegister, arm64.VectorArrangement16B) 1377 case runtimeValueTypeV128Hi: 1378 panic("BUG") // since pick target must point to the lower 64-bits of vectors. 1379 } 1380 } else if pickTarget.onStack() { 1381 // Temporarily assign a register to the pick target, and then load the value. 1382 pickTarget.setRegister(pickedRegister) 1383 c.compileLoadValueOnStackToRegister(pickTarget) 1384 1385 // After the load, we revert the register assignment to the pick target. 1386 pickTarget.setRegister(asm.NilRegister) 1387 if isTargetVector { 1388 hi := &c.locationStack.stack[pickTarget.stackPointer+1] 1389 hi.setRegister(asm.NilRegister) 1390 } 1391 } 1392 1393 // Now we have the value of the target on the pickedRegister, 1394 // so push the location. 1395 c.pushRuntimeValueLocationOnRegister(pickedRegister, pickTarget.valueType) 1396 if isTargetVector { 1397 c.pushRuntimeValueLocationOnRegister(pickedRegister, runtimeValueTypeV128Hi) 1398 } 1399 return nil 1400 } 1401 1402 // compileAdd implements compiler.compileAdd for the arm64 architecture. 1403 func (c *arm64Compiler) compileAdd(o *wazeroir.UnionOperation) error { 1404 x1, x2, err := c.popTwoValuesOnRegisters() 1405 if err != nil { 1406 return err 1407 } 1408 1409 // Addition can be nop if one of operands is zero. 1410 if isZeroRegister(x1.register) { 1411 c.pushRuntimeValueLocationOnRegister(x2.register, x1.valueType) 1412 return nil 1413 } else if isZeroRegister(x2.register) { 1414 c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) 1415 return nil 1416 } 1417 1418 var inst asm.Instruction 1419 unsignedType := wazeroir.UnsignedType(o.B1) 1420 switch unsignedType { 1421 case wazeroir.UnsignedTypeI32: 1422 inst = arm64.ADDW 1423 case wazeroir.UnsignedTypeI64: 1424 inst = arm64.ADD 1425 case wazeroir.UnsignedTypeF32: 1426 inst = arm64.FADDS 1427 case wazeroir.UnsignedTypeF64: 1428 inst = arm64.FADDD 1429 } 1430 1431 c.assembler.CompileRegisterToRegister(inst, x2.register, x1.register) 1432 // The result is placed on a register for x1, so record it. 1433 c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) 1434 return nil 1435 } 1436 1437 // compileSub implements compiler.compileSub for the arm64 architecture. 1438 func (c *arm64Compiler) compileSub(o *wazeroir.UnionOperation) error { 1439 x1, x2, err := c.popTwoValuesOnRegisters() 1440 if err != nil { 1441 return err 1442 } 1443 1444 // If both of registers are zeros, this can be nop and push the zero register. 1445 if isZeroRegister(x1.register) && isZeroRegister(x2.register) { 1446 c.pushRuntimeValueLocationOnRegister(arm64.RegRZR, x1.valueType) 1447 return nil 1448 } 1449 1450 // At this point, at least one of x1 or x2 registers is non zero. 1451 // Choose the non-zero register as destination. 1452 destinationReg := x1.register 1453 if isZeroRegister(x1.register) { 1454 destinationReg = x2.register 1455 } 1456 1457 var inst asm.Instruction 1458 var vt runtimeValueType 1459 unsignedType := wazeroir.UnsignedType(o.B1) 1460 switch unsignedType { 1461 case wazeroir.UnsignedTypeI32: 1462 inst = arm64.SUBW 1463 vt = runtimeValueTypeI32 1464 case wazeroir.UnsignedTypeI64: 1465 inst = arm64.SUB 1466 vt = runtimeValueTypeI64 1467 case wazeroir.UnsignedTypeF32: 1468 inst = arm64.FSUBS 1469 vt = runtimeValueTypeF32 1470 case wazeroir.UnsignedTypeF64: 1471 inst = arm64.FSUBD 1472 vt = runtimeValueTypeF64 1473 } 1474 1475 c.assembler.CompileTwoRegistersToRegister(inst, x2.register, x1.register, destinationReg) 1476 c.pushRuntimeValueLocationOnRegister(destinationReg, vt) 1477 return nil 1478 } 1479 1480 // compileMul implements compiler.compileMul for the arm64 architecture. 1481 func (c *arm64Compiler) compileMul(o *wazeroir.UnionOperation) error { 1482 x1, x2, err := c.popTwoValuesOnRegisters() 1483 if err != nil { 1484 return err 1485 } 1486 1487 // Multiplication can be done by putting a zero register if one of operands is zero. 1488 if isZeroRegister(x1.register) || isZeroRegister(x2.register) { 1489 c.pushRuntimeValueLocationOnRegister(arm64.RegRZR, x1.valueType) 1490 return nil 1491 } 1492 1493 var inst asm.Instruction 1494 var vt runtimeValueType 1495 unsignedType := wazeroir.UnsignedType(o.B1) 1496 switch unsignedType { 1497 case wazeroir.UnsignedTypeI32: 1498 inst = arm64.MULW 1499 vt = runtimeValueTypeI32 1500 case wazeroir.UnsignedTypeI64: 1501 inst = arm64.MUL 1502 vt = runtimeValueTypeI64 1503 case wazeroir.UnsignedTypeF32: 1504 inst = arm64.FMULS 1505 vt = runtimeValueTypeF32 1506 case wazeroir.UnsignedTypeF64: 1507 inst = arm64.FMULD 1508 vt = runtimeValueTypeF64 1509 } 1510 1511 c.assembler.CompileRegisterToRegister(inst, x2.register, x1.register) 1512 // The result is placed on a register for x1, so record it. 1513 c.pushRuntimeValueLocationOnRegister(x1.register, vt) 1514 return nil 1515 } 1516 1517 // compileClz implements compiler.compileClz for the arm64 architecture. 1518 func (c *arm64Compiler) compileClz(o *wazeroir.UnionOperation) error { 1519 v, err := c.popValueOnRegister() 1520 if err != nil { 1521 return err 1522 } 1523 1524 unsignedInt := wazeroir.UnsignedInt(o.B1) 1525 if isZeroRegister(v.register) { 1526 // If the target is zero register, the result is always 32 (or 64 for 64-bits), 1527 // so we allocate a register and put the const on it. 1528 reg, err := c.allocateRegister(registerTypeGeneralPurpose) 1529 if err != nil { 1530 return err 1531 } 1532 var vt runtimeValueType 1533 if unsignedInt == wazeroir.UnsignedInt32 { 1534 vt = runtimeValueTypeI32 1535 c.assembler.CompileConstToRegister(arm64.MOVW, 32, reg) 1536 } else { 1537 vt = runtimeValueTypeI64 1538 c.assembler.CompileConstToRegister(arm64.MOVD, 64, reg) 1539 } 1540 c.pushRuntimeValueLocationOnRegister(reg, vt) 1541 return nil 1542 } 1543 1544 reg := v.register 1545 var vt runtimeValueType 1546 if unsignedInt == wazeroir.UnsignedInt32 { 1547 vt = runtimeValueTypeI32 1548 c.assembler.CompileRegisterToRegister(arm64.CLZW, reg, reg) 1549 } else { 1550 vt = runtimeValueTypeI64 1551 c.assembler.CompileRegisterToRegister(arm64.CLZ, reg, reg) 1552 } 1553 c.pushRuntimeValueLocationOnRegister(reg, vt) 1554 return nil 1555 } 1556 1557 // compileCtz implements compiler.compileCtz for the arm64 architecture. 1558 func (c *arm64Compiler) compileCtz(o *wazeroir.UnionOperation) error { 1559 v, err := c.popValueOnRegister() 1560 if err != nil { 1561 return err 1562 } 1563 1564 unsignedInt := wazeroir.UnsignedInt(o.B1) 1565 reg := v.register 1566 if isZeroRegister(reg) { 1567 // If the target is zero register, the result is always 32 (or 64 for 64-bits), 1568 // so we allocate a register and put the const on it. 1569 reg, err := c.allocateRegister(registerTypeGeneralPurpose) 1570 if err != nil { 1571 return err 1572 } 1573 var vt runtimeValueType 1574 if unsignedInt == wazeroir.UnsignedInt32 { 1575 vt = runtimeValueTypeI32 1576 c.assembler.CompileConstToRegister(arm64.MOVW, 32, reg) 1577 } else { 1578 vt = runtimeValueTypeI64 1579 c.assembler.CompileConstToRegister(arm64.MOVD, 64, reg) 1580 } 1581 c.pushRuntimeValueLocationOnRegister(reg, vt) 1582 return nil 1583 } 1584 1585 // Since arm64 doesn't have an instruction directly counting trailing zeros, 1586 // we reverse the bits first, and then do CLZ, which is exactly the same as 1587 // gcc implements __builtin_ctz for arm64. 1588 var vt runtimeValueType 1589 if unsignedInt == wazeroir.UnsignedInt32 { 1590 vt = runtimeValueTypeI32 1591 c.assembler.CompileRegisterToRegister(arm64.RBITW, reg, reg) 1592 c.assembler.CompileRegisterToRegister(arm64.CLZW, reg, reg) 1593 } else { 1594 vt = runtimeValueTypeI64 1595 c.assembler.CompileRegisterToRegister(arm64.RBIT, reg, reg) 1596 c.assembler.CompileRegisterToRegister(arm64.CLZ, reg, reg) 1597 } 1598 c.pushRuntimeValueLocationOnRegister(reg, vt) 1599 return nil 1600 } 1601 1602 // compilePopcnt implements compiler.compilePopcnt for the arm64 architecture. 1603 func (c *arm64Compiler) compilePopcnt(o *wazeroir.UnionOperation) error { 1604 v, err := c.popValueOnRegister() 1605 if err != nil { 1606 return err 1607 } 1608 1609 reg := v.register 1610 if isZeroRegister(reg) { 1611 c.pushRuntimeValueLocationOnRegister(reg, v.valueType) 1612 return nil 1613 } 1614 1615 freg, err := c.allocateRegister(registerTypeVector) 1616 if err != nil { 1617 return err 1618 } 1619 1620 // arm64 doesn't have an instruction for population count on scalar register, 1621 // so we use the vector one (VCNT). 1622 // This exactly what the official Go implements bits.OneCount. 1623 // For example, "func () int { return bits.OneCount(10) }" is compiled as 1624 // 1625 // MOVD $10, R0 ;; Load 10. 1626 // FMOVD R0, F0 1627 // VCNT V0.B8, V0.B8 1628 // UADDLV V0.B8, V0 1629 // 1630 var movInst asm.Instruction 1631 unsignedInt := wazeroir.UnsignedInt(o.B1) 1632 if unsignedInt == wazeroir.UnsignedInt32 { 1633 movInst = arm64.FMOVS 1634 } else { 1635 movInst = arm64.FMOVD 1636 } 1637 c.assembler.CompileRegisterToRegister(movInst, reg, freg) 1638 c.assembler.CompileVectorRegisterToVectorRegister(arm64.VCNT, freg, freg, 1639 arm64.VectorArrangement16B, arm64.VectorIndexNone, arm64.VectorIndexNone) 1640 c.assembler.CompileVectorRegisterToVectorRegister(arm64.UADDLV, freg, freg, arm64.VectorArrangement8B, 1641 arm64.VectorIndexNone, arm64.VectorIndexNone) 1642 1643 c.assembler.CompileRegisterToRegister(movInst, freg, reg) 1644 1645 c.pushRuntimeValueLocationOnRegister(reg, v.valueType) 1646 return nil 1647 } 1648 1649 // compileDiv implements compiler.compileDiv for the arm64 architecture. 1650 func (c *arm64Compiler) compileDiv(o *wazeroir.UnionOperation) error { 1651 dividend, divisor, err := c.popTwoValuesOnRegisters() 1652 if err != nil { 1653 return err 1654 } 1655 1656 signedType := wazeroir.SignedType(o.B1) 1657 1658 // If the divisor is on the zero register, exit from the function deterministically. 1659 if isZeroRegister(divisor.register) { 1660 // Push any value so that the subsequent instruction can have a consistent location stack state. 1661 v := c.locationStack.pushRuntimeValueLocationOnStack() 1662 switch signedType { 1663 case wazeroir.SignedTypeInt32, wazeroir.SignedTypeUint32: 1664 v.valueType = runtimeValueTypeI32 1665 case wazeroir.SignedTypeUint64, wazeroir.SignedTypeInt64: 1666 v.valueType = runtimeValueTypeI64 1667 } 1668 c.compileExitFromNativeCode(nativeCallStatusIntegerDivisionByZero) 1669 return nil 1670 } 1671 1672 var inst asm.Instruction 1673 var vt runtimeValueType 1674 switch signedType { 1675 case wazeroir.SignedTypeUint32: 1676 inst = arm64.UDIVW 1677 if err := c.compileIntegerDivPrecheck(true, false, dividend.register, divisor.register); err != nil { 1678 return err 1679 } 1680 vt = runtimeValueTypeI32 1681 case wazeroir.SignedTypeUint64: 1682 if err := c.compileIntegerDivPrecheck(false, false, dividend.register, divisor.register); err != nil { 1683 return err 1684 } 1685 inst = arm64.UDIV 1686 vt = runtimeValueTypeI64 1687 case wazeroir.SignedTypeInt32: 1688 if err := c.compileIntegerDivPrecheck(true, true, dividend.register, divisor.register); err != nil { 1689 return err 1690 } 1691 inst = arm64.SDIVW 1692 vt = runtimeValueTypeI32 1693 case wazeroir.SignedTypeInt64: 1694 if err := c.compileIntegerDivPrecheck(false, true, dividend.register, divisor.register); err != nil { 1695 return err 1696 } 1697 inst = arm64.SDIV 1698 vt = runtimeValueTypeI64 1699 case wazeroir.SignedTypeFloat32: 1700 inst = arm64.FDIVS 1701 vt = runtimeValueTypeF32 1702 case wazeroir.SignedTypeFloat64: 1703 inst = arm64.FDIVD 1704 vt = runtimeValueTypeF64 1705 } 1706 1707 c.assembler.CompileRegisterToRegister(inst, divisor.register, dividend.register) 1708 1709 c.pushRuntimeValueLocationOnRegister(dividend.register, vt) 1710 return nil 1711 } 1712 1713 // compileIntegerDivPrecheck adds instructions to check if the divisor and dividend are sound for division operation. 1714 // First, this adds instructions to check if the divisor equals zero, and if so, exits the function. 1715 // Plus, for signed divisions, check if the result might result in overflow or not. 1716 func (c *arm64Compiler) compileIntegerDivPrecheck(is32Bit, isSigned bool, dividend, divisor asm.Register) error { 1717 // We check the divisor value equals zero. 1718 var cmpInst, movInst, loadInst asm.Instruction 1719 var minValueOffsetInVM int64 1720 if is32Bit { 1721 cmpInst = arm64.CMPW 1722 movInst = arm64.MOVW 1723 loadInst = arm64.LDRW 1724 minValueOffsetInVM = arm64CallEngineArchContextMinimum32BitSignedIntOffset 1725 } else { 1726 cmpInst = arm64.CMP 1727 movInst = arm64.MOVD 1728 loadInst = arm64.LDRD 1729 minValueOffsetInVM = arm64CallEngineArchContextMinimum64BitSignedIntOffset 1730 } 1731 c.assembler.CompileTwoRegistersToNone(cmpInst, arm64.RegRZR, divisor) 1732 1733 // If it is zero, we exit with nativeCallStatusIntegerDivisionByZero. 1734 c.compileMaybeExitFromNativeCode(arm64.BCONDNE, nativeCallStatusIntegerDivisionByZero) 1735 // Otherwise, we proceed. 1736 1737 // If the operation is a signed integer div, we have to do an additional check on overflow. 1738 if isSigned { 1739 // For signed division, we have to have branches for "math.MinInt{32,64} / -1" 1740 // case which results in the overflow. 1741 1742 // First, we compare the divisor with -1. 1743 c.assembler.CompileConstToRegister(movInst, -1, arm64ReservedRegisterForTemporary) 1744 c.assembler.CompileTwoRegistersToNone(cmpInst, arm64ReservedRegisterForTemporary, divisor) 1745 1746 // If they not equal, we skip the following check. 1747 brIfDivisorNonMinusOne := c.assembler.CompileJump(arm64.BCONDNE) 1748 1749 // Otherwise, we further check if the dividend equals math.MinInt32 or MinInt64. 1750 c.assembler.CompileMemoryToRegister( 1751 loadInst, 1752 arm64ReservedRegisterForCallEngine, minValueOffsetInVM, 1753 arm64ReservedRegisterForTemporary, 1754 ) 1755 c.assembler.CompileTwoRegistersToNone(cmpInst, arm64ReservedRegisterForTemporary, dividend) 1756 1757 // If they not equal, we are safe to execute the division. 1758 // Otherwise, we raise overflow error. 1759 c.compileMaybeExitFromNativeCode(arm64.BCONDNE, nativeCallStatusIntegerOverflow) 1760 1761 c.assembler.SetJumpTargetOnNext(brIfDivisorNonMinusOne) 1762 } 1763 return nil 1764 } 1765 1766 // compileRem implements compiler.compileRem for the arm64 architecture. 1767 func (c *arm64Compiler) compileRem(o *wazeroir.UnionOperation) error { 1768 dividend, divisor, err := c.popTwoValuesOnRegisters() 1769 if err != nil { 1770 return err 1771 } 1772 1773 dividendReg := dividend.register 1774 divisorReg := divisor.register 1775 1776 // If the divisor is on the zero register, exit from the function deterministically. 1777 if isZeroRegister(divisor.register) { 1778 // Push any value so that the subsequent instruction can have a consistent location stack state. 1779 v := c.locationStack.pushRuntimeValueLocationOnStack() 1780 v.valueType = runtimeValueTypeI32 1781 c.compileExitFromNativeCode(nativeCallStatusIntegerDivisionByZero) 1782 return nil 1783 } 1784 1785 var divInst, msubInst, cmpInst asm.Instruction 1786 signedInt := wazeroir.SignedInt(o.B1) 1787 switch signedInt { 1788 case wazeroir.SignedUint32: 1789 divInst = arm64.UDIVW 1790 msubInst = arm64.MSUBW 1791 cmpInst = arm64.CMPW 1792 case wazeroir.SignedUint64: 1793 divInst = arm64.UDIV 1794 msubInst = arm64.MSUB 1795 cmpInst = arm64.CMP 1796 case wazeroir.SignedInt32: 1797 divInst = arm64.SDIVW 1798 msubInst = arm64.MSUBW 1799 cmpInst = arm64.CMPW 1800 case wazeroir.SignedInt64: 1801 divInst = arm64.SDIV 1802 msubInst = arm64.MSUB 1803 cmpInst = arm64.CMP 1804 } 1805 1806 // We check the divisor value equals zero. 1807 c.assembler.CompileTwoRegistersToNone(cmpInst, arm64.RegRZR, divisorReg) 1808 1809 // If it is zero, we exit with nativeCallStatusIntegerDivisionByZero. 1810 c.compileMaybeExitFromNativeCode(arm64.BCONDNE, nativeCallStatusIntegerDivisionByZero) 1811 // Otherwise, we proceed. 1812 1813 // Temporarily mark them used to allocate a result register while keeping these values. 1814 c.markRegisterUsed(dividend.register, divisor.register) 1815 1816 resultReg, err := c.allocateRegister(registerTypeGeneralPurpose) 1817 if err != nil { 1818 return err 1819 } 1820 1821 // arm64 doesn't have an instruction for rem, we use calculate it by two instructions: UDIV (SDIV for signed) and MSUB. 1822 // This exactly the same code that Clang emits. 1823 // [input: x0=dividend, x1=divisor] 1824 // >> UDIV x2, x0, x1 1825 // >> MSUB x3, x2, x1, x0 1826 // [result: x2=quotient, x3=remainder] 1827 // 1828 c.assembler.CompileTwoRegistersToRegister(divInst, divisorReg, dividendReg, resultReg) 1829 // ResultReg = dividendReg - (divisorReg * resultReg) 1830 c.assembler.CompileThreeRegistersToRegister(msubInst, divisorReg, dividendReg, resultReg, resultReg) 1831 1832 c.markRegisterUnused(dividend.register, divisor.register) 1833 c.pushRuntimeValueLocationOnRegister(resultReg, dividend.valueType) 1834 return nil 1835 } 1836 1837 // compileAnd implements compiler.compileAnd for the arm64 architecture. 1838 func (c *arm64Compiler) compileAnd(o *wazeroir.UnionOperation) error { 1839 x1, x2, err := c.popTwoValuesOnRegisters() 1840 if err != nil { 1841 return err 1842 } 1843 1844 // If either of the registers x1 or x2 is zero, 1845 // the result will always be zero. 1846 if isZeroRegister(x1.register) || isZeroRegister(x2.register) { 1847 c.pushRuntimeValueLocationOnRegister(arm64.RegRZR, x1.valueType) 1848 return nil 1849 } 1850 1851 // At this point, at least one of x1 or x2 registers is non zero. 1852 // Choose the non-zero register as destination. 1853 destinationReg := x1.register 1854 if isZeroRegister(x1.register) { 1855 destinationReg = x2.register 1856 } 1857 1858 var inst asm.Instruction 1859 unsignedInt := wazeroir.UnsignedInt(o.B1) 1860 switch unsignedInt { 1861 case wazeroir.UnsignedInt32: 1862 inst = arm64.ANDW 1863 case wazeroir.UnsignedInt64: 1864 inst = arm64.AND 1865 } 1866 1867 c.assembler.CompileTwoRegistersToRegister(inst, x2.register, x1.register, destinationReg) 1868 c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) 1869 return nil 1870 } 1871 1872 // compileOr implements compiler.compileOr for the arm64 architecture. 1873 func (c *arm64Compiler) compileOr(o *wazeroir.UnionOperation) error { 1874 x1, x2, err := c.popTwoValuesOnRegisters() 1875 if err != nil { 1876 return err 1877 } 1878 1879 if isZeroRegister(x1.register) { 1880 c.pushRuntimeValueLocationOnRegister(x2.register, x2.valueType) 1881 return nil 1882 } 1883 if isZeroRegister(x2.register) { 1884 c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) 1885 return nil 1886 } 1887 1888 var inst asm.Instruction 1889 unsignedInt := wazeroir.UnsignedInt(o.B1) 1890 switch unsignedInt { 1891 case wazeroir.UnsignedInt32: 1892 inst = arm64.ORRW 1893 case wazeroir.UnsignedInt64: 1894 inst = arm64.ORR 1895 } 1896 1897 c.assembler.CompileTwoRegistersToRegister(inst, x2.register, x1.register, x1.register) 1898 c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) 1899 return nil 1900 } 1901 1902 // compileXor implements compiler.compileXor for the arm64 architecture. 1903 func (c *arm64Compiler) compileXor(o *wazeroir.UnionOperation) error { 1904 x1, x2, err := c.popTwoValuesOnRegisters() 1905 if err != nil { 1906 return err 1907 } 1908 1909 // At this point, at least one of x1 or x2 registers is non zero. 1910 // Choose the non-zero register as destination. 1911 destinationReg := x1.register 1912 if isZeroRegister(x1.register) { 1913 destinationReg = x2.register 1914 } 1915 1916 var inst asm.Instruction 1917 unsignedInt := wazeroir.UnsignedInt(o.B1) 1918 switch unsignedInt { 1919 case wazeroir.UnsignedInt32: 1920 inst = arm64.EORW 1921 case wazeroir.UnsignedInt64: 1922 inst = arm64.EOR 1923 } 1924 1925 c.assembler.CompileTwoRegistersToRegister(inst, x2.register, x1.register, destinationReg) 1926 c.pushRuntimeValueLocationOnRegister(destinationReg, x1.valueType) 1927 return nil 1928 } 1929 1930 // compileShl implements compiler.compileShl for the arm64 architecture. 1931 func (c *arm64Compiler) compileShl(o *wazeroir.UnionOperation) error { 1932 x1, x2, err := c.popTwoValuesOnRegisters() 1933 if err != nil { 1934 return err 1935 } 1936 1937 if isZeroRegister(x1.register) || isZeroRegister(x2.register) { 1938 c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) 1939 return nil 1940 } 1941 1942 var inst asm.Instruction 1943 unsignedInt := wazeroir.UnsignedInt(o.B1) 1944 switch unsignedInt { 1945 case wazeroir.UnsignedInt32: 1946 inst = arm64.LSLW 1947 case wazeroir.UnsignedInt64: 1948 inst = arm64.LSL 1949 } 1950 1951 c.assembler.CompileTwoRegistersToRegister(inst, x2.register, x1.register, x1.register) 1952 c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) 1953 return nil 1954 } 1955 1956 // compileShr implements compiler.compileShr for the arm64 architecture. 1957 func (c *arm64Compiler) compileShr(o *wazeroir.UnionOperation) error { 1958 x1, x2, err := c.popTwoValuesOnRegisters() 1959 if err != nil { 1960 return err 1961 } 1962 1963 if isZeroRegister(x1.register) || isZeroRegister(x2.register) { 1964 c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) 1965 return nil 1966 } 1967 1968 var inst asm.Instruction 1969 signedInt := wazeroir.SignedInt(o.B1) 1970 switch signedInt { 1971 case wazeroir.SignedInt32: 1972 inst = arm64.ASRW 1973 case wazeroir.SignedInt64: 1974 inst = arm64.ASR 1975 case wazeroir.SignedUint32: 1976 inst = arm64.LSRW 1977 case wazeroir.SignedUint64: 1978 inst = arm64.LSR 1979 } 1980 1981 c.assembler.CompileTwoRegistersToRegister(inst, x2.register, x1.register, x1.register) 1982 c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) 1983 return nil 1984 } 1985 1986 // compileRotl implements compiler.compileRotl for the arm64 architecture. 1987 func (c *arm64Compiler) compileRotl(o *wazeroir.UnionOperation) error { 1988 x1, x2, err := c.popTwoValuesOnRegisters() 1989 if err != nil { 1990 return err 1991 } 1992 1993 if isZeroRegister(x1.register) || isZeroRegister(x2.register) { 1994 c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) 1995 return nil 1996 } 1997 1998 var inst, neginst asm.Instruction 1999 unsignedInt := wazeroir.UnsignedInt(o.B1) 2000 switch unsignedInt { 2001 case wazeroir.UnsignedInt32: 2002 inst = arm64.RORW 2003 neginst = arm64.NEGW 2004 case wazeroir.UnsignedInt64: 2005 inst = arm64.ROR 2006 neginst = arm64.NEG 2007 } 2008 2009 // Arm64 doesn't have rotate left instruction. 2010 // The shift amount needs to be converted to a negative number, similar to assembly output of bits.RotateLeft. 2011 c.assembler.CompileRegisterToRegister(neginst, x2.register, x2.register) 2012 2013 c.assembler.CompileTwoRegistersToRegister(inst, x2.register, x1.register, x1.register) 2014 c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) 2015 return nil 2016 } 2017 2018 // compileRotr implements compiler.compileRotr for the arm64 architecture. 2019 func (c *arm64Compiler) compileRotr(o *wazeroir.UnionOperation) error { 2020 x1, x2, err := c.popTwoValuesOnRegisters() 2021 if err != nil { 2022 return err 2023 } 2024 2025 if isZeroRegister(x1.register) || isZeroRegister(x2.register) { 2026 c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) 2027 return nil 2028 } 2029 2030 var inst asm.Instruction 2031 unsignedInt := wazeroir.UnsignedInt(o.B1) 2032 switch unsignedInt { 2033 case wazeroir.UnsignedInt32: 2034 inst = arm64.RORW 2035 case wazeroir.UnsignedInt64: 2036 inst = arm64.ROR 2037 } 2038 2039 c.assembler.CompileTwoRegistersToRegister(inst, x2.register, x1.register, x1.register) 2040 c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) 2041 return nil 2042 } 2043 2044 // compileAbs implements compiler.compileAbs for the arm64 architecture. 2045 func (c *arm64Compiler) compileAbs(o *wazeroir.UnionOperation) error { 2046 if wazeroir.Float(o.B1) == wazeroir.Float32 { 2047 return c.compileSimpleUnop(arm64.FABSS, runtimeValueTypeF32) 2048 } else { 2049 return c.compileSimpleUnop(arm64.FABSD, runtimeValueTypeF64) 2050 } 2051 } 2052 2053 // compileNeg implements compiler.compileNeg for the arm64 architecture. 2054 func (c *arm64Compiler) compileNeg(o *wazeroir.UnionOperation) error { 2055 if wazeroir.Float(o.B1) == wazeroir.Float32 { 2056 return c.compileSimpleUnop(arm64.FNEGS, runtimeValueTypeF32) 2057 } else { 2058 return c.compileSimpleUnop(arm64.FNEGD, runtimeValueTypeF64) 2059 } 2060 } 2061 2062 // compileCeil implements compiler.compileCeil for the arm64 architecture. 2063 func (c *arm64Compiler) compileCeil(o *wazeroir.UnionOperation) error { 2064 if wazeroir.Float(o.B1) == wazeroir.Float32 { 2065 return c.compileSimpleUnop(arm64.FRINTPS, runtimeValueTypeF32) 2066 } else { 2067 return c.compileSimpleUnop(arm64.FRINTPD, runtimeValueTypeF64) 2068 } 2069 } 2070 2071 // compileFloor implements compiler.compileFloor for the arm64 architecture. 2072 func (c *arm64Compiler) compileFloor(o *wazeroir.UnionOperation) error { 2073 if wazeroir.Float(o.B1) == wazeroir.Float32 { 2074 return c.compileSimpleUnop(arm64.FRINTMS, runtimeValueTypeF32) 2075 } else { 2076 return c.compileSimpleUnop(arm64.FRINTMD, runtimeValueTypeF64) 2077 } 2078 } 2079 2080 // compileTrunc implements compiler.compileTrunc for the arm64 architecture. 2081 func (c *arm64Compiler) compileTrunc(o *wazeroir.UnionOperation) error { 2082 if wazeroir.Float(o.B1) == wazeroir.Float32 { 2083 return c.compileSimpleUnop(arm64.FRINTZS, runtimeValueTypeF32) 2084 } else { 2085 return c.compileSimpleUnop(arm64.FRINTZD, runtimeValueTypeF64) 2086 } 2087 } 2088 2089 // compileNearest implements compiler.compileNearest for the arm64 architecture. 2090 func (c *arm64Compiler) compileNearest(o *wazeroir.UnionOperation) error { 2091 if wazeroir.Float(o.B1) == wazeroir.Float32 { 2092 return c.compileSimpleUnop(arm64.FRINTNS, runtimeValueTypeF32) 2093 } else { 2094 return c.compileSimpleUnop(arm64.FRINTND, runtimeValueTypeF64) 2095 } 2096 } 2097 2098 // compileSqrt implements compiler.compileSqrt for the arm64 architecture. 2099 func (c *arm64Compiler) compileSqrt(o *wazeroir.UnionOperation) error { 2100 if wazeroir.Float(o.B1) == wazeroir.Float32 { 2101 return c.compileSimpleUnop(arm64.FSQRTS, runtimeValueTypeF32) 2102 } else { 2103 return c.compileSimpleUnop(arm64.FSQRTD, runtimeValueTypeF64) 2104 } 2105 } 2106 2107 // compileMin implements compiler.compileMin for the arm64 architecture. 2108 func (c *arm64Compiler) compileMin(o *wazeroir.UnionOperation) error { 2109 if wazeroir.Float(o.B1) == wazeroir.Float32 { 2110 return c.compileSimpleFloatBinop(arm64.FMINS) 2111 } else { 2112 return c.compileSimpleFloatBinop(arm64.FMIND) 2113 } 2114 } 2115 2116 // compileMax implements compiler.compileMax for the arm64 architecture. 2117 func (c *arm64Compiler) compileMax(o *wazeroir.UnionOperation) error { 2118 if wazeroir.Float(o.B1) == wazeroir.Float32 { 2119 return c.compileSimpleFloatBinop(arm64.FMAXS) 2120 } else { 2121 return c.compileSimpleFloatBinop(arm64.FMAXD) 2122 } 2123 } 2124 2125 func (c *arm64Compiler) compileSimpleFloatBinop(inst asm.Instruction) error { 2126 x1, x2, err := c.popTwoValuesOnRegisters() 2127 if err != nil { 2128 return err 2129 } 2130 c.assembler.CompileRegisterToRegister(inst, x2.register, x1.register) 2131 c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) 2132 return nil 2133 } 2134 2135 // compileCopysign implements compiler.compileCopysign for the arm64 architecture. 2136 func (c *arm64Compiler) compileCopysign(o *wazeroir.UnionOperation) error { 2137 x1, x2, err := c.popTwoValuesOnRegisters() 2138 if err != nil { 2139 return err 2140 } 2141 2142 var ldr asm.Instruction 2143 var minValueOffsetInVM int64 2144 if wazeroir.Float(o.B1) == wazeroir.Float32 { 2145 ldr = arm64.FLDRS 2146 minValueOffsetInVM = arm64CallEngineArchContextMinimum32BitSignedIntOffset 2147 } else { 2148 ldr = arm64.FLDRD 2149 minValueOffsetInVM = arm64CallEngineArchContextMinimum64BitSignedIntOffset 2150 } 2151 2152 c.markRegisterUsed(x1.register, x2.register) 2153 freg, err := c.allocateRegister(registerTypeVector) 2154 if err != nil { 2155 return err 2156 } 2157 2158 // This is exactly the same code emitted by GCC for "__builtin_copysign": 2159 // 2160 // mov x0, -9223372036854775808 2161 // fmov d2, x0 2162 // vbit v0.8b, v1.8b, v2.8b 2163 // 2164 // "mov freg, -9223372036854775808 (stored at ce.minimum64BitSignedInt)" 2165 c.assembler.CompileMemoryToRegister( 2166 ldr, 2167 arm64ReservedRegisterForCallEngine, minValueOffsetInVM, 2168 freg, 2169 ) 2170 2171 // VBIT inserts each bit from the first operand into the destination if the corresponding bit of the second operand is 1, 2172 // otherwise it leaves the destination bit unchanged. 2173 // See https://developer.arm.com/documentation/dui0801/g/Advanced-SIMD-Instructions--32-bit-/VBIT 2174 // 2175 // "vbit vreg.8b, x2vreg.8b, x1vreg.8b" == "inserting 64th bit of x2 into x1". 2176 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.VBIT, 2177 freg, x2.register, x1.register, arm64.VectorArrangement16B) 2178 2179 c.markRegisterUnused(x2.register) 2180 c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) 2181 return nil 2182 } 2183 2184 // compileI32WrapFromI64 implements compiler.compileI32WrapFromI64 for the arm64 architecture. 2185 func (c *arm64Compiler) compileI32WrapFromI64() error { 2186 return c.compileSimpleUnop(arm64.MOVW, runtimeValueTypeI32) 2187 } 2188 2189 // compileITruncFromF implements compiler.compileITruncFromF for the arm64 architecture. 2190 func (c *arm64Compiler) compileITruncFromF(o *wazeroir.UnionOperation) error { 2191 // Clear the floating point status register (FPSR). 2192 c.assembler.CompileRegisterToRegister(arm64.MSR, arm64.RegRZR, arm64.RegFPSR) 2193 2194 var vt runtimeValueType 2195 var convinst asm.Instruction 2196 inputType := wazeroir.Float(o.B1) 2197 outputType := wazeroir.SignedInt(o.B2) 2198 nonTrapping := o.B3 2199 2200 is32bitFloat := inputType == wazeroir.Float32 2201 if is32bitFloat && outputType == wazeroir.SignedInt32 { 2202 convinst = arm64.FCVTZSSW 2203 vt = runtimeValueTypeI32 2204 } else if is32bitFloat && outputType == wazeroir.SignedInt64 { 2205 convinst = arm64.FCVTZSS 2206 vt = runtimeValueTypeI64 2207 } else if !is32bitFloat && outputType == wazeroir.SignedInt32 { 2208 convinst = arm64.FCVTZSDW 2209 vt = runtimeValueTypeI32 2210 } else if !is32bitFloat && outputType == wazeroir.SignedInt64 { 2211 convinst = arm64.FCVTZSD 2212 vt = runtimeValueTypeI64 2213 } else if is32bitFloat && outputType == wazeroir.SignedUint32 { 2214 convinst = arm64.FCVTZUSW 2215 vt = runtimeValueTypeI32 2216 } else if is32bitFloat && outputType == wazeroir.SignedUint64 { 2217 convinst = arm64.FCVTZUS 2218 vt = runtimeValueTypeI64 2219 } else if !is32bitFloat && outputType == wazeroir.SignedUint32 { 2220 convinst = arm64.FCVTZUDW 2221 vt = runtimeValueTypeI32 2222 } else if !is32bitFloat && outputType == wazeroir.SignedUint64 { 2223 convinst = arm64.FCVTZUD 2224 vt = runtimeValueTypeI64 2225 } 2226 2227 source, err := c.popValueOnRegister() 2228 if err != nil { 2229 return err 2230 } 2231 sourceReg := source.register 2232 2233 destinationReg, err := c.allocateRegister(registerTypeGeneralPurpose) 2234 if err != nil { 2235 return err 2236 } 2237 2238 c.assembler.CompileRegisterToRegister(convinst, sourceReg, destinationReg) 2239 c.pushRuntimeValueLocationOnRegister(destinationReg, vt) 2240 2241 if !nonTrapping { 2242 // Obtain the floating point status register value into the general purpose register, 2243 // so that we can check if the conversion resulted in undefined behavior. 2244 c.assembler.CompileRegisterToRegister(arm64.MRS, arm64.RegFPSR, arm64ReservedRegisterForTemporary) 2245 // Check if the conversion was undefined by comparing the status with 1. 2246 // See https://developer.arm.com/documentation/ddi0595/2020-12/AArch64-Registers/FPSR--Floating-point-Status-Register 2247 c.assembler.CompileRegisterAndConstToNone(arm64.CMP, arm64ReservedRegisterForTemporary, 1) 2248 2249 brOK := c.assembler.CompileJump(arm64.BCONDNE) 2250 2251 // If so, exit the execution with errors depending on whether or not the source value is NaN. 2252 var floatcmp asm.Instruction 2253 if is32bitFloat { 2254 floatcmp = arm64.FCMPS 2255 } else { 2256 floatcmp = arm64.FCMPD 2257 } 2258 c.assembler.CompileTwoRegistersToNone(floatcmp, sourceReg, sourceReg) 2259 // VS flag is set if at least one of values for FCMP is NaN. 2260 // https://developer.arm.com/documentation/dui0801/g/Condition-Codes/Comparison-of-condition-code-meanings-in-integer-and-floating-point-code 2261 // If the source value is not NaN, the operation was overflow. 2262 c.compileMaybeExitFromNativeCode(arm64.BCONDVS, nativeCallStatusIntegerOverflow) 2263 2264 // Otherwise, the operation was invalid as this is trying to convert NaN to integer. 2265 c.compileExitFromNativeCode(nativeCallStatusCodeInvalidFloatToIntConversion) 2266 2267 // Otherwise, we branch into the next instruction. 2268 c.assembler.SetJumpTargetOnNext(brOK) 2269 } 2270 return nil 2271 } 2272 2273 // compileFConvertFromI implements compiler.compileFConvertFromI for the arm64 architecture. 2274 func (c *arm64Compiler) compileFConvertFromI(o *wazeroir.UnionOperation) error { 2275 var convinst asm.Instruction 2276 inputType := wazeroir.SignedInt(o.B1) 2277 outputType := wazeroir.Float(o.B2) 2278 2279 if outputType == wazeroir.Float32 && inputType == wazeroir.SignedInt32 { 2280 convinst = arm64.SCVTFWS 2281 } else if outputType == wazeroir.Float32 && inputType == wazeroir.SignedInt64 { 2282 convinst = arm64.SCVTFS 2283 } else if outputType == wazeroir.Float64 && inputType == wazeroir.SignedInt32 { 2284 convinst = arm64.SCVTFWD 2285 } else if outputType == wazeroir.Float64 && inputType == wazeroir.SignedInt64 { 2286 convinst = arm64.SCVTFD 2287 } else if outputType == wazeroir.Float32 && inputType == wazeroir.SignedUint32 { 2288 convinst = arm64.UCVTFWS 2289 } else if outputType == wazeroir.Float32 && inputType == wazeroir.SignedUint64 { 2290 convinst = arm64.UCVTFS 2291 } else if outputType == wazeroir.Float64 && inputType == wazeroir.SignedUint32 { 2292 convinst = arm64.UCVTFWD 2293 } else if outputType == wazeroir.Float64 && inputType == wazeroir.SignedUint64 { 2294 convinst = arm64.UCVTFD 2295 } 2296 2297 var vt runtimeValueType 2298 if outputType == wazeroir.Float32 { 2299 vt = runtimeValueTypeF32 2300 } else { 2301 vt = runtimeValueTypeF64 2302 } 2303 return c.compileSimpleConversion(convinst, registerTypeVector, vt) 2304 } 2305 2306 // compileF32DemoteFromF64 implements compiler.compileF32DemoteFromF64 for the arm64 architecture. 2307 func (c *arm64Compiler) compileF32DemoteFromF64() error { 2308 return c.compileSimpleUnop(arm64.FCVTDS, runtimeValueTypeF32) 2309 } 2310 2311 // compileF64PromoteFromF32 implements compiler.compileF64PromoteFromF32 for the arm64 architecture. 2312 func (c *arm64Compiler) compileF64PromoteFromF32() error { 2313 return c.compileSimpleUnop(arm64.FCVTSD, runtimeValueTypeF64) 2314 } 2315 2316 // compileI32ReinterpretFromF32 implements compiler.compileI32ReinterpretFromF32 for the arm64 architecture. 2317 func (c *arm64Compiler) compileI32ReinterpretFromF32() error { 2318 if peek := c.locationStack.peek(); peek.onStack() { 2319 // If the value is on the stack, this is no-op as there is nothing to do for converting type. 2320 peek.valueType = runtimeValueTypeI32 2321 return nil 2322 } 2323 return c.compileSimpleConversion(arm64.FMOVS, registerTypeGeneralPurpose, runtimeValueTypeI32) 2324 } 2325 2326 // compileI64ReinterpretFromF64 implements compiler.compileI64ReinterpretFromF64 for the arm64 architecture. 2327 func (c *arm64Compiler) compileI64ReinterpretFromF64() error { 2328 if peek := c.locationStack.peek(); peek.onStack() { 2329 // If the value is on the stack, this is no-op as there is nothing to do for converting type. 2330 peek.valueType = runtimeValueTypeI64 2331 return nil 2332 } 2333 return c.compileSimpleConversion(arm64.FMOVD, registerTypeGeneralPurpose, runtimeValueTypeI64) 2334 } 2335 2336 // compileF32ReinterpretFromI32 implements compiler.compileF32ReinterpretFromI32 for the arm64 architecture. 2337 func (c *arm64Compiler) compileF32ReinterpretFromI32() error { 2338 if peek := c.locationStack.peek(); peek.onStack() { 2339 // If the value is on the stack, this is no-op as there is nothing to do for converting type. 2340 peek.valueType = runtimeValueTypeF32 2341 return nil 2342 } 2343 return c.compileSimpleConversion(arm64.FMOVS, registerTypeVector, runtimeValueTypeF32) 2344 } 2345 2346 // compileF64ReinterpretFromI64 implements compiler.compileF64ReinterpretFromI64 for the arm64 architecture. 2347 func (c *arm64Compiler) compileF64ReinterpretFromI64() error { 2348 if peek := c.locationStack.peek(); peek.onStack() { 2349 // If the value is on the stack, this is no-op as there is nothing to do for converting type. 2350 peek.valueType = runtimeValueTypeF64 2351 return nil 2352 } 2353 return c.compileSimpleConversion(arm64.FMOVD, registerTypeVector, runtimeValueTypeF64) 2354 } 2355 2356 func (c *arm64Compiler) compileSimpleConversion(inst asm.Instruction, destinationRegType registerType, resultRuntimeValueType runtimeValueType) error { 2357 source, err := c.popValueOnRegister() 2358 if err != nil { 2359 return err 2360 } 2361 2362 destinationReg, err := c.allocateRegister(destinationRegType) 2363 if err != nil { 2364 return err 2365 } 2366 2367 c.assembler.CompileRegisterToRegister(inst, source.register, destinationReg) 2368 c.pushRuntimeValueLocationOnRegister(destinationReg, resultRuntimeValueType) 2369 return nil 2370 } 2371 2372 // compileExtend implements compiler.compileExtend for the arm64 architecture. 2373 func (c *arm64Compiler) compileExtend(o *wazeroir.UnionOperation) error { 2374 signed := o.B1 != 0 2375 if signed { 2376 return c.compileSimpleUnop(arm64.SXTW, runtimeValueTypeI64) 2377 } else { 2378 return c.compileSimpleUnop(arm64.MOVW, runtimeValueTypeI64) 2379 } 2380 } 2381 2382 // compileSignExtend32From8 implements compiler.compileSignExtend32From8 for the arm64 architecture. 2383 func (c *arm64Compiler) compileSignExtend32From8() error { 2384 return c.compileSimpleUnop(arm64.SXTBW, runtimeValueTypeI32) 2385 } 2386 2387 // compileSignExtend32From16 implements compiler.compileSignExtend32From16 for the arm64 architecture. 2388 func (c *arm64Compiler) compileSignExtend32From16() error { 2389 return c.compileSimpleUnop(arm64.SXTHW, runtimeValueTypeI32) 2390 } 2391 2392 // compileSignExtend64From8 implements compiler.compileSignExtend64From8 for the arm64 architecture. 2393 func (c *arm64Compiler) compileSignExtend64From8() error { 2394 return c.compileSimpleUnop(arm64.SXTB, runtimeValueTypeI64) 2395 } 2396 2397 // compileSignExtend64From16 implements compiler.compileSignExtend64From16 for the arm64 architecture. 2398 func (c *arm64Compiler) compileSignExtend64From16() error { 2399 return c.compileSimpleUnop(arm64.SXTH, runtimeValueTypeI64) 2400 } 2401 2402 // compileSignExtend64From32 implements compiler.compileSignExtend64From32 for the arm64 architecture. 2403 func (c *arm64Compiler) compileSignExtend64From32() error { 2404 return c.compileSimpleUnop(arm64.SXTW, runtimeValueTypeI64) 2405 } 2406 2407 func (c *arm64Compiler) compileSimpleUnop(inst asm.Instruction, resultRuntimeValueType runtimeValueType) error { 2408 v, err := c.popValueOnRegister() 2409 if err != nil { 2410 return err 2411 } 2412 reg := v.register 2413 c.assembler.CompileRegisterToRegister(inst, reg, reg) 2414 c.pushRuntimeValueLocationOnRegister(reg, resultRuntimeValueType) 2415 return nil 2416 } 2417 2418 // compileEq implements compiler.compileEq for the arm64 architecture. 2419 func (c *arm64Compiler) compileEq(o *wazeroir.UnionOperation) error { 2420 return c.emitEqOrNe(true, wazeroir.UnsignedType(o.B1)) 2421 } 2422 2423 // compileNe implements compiler.compileNe for the arm64 architecture. 2424 func (c *arm64Compiler) compileNe(o *wazeroir.UnionOperation) error { 2425 return c.emitEqOrNe(false, wazeroir.UnsignedType(o.B1)) 2426 } 2427 2428 // emitEqOrNe implements compiler.compileEq and compiler.compileNe for the arm64 architecture. 2429 func (c *arm64Compiler) emitEqOrNe(isEq bool, unsignedType wazeroir.UnsignedType) error { 2430 x1, x2, err := c.popTwoValuesOnRegisters() 2431 if err != nil { 2432 return err 2433 } 2434 2435 var inst asm.Instruction 2436 switch unsignedType { 2437 case wazeroir.UnsignedTypeI32: 2438 inst = arm64.CMPW 2439 case wazeroir.UnsignedTypeI64: 2440 inst = arm64.CMP 2441 case wazeroir.UnsignedTypeF32: 2442 inst = arm64.FCMPS 2443 case wazeroir.UnsignedTypeF64: 2444 inst = arm64.FCMPD 2445 } 2446 2447 c.assembler.CompileTwoRegistersToNone(inst, x2.register, x1.register) 2448 2449 // Push the comparison result as a conditional register value. 2450 cond := arm64.CondNE 2451 if isEq { 2452 cond = arm64.CondEQ 2453 } 2454 c.locationStack.pushRuntimeValueLocationOnConditionalRegister(cond) 2455 return nil 2456 } 2457 2458 // compileEqz implements compiler.compileEqz for the arm64 architecture. 2459 func (c *arm64Compiler) compileEqz(o *wazeroir.UnionOperation) error { 2460 x1, err := c.popValueOnRegister() 2461 if err != nil { 2462 return err 2463 } 2464 2465 var inst asm.Instruction 2466 unsignedInt := wazeroir.UnsignedInt(o.B1) 2467 switch unsignedInt { 2468 case wazeroir.UnsignedInt32: 2469 inst = arm64.CMPW 2470 case wazeroir.UnsignedInt64: 2471 inst = arm64.CMP 2472 } 2473 2474 c.assembler.CompileTwoRegistersToNone(inst, arm64.RegRZR, x1.register) 2475 2476 // Push the comparison result as a conditional register value. 2477 c.locationStack.pushRuntimeValueLocationOnConditionalRegister(arm64.CondEQ) 2478 return nil 2479 } 2480 2481 // compileLt implements compiler.compileLt for the arm64 architecture. 2482 func (c *arm64Compiler) compileLt(o *wazeroir.UnionOperation) error { 2483 x1, x2, err := c.popTwoValuesOnRegisters() 2484 if err != nil { 2485 return err 2486 } 2487 2488 var inst asm.Instruction 2489 var conditionalRegister asm.ConditionalRegisterState 2490 signedType := wazeroir.SignedType(o.B1) 2491 switch signedType { 2492 case wazeroir.SignedTypeUint32: 2493 inst = arm64.CMPW 2494 conditionalRegister = arm64.CondLO 2495 case wazeroir.SignedTypeUint64: 2496 inst = arm64.CMP 2497 conditionalRegister = arm64.CondLO 2498 case wazeroir.SignedTypeInt32: 2499 inst = arm64.CMPW 2500 conditionalRegister = arm64.CondLT 2501 case wazeroir.SignedTypeInt64: 2502 inst = arm64.CMP 2503 conditionalRegister = arm64.CondLT 2504 case wazeroir.SignedTypeFloat32: 2505 inst = arm64.FCMPS 2506 conditionalRegister = arm64.CondMI 2507 case wazeroir.SignedTypeFloat64: 2508 inst = arm64.FCMPD 2509 conditionalRegister = arm64.CondMI 2510 } 2511 2512 c.assembler.CompileTwoRegistersToNone(inst, x2.register, x1.register) 2513 2514 // Push the comparison result as a conditional register value. 2515 c.locationStack.pushRuntimeValueLocationOnConditionalRegister(conditionalRegister) 2516 return nil 2517 } 2518 2519 // compileGt implements compiler.compileGt for the arm64 architecture. 2520 func (c *arm64Compiler) compileGt(o *wazeroir.UnionOperation) error { 2521 x1, x2, err := c.popTwoValuesOnRegisters() 2522 if err != nil { 2523 return err 2524 } 2525 2526 var inst asm.Instruction 2527 var conditionalRegister asm.ConditionalRegisterState 2528 signedType := wazeroir.SignedType(o.B1) 2529 switch signedType { 2530 case wazeroir.SignedTypeUint32: 2531 inst = arm64.CMPW 2532 conditionalRegister = arm64.CondHI 2533 case wazeroir.SignedTypeUint64: 2534 inst = arm64.CMP 2535 conditionalRegister = arm64.CondHI 2536 case wazeroir.SignedTypeInt32: 2537 inst = arm64.CMPW 2538 conditionalRegister = arm64.CondGT 2539 case wazeroir.SignedTypeInt64: 2540 inst = arm64.CMP 2541 conditionalRegister = arm64.CondGT 2542 case wazeroir.SignedTypeFloat32: 2543 inst = arm64.FCMPS 2544 conditionalRegister = arm64.CondGT 2545 case wazeroir.SignedTypeFloat64: 2546 inst = arm64.FCMPD 2547 conditionalRegister = arm64.CondGT 2548 } 2549 2550 c.assembler.CompileTwoRegistersToNone(inst, x2.register, x1.register) 2551 2552 // Push the comparison result as a conditional register value. 2553 c.locationStack.pushRuntimeValueLocationOnConditionalRegister(conditionalRegister) 2554 return nil 2555 } 2556 2557 // compileLe implements compiler.compileLe for the arm64 architecture. 2558 func (c *arm64Compiler) compileLe(o *wazeroir.UnionOperation) error { 2559 x1, x2, err := c.popTwoValuesOnRegisters() 2560 if err != nil { 2561 return err 2562 } 2563 2564 var inst asm.Instruction 2565 var conditionalRegister asm.ConditionalRegisterState 2566 signedType := wazeroir.SignedType(o.B1) 2567 switch signedType { 2568 case wazeroir.SignedTypeUint32: 2569 inst = arm64.CMPW 2570 conditionalRegister = arm64.CondLS 2571 case wazeroir.SignedTypeUint64: 2572 inst = arm64.CMP 2573 conditionalRegister = arm64.CondLS 2574 case wazeroir.SignedTypeInt32: 2575 inst = arm64.CMPW 2576 conditionalRegister = arm64.CondLE 2577 case wazeroir.SignedTypeInt64: 2578 inst = arm64.CMP 2579 conditionalRegister = arm64.CondLE 2580 case wazeroir.SignedTypeFloat32: 2581 inst = arm64.FCMPS 2582 conditionalRegister = arm64.CondLS 2583 case wazeroir.SignedTypeFloat64: 2584 inst = arm64.FCMPD 2585 conditionalRegister = arm64.CondLS 2586 } 2587 2588 c.assembler.CompileTwoRegistersToNone(inst, x2.register, x1.register) 2589 2590 // Push the comparison result as a conditional register value. 2591 c.locationStack.pushRuntimeValueLocationOnConditionalRegister(conditionalRegister) 2592 return nil 2593 } 2594 2595 // compileGe implements compiler.compileGe for the arm64 architecture. 2596 func (c *arm64Compiler) compileGe(o *wazeroir.UnionOperation) error { 2597 x1, x2, err := c.popTwoValuesOnRegisters() 2598 if err != nil { 2599 return err 2600 } 2601 2602 var inst asm.Instruction 2603 var conditionalRegister asm.ConditionalRegisterState 2604 signedType := wazeroir.SignedType(o.B1) 2605 switch signedType { 2606 case wazeroir.SignedTypeUint32: 2607 inst = arm64.CMPW 2608 conditionalRegister = arm64.CondHS 2609 case wazeroir.SignedTypeUint64: 2610 inst = arm64.CMP 2611 conditionalRegister = arm64.CondHS 2612 case wazeroir.SignedTypeInt32: 2613 inst = arm64.CMPW 2614 conditionalRegister = arm64.CondGE 2615 case wazeroir.SignedTypeInt64: 2616 inst = arm64.CMP 2617 conditionalRegister = arm64.CondGE 2618 case wazeroir.SignedTypeFloat32: 2619 inst = arm64.FCMPS 2620 conditionalRegister = arm64.CondGE 2621 case wazeroir.SignedTypeFloat64: 2622 inst = arm64.FCMPD 2623 conditionalRegister = arm64.CondGE 2624 } 2625 2626 c.assembler.CompileTwoRegistersToNone(inst, x2.register, x1.register) 2627 2628 // Push the comparison result as a conditional register value. 2629 c.locationStack.pushRuntimeValueLocationOnConditionalRegister(conditionalRegister) 2630 return nil 2631 } 2632 2633 // compileLoad implements compiler.compileLoad for the arm64 architecture. 2634 func (c *arm64Compiler) compileLoad(o *wazeroir.UnionOperation) error { 2635 var ( 2636 isFloat bool 2637 loadInst asm.Instruction 2638 targetSizeInBytes int64 2639 vt runtimeValueType 2640 ) 2641 2642 unsignedType := wazeroir.UnsignedType(o.B1) 2643 offset := uint32(o.U2) 2644 2645 switch unsignedType { 2646 case wazeroir.UnsignedTypeI32: 2647 loadInst = arm64.LDRW 2648 targetSizeInBytes = 32 / 8 2649 vt = runtimeValueTypeI32 2650 case wazeroir.UnsignedTypeI64: 2651 loadInst = arm64.LDRD 2652 targetSizeInBytes = 64 / 8 2653 vt = runtimeValueTypeI64 2654 case wazeroir.UnsignedTypeF32: 2655 loadInst = arm64.FLDRS 2656 isFloat = true 2657 targetSizeInBytes = 32 / 8 2658 vt = runtimeValueTypeF32 2659 case wazeroir.UnsignedTypeF64: 2660 loadInst = arm64.FLDRD 2661 isFloat = true 2662 targetSizeInBytes = 64 / 8 2663 vt = runtimeValueTypeF64 2664 } 2665 return c.compileLoadImpl(offset, loadInst, targetSizeInBytes, isFloat, vt) 2666 } 2667 2668 // compileLoad8 implements compiler.compileLoad8 for the arm64 architecture. 2669 func (c *arm64Compiler) compileLoad8(o *wazeroir.UnionOperation) error { 2670 var loadInst asm.Instruction 2671 var vt runtimeValueType 2672 2673 signedInt := wazeroir.SignedInt(o.B1) 2674 offset := uint32(o.U2) 2675 2676 switch signedInt { 2677 case wazeroir.SignedInt32: 2678 loadInst = arm64.LDRSBW 2679 vt = runtimeValueTypeI32 2680 case wazeroir.SignedInt64: 2681 loadInst = arm64.LDRSBD 2682 vt = runtimeValueTypeI64 2683 case wazeroir.SignedUint32: 2684 loadInst = arm64.LDRB 2685 vt = runtimeValueTypeI32 2686 case wazeroir.SignedUint64: 2687 loadInst = arm64.LDRB 2688 vt = runtimeValueTypeI64 2689 } 2690 return c.compileLoadImpl(offset, loadInst, 1, false, vt) 2691 } 2692 2693 // compileLoad16 implements compiler.compileLoad16 for the arm64 architecture. 2694 func (c *arm64Compiler) compileLoad16(o *wazeroir.UnionOperation) error { 2695 var loadInst asm.Instruction 2696 var vt runtimeValueType 2697 2698 signedInt := wazeroir.SignedInt(o.B1) 2699 offset := uint32(o.U2) 2700 2701 switch signedInt { 2702 case wazeroir.SignedInt32: 2703 loadInst = arm64.LDRSHW 2704 vt = runtimeValueTypeI32 2705 case wazeroir.SignedInt64: 2706 loadInst = arm64.LDRSHD 2707 vt = runtimeValueTypeI64 2708 case wazeroir.SignedUint32: 2709 loadInst = arm64.LDRH 2710 vt = runtimeValueTypeI32 2711 case wazeroir.SignedUint64: 2712 loadInst = arm64.LDRH 2713 vt = runtimeValueTypeI64 2714 } 2715 return c.compileLoadImpl(offset, loadInst, 16/8, false, vt) 2716 } 2717 2718 // compileLoad32 implements compiler.compileLoad32 for the arm64 architecture. 2719 func (c *arm64Compiler) compileLoad32(o *wazeroir.UnionOperation) error { 2720 var loadInst asm.Instruction 2721 signed := o.B1 == 1 2722 offset := uint32(o.U2) 2723 2724 if signed { 2725 loadInst = arm64.LDRSW 2726 } else { 2727 loadInst = arm64.LDRW 2728 } 2729 return c.compileLoadImpl(offset, loadInst, 32/8, false, runtimeValueTypeI64) 2730 } 2731 2732 // compileLoadImpl implements compileLoadImpl* variants for arm64 architecture. 2733 func (c *arm64Compiler) compileLoadImpl(offsetArg uint32, loadInst asm.Instruction, 2734 targetSizeInBytes int64, isFloat bool, resultRuntimeValueType runtimeValueType, 2735 ) error { 2736 offsetReg, err := c.compileMemoryAccessOffsetSetup(offsetArg, targetSizeInBytes) 2737 if err != nil { 2738 return err 2739 } 2740 2741 resultRegister := offsetReg 2742 if isFloat { 2743 resultRegister, err = c.allocateRegister(registerTypeVector) 2744 if err != nil { 2745 return err 2746 } 2747 } 2748 2749 // "resultRegister = [arm64ReservedRegisterForMemory + offsetReg]" 2750 // In other words, "resultRegister = memory.Buffer[offset: offset+targetSizeInBytes]" 2751 c.assembler.CompileMemoryWithRegisterOffsetToRegister( 2752 loadInst, 2753 arm64ReservedRegisterForMemory, offsetReg, 2754 resultRegister, 2755 ) 2756 2757 c.pushRuntimeValueLocationOnRegister(resultRegister, resultRuntimeValueType) 2758 return nil 2759 } 2760 2761 // compileStore implements compiler.compileStore for the arm64 architecture. 2762 func (c *arm64Compiler) compileStore(o *wazeroir.UnionOperation) error { 2763 var movInst asm.Instruction 2764 var targetSizeInBytes int64 2765 unsignedType := wazeroir.UnsignedType(o.B1) 2766 offset := uint32(o.U2) 2767 switch unsignedType { 2768 case wazeroir.UnsignedTypeI32: 2769 movInst = arm64.STRW 2770 targetSizeInBytes = 32 / 8 2771 case wazeroir.UnsignedTypeI64: 2772 movInst = arm64.STRD 2773 targetSizeInBytes = 64 / 8 2774 case wazeroir.UnsignedTypeF32: 2775 movInst = arm64.FSTRS 2776 targetSizeInBytes = 32 / 8 2777 case wazeroir.UnsignedTypeF64: 2778 movInst = arm64.FSTRD 2779 targetSizeInBytes = 64 / 8 2780 } 2781 return c.compileStoreImpl(offset, movInst, targetSizeInBytes) 2782 } 2783 2784 // compileStore8 implements compiler.compileStore8 for the arm64 architecture. 2785 func (c *arm64Compiler) compileStore8(o *wazeroir.UnionOperation) error { 2786 return c.compileStoreImpl(uint32(o.U2), arm64.STRB, 1) 2787 } 2788 2789 // compileStore16 implements compiler.compileStore16 for the arm64 architecture. 2790 func (c *arm64Compiler) compileStore16(o *wazeroir.UnionOperation) error { 2791 return c.compileStoreImpl(uint32(o.U2), arm64.STRH, 16/8) 2792 } 2793 2794 // compileStore32 implements compiler.compileStore32 for the arm64 architecture. 2795 func (c *arm64Compiler) compileStore32(o *wazeroir.UnionOperation) error { 2796 return c.compileStoreImpl(uint32(o.U2), arm64.STRW, 32/8) 2797 } 2798 2799 // compileStoreImpl implements compleStore* variants for arm64 architecture. 2800 func (c *arm64Compiler) compileStoreImpl(offsetArg uint32, storeInst asm.Instruction, targetSizeInBytes int64) error { 2801 val, err := c.popValueOnRegister() 2802 if err != nil { 2803 return err 2804 } 2805 // Mark temporarily used as compileMemoryAccessOffsetSetup might try allocating register. 2806 c.markRegisterUsed(val.register) 2807 2808 offsetReg, err := c.compileMemoryAccessOffsetSetup(offsetArg, targetSizeInBytes) 2809 if err != nil { 2810 return err 2811 } 2812 2813 // "[arm64ReservedRegisterForMemory + offsetReg] = val.register" 2814 // In other words, "memory.Buffer[offset: offset+targetSizeInBytes] = val.register" 2815 c.assembler.CompileRegisterToMemoryWithRegisterOffset( 2816 storeInst, val.register, 2817 arm64ReservedRegisterForMemory, offsetReg, 2818 ) 2819 2820 c.markRegisterUnused(val.register) 2821 return nil 2822 } 2823 2824 // compileMemoryAccessOffsetSetup pops the top value from the stack (called "base"), stores "base + offsetArg" 2825 // into a register, and returns the stored register. We call the result "offset" because we access the memory 2826 // as memory.Buffer[offset: offset+targetSizeInBytes]. 2827 // 2828 // Note: this also emits the instructions to check the out of bounds memory access. 2829 // In other words, if the offset+targetSizeInBytes exceeds the memory size, the code exits with nativeCallStatusCodeMemoryOutOfBounds status. 2830 func (c *arm64Compiler) compileMemoryAccessOffsetSetup(offsetArg uint32, targetSizeInBytes int64) (offsetRegister asm.Register, err error) { 2831 base, err := c.popValueOnRegister() 2832 if err != nil { 2833 return 0, err 2834 } 2835 2836 offsetRegister = base.register 2837 if isZeroRegister(base.register) { 2838 offsetRegister, err = c.allocateRegister(registerTypeGeneralPurpose) 2839 if err != nil { 2840 return 2841 } 2842 c.assembler.CompileRegisterToRegister(arm64.MOVD, arm64.RegRZR, offsetRegister) 2843 } 2844 2845 if offsetConst := int64(offsetArg) + targetSizeInBytes; offsetConst <= math.MaxUint32 { 2846 // "offsetRegister = base + offsetArg + targetSizeInBytes" 2847 c.assembler.CompileConstToRegister(arm64.ADD, offsetConst, offsetRegister) 2848 } else { 2849 // If the offset const is too large, we exit with nativeCallStatusCodeMemoryOutOfBounds. 2850 c.compileExitFromNativeCode(nativeCallStatusCodeMemoryOutOfBounds) 2851 return 2852 } 2853 2854 // "arm64ReservedRegisterForTemporary = len(memory.Buffer)" 2855 c.assembler.CompileMemoryToRegister(arm64.LDRD, 2856 arm64ReservedRegisterForCallEngine, callEngineModuleContextMemorySliceLenOffset, 2857 arm64ReservedRegisterForTemporary) 2858 2859 // Check if offsetRegister(= base+offsetArg+targetSizeInBytes) > len(memory.Buffer). 2860 c.assembler.CompileTwoRegistersToNone(arm64.CMP, arm64ReservedRegisterForTemporary, offsetRegister) 2861 2862 // If offsetRegister(= base+offsetArg+targetSizeInBytes) exceeds the memory length, 2863 // we exit the function with nativeCallStatusCodeMemoryOutOfBounds. 2864 c.compileMaybeExitFromNativeCode(arm64.BCONDLS, nativeCallStatusCodeMemoryOutOfBounds) 2865 2866 // Otherwise, we subtract targetSizeInBytes from offsetRegister. 2867 c.assembler.CompileConstToRegister(arm64.SUB, targetSizeInBytes, offsetRegister) 2868 return offsetRegister, nil 2869 } 2870 2871 // compileMemoryGrow implements compileMemoryGrow variants for arm64 architecture. 2872 func (c *arm64Compiler) compileMemoryGrow() error { 2873 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 2874 return err 2875 } 2876 2877 if err := c.compileCallGoFunction(nativeCallStatusCodeCallBuiltInFunction, builtinFunctionIndexMemoryGrow); err != nil { 2878 return err 2879 } 2880 2881 // After return, we re-initialize reserved registers just like preamble of functions. 2882 c.compileReservedStackBasePointerRegisterInitialization() 2883 c.compileReservedMemoryRegisterInitialization() 2884 return nil 2885 } 2886 2887 // compileMemorySize implements compileMemorySize variants for arm64 architecture. 2888 func (c *arm64Compiler) compileMemorySize() error { 2889 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 2890 return err 2891 } 2892 2893 reg, err := c.allocateRegister(registerTypeGeneralPurpose) 2894 if err != nil { 2895 return err 2896 } 2897 2898 // "reg = len(memory.Buffer)" 2899 c.assembler.CompileMemoryToRegister( 2900 arm64.LDRD, 2901 arm64ReservedRegisterForCallEngine, callEngineModuleContextMemorySliceLenOffset, 2902 reg, 2903 ) 2904 2905 // memory.size loads the page size of memory, so we have to divide by the page size. 2906 // "reg = reg >> wasm.MemoryPageSizeInBits (== reg / wasm.MemoryPageSize) " 2907 c.assembler.CompileConstToRegister( 2908 arm64.LSR, 2909 wasm.MemoryPageSizeInBits, 2910 reg, 2911 ) 2912 2913 c.pushRuntimeValueLocationOnRegister(reg, runtimeValueTypeI32) 2914 return nil 2915 } 2916 2917 // compileCallGoFunction adds instructions to call a Go function whose address equals the addr parameter. 2918 // compilerStatus is set before making call, and it should be either nativeCallStatusCodeCallBuiltInFunction or 2919 // nativeCallStatusCodeCallGoHostFunction. 2920 func (c *arm64Compiler) compileCallGoFunction(compilerStatus nativeCallStatusCode, builtinFunction wasm.Index) error { 2921 // Release all the registers as our calling convention requires the caller-save. 2922 if err := c.compileReleaseAllRegistersToStack(); err != nil { 2923 return err 2924 } 2925 2926 if compilerStatus == nativeCallStatusCodeCallBuiltInFunction { 2927 // Set the target function address to ce.functionCallAddress 2928 // "tmp = $index" 2929 c.assembler.CompileConstToRegister( 2930 arm64.MOVD, 2931 int64(builtinFunction), 2932 arm64ReservedRegisterForTemporary, 2933 ) 2934 // "[arm64ReservedRegisterForCallEngine + callEngineExitContextFunctionCallAddressOffset] = tmp" 2935 // In other words, "ce.functionCallAddress = tmp (== $addr)" 2936 c.assembler.CompileRegisterToMemory( 2937 arm64.STRW, 2938 arm64ReservedRegisterForTemporary, 2939 arm64ReservedRegisterForCallEngine, callEngineExitContextBuiltinFunctionCallIndexOffset, 2940 ) 2941 } 2942 2943 c.compileExitFromNativeCode(compilerStatus) 2944 return nil 2945 } 2946 2947 // compileConstI32 implements compiler.compileConstI32 for the arm64 architecture. 2948 func (c *arm64Compiler) compileConstI32(o *wazeroir.UnionOperation) error { 2949 return c.compileIntConstant(true, o.U1) 2950 } 2951 2952 // compileConstI64 implements compiler.compileConstI64 for the arm64 architecture. 2953 func (c *arm64Compiler) compileConstI64(o *wazeroir.UnionOperation) error { 2954 return c.compileIntConstant(false, o.U1) 2955 } 2956 2957 // compileIntConstant adds instructions to load an integer constant. 2958 // is32bit is true if the target value is originally 32-bit const, false otherwise. 2959 // value holds the (zero-extended for 32-bit case) load target constant. 2960 func (c *arm64Compiler) compileIntConstant(is32bit bool, value uint64) error { 2961 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 2962 return err 2963 } 2964 2965 var inst asm.Instruction 2966 var vt runtimeValueType 2967 if is32bit { 2968 inst = arm64.MOVW 2969 vt = runtimeValueTypeI32 2970 } else { 2971 inst = arm64.MOVD 2972 vt = runtimeValueTypeI64 2973 } 2974 2975 if value == 0 { 2976 c.pushRuntimeValueLocationOnRegister(arm64.RegRZR, vt) 2977 } else { 2978 // Take a register to load the value. 2979 reg, err := c.allocateRegister(registerTypeGeneralPurpose) 2980 if err != nil { 2981 return err 2982 } 2983 2984 c.assembler.CompileConstToRegister(inst, int64(value), reg) 2985 2986 c.pushRuntimeValueLocationOnRegister(reg, vt) 2987 } 2988 return nil 2989 } 2990 2991 // compileConstF32 implements compiler.compileConstF32 for the arm64 architecture. 2992 func (c *arm64Compiler) compileConstF32(o *wazeroir.UnionOperation) error { 2993 return c.compileFloatConstant(true, o.U1 /*uint64(math.Float32bits(o.Value))*/) 2994 } 2995 2996 // compileConstF64 implements compiler.compileConstF64 for the arm64 architecture. 2997 func (c *arm64Compiler) compileConstF64(o *wazeroir.UnionOperation) error { 2998 return c.compileFloatConstant(false, o.U1 /*math.Float64bits(o.Value)*/) 2999 } 3000 3001 // compileFloatConstant adds instructions to load a float constant. 3002 // is32bit is true if the target value is originally 32-bit const, false otherwise. 3003 // value holds the (zero-extended for 32-bit case) bit representation of load target float constant. 3004 func (c *arm64Compiler) compileFloatConstant(is32bit bool, value uint64) error { 3005 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 3006 return err 3007 } 3008 3009 // Take a register to load the value. 3010 reg, err := c.allocateRegister(registerTypeVector) 3011 if err != nil { 3012 return err 3013 } 3014 3015 tmpReg := arm64.RegRZR 3016 if value != 0 { 3017 tmpReg = arm64ReservedRegisterForTemporary 3018 var inst asm.Instruction 3019 if is32bit { 3020 inst = arm64.MOVW 3021 } else { 3022 inst = arm64.MOVD 3023 } 3024 c.assembler.CompileConstToRegister(inst, int64(value), tmpReg) 3025 } 3026 3027 // Use FMOV instruction to move the value on integer register into the float one. 3028 var inst asm.Instruction 3029 var vt runtimeValueType 3030 if is32bit { 3031 vt = runtimeValueTypeF32 3032 inst = arm64.FMOVS 3033 } else { 3034 vt = runtimeValueTypeF64 3035 inst = arm64.FMOVD 3036 } 3037 c.assembler.CompileRegisterToRegister(inst, tmpReg, reg) 3038 3039 c.pushRuntimeValueLocationOnRegister(reg, vt) 3040 return nil 3041 } 3042 3043 // compileMemoryInit implements compiler.compileMemoryInit for the arm64 architecture. 3044 func (c *arm64Compiler) compileMemoryInit(o *wazeroir.UnionOperation) error { 3045 dataIndex := uint32(o.U1) 3046 return c.compileInitImpl(false, dataIndex, 0) 3047 } 3048 3049 // compileInitImpl implements compileTableInit and compileMemoryInit. 3050 // 3051 // TODO: the compiled code in this function should be reused and compile at once as 3052 // the code is independent of any module. 3053 func (c *arm64Compiler) compileInitImpl(isTable bool, index, tableIndex uint32) error { 3054 outOfBoundsErrorStatus := nativeCallStatusCodeMemoryOutOfBounds 3055 if isTable { 3056 outOfBoundsErrorStatus = nativeCallStatusCodeInvalidTableAccess 3057 } 3058 3059 copySize, err := c.popValueOnRegister() 3060 if err != nil { 3061 return err 3062 } 3063 c.markRegisterUsed(copySize.register) 3064 3065 sourceOffset, err := c.popValueOnRegister() 3066 if err != nil { 3067 return err 3068 } 3069 if isZeroRegister(sourceOffset.register) { 3070 sourceOffset.register, err = c.allocateRegister(registerTypeGeneralPurpose) 3071 if err != nil { 3072 return err 3073 } 3074 c.assembler.CompileRegisterToRegister(arm64.MOVD, arm64.RegRZR, sourceOffset.register) 3075 } 3076 c.markRegisterUsed(sourceOffset.register) 3077 3078 destinationOffset, err := c.popValueOnRegister() 3079 if err != nil { 3080 return err 3081 } 3082 if isZeroRegister(destinationOffset.register) { 3083 destinationOffset.register, err = c.allocateRegister(registerTypeGeneralPurpose) 3084 if err != nil { 3085 return err 3086 } 3087 c.assembler.CompileRegisterToRegister(arm64.MOVD, arm64.RegRZR, destinationOffset.register) 3088 } 3089 c.markRegisterUsed(destinationOffset.register) 3090 3091 tableInstanceAddressReg := asm.NilRegister 3092 if isTable { 3093 tableInstanceAddressReg, err = c.allocateRegister(registerTypeGeneralPurpose) 3094 if err != nil { 3095 return err 3096 } 3097 c.markRegisterUsed(tableInstanceAddressReg) 3098 } 3099 3100 if !isZeroRegister(copySize.register) { 3101 // sourceOffset += size. 3102 c.assembler.CompileRegisterToRegister(arm64.ADD, copySize.register, sourceOffset.register) 3103 // destinationOffset += size. 3104 c.assembler.CompileRegisterToRegister(arm64.ADD, copySize.register, destinationOffset.register) 3105 } 3106 3107 instanceAddr, err := c.allocateRegister(registerTypeGeneralPurpose) 3108 if err != nil { 3109 return err 3110 } 3111 3112 if isTable { 3113 c.compileLoadElemInstanceAddress(index, instanceAddr) 3114 } else { 3115 c.compileLoadDataInstanceAddress(index, instanceAddr) 3116 } 3117 3118 // Check data instance bounds. 3119 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3120 instanceAddr, 8, // DataInstance and Element instance holds the length is stored at offset 8. 3121 arm64ReservedRegisterForTemporary) 3122 3123 c.assembler.CompileTwoRegistersToNone(arm64.CMP, arm64ReservedRegisterForTemporary, sourceOffset.register) 3124 // If not, raise out of bounds memory access error. 3125 c.compileMaybeExitFromNativeCode(arm64.BCONDLS, outOfBoundsErrorStatus) 3126 3127 // Otherwise, ready to copy the value from destination to source. 3128 // Check destination bounds. 3129 if isTable { 3130 // arm64ReservedRegisterForTemporary = &tables[0] 3131 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3132 arm64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset, 3133 arm64ReservedRegisterForTemporary) 3134 // tableInstanceAddressReg = arm64ReservedRegisterForTemporary + tableIndex*8 3135 // = &tables[0] + sizeOf(*tableInstance)*8 3136 // = &tables[tableIndex] 3137 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3138 arm64ReservedRegisterForTemporary, int64(tableIndex)*8, 3139 tableInstanceAddressReg) 3140 // arm64ReservedRegisterForTemporary = [tableInstanceAddressReg+tableInstanceTableLenOffset] = len(tables[tableIndex]) 3141 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3142 tableInstanceAddressReg, tableInstanceTableLenOffset, 3143 arm64ReservedRegisterForTemporary) 3144 } else { 3145 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3146 arm64ReservedRegisterForCallEngine, callEngineModuleContextMemorySliceLenOffset, 3147 arm64ReservedRegisterForTemporary) 3148 } 3149 3150 c.assembler.CompileTwoRegistersToNone(arm64.CMP, arm64ReservedRegisterForTemporary, destinationOffset.register) 3151 // If not, raise out of bounds memory access error. 3152 c.compileMaybeExitFromNativeCode(arm64.BCONDLS, outOfBoundsErrorStatus) 3153 3154 // Otherwise, ready to copy the value from source to destination. 3155 if !isZeroRegister(copySize.register) { 3156 // If the size equals zero, we can skip the entire instructions beflow. 3157 c.assembler.CompileTwoRegistersToNone(arm64.CMP, arm64.RegRZR, copySize.register) 3158 skipCopyJump := c.assembler.CompileJump(arm64.BCONDEQ) 3159 3160 var ldr, str asm.Instruction 3161 var movSize int64 3162 if isTable { 3163 ldr, str = arm64.LDRD, arm64.STRD 3164 movSize = 8 3165 3166 // arm64ReservedRegisterForTemporary = &Table[0] 3167 c.assembler.CompileMemoryToRegister(arm64.LDRD, tableInstanceAddressReg, 3168 tableInstanceTableOffset, arm64ReservedRegisterForTemporary) 3169 // destinationOffset = (destinationOffset<< pointerSizeLog2) + arm64ReservedRegisterForTemporary 3170 c.assembler.CompileLeftShiftedRegisterToRegister(arm64.ADD, 3171 destinationOffset.register, pointerSizeLog2, 3172 arm64ReservedRegisterForTemporary, destinationOffset.register) 3173 3174 // arm64ReservedRegisterForTemporary = &ElementInstance.References[0] 3175 c.assembler.CompileMemoryToRegister(arm64.LDRD, instanceAddr, 0, arm64ReservedRegisterForTemporary) 3176 // sourceOffset = (sourceOffset<< pointerSizeLog2) + arm64ReservedRegisterForTemporary 3177 c.assembler.CompileLeftShiftedRegisterToRegister(arm64.ADD, 3178 sourceOffset.register, pointerSizeLog2, 3179 arm64ReservedRegisterForTemporary, sourceOffset.register) 3180 3181 // copySize = copySize << pointerSizeLog2 3182 c.assembler.CompileConstToRegister(arm64.LSL, pointerSizeLog2, copySize.register) 3183 } else { 3184 ldr, str = arm64.LDRB, arm64.STRB 3185 movSize = 1 3186 3187 // destinationOffset += memory buffer's absolute address. 3188 c.assembler.CompileRegisterToRegister(arm64.ADD, arm64ReservedRegisterForMemory, destinationOffset.register) 3189 3190 // sourceOffset += data buffer's absolute address. 3191 c.assembler.CompileMemoryToRegister(arm64.LDRD, instanceAddr, 0, arm64ReservedRegisterForTemporary) 3192 c.assembler.CompileRegisterToRegister(arm64.ADD, arm64ReservedRegisterForTemporary, sourceOffset.register) 3193 3194 } 3195 3196 // Negate the counter. 3197 c.assembler.CompileRegisterToRegister(arm64.NEG, copySize.register, copySize.register) 3198 3199 beginCopyLoop := c.assembler.CompileStandAlone(arm64.NOP) 3200 3201 // arm64ReservedRegisterForTemporary = [sourceOffset + (size.register)] 3202 c.assembler.CompileMemoryWithRegisterOffsetToRegister(ldr, 3203 sourceOffset.register, copySize.register, 3204 arm64ReservedRegisterForTemporary) 3205 // [destinationOffset + (size.register)] = arm64ReservedRegisterForTemporary. 3206 c.assembler.CompileRegisterToMemoryWithRegisterOffset(str, 3207 arm64ReservedRegisterForTemporary, 3208 destinationOffset.register, copySize.register, 3209 ) 3210 3211 // Decrement the size counter and if the value is still negative, continue the loop. 3212 c.assembler.CompileConstToRegister(arm64.ADDS, movSize, copySize.register) 3213 c.assembler.CompileJump(arm64.BCONDMI).AssignJumpTarget(beginCopyLoop) 3214 3215 c.assembler.SetJumpTargetOnNext(skipCopyJump) 3216 } 3217 3218 c.markRegisterUnused(copySize.register, sourceOffset.register, 3219 destinationOffset.register, instanceAddr, tableInstanceAddressReg) 3220 return nil 3221 } 3222 3223 // compileDataDrop implements compiler.compileDataDrop for the arm64 architecture. 3224 func (c *arm64Compiler) compileDataDrop(o *wazeroir.UnionOperation) error { 3225 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 3226 return err 3227 } 3228 3229 tmp, err := c.allocateRegister(registerTypeGeneralPurpose) 3230 if err != nil { 3231 return err 3232 } 3233 3234 dataIndex := uint32(o.U1) 3235 c.compileLoadDataInstanceAddress(dataIndex, tmp) 3236 3237 // Clears the content of DataInstance[o.DataIndex] (== []byte type). 3238 c.assembler.CompileRegisterToMemory(arm64.STRD, arm64.RegRZR, tmp, 0) 3239 c.assembler.CompileRegisterToMemory(arm64.STRD, arm64.RegRZR, tmp, 8) 3240 c.assembler.CompileRegisterToMemory(arm64.STRD, arm64.RegRZR, tmp, 16) 3241 return nil 3242 } 3243 3244 func (c *arm64Compiler) compileLoadDataInstanceAddress(dataIndex uint32, dst asm.Register) { 3245 // dst = dataIndex * dataInstanceStructSize 3246 c.assembler.CompileConstToRegister(arm64.MOVD, int64(dataIndex)*dataInstanceStructSize, dst) 3247 3248 // arm64ReservedRegisterForTemporary = &moduleInstance.DataInstances[0] 3249 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3250 arm64ReservedRegisterForCallEngine, callEngineModuleContextDataInstancesElement0AddressOffset, 3251 arm64ReservedRegisterForTemporary, 3252 ) 3253 3254 // dst = arm64ReservedRegisterForTemporary + dst 3255 // = &moduleInstance.DataInstances[0] + dataIndex*dataInstanceStructSize 3256 // = &moduleInstance.DataInstances[dataIndex] 3257 c.assembler.CompileRegisterToRegister(arm64.ADD, arm64ReservedRegisterForTemporary, dst) 3258 } 3259 3260 // compileMemoryCopy implements compiler.compileMemoryCopy for the arm64 architecture. 3261 func (c *arm64Compiler) compileMemoryCopy() error { 3262 return c.compileCopyImpl(false, 0, 0) 3263 } 3264 3265 // compileCopyImpl implements compileTableCopy and compileMemoryCopy. 3266 // 3267 // TODO: the compiled code in this function should be reused and compile at once as 3268 // the code is independent of any module. 3269 func (c *arm64Compiler) compileCopyImpl(isTable bool, srcTableIndex, dstTableIndex uint32) error { 3270 outOfBoundsErrorStatus := nativeCallStatusCodeMemoryOutOfBounds 3271 if isTable { 3272 outOfBoundsErrorStatus = nativeCallStatusCodeInvalidTableAccess 3273 } 3274 3275 copySize, err := c.popValueOnRegister() 3276 if err != nil { 3277 return err 3278 } 3279 c.markRegisterUsed(copySize.register) 3280 3281 sourceOffset, err := c.popValueOnRegister() 3282 if err != nil { 3283 return err 3284 } 3285 if isZeroRegister(sourceOffset.register) { 3286 sourceOffset.register, err = c.allocateRegister(registerTypeGeneralPurpose) 3287 if err != nil { 3288 return err 3289 } 3290 c.assembler.CompileRegisterToRegister(arm64.MOVD, arm64.RegRZR, sourceOffset.register) 3291 } 3292 c.markRegisterUsed(sourceOffset.register) 3293 3294 destinationOffset, err := c.popValueOnRegister() 3295 if err != nil { 3296 return err 3297 } 3298 if isZeroRegister(destinationOffset.register) { 3299 destinationOffset.register, err = c.allocateRegister(registerTypeGeneralPurpose) 3300 if err != nil { 3301 return err 3302 } 3303 c.assembler.CompileRegisterToRegister(arm64.MOVD, arm64.RegRZR, destinationOffset.register) 3304 } 3305 c.markRegisterUsed(destinationOffset.register) 3306 3307 if !isZeroRegister(copySize.register) { 3308 // sourceOffset += size. 3309 c.assembler.CompileRegisterToRegister(arm64.ADD, copySize.register, sourceOffset.register) 3310 // destinationOffset += size. 3311 c.assembler.CompileRegisterToRegister(arm64.ADD, copySize.register, destinationOffset.register) 3312 } 3313 3314 if isTable { 3315 // arm64ReservedRegisterForTemporary = &tables[0] 3316 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3317 arm64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset, 3318 arm64ReservedRegisterForTemporary) 3319 // arm64ReservedRegisterForTemporary = arm64ReservedRegisterForTemporary + srcTableIndex*8 3320 // = &tables[0] + sizeOf(*tableInstance)*8 3321 // = &tables[srcTableIndex] 3322 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3323 arm64ReservedRegisterForTemporary, int64(srcTableIndex)*8, 3324 arm64ReservedRegisterForTemporary) 3325 // arm64ReservedRegisterForTemporary = [arm64ReservedRegisterForTemporary+tableInstanceTableLenOffset] = len(tables[srcTableIndex]) 3326 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3327 arm64ReservedRegisterForTemporary, tableInstanceTableLenOffset, 3328 arm64ReservedRegisterForTemporary) 3329 } else { 3330 // arm64ReservedRegisterForTemporary = len(memoryInst.Buffer). 3331 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3332 arm64ReservedRegisterForCallEngine, callEngineModuleContextMemorySliceLenOffset, 3333 arm64ReservedRegisterForTemporary) 3334 } 3335 3336 // Check memory len >= sourceOffset. 3337 c.assembler.CompileTwoRegistersToNone(arm64.CMP, arm64ReservedRegisterForTemporary, sourceOffset.register) 3338 // If not, raise out of bounds memory access error. 3339 c.compileMaybeExitFromNativeCode(arm64.BCONDLS, outOfBoundsErrorStatus) 3340 3341 // Otherwise, check memory len >= destinationOffset. 3342 if isTable { 3343 // arm64ReservedRegisterForTemporary = &tables[0] 3344 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3345 arm64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset, 3346 arm64ReservedRegisterForTemporary) 3347 // arm64ReservedRegisterForTemporary = arm64ReservedRegisterForTemporary + dstTableIndex*8 3348 // = &tables[0] + sizeOf(*tableInstance)*8 3349 // = &tables[dstTableIndex] 3350 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3351 arm64ReservedRegisterForTemporary, int64(dstTableIndex)*8, 3352 arm64ReservedRegisterForTemporary) 3353 // arm64ReservedRegisterForTemporary = [arm64ReservedRegisterForTemporary+tableInstanceTableLenOffset] = len(tables[dstTableIndex]) 3354 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3355 arm64ReservedRegisterForTemporary, tableInstanceTableLenOffset, 3356 arm64ReservedRegisterForTemporary) 3357 } 3358 3359 c.assembler.CompileTwoRegistersToNone(arm64.CMP, arm64ReservedRegisterForTemporary, destinationOffset.register) 3360 // If not, raise out of bounds memory access error. 3361 c.compileMaybeExitFromNativeCode(arm64.BCONDLS, outOfBoundsErrorStatus) 3362 3363 // Otherwise, ready to copy the value from source to destination. 3364 var ldr, str asm.Instruction 3365 var movSize int64 3366 if isTable { 3367 ldr, str = arm64.LDRD, arm64.STRD 3368 movSize = 8 3369 } else { 3370 ldr, str = arm64.LDRB, arm64.STRB 3371 movSize = 1 3372 } 3373 3374 // If the size equals zero, we can skip the entire instructions beflow. 3375 if !isZeroRegister(copySize.register) { 3376 c.assembler.CompileTwoRegistersToNone(arm64.CMP, arm64.RegRZR, copySize.register) 3377 skipCopyJump := c.assembler.CompileJump(arm64.BCONDEQ) 3378 3379 // If source offet < destination offset: for (i = size-1; i >= 0; i--) dst[i] = src[i]; 3380 c.assembler.CompileTwoRegistersToNone(arm64.CMP, sourceOffset.register, destinationOffset.register) 3381 destLowerThanSourceJump := c.assembler.CompileJump(arm64.BCONDLS) 3382 var endJump asm.Node 3383 { 3384 // sourceOffset -= size. 3385 c.assembler.CompileRegisterToRegister(arm64.SUB, copySize.register, sourceOffset.register) 3386 // destinationOffset -= size. 3387 c.assembler.CompileRegisterToRegister(arm64.SUB, copySize.register, destinationOffset.register) 3388 3389 if isTable { 3390 // arm64ReservedRegisterForTemporary = &Tables[dstTableIndex].Table[0] 3391 c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForCallEngine, 3392 callEngineModuleContextTablesElement0AddressOffset, arm64ReservedRegisterForTemporary) 3393 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3394 arm64ReservedRegisterForTemporary, int64(dstTableIndex)*8, 3395 arm64ReservedRegisterForTemporary) 3396 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3397 arm64ReservedRegisterForTemporary, tableInstanceTableOffset, 3398 arm64ReservedRegisterForTemporary) 3399 // destinationOffset = (destinationOffset<< pointerSizeLog2) + &Table[dstTableIndex].Table[0] 3400 c.assembler.CompileLeftShiftedRegisterToRegister(arm64.ADD, 3401 destinationOffset.register, pointerSizeLog2, 3402 arm64ReservedRegisterForTemporary, destinationOffset.register) 3403 3404 // arm64ReservedRegisterForTemporary = &Tables[srcTableIndex] 3405 c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForCallEngine, 3406 callEngineModuleContextTablesElement0AddressOffset, arm64ReservedRegisterForTemporary) 3407 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3408 arm64ReservedRegisterForTemporary, int64(srcTableIndex)*8, 3409 arm64ReservedRegisterForTemporary) 3410 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3411 arm64ReservedRegisterForTemporary, tableInstanceTableOffset, 3412 arm64ReservedRegisterForTemporary) 3413 // sourceOffset = (sourceOffset<< 3) + &Table[0] 3414 c.assembler.CompileLeftShiftedRegisterToRegister(arm64.ADD, 3415 sourceOffset.register, pointerSizeLog2, 3416 arm64ReservedRegisterForTemporary, sourceOffset.register) 3417 3418 // copySize = copySize << pointerSizeLog2 as each element has 8 bytes and we copy one by one. 3419 c.assembler.CompileConstToRegister(arm64.LSL, pointerSizeLog2, copySize.register) 3420 } else { 3421 // sourceOffset += memory buffer's absolute address. 3422 c.assembler.CompileRegisterToRegister(arm64.ADD, arm64ReservedRegisterForMemory, sourceOffset.register) 3423 // destinationOffset += memory buffer's absolute address. 3424 c.assembler.CompileRegisterToRegister(arm64.ADD, arm64ReservedRegisterForMemory, destinationOffset.register) 3425 } 3426 3427 beginCopyLoop := c.assembler.CompileStandAlone(arm64.NOP) 3428 3429 // size -= 1 3430 c.assembler.CompileConstToRegister(arm64.SUBS, movSize, copySize.register) 3431 3432 // arm64ReservedRegisterForTemporary = [sourceOffset + (size.register)] 3433 c.assembler.CompileMemoryWithRegisterOffsetToRegister(ldr, 3434 sourceOffset.register, copySize.register, 3435 arm64ReservedRegisterForTemporary) 3436 // [destinationOffset + (size.register)] = arm64ReservedRegisterForTemporary. 3437 c.assembler.CompileRegisterToMemoryWithRegisterOffset(str, 3438 arm64ReservedRegisterForTemporary, 3439 destinationOffset.register, copySize.register, 3440 ) 3441 3442 // If the value on the copySize.register is not equal zero, continue the loop. 3443 c.assembler.CompileJump(arm64.BCONDNE).AssignJumpTarget(beginCopyLoop) 3444 3445 // Otherwise, exit the loop. 3446 endJump = c.assembler.CompileJump(arm64.B) 3447 } 3448 3449 // Else (destination offet < source offset): for (i = 0; i < size; i++) dst[counter-1-i] = src[counter-1-i]; 3450 c.assembler.SetJumpTargetOnNext(destLowerThanSourceJump) 3451 { 3452 3453 if isTable { 3454 // arm64ReservedRegisterForTemporary = &Tables[dstTableIndex].Table[0] 3455 c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForCallEngine, 3456 callEngineModuleContextTablesElement0AddressOffset, arm64ReservedRegisterForTemporary) 3457 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3458 arm64ReservedRegisterForTemporary, int64(dstTableIndex)*8, 3459 arm64ReservedRegisterForTemporary) 3460 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3461 arm64ReservedRegisterForTemporary, tableInstanceTableOffset, 3462 arm64ReservedRegisterForTemporary) 3463 // destinationOffset = (destinationOffset<< interfaceDataySizeLog2) + &Table[dstTableIndex].Table[0] 3464 c.assembler.CompileLeftShiftedRegisterToRegister(arm64.ADD, 3465 destinationOffset.register, pointerSizeLog2, 3466 arm64ReservedRegisterForTemporary, destinationOffset.register) 3467 3468 // arm64ReservedRegisterForTemporary = &Tables[srcTableIndex] 3469 c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForCallEngine, 3470 callEngineModuleContextTablesElement0AddressOffset, arm64ReservedRegisterForTemporary) 3471 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3472 arm64ReservedRegisterForTemporary, int64(srcTableIndex)*8, 3473 arm64ReservedRegisterForTemporary) 3474 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3475 arm64ReservedRegisterForTemporary, tableInstanceTableOffset, 3476 arm64ReservedRegisterForTemporary) 3477 // sourceOffset = (sourceOffset<< 3) + &Table[0] 3478 c.assembler.CompileLeftShiftedRegisterToRegister(arm64.ADD, 3479 sourceOffset.register, pointerSizeLog2, 3480 arm64ReservedRegisterForTemporary, sourceOffset.register) 3481 3482 // copySize = copySize << pointerSizeLog2 as each element has 8 bytes and we copy one by one. 3483 c.assembler.CompileConstToRegister(arm64.LSL, pointerSizeLog2, copySize.register) 3484 } else { 3485 // sourceOffset += memory buffer's absolute address. 3486 c.assembler.CompileRegisterToRegister(arm64.ADD, arm64ReservedRegisterForMemory, sourceOffset.register) 3487 // destinationOffset += memory buffer's absolute address. 3488 c.assembler.CompileRegisterToRegister(arm64.ADD, arm64ReservedRegisterForMemory, destinationOffset.register) 3489 } 3490 3491 // Negate the counter. 3492 c.assembler.CompileRegisterToRegister(arm64.NEG, copySize.register, copySize.register) 3493 3494 beginCopyLoop := c.assembler.CompileStandAlone(arm64.NOP) 3495 3496 // arm64ReservedRegisterForTemporary = [sourceOffset + (size.register)] 3497 c.assembler.CompileMemoryWithRegisterOffsetToRegister(ldr, 3498 sourceOffset.register, copySize.register, 3499 arm64ReservedRegisterForTemporary) 3500 // [destinationOffset + (size.register)] = arm64ReservedRegisterForTemporary. 3501 c.assembler.CompileRegisterToMemoryWithRegisterOffset(str, 3502 arm64ReservedRegisterForTemporary, 3503 destinationOffset.register, copySize.register, 3504 ) 3505 3506 // size += 1 3507 c.assembler.CompileConstToRegister(arm64.ADDS, movSize, copySize.register) 3508 c.assembler.CompileJump(arm64.BCONDMI).AssignJumpTarget(beginCopyLoop) 3509 } 3510 c.assembler.SetJumpTargetOnNext(skipCopyJump) 3511 c.assembler.SetJumpTargetOnNext(endJump) 3512 } 3513 3514 // Mark all of the operand registers. 3515 c.markRegisterUnused(copySize.register, sourceOffset.register, destinationOffset.register) 3516 3517 return nil 3518 } 3519 3520 // compileMemoryFill implements compiler.compileMemoryCopy for the arm64 architecture. 3521 func (c *arm64Compiler) compileMemoryFill() error { 3522 return c.compileFillImpl(false, 0) 3523 } 3524 3525 // compileFillImpl implements TableFill and MemoryFill. 3526 // 3527 // TODO: the compiled code in this function should be reused and compile at once as 3528 // the code is independent of any module. 3529 func (c *arm64Compiler) compileFillImpl(isTable bool, tableIndex uint32) error { 3530 outOfBoundsErrorStatus := nativeCallStatusCodeMemoryOutOfBounds 3531 if isTable { 3532 outOfBoundsErrorStatus = nativeCallStatusCodeInvalidTableAccess 3533 } 3534 3535 fillSize, err := c.popValueOnRegister() 3536 if err != nil { 3537 return err 3538 } 3539 c.markRegisterUsed(fillSize.register) 3540 3541 value, err := c.popValueOnRegister() 3542 if err != nil { 3543 return err 3544 } 3545 c.markRegisterUsed(value.register) 3546 3547 destinationOffset, err := c.popValueOnRegister() 3548 if err != nil { 3549 return err 3550 } 3551 if isZeroRegister(destinationOffset.register) { 3552 destinationOffset.register, err = c.allocateRegister(registerTypeGeneralPurpose) 3553 if err != nil { 3554 return err 3555 } 3556 c.assembler.CompileRegisterToRegister(arm64.MOVD, arm64.RegRZR, destinationOffset.register) 3557 } 3558 c.markRegisterUsed(destinationOffset.register) 3559 3560 // destinationOffset += size. 3561 c.assembler.CompileRegisterToRegister(arm64.ADD, fillSize.register, destinationOffset.register) 3562 3563 if isTable { 3564 // arm64ReservedRegisterForTemporary = &tables[0] 3565 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3566 arm64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset, 3567 arm64ReservedRegisterForTemporary) 3568 // arm64ReservedRegisterForTemporary = arm64ReservedRegisterForTemporary + srcTableIndex*8 3569 // = &tables[0] + sizeOf(*tableInstance)*8 3570 // = &tables[srcTableIndex] 3571 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3572 arm64ReservedRegisterForTemporary, int64(tableIndex)*8, 3573 arm64ReservedRegisterForTemporary) 3574 // arm64ReservedRegisterForTemporary = [arm64ReservedRegisterForTemporary+tableInstanceTableLenOffset] = len(tables[srcTableIndex]) 3575 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3576 arm64ReservedRegisterForTemporary, tableInstanceTableLenOffset, 3577 arm64ReservedRegisterForTemporary) 3578 } else { 3579 // arm64ReservedRegisterForTemporary = len(memoryInst.Buffer). 3580 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3581 arm64ReservedRegisterForCallEngine, callEngineModuleContextMemorySliceLenOffset, 3582 arm64ReservedRegisterForTemporary) 3583 } 3584 3585 // Check len >= destinationOffset. 3586 c.assembler.CompileTwoRegistersToNone(arm64.CMP, arm64ReservedRegisterForTemporary, destinationOffset.register) 3587 3588 // If not, raise the runtime error. 3589 c.compileMaybeExitFromNativeCode(arm64.BCONDLS, outOfBoundsErrorStatus) 3590 3591 // Otherwise, ready to copy the value from destination to source. 3592 // If the size equals zero, we can skip the entire instructions below. 3593 c.assembler.CompileTwoRegistersToNone(arm64.CMP, arm64.RegRZR, fillSize.register) 3594 skipCopyJump := c.assembler.CompileJump(arm64.BCONDEQ) 3595 3596 // destinationOffset -= size. 3597 c.assembler.CompileRegisterToRegister(arm64.SUB, fillSize.register, destinationOffset.register) 3598 3599 var str asm.Instruction 3600 var movSize int64 3601 if isTable { 3602 str = arm64.STRD 3603 movSize = 8 3604 3605 // arm64ReservedRegisterForTemporary = &Tables[dstTableIndex].Table[0] 3606 c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForCallEngine, 3607 callEngineModuleContextTablesElement0AddressOffset, arm64ReservedRegisterForTemporary) 3608 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3609 arm64ReservedRegisterForTemporary, int64(tableIndex)*8, 3610 arm64ReservedRegisterForTemporary) 3611 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3612 arm64ReservedRegisterForTemporary, tableInstanceTableOffset, 3613 arm64ReservedRegisterForTemporary) 3614 // destinationOffset = (destinationOffset<< pointerSizeLog2) + &Table[dstTableIndex].Table[0] 3615 c.assembler.CompileLeftShiftedRegisterToRegister(arm64.ADD, 3616 destinationOffset.register, pointerSizeLog2, 3617 arm64ReservedRegisterForTemporary, destinationOffset.register) 3618 3619 // copySize = copySize << pointerSizeLog2 as each element has 8 bytes and we copy one by one. 3620 c.assembler.CompileConstToRegister(arm64.LSL, pointerSizeLog2, fillSize.register) 3621 } else { 3622 str = arm64.STRB 3623 movSize = 1 3624 3625 // destinationOffset += memory buffer's absolute address. 3626 c.assembler.CompileRegisterToRegister(arm64.ADD, arm64ReservedRegisterForMemory, destinationOffset.register) 3627 } 3628 3629 // Naively implement the copy with "for loop" by copying byte one by one. 3630 beginCopyLoop := c.assembler.CompileStandAlone(arm64.NOP) 3631 3632 // size -= 1 3633 c.assembler.CompileConstToRegister(arm64.SUBS, movSize, fillSize.register) 3634 3635 // [destinationOffset + (size.register)] = arm64ReservedRegisterForTemporary. 3636 c.assembler.CompileRegisterToMemoryWithRegisterOffset(str, 3637 value.register, 3638 destinationOffset.register, fillSize.register, 3639 ) 3640 3641 // If the value on the copySizeRgister.register is not equal zero, continue the loop. 3642 continueJump := c.assembler.CompileJump(arm64.BCONDNE) 3643 continueJump.AssignJumpTarget(beginCopyLoop) 3644 3645 // Mark all of the operand registers. 3646 c.markRegisterUnused(fillSize.register, value.register, destinationOffset.register) 3647 3648 c.assembler.SetJumpTargetOnNext(skipCopyJump) 3649 return nil 3650 } 3651 3652 // compileTableInit implements compiler.compileTableInit for the arm64 architecture. 3653 func (c *arm64Compiler) compileTableInit(o *wazeroir.UnionOperation) error { 3654 elemIndex := uint32(o.U1) 3655 tableIndex := uint32(o.U2) 3656 return c.compileInitImpl(true, elemIndex, tableIndex) 3657 } 3658 3659 // compileTableCopy implements compiler.compileTableCopy for the arm64 architecture. 3660 func (c *arm64Compiler) compileTableCopy(o *wazeroir.UnionOperation) error { 3661 return c.compileCopyImpl(true, uint32(o.U1), uint32(o.U2)) 3662 } 3663 3664 // compileElemDrop implements compiler.compileElemDrop for the arm64 architecture. 3665 func (c *arm64Compiler) compileElemDrop(o *wazeroir.UnionOperation) error { 3666 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 3667 return err 3668 } 3669 3670 tmp, err := c.allocateRegister(registerTypeGeneralPurpose) 3671 if err != nil { 3672 return err 3673 } 3674 3675 elemIndex := uint32(o.U1) 3676 c.compileLoadElemInstanceAddress(elemIndex, tmp) 3677 3678 // Clears the content of ElementInstances[o.ElemIndex] (== []interface{} type). 3679 c.assembler.CompileRegisterToMemory(arm64.STRD, arm64.RegRZR, tmp, 0) 3680 c.assembler.CompileRegisterToMemory(arm64.STRD, arm64.RegRZR, tmp, 8) 3681 c.assembler.CompileRegisterToMemory(arm64.STRD, arm64.RegRZR, tmp, 16) 3682 return nil 3683 } 3684 3685 func (c *arm64Compiler) compileLoadElemInstanceAddress(elemIndex uint32, dst asm.Register) { 3686 // dst = dataIndex * elementInstanceStructSize 3687 c.assembler.CompileConstToRegister(arm64.MOVD, int64(elemIndex)*elementInstanceStructSize, dst) 3688 3689 // arm64ReservedRegisterForTemporary = &moduleInstance.ElementInstances[0] 3690 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3691 arm64ReservedRegisterForCallEngine, callEngineModuleContextElementInstancesElement0AddressOffset, 3692 arm64ReservedRegisterForTemporary, 3693 ) 3694 3695 // dst = arm64ReservedRegisterForTemporary + dst 3696 // = &moduleInstance.ElementInstances[0] + elemIndex*elementInstanceStructSize 3697 // = &moduleInstance.ElementInstances[elemIndex] 3698 c.assembler.CompileRegisterToRegister(arm64.ADD, arm64ReservedRegisterForTemporary, dst) 3699 } 3700 3701 // compileRefFunc implements compiler.compileRefFunc for the arm64 architecture. 3702 func (c *arm64Compiler) compileRefFunc(o *wazeroir.UnionOperation) error { 3703 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 3704 return err 3705 } 3706 3707 ref, err := c.allocateRegister(registerTypeGeneralPurpose) 3708 if err != nil { 3709 return err 3710 } 3711 // arm64ReservedRegisterForTemporary = [arm64ReservedRegisterForCallEngine + callEngineModuleContextFunctionsElement0AddressOffset] 3712 // = &moduleEngine.functions[0] 3713 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3714 arm64ReservedRegisterForCallEngine, callEngineModuleContextFunctionsElement0AddressOffset, 3715 ref) 3716 3717 // ref = ref + int64(o.FunctionIndex)*sizeOf(function) 3718 // = &moduleEngine.functions[index] 3719 functionIndex := int64(o.U1) 3720 c.assembler.CompileConstToRegister(arm64.ADD, 3721 functionIndex*functionSize, 3722 ref, 3723 ) 3724 3725 c.pushRuntimeValueLocationOnRegister(ref, runtimeValueTypeI64) 3726 return nil 3727 } 3728 3729 // compileTableGet implements compiler.compileTableGet for the arm64 architecture. 3730 func (c *arm64Compiler) compileTableGet(o *wazeroir.UnionOperation) error { 3731 ref, err := c.allocateRegister(registerTypeGeneralPurpose) 3732 if err != nil { 3733 return err 3734 } 3735 c.markRegisterUsed(ref) 3736 3737 offset, err := c.popValueOnRegister() 3738 if err != nil { 3739 return err 3740 } 3741 3742 // arm64ReservedRegisterForTemporary = &tables[0] 3743 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3744 arm64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset, 3745 arm64ReservedRegisterForTemporary) 3746 // arm64ReservedRegisterForTemporary = [arm64ReservedRegisterForTemporary + TableIndex*8] 3747 // = [&tables[0] + TableIndex*sizeOf(*tableInstance)] 3748 // = [&tables[TableIndex]] = tables[TableIndex]. 3749 tableIndex := int64(o.U1) 3750 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3751 arm64ReservedRegisterForTemporary, tableIndex*8, 3752 arm64ReservedRegisterForTemporary) 3753 3754 // Out of bounds check. 3755 // ref = [&tables[TableIndex] + tableInstanceTableLenOffset] = len(tables[TableIndex]) 3756 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3757 arm64ReservedRegisterForTemporary, tableInstanceTableLenOffset, 3758 ref, 3759 ) 3760 // "cmp ref, offset" 3761 c.assembler.CompileTwoRegistersToNone(arm64.CMP, ref, offset.register) 3762 3763 // If it exceeds len(table), we exit the execution. 3764 c.compileMaybeExitFromNativeCode(arm64.BCONDLO, nativeCallStatusCodeInvalidTableAccess) 3765 3766 // ref = [&tables[TableIndex] + tableInstanceTableOffset] = &tables[TableIndex].References[0] 3767 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3768 arm64ReservedRegisterForTemporary, tableInstanceTableOffset, 3769 ref, 3770 ) 3771 3772 // ref = (offset << pointerSizeLog2) + ref 3773 // = &tables[TableIndex].References[0] + sizeOf(uintptr) * offset 3774 // = &tables[TableIndex].References[offset] 3775 c.assembler.CompileLeftShiftedRegisterToRegister(arm64.ADD, 3776 offset.register, pointerSizeLog2, ref, ref) 3777 3778 // ref = [&tables[TableIndex]] = load the Reference's pointer as uint64. 3779 c.assembler.CompileMemoryToRegister(arm64.LDRD, ref, 0, ref) 3780 3781 c.pushRuntimeValueLocationOnRegister(ref, runtimeValueTypeI64) // table elements are opaque 64-bit at runtime. 3782 return nil 3783 } 3784 3785 // compileTableSet implements compiler.compileTableSet for the arm64 architecture. 3786 func (c *arm64Compiler) compileTableSet(o *wazeroir.UnionOperation) error { 3787 ref := c.locationStack.pop() 3788 if err := c.compileEnsureOnRegister(ref); err != nil { 3789 return err 3790 } 3791 3792 offset := c.locationStack.pop() 3793 if err := c.compileEnsureOnRegister(offset); err != nil { 3794 return err 3795 } 3796 3797 tmp, err := c.allocateRegister(registerTypeGeneralPurpose) 3798 if err != nil { 3799 return err 3800 } 3801 3802 // arm64ReservedRegisterForTemporary = &tables[0] 3803 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3804 arm64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset, 3805 arm64ReservedRegisterForTemporary) 3806 // arm64ReservedRegisterForTemporary = arm64ReservedRegisterForTemporary + TableIndex*8 3807 // = &tables[0] + TableIndex*sizeOf(*tableInstance) 3808 // = &tables[TableIndex] 3809 tableIndex := int64(o.U1) 3810 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3811 arm64ReservedRegisterForTemporary, tableIndex*8, 3812 arm64ReservedRegisterForTemporary) 3813 3814 // Out of bounds check. 3815 // tmp = [&tables[TableIndex] + tableInstanceTableLenOffset] = len(tables[TableIndex]) 3816 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3817 arm64ReservedRegisterForTemporary, tableInstanceTableLenOffset, 3818 tmp, 3819 ) 3820 // "cmp tmp, offset" 3821 c.assembler.CompileTwoRegistersToNone(arm64.CMP, tmp, offset.register) 3822 3823 // If it exceeds len(table), we exit the execution. 3824 c.compileMaybeExitFromNativeCode(arm64.BCONDLO, nativeCallStatusCodeInvalidTableAccess) 3825 3826 // tmp = [&tables[TableIndex] + tableInstanceTableOffset] = &tables[TableIndex].References[0] 3827 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3828 arm64ReservedRegisterForTemporary, tableInstanceTableOffset, 3829 tmp, 3830 ) 3831 3832 // tmp = (offset << pointerSizeLog2) + tmp 3833 // = &tables[TableIndex].References[0] + sizeOf(uintptr) * offset 3834 // = &tables[TableIndex].References[offset] 3835 c.assembler.CompileLeftShiftedRegisterToRegister(arm64.ADD, offset.register, pointerSizeLog2, tmp, tmp) 3836 3837 // Set the reference's raw pointer. 3838 c.assembler.CompileRegisterToMemory(arm64.STRD, ref.register, tmp, 0) 3839 3840 c.markRegisterUnused(offset.register, ref.register, tmp) 3841 return nil 3842 } 3843 3844 // compileTableGrow implements compiler.compileTableGrow for the arm64 architecture. 3845 func (c *arm64Compiler) compileTableGrow(o *wazeroir.UnionOperation) error { 3846 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 3847 return err 3848 } 3849 3850 // Pushes the table index. 3851 tableIndex := o.U1 3852 if err := c.compileIntConstant(true, tableIndex); err != nil { 3853 return err 3854 } 3855 3856 // Table grow cannot be done in assembly just like memory grow as it involves with allocation in Go. 3857 // Therefore, call out to the built function for this purpose. 3858 if err := c.compileCallGoFunction(nativeCallStatusCodeCallBuiltInFunction, builtinFunctionIndexTableGrow); err != nil { 3859 return err 3860 } 3861 3862 // TableGrow consumes three values (table index, number of items, initial value). 3863 for i := 0; i < 3; i++ { 3864 c.locationStack.pop() 3865 } 3866 3867 // Then, the previous length was pushed as the result. 3868 v := c.locationStack.pushRuntimeValueLocationOnStack() 3869 v.valueType = runtimeValueTypeI32 3870 3871 // After return, we re-initialize reserved registers just like preamble of functions. 3872 c.compileReservedStackBasePointerRegisterInitialization() 3873 c.compileReservedMemoryRegisterInitialization() 3874 return nil 3875 } 3876 3877 // compileTableSize implements compiler.compileTableSize for the arm64 architecture. 3878 func (c *arm64Compiler) compileTableSize(o *wazeroir.UnionOperation) error { 3879 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 3880 return err 3881 } 3882 result, err := c.allocateRegister(registerTypeGeneralPurpose) 3883 if err != nil { 3884 return err 3885 } 3886 c.markRegisterUsed(result) 3887 3888 // arm64ReservedRegisterForTemporary = &tables[0] 3889 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3890 arm64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset, 3891 arm64ReservedRegisterForTemporary) 3892 // arm64ReservedRegisterForTemporary = [arm64ReservedRegisterForTemporary + TableIndex*8] 3893 // = [&tables[0] + TableIndex*sizeOf(*tableInstance)] 3894 // = [&tables[TableIndex]] = tables[TableIndex]. 3895 tableIndex := int64(o.U1) 3896 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3897 arm64ReservedRegisterForTemporary, tableIndex*8, 3898 arm64ReservedRegisterForTemporary) 3899 3900 // result = [&tables[TableIndex] + tableInstanceTableLenOffset] = len(tables[TableIndex]) 3901 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3902 arm64ReservedRegisterForTemporary, tableInstanceTableLenOffset, 3903 result, 3904 ) 3905 3906 c.pushRuntimeValueLocationOnRegister(result, runtimeValueTypeI32) 3907 return nil 3908 } 3909 3910 // compileTableFill implements compiler.compileTableFill for the arm64 architecture. 3911 func (c *arm64Compiler) compileTableFill(o *wazeroir.UnionOperation) error { 3912 tableIndex := uint32(o.U1) 3913 return c.compileFillImpl(true, tableIndex) 3914 } 3915 3916 // popTwoValuesOnRegisters pops two values from the location stacks, ensures 3917 // these two values are located on registers, and mark them unused. 3918 // 3919 // TODO: we’d usually prefix this with compileXXX as this might end up emitting instructions, 3920 // but the name seems awkward. 3921 func (c *arm64Compiler) popTwoValuesOnRegisters() (x1, x2 *runtimeValueLocation, err error) { 3922 x2 = c.locationStack.pop() 3923 if err = c.compileEnsureOnRegister(x2); err != nil { 3924 return 3925 } 3926 3927 x1 = c.locationStack.pop() 3928 if err = c.compileEnsureOnRegister(x1); err != nil { 3929 return 3930 } 3931 3932 c.markRegisterUnused(x2.register) 3933 c.markRegisterUnused(x1.register) 3934 return 3935 } 3936 3937 // popValueOnRegister pops one value from the location stack, ensures 3938 // that it is located on a register, and mark it unused. 3939 // 3940 // TODO: we’d usually prefix this with compileXXX as this might end up emitting instructions, 3941 // but the name seems awkward. 3942 func (c *arm64Compiler) popValueOnRegister() (v *runtimeValueLocation, err error) { 3943 v = c.locationStack.pop() 3944 if err = c.compileEnsureOnRegister(v); err != nil { 3945 return 3946 } 3947 3948 c.markRegisterUnused(v.register) 3949 return 3950 } 3951 3952 // compileEnsureOnRegister emits instructions to ensure that a value is located on a register. 3953 func (c *arm64Compiler) compileEnsureOnRegister(loc *runtimeValueLocation) (err error) { 3954 if loc.onStack() { 3955 reg, err := c.allocateRegister(loc.getRegisterType()) 3956 if err != nil { 3957 return err 3958 } 3959 3960 // Record that the value holds the register and the register is marked used. 3961 loc.setRegister(reg) 3962 c.markRegisterUsed(reg) 3963 3964 c.compileLoadValueOnStackToRegister(loc) 3965 } else if loc.onConditionalRegister() { 3966 err = c.compileLoadConditionalRegisterToGeneralPurposeRegister(loc) 3967 } 3968 return 3969 } 3970 3971 // maybeCompileMoveTopConditionalToGeneralPurposeRegister moves the top value on the stack 3972 // if the value is located on a conditional register. 3973 // 3974 // This is usually called at the beginning of methods on compiler interface where we possibly 3975 // compile instructions without saving the conditional register value. 3976 // compile* functions without calling this function is saving the conditional 3977 // value to the stack or register by invoking ensureOnGeneralPurposeRegister for the top. 3978 func (c *arm64Compiler) maybeCompileMoveTopConditionalToGeneralPurposeRegister() (err error) { 3979 if c.locationStack.sp > 0 { 3980 if loc := c.locationStack.peek(); loc.onConditionalRegister() { 3981 err = c.compileLoadConditionalRegisterToGeneralPurposeRegister(loc) 3982 } 3983 } 3984 return 3985 } 3986 3987 // loadConditionalRegisterToGeneralPurposeRegister saves the conditional register value 3988 // to a general purpose register. 3989 func (c *arm64Compiler) compileLoadConditionalRegisterToGeneralPurposeRegister(loc *runtimeValueLocation) error { 3990 reg, err := c.allocateRegister(loc.getRegisterType()) 3991 if err != nil { 3992 return err 3993 } 3994 3995 c.markRegisterUsed(reg) 3996 c.assembler.CompileConditionalRegisterSet(loc.conditionalRegister, reg) 3997 3998 // Record that now the value is located on a general purpose register. 3999 loc.setRegister(reg) 4000 return nil 4001 } 4002 4003 // compileLoadValueOnStackToRegister implements compiler.compileLoadValueOnStackToRegister for arm64. 4004 func (c *arm64Compiler) compileLoadValueOnStackToRegister(loc *runtimeValueLocation) { 4005 switch loc.valueType { 4006 case runtimeValueTypeI32: 4007 c.assembler.CompileMemoryToRegister(arm64.LDRW, arm64ReservedRegisterForStackBasePointerAddress, 4008 int64(loc.stackPointer)*8, loc.register) 4009 case runtimeValueTypeI64: 4010 c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForStackBasePointerAddress, 4011 int64(loc.stackPointer)*8, loc.register) 4012 case runtimeValueTypeF32: 4013 c.assembler.CompileMemoryToRegister(arm64.FLDRS, arm64ReservedRegisterForStackBasePointerAddress, 4014 int64(loc.stackPointer)*8, loc.register) 4015 case runtimeValueTypeF64: 4016 c.assembler.CompileMemoryToRegister(arm64.FLDRD, arm64ReservedRegisterForStackBasePointerAddress, 4017 int64(loc.stackPointer)*8, loc.register) 4018 case runtimeValueTypeV128Lo: 4019 c.assembler.CompileMemoryToVectorRegister(arm64.VMOV, 4020 arm64ReservedRegisterForStackBasePointerAddress, int64(loc.stackPointer)*8, loc.register, 4021 arm64.VectorArrangementQ) 4022 // Higher 64-bits are loaded as well ^^. 4023 hi := &c.locationStack.stack[loc.stackPointer+1] 4024 hi.setRegister(loc.register) 4025 case runtimeValueTypeV128Hi: 4026 panic("BUG: V128Hi must be be loaded to a register along with V128Lo") 4027 } 4028 } 4029 4030 // allocateRegister implements compiler.allocateRegister for arm64. 4031 func (c *arm64Compiler) allocateRegister(t registerType) (reg asm.Register, err error) { 4032 var ok bool 4033 // Try to get the unused register. 4034 reg, ok = c.locationStack.takeFreeRegister(t) 4035 if ok { 4036 return 4037 } 4038 4039 // If not found, we have to steal the register. 4040 stealTarget, ok := c.locationStack.takeStealTargetFromUsedRegister(t) 4041 if !ok { 4042 err = fmt.Errorf("cannot steal register") 4043 return 4044 } 4045 4046 // Release the steal target register value onto stack location. 4047 reg = stealTarget.register 4048 c.compileReleaseRegisterToStack(stealTarget) 4049 return 4050 } 4051 4052 // compileReleaseAllRegistersToStack adds instructions to store all the values located on 4053 // either general purpose or conditional registers onto the memory stack. 4054 // See releaseRegisterToStack. 4055 func (c *arm64Compiler) compileReleaseAllRegistersToStack() (err error) { 4056 for i := uint64(0); i < c.locationStack.sp; i++ { 4057 if loc := &c.locationStack.stack[i]; loc.onRegister() { 4058 c.compileReleaseRegisterToStack(loc) 4059 } else if loc.onConditionalRegister() { 4060 if err = c.compileLoadConditionalRegisterToGeneralPurposeRegister(loc); err != nil { 4061 return 4062 } 4063 c.compileReleaseRegisterToStack(loc) 4064 } 4065 } 4066 return 4067 } 4068 4069 // releaseRegisterToStack adds an instruction to write the value on a register back to memory stack region. 4070 func (c *arm64Compiler) compileReleaseRegisterToStack(loc *runtimeValueLocation) { 4071 switch loc.valueType { 4072 case runtimeValueTypeI32: 4073 c.assembler.CompileRegisterToMemory(arm64.STRW, loc.register, arm64ReservedRegisterForStackBasePointerAddress, int64(loc.stackPointer)*8) 4074 case runtimeValueTypeI64: 4075 c.assembler.CompileRegisterToMemory(arm64.STRD, loc.register, arm64ReservedRegisterForStackBasePointerAddress, int64(loc.stackPointer)*8) 4076 case runtimeValueTypeF32: 4077 c.assembler.CompileRegisterToMemory(arm64.FSTRS, loc.register, arm64ReservedRegisterForStackBasePointerAddress, int64(loc.stackPointer)*8) 4078 case runtimeValueTypeF64: 4079 c.assembler.CompileRegisterToMemory(arm64.FSTRD, loc.register, arm64ReservedRegisterForStackBasePointerAddress, int64(loc.stackPointer)*8) 4080 case runtimeValueTypeV128Lo: 4081 c.assembler.CompileVectorRegisterToMemory(arm64.VMOV, 4082 loc.register, arm64ReservedRegisterForStackBasePointerAddress, int64(loc.stackPointer)*8, 4083 arm64.VectorArrangementQ) 4084 // Higher 64-bits are released as well ^^. 4085 hi := &c.locationStack.stack[loc.stackPointer+1] 4086 c.locationStack.releaseRegister(hi) 4087 case runtimeValueTypeV128Hi: 4088 panic("BUG: V128Hi must be released to the stack along with V128Lo") 4089 default: 4090 panic("BUG") 4091 } 4092 4093 // Mark the register is free. 4094 c.locationStack.releaseRegister(loc) 4095 } 4096 4097 // compileReservedStackBasePointerRegisterInitialization adds instructions to initialize arm64ReservedRegisterForStackBasePointerAddress 4098 // so that it points to the absolute address of the stack base for this function. 4099 func (c *arm64Compiler) compileReservedStackBasePointerRegisterInitialization() { 4100 // First, load the address of the first element in the value stack into arm64ReservedRegisterForStackBasePointerAddress temporarily. 4101 c.assembler.CompileMemoryToRegister(arm64.LDRD, 4102 arm64ReservedRegisterForCallEngine, callEngineStackContextStackElement0AddressOffset, 4103 arm64ReservedRegisterForStackBasePointerAddress) 4104 4105 // next we move the base pointer (ce.stackBasePointer) to arm64ReservedRegisterForTemporary. 4106 c.assembler.CompileMemoryToRegister(arm64.LDRD, 4107 arm64ReservedRegisterForCallEngine, callEngineStackContextStackBasePointerInBytesOffset, 4108 arm64ReservedRegisterForTemporary) 4109 4110 // Finally, we calculate "callEngineStackContextStackBasePointerInBytesOffset + arm64ReservedRegisterForTemporary" 4111 c.assembler.CompileRegisterToRegister(arm64.ADD, arm64ReservedRegisterForTemporary, arm64ReservedRegisterForStackBasePointerAddress) 4112 } 4113 4114 func (c *arm64Compiler) compileReservedMemoryRegisterInitialization() { 4115 if c.ir.HasMemory || c.ir.UsesMemory { 4116 // "arm64ReservedRegisterForMemory = ce.MemoryElement0Address" 4117 c.assembler.CompileMemoryToRegister( 4118 arm64.LDRD, 4119 arm64ReservedRegisterForCallEngine, callEngineModuleContextMemoryElement0AddressOffset, 4120 arm64ReservedRegisterForMemory, 4121 ) 4122 } 4123 } 4124 4125 // compileModuleContextInitialization adds instructions to initialize ce.moduleContext's fields based on 4126 // ce.moduleContext.ModuleInstanceAddress. 4127 // This is called in two cases: in function preamble, and on the return from (non-Go) function calls. 4128 func (c *arm64Compiler) compileModuleContextInitialization() error { 4129 tmpX, found := c.locationStack.takeFreeRegister(registerTypeGeneralPurpose) 4130 if !found { 4131 panic("BUG: all the registers should be free at this point") 4132 } 4133 c.markRegisterUsed(tmpX) 4134 tmpY, found := c.locationStack.takeFreeRegister(registerTypeGeneralPurpose) 4135 if !found { 4136 panic("BUG: all the registers should be free at this point") 4137 } 4138 c.markRegisterUsed(tmpY) 4139 4140 // "tmpX = ce.ModuleInstanceAddress" 4141 c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForCallEngine, callEngineModuleContextModuleInstanceOffset, tmpX) 4142 4143 // If the module instance address stays the same, we could skip the entire code below. 4144 c.assembler.CompileTwoRegistersToNone(arm64.CMP, arm64CallingConventionModuleInstanceAddressRegister, tmpX) 4145 brIfModuleUnchanged := c.assembler.CompileJump(arm64.BCONDEQ) 4146 4147 // Otherwise, update the moduleEngine.moduleContext.ModuleInstanceAddress. 4148 c.assembler.CompileRegisterToMemory(arm64.STRD, 4149 arm64CallingConventionModuleInstanceAddressRegister, 4150 arm64ReservedRegisterForCallEngine, callEngineModuleContextModuleInstanceOffset, 4151 ) 4152 4153 // Also, we have to update the following fields: 4154 // * callEngine.moduleContext.globalElement0Address 4155 // * callEngine.moduleContext.memoryElement0Address 4156 // * callEngine.moduleContext.memorySliceLen 4157 // * callEngine.moduleContext.memoryInstance 4158 // * callEngine.moduleContext.tableElement0Address 4159 // * callEngine.moduleContext.tableSliceLen 4160 // * callEngine.moduleContext.functionsElement0Address 4161 // * callEngine.moduleContext.typeIDsElement0Address 4162 // * callEngine.moduleContext.dataInstancesElement0Address 4163 // * callEngine.moduleContext.elementInstancesElement0Address 4164 4165 // Update globalElement0Address. 4166 // 4167 // Note: if there's global.get or set instruction in the function, the existence of the globals 4168 // is ensured by function validation at module instantiation phase, and that's why it is ok to 4169 // skip the initialization if the module's globals slice is empty. 4170 if len(c.ir.Globals) > 0 { 4171 // "tmpX = &moduleInstance.Globals[0]" 4172 c.assembler.CompileMemoryToRegister(arm64.LDRD, 4173 arm64CallingConventionModuleInstanceAddressRegister, moduleInstanceGlobalsOffset, 4174 tmpX, 4175 ) 4176 4177 // "ce.GlobalElement0Address = tmpX (== &moduleInstance.Globals[0])" 4178 c.assembler.CompileRegisterToMemory( 4179 arm64.STRD, tmpX, 4180 arm64ReservedRegisterForCallEngine, callEngineModuleContextGlobalElement0AddressOffset, 4181 ) 4182 } 4183 4184 // Update memoryElement0Address and memorySliceLen. 4185 // 4186 // Note: if there's memory instruction in the function, memory instance must be non-nil. 4187 // That is ensured by function validation at module instantiation phase, and that's 4188 // why it is ok to skip the initialization if the module's memory instance is nil. 4189 if c.ir.HasMemory { 4190 // "tmpX = moduleInstance.Memory" 4191 c.assembler.CompileMemoryToRegister( 4192 arm64.LDRD, 4193 arm64CallingConventionModuleInstanceAddressRegister, moduleInstanceMemoryOffset, 4194 tmpX, 4195 ) 4196 4197 // First, set ce.memoryInstance 4198 c.assembler.CompileRegisterToMemory( 4199 arm64.STRD, 4200 tmpX, 4201 arm64ReservedRegisterForCallEngine, callEngineModuleContextMemoryInstanceOffset, 4202 ) 4203 4204 // Next, we write the memory length into ce.MemorySliceLen. 4205 // 4206 // "tmpY = [tmpX + memoryInstanceBufferLenOffset] (== len(memory.Buffer))" 4207 c.assembler.CompileMemoryToRegister( 4208 arm64.LDRD, 4209 tmpX, memoryInstanceBufferLenOffset, 4210 tmpY, 4211 ) 4212 // "ce.MemorySliceLen = tmpY". 4213 c.assembler.CompileRegisterToMemory( 4214 arm64.STRD, 4215 tmpY, 4216 arm64ReservedRegisterForCallEngine, callEngineModuleContextMemorySliceLenOffset, 4217 ) 4218 4219 // Finally, we write ce.memoryElement0Address. 4220 // 4221 // "tmpY = *tmpX (== &memory.Buffer[0])" 4222 c.assembler.CompileMemoryToRegister( 4223 arm64.LDRD, 4224 tmpX, memoryInstanceBufferOffset, 4225 tmpY, 4226 ) 4227 // "ce.memoryElement0Address = tmpY". 4228 c.assembler.CompileRegisterToMemory( 4229 arm64.STRD, 4230 tmpY, 4231 arm64ReservedRegisterForCallEngine, callEngineModuleContextMemoryElement0AddressOffset, 4232 ) 4233 } 4234 4235 // Update tableElement0Address, tableSliceLen and typeIDsElement0Address. 4236 // 4237 // Note: if there's table instruction in the function, the existence of the table 4238 // is ensured by function validation at module instantiation phase, and that's 4239 // why it is ok to skip the initialization if the module's table doesn't exist. 4240 if c.ir.HasTable { 4241 // "tmpX = &tables[0] (type of **wasm.Table)" 4242 c.assembler.CompileMemoryToRegister( 4243 arm64.LDRD, 4244 arm64CallingConventionModuleInstanceAddressRegister, moduleInstanceTablesOffset, 4245 tmpX, 4246 ) 4247 4248 // Update ce.tableElement0Address. 4249 // "ce.tableElement0Address = tmpX". 4250 c.assembler.CompileRegisterToMemory( 4251 arm64.STRD, 4252 tmpX, 4253 arm64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset, 4254 ) 4255 4256 // Finally, we put &ModuleInstance.TypeIDs[0] into moduleContext.typeIDsElement0Address. 4257 c.assembler.CompileMemoryToRegister(arm64.LDRD, 4258 arm64CallingConventionModuleInstanceAddressRegister, moduleInstanceTypeIDsOffset, tmpX) 4259 c.assembler.CompileRegisterToMemory(arm64.STRD, 4260 tmpX, arm64ReservedRegisterForCallEngine, callEngineModuleContextTypeIDsElement0AddressOffset) 4261 } 4262 4263 // Update callEngine.moduleContext.functionsElement0Address 4264 { 4265 // "tmpX = [moduleInstanceAddressRegister + moduleInstanceEngineOffset + interfaceDataOffset] (== *moduleEngine)" 4266 // 4267 // Go's interface is laid out on memory as two quad words as struct {tab, data uintptr} 4268 // where tab points to the interface table, and the latter points to the actual 4269 // implementation of interface. This case, we extract "data" pointer as *moduleEngine. 4270 // See the following references for detail: 4271 // * https://research.swtch.com/interfaces 4272 // * https://github.com/golang/go/blob/release-branch.go1.20/src/runtime/runtime2.go#L207-L210 4273 c.assembler.CompileMemoryToRegister( 4274 arm64.LDRD, 4275 arm64CallingConventionModuleInstanceAddressRegister, moduleInstanceEngineOffset+interfaceDataOffset, 4276 tmpX, 4277 ) 4278 4279 // "tmpY = [tmpX + moduleEngineFunctionsOffset] (== &moduleEngine.functions[0])" 4280 c.assembler.CompileMemoryToRegister( 4281 arm64.LDRD, 4282 tmpX, moduleEngineFunctionsOffset, 4283 tmpY, 4284 ) 4285 4286 // "callEngine.moduleContext.functionsElement0Address = tmpY". 4287 c.assembler.CompileRegisterToMemory( 4288 arm64.STRD, 4289 tmpY, 4290 arm64ReservedRegisterForCallEngine, callEngineModuleContextFunctionsElement0AddressOffset, 4291 ) 4292 } 4293 4294 // Update dataInstancesElement0Address. 4295 if c.ir.HasDataInstances { 4296 // "tmpX = &moduleInstance.DataInstances[0]" 4297 c.assembler.CompileMemoryToRegister( 4298 arm64.LDRD, 4299 arm64CallingConventionModuleInstanceAddressRegister, moduleInstanceDataInstancesOffset, 4300 tmpX, 4301 ) 4302 // "callEngine.moduleContext.dataInstancesElement0Address = tmpX". 4303 c.assembler.CompileRegisterToMemory( 4304 arm64.STRD, 4305 tmpX, 4306 arm64ReservedRegisterForCallEngine, callEngineModuleContextDataInstancesElement0AddressOffset, 4307 ) 4308 } 4309 4310 // Update callEngine.moduleContext.elementInstancesElement0Address 4311 if c.ir.HasElementInstances { 4312 // "tmpX = &moduleInstance.DataInstances[0]" 4313 c.assembler.CompileMemoryToRegister( 4314 arm64.LDRD, 4315 arm64CallingConventionModuleInstanceAddressRegister, moduleInstanceElementInstancesOffset, 4316 tmpX, 4317 ) 4318 // "callEngine.moduleContext.dataInstancesElement0Address = tmpX". 4319 c.assembler.CompileRegisterToMemory( 4320 arm64.STRD, 4321 tmpX, 4322 arm64ReservedRegisterForCallEngine, callEngineModuleContextElementInstancesElement0AddressOffset, 4323 ) 4324 } 4325 4326 c.assembler.SetJumpTargetOnNext(brIfModuleUnchanged) 4327 c.markRegisterUnused(tmpX, tmpY) 4328 return nil 4329 }