github.com/wasilibs/wazerox@v0.0.0-20240124024944-4923be63ab5f/internal/engine/compiler/impl_arm64.go (about) 1 // This file implements the compiler for arm64 target. 2 // Please refer to https://developer.arm.com/documentation/102374/latest/ 3 // if unfamiliar with arm64 instructions and semantics. 4 package compiler 5 6 import ( 7 "bytes" 8 "errors" 9 "fmt" 10 "math" 11 12 "github.com/wasilibs/wazerox/internal/asm" 13 "github.com/wasilibs/wazerox/internal/asm/arm64" 14 "github.com/wasilibs/wazerox/internal/wasm" 15 "github.com/wasilibs/wazerox/internal/wazeroir" 16 ) 17 18 type arm64Compiler struct { 19 assembler arm64.Assembler 20 ir *wazeroir.CompilationResult 21 // locationStack holds the state of wazeroir virtual stack. 22 // and each item is either placed in register or the actual memory stack. 23 locationStack *runtimeValueLocationStack 24 // labels maps a label (e.g. ".L1_then") to *arm64LabelInfo. 25 labels [wazeroir.LabelKindNum][]arm64LabelInfo 26 // stackPointerCeil is the greatest stack pointer value (from runtimeValueLocationStack) seen during compilation. 27 stackPointerCeil uint64 28 // assignStackPointerCeilNeeded holds an asm.Node whose AssignDestinationConstant must be called with the determined stack pointer ceiling. 29 assignStackPointerCeilNeeded asm.Node 30 compiledTrapTargets [nativeCallStatusModuleClosed]asm.Node 31 withListener bool 32 typ *wasm.FunctionType 33 br *bytes.Reader 34 // locationStackForEntrypoint is the initial location stack for all functions. To reuse the allocated stack, 35 // we cache it here, and reset and set to .locationStack in the Init method. 36 locationStackForEntrypoint runtimeValueLocationStack 37 // frameIDMax tracks the maximum value of frame id per function. 38 frameIDMax int 39 brTableTmp []runtimeValueLocation 40 } 41 42 func newArm64Compiler() compiler { 43 return &arm64Compiler{ 44 assembler: arm64.NewAssembler(arm64ReservedRegisterForTemporary), 45 locationStackForEntrypoint: newRuntimeValueLocationStack(), 46 br: bytes.NewReader(nil), 47 } 48 } 49 50 // Init implements compiler.Init. 51 func (c *arm64Compiler) Init(typ *wasm.FunctionType, ir *wazeroir.CompilationResult, withListener bool) { 52 c.assembler.Reset() 53 c.locationStackForEntrypoint.reset() 54 c.resetLabels() 55 56 *c = arm64Compiler{ 57 ir: ir, 58 withListener: withListener, 59 typ: typ, 60 assembler: c.assembler, 61 labels: c.labels, 62 br: c.br, 63 brTableTmp: c.brTableTmp, 64 locationStackForEntrypoint: c.locationStackForEntrypoint, 65 } 66 67 // Reuses the initial location stack for the compilation of subsequent functions. 68 c.locationStack = &c.locationStackForEntrypoint 69 } 70 71 // resetLabels resets the existing content in arm64Compiler.labels so that 72 // we could reuse the allocated slices and stacks in the subsequent compilations. 73 func (c *arm64Compiler) resetLabels() { 74 for i := range c.labels { 75 for j := range c.labels[i] { 76 if j > c.frameIDMax { 77 // Only need to reset until the maximum frame id. This makes the compilation faster for large binary. 78 break 79 } 80 l := &c.labels[i][j] 81 l.initialInstruction = nil 82 l.stackInitialized = false 83 l.initialStack.reset() 84 } 85 } 86 } 87 88 var ( 89 arm64UnreservedVectorRegisters = []asm.Register{ 90 arm64.RegV0, arm64.RegV1, arm64.RegV2, arm64.RegV3, 91 arm64.RegV4, arm64.RegV5, arm64.RegV6, arm64.RegV7, arm64.RegV8, 92 arm64.RegV9, arm64.RegV10, arm64.RegV11, arm64.RegV12, arm64.RegV13, 93 arm64.RegV14, arm64.RegV15, arm64.RegV16, arm64.RegV17, arm64.RegV18, 94 arm64.RegV19, arm64.RegV20, arm64.RegV21, arm64.RegV22, arm64.RegV23, 95 arm64.RegV24, arm64.RegV25, arm64.RegV26, arm64.RegV27, arm64.RegV28, 96 arm64.RegV29, arm64.RegV30, arm64.RegV31, 97 } 98 99 // Note (see arm64 section in https://go.dev/doc/asm): 100 // * RegR18 is reserved as a platform register, and we don't use it in Compiler. 101 // * RegR28 is reserved for Goroutine by Go runtime, and we don't use it in Compiler. 102 arm64UnreservedGeneralPurposeRegisters = []asm.Register{ //nolint 103 arm64.RegR3, arm64.RegR4, arm64.RegR5, arm64.RegR6, arm64.RegR7, arm64.RegR8, 104 arm64.RegR9, arm64.RegR10, arm64.RegR11, arm64.RegR12, arm64.RegR13, 105 arm64.RegR14, arm64.RegR15, arm64.RegR16, arm64.RegR17, arm64.RegR19, 106 arm64.RegR20, arm64.RegR21, arm64.RegR22, arm64.RegR23, arm64.RegR24, 107 arm64.RegR25, arm64.RegR26, arm64.RegR29, arm64.RegR30, 108 } 109 ) 110 111 const ( 112 // arm64ReservedRegisterForCallEngine holds the pointer to callEngine instance (i.e. *callEngine as uintptr) 113 arm64ReservedRegisterForCallEngine = arm64.RegR0 114 // arm64ReservedRegisterForStackBasePointerAddress holds stack base pointer's address (callEngine.stackBasePointer) in the current function call. 115 arm64ReservedRegisterForStackBasePointerAddress = arm64.RegR1 116 // arm64ReservedRegisterForMemory holds the pointer to the memory slice's data (i.e. &memory.Buffer[0] as uintptr). 117 arm64ReservedRegisterForMemory = arm64.RegR2 118 // arm64ReservedRegisterForTemporary is the temporary register which is available at any point of execution, but its content shouldn't be supposed to live beyond the single operation. 119 // Note: we choose R27 as that is the temporary register used in Go's assembler. 120 arm64ReservedRegisterForTemporary = arm64.RegR27 121 ) 122 123 var arm64CallingConventionModuleInstanceAddressRegister = arm64.RegR29 124 125 const ( 126 // arm64CallEngineArchContextCompilerCallReturnAddressOffset is the offset of archContext.nativeCallReturnAddress in callEngine. 127 arm64CallEngineArchContextCompilerCallReturnAddressOffset = 144 128 // arm64CallEngineArchContextMinimum32BitSignedIntOffset is the offset of archContext.minimum32BitSignedIntAddress in callEngine. 129 arm64CallEngineArchContextMinimum32BitSignedIntOffset = 152 130 // arm64CallEngineArchContextMinimum64BitSignedIntOffset is the offset of archContext.minimum64BitSignedIntAddress in callEngine. 131 arm64CallEngineArchContextMinimum64BitSignedIntOffset = 160 132 ) 133 134 func isZeroRegister(r asm.Register) bool { 135 return r == arm64.RegRZR 136 } 137 138 // compileNOP implements compiler.compileNOP for the arm64 architecture. 139 func (c *arm64Compiler) compileNOP() asm.Node { 140 return c.assembler.CompileStandAlone(arm64.NOP) 141 } 142 143 // compile implements compiler.compile for the arm64 architecture. 144 func (c *arm64Compiler) compile(buf asm.Buffer) (stackPointerCeil uint64, err error) { 145 // c.stackPointerCeil tracks the stack pointer ceiling (max seen) value across all runtimeValueLocationStack(s) 146 // used for all labels (via setLocationStack), excluding the current one. 147 // Hence, we check here if the final block's max one exceeds the current c.stackPointerCeil. 148 stackPointerCeil = c.stackPointerCeil 149 if stackPointerCeil < c.locationStack.stackPointerCeil { 150 stackPointerCeil = c.locationStack.stackPointerCeil 151 } 152 153 // Now that the ceil of stack pointer is determined, we are invoking the callback. 154 // Note: this must be called before Assemble() below. 155 c.assignStackPointerCeil(stackPointerCeil) 156 157 err = c.assembler.Assemble(buf) 158 return 159 } 160 161 // arm64LabelInfo holds a wazeroir label specific information in this function. 162 type arm64LabelInfo struct { 163 // initialInstruction is the initial instruction for this label so other block can branch into it. 164 initialInstruction asm.Node 165 // initialStack is the initial value location stack from which we start compiling this label. 166 initialStack runtimeValueLocationStack 167 stackInitialized bool 168 } 169 170 // assignStackPointerCeil implements compilerImpl.assignStackPointerCeil for the arm64 architecture. 171 func (c *arm64Compiler) assignStackPointerCeil(ceil uint64) { 172 if c.assignStackPointerCeilNeeded != nil { 173 c.assignStackPointerCeilNeeded.AssignSourceConstant(int64(ceil) << 3) 174 } 175 } 176 177 func (c *arm64Compiler) label(label wazeroir.Label) *arm64LabelInfo { 178 kind := label.Kind() 179 frames := c.labels[kind] 180 frameID := label.FrameID() 181 if c.frameIDMax < frameID { 182 c.frameIDMax = frameID 183 } 184 // If the frameID is not allocated yet, expand the slice by twice of the diff, 185 // so that we could reduce the allocation in the subsequent compilation. 186 if diff := frameID - len(frames) + 1; diff > 0 { 187 for i := 0; i < diff; i++ { 188 frames = append(frames, arm64LabelInfo{initialStack: newRuntimeValueLocationStack()}) 189 } 190 c.labels[kind] = frames 191 } 192 return &frames[frameID] 193 } 194 195 // runtimeValueLocationStack implements compilerImpl.runtimeValueLocationStack for the amd64 architecture. 196 func (c *arm64Compiler) runtimeValueLocationStack() *runtimeValueLocationStack { 197 return c.locationStack 198 } 199 200 // pushRuntimeValueLocationOnRegister implements compiler.pushRuntimeValueLocationOnRegister for arm64. 201 func (c *arm64Compiler) pushRuntimeValueLocationOnRegister(reg asm.Register, vt runtimeValueType) (ret *runtimeValueLocation) { 202 ret = c.locationStack.pushRuntimeValueLocationOnRegister(reg, vt) 203 c.markRegisterUsed(reg) 204 return 205 } 206 207 // pushVectorRuntimeValueLocationOnRegister implements compiler.pushVectorRuntimeValueLocationOnRegister for arm64. 208 func (c *arm64Compiler) pushVectorRuntimeValueLocationOnRegister(reg asm.Register) (lowerBitsLocation *runtimeValueLocation) { 209 lowerBitsLocation = c.locationStack.pushRuntimeValueLocationOnRegister(reg, runtimeValueTypeV128Lo) 210 c.locationStack.pushRuntimeValueLocationOnRegister(reg, runtimeValueTypeV128Hi) 211 c.markRegisterUsed(reg) 212 return 213 } 214 215 func (c *arm64Compiler) markRegisterUsed(regs ...asm.Register) { 216 for _, reg := range regs { 217 if !isZeroRegister(reg) && reg != asm.NilRegister { 218 c.locationStack.markRegisterUsed(reg) 219 } 220 } 221 } 222 223 func (c *arm64Compiler) markRegisterUnused(regs ...asm.Register) { 224 for _, reg := range regs { 225 if !isZeroRegister(reg) && reg != asm.NilRegister { 226 c.locationStack.markRegisterUnused(reg) 227 } 228 } 229 } 230 231 func (c *arm64Compiler) String() (ret string) { return c.locationStack.String() } 232 233 // compilePreamble implements compiler.compilePreamble for the arm64 architecture. 234 func (c *arm64Compiler) compilePreamble() error { 235 c.markRegisterUsed(arm64CallingConventionModuleInstanceAddressRegister) 236 defer c.markRegisterUnused(arm64CallingConventionModuleInstanceAddressRegister) 237 238 c.locationStack.init(c.typ) 239 240 // Check if it's necessary to grow the value stack before entering function body. 241 if err := c.compileMaybeGrowStack(); err != nil { 242 return err 243 } 244 245 if err := c.compileModuleContextInitialization(); err != nil { 246 return err 247 } 248 249 if c.withListener { 250 if err := c.compileCallGoFunction(nativeCallStatusCodeCallBuiltInFunction, builtinFunctionIndexFunctionListenerBefore); err != nil { 251 return err 252 } 253 } 254 255 // We must initialize the stack base pointer register so that we can manipulate the stack properly. 256 c.compileReservedStackBasePointerRegisterInitialization() 257 258 c.compileReservedMemoryRegisterInitialization() 259 260 return nil 261 } 262 263 // compileMaybeGrowStack adds instructions to check the necessity to grow the value stack, 264 // and if so, make the builtin function call to do so. These instructions are called in the function's 265 // preamble. 266 func (c *arm64Compiler) compileMaybeGrowStack() error { 267 tmpX, found := c.locationStack.takeFreeRegister(registerTypeGeneralPurpose) 268 if !found { 269 panic("BUG: all the registers should be free at this point") 270 } 271 c.markRegisterUsed(tmpX) 272 tmpY, found := c.locationStack.takeFreeRegister(registerTypeGeneralPurpose) 273 if !found { 274 panic("BUG: all the registers should be free at this point") 275 } 276 c.markRegisterUsed(tmpY) 277 278 // "tmpX = len(ce.stack)" 279 c.assembler.CompileMemoryToRegister( 280 arm64.LDRD, 281 arm64ReservedRegisterForCallEngine, callEngineStackContextStackLenInBytesOffset, 282 tmpX, 283 ) 284 285 // "tmpY = ce.stackBasePointer" 286 c.assembler.CompileMemoryToRegister( 287 arm64.LDRD, 288 arm64ReservedRegisterForCallEngine, callEngineStackContextStackBasePointerInBytesOffset, 289 tmpY, 290 ) 291 292 // "tmpX = tmpX - tmpY", in other words "tmpX = len(ce.stack) - ce.stackBasePointer" 293 c.assembler.CompileRegisterToRegister( 294 arm64.SUB, 295 tmpY, 296 tmpX, 297 ) 298 299 // "tmpY = stackPointerCeil" 300 loadStackPointerCeil := c.assembler.CompileConstToRegister( 301 arm64.MOVD, 302 math.MaxInt32, 303 tmpY, 304 ) 305 // At this point of compilation, we don't know the value of stack point ceil, 306 // so we lazily resolve the value later. 307 c.assignStackPointerCeilNeeded = loadStackPointerCeil 308 309 // Compare tmpX (len(ce.stack) - ce.stackBasePointer) and tmpY (ce.stackPointerCeil) 310 c.assembler.CompileTwoRegistersToNone(arm64.CMP, tmpX, tmpY) 311 312 // If ceil > stackLen - stack base pointer, we need to grow the stack by calling builtin Go function. 313 brIfStackOK := c.assembler.CompileJump(arm64.BCONDLS) 314 if err := c.compileCallGoFunction(nativeCallStatusCodeCallBuiltInFunction, builtinFunctionIndexGrowStack); err != nil { 315 return err 316 } 317 318 // Otherwise, skip calling it. 319 c.assembler.SetJumpTargetOnNext(brIfStackOK) 320 321 c.markRegisterUnused(tmpX, tmpY) 322 return nil 323 } 324 325 // returnFunction emits instructions to return from the current function frame. 326 // If the current frame is the bottom, the code goes back to the Go code with nativeCallStatusCodeReturned status. 327 // Otherwise, we branch into the caller's return address. 328 func (c *arm64Compiler) compileReturnFunction() error { 329 // Release all the registers as our calling convention requires the caller-save. 330 if err := c.compileReleaseAllRegistersToStack(); err != nil { 331 return err 332 } 333 334 if c.withListener { 335 if err := c.compileCallGoFunction(nativeCallStatusCodeCallBuiltInFunction, builtinFunctionIndexFunctionListenerAfter); err != nil { 336 return err 337 } 338 // After return, we re-initialize the stack base pointer as that is used to return to the caller below. 339 c.compileReservedStackBasePointerRegisterInitialization() 340 } 341 342 // arm64CallingConventionModuleInstanceAddressRegister holds the module intstance's address 343 // so mark it used so that it won't be used as a free register. 344 c.locationStack.markRegisterUsed(arm64CallingConventionModuleInstanceAddressRegister) 345 defer c.locationStack.markRegisterUnused(arm64CallingConventionModuleInstanceAddressRegister) 346 347 returnAddress, callerStackBasePointerInBytes, callerFunction := c.locationStack.getCallFrameLocations(c.typ) 348 349 // If the return address is zero, meaning that we return from the execution. 350 returnAddress.setRegister(arm64ReservedRegisterForTemporary) 351 c.compileLoadValueOnStackToRegister(returnAddress) 352 c.assembler.CompileTwoRegistersToNone(arm64.CMP, arm64ReservedRegisterForTemporary, arm64.RegRZR) 353 354 // Br if the address does not equal zero, otherwise, exit. 355 // If the address doesn't equal zero, return br into returnAddressRegister (caller's return address). 356 c.compileMaybeExitFromNativeCode(arm64.BCONDNE, nativeCallStatusCodeReturned) 357 358 // Alias for readability. 359 tmp := arm64CallingConventionModuleInstanceAddressRegister 360 361 // First, restore the stackContext.stackBasePointerInBytesOffset from callerStackBasePointerInBytes. 362 callerStackBasePointerInBytes.setRegister(tmp) 363 c.compileLoadValueOnStackToRegister(callerStackBasePointerInBytes) 364 c.assembler.CompileRegisterToMemory(arm64.STRD, tmp, 365 arm64ReservedRegisterForCallEngine, callEngineStackContextStackBasePointerInBytesOffset) 366 367 // Next, restore moduleContext.fn from callerFunction. 368 callerFunction.setRegister(tmp) 369 c.compileLoadValueOnStackToRegister(callerFunction) 370 c.assembler.CompileRegisterToMemory(arm64.STRD, tmp, 371 arm64ReservedRegisterForCallEngine, callEngineModuleContextFnOffset) 372 373 // Also, we have to put the target function's *wasm.ModuleInstance into arm64CallingConventionModuleInstanceAddressRegister. 374 c.assembler.CompileMemoryToRegister(arm64.LDRD, 375 tmp, functionModuleInstanceOffset, 376 arm64CallingConventionModuleInstanceAddressRegister) 377 378 c.assembler.CompileJumpToRegister(arm64.B, returnAddress.register) 379 return nil 380 } 381 382 func (c *arm64Compiler) compileMaybeExitFromNativeCode(skipCondition asm.Instruction, status nativeCallStatusCode) { 383 skip := c.assembler.CompileJump(skipCondition) 384 c.compileExitFromNativeCode(status) 385 c.assembler.SetJumpTargetOnNext(skip) 386 } 387 388 // compileExitFromNativeCode adds instructions to give the control back to ce.exec with the given status code. 389 func (c *arm64Compiler) compileExitFromNativeCode(status nativeCallStatusCode) { 390 if target := c.compiledTrapTargets[status]; target != nil { 391 c.assembler.CompileJump(arm64.B).AssignJumpTarget(target) 392 return 393 } 394 395 switch status { 396 case nativeCallStatusCodeReturned: 397 // Save the target for reuse. 398 c.compiledTrapTargets[status] = c.compileNOP() 399 case nativeCallStatusCodeCallGoHostFunction, nativeCallStatusCodeCallBuiltInFunction: 400 // Read the return address, and write it to callEngine.exitContext.returnAddress. 401 c.assembler.CompileReadInstructionAddress(arm64ReservedRegisterForTemporary, arm64.RET) 402 c.assembler.CompileRegisterToMemory( 403 arm64.STRD, arm64ReservedRegisterForTemporary, 404 arm64ReservedRegisterForCallEngine, callEngineExitContextReturnAddressOffset, 405 ) 406 default: 407 if c.ir.IROperationSourceOffsetsInWasmBinary != nil { 408 // This case, the execution traps, and we want the top frame's source position in the stack trace. 409 // We store the instruction address onto callEngine.returnAddress. 410 c.assembler.CompileReadInstructionAddress(arm64ReservedRegisterForTemporary, arm64.STRD) 411 c.assembler.CompileRegisterToMemory( 412 arm64.STRD, arm64ReservedRegisterForTemporary, 413 arm64ReservedRegisterForCallEngine, callEngineExitContextReturnAddressOffset, 414 ) 415 } else { 416 // We won't use the source position, so just save the target for reuse. 417 c.compiledTrapTargets[status] = c.compileNOP() 418 } 419 } 420 421 // Write the current stack pointer to the ce.stackPointer. 422 c.assembler.CompileConstToRegister(arm64.MOVD, int64(c.locationStack.sp), arm64ReservedRegisterForTemporary) 423 c.assembler.CompileRegisterToMemory(arm64.STRD, arm64ReservedRegisterForTemporary, arm64ReservedRegisterForCallEngine, 424 callEngineStackContextStackPointerOffset) 425 426 // Write the status to callEngine.exitContext.statusCode. 427 if status != 0 { 428 c.assembler.CompileConstToRegister(arm64.MOVW, int64(status), arm64ReservedRegisterForTemporary) 429 c.assembler.CompileRegisterToMemory(arm64.STRW, arm64ReservedRegisterForTemporary, 430 arm64ReservedRegisterForCallEngine, callEngineExitContextNativeCallStatusCodeOffset) 431 } else { 432 // If the status == 0, we use zero register to store zero. 433 c.assembler.CompileRegisterToMemory(arm64.STRW, arm64.RegRZR, 434 arm64ReservedRegisterForCallEngine, callEngineExitContextNativeCallStatusCodeOffset) 435 } 436 437 // The return address to the Go code is stored in archContext.compilerReturnAddress which 438 // is embedded in ce. We load the value to the tmpRegister, and then 439 // invoke RET with that register. 440 c.assembler.CompileMemoryToRegister(arm64.LDRD, 441 arm64ReservedRegisterForCallEngine, arm64CallEngineArchContextCompilerCallReturnAddressOffset, 442 arm64ReservedRegisterForTemporary) 443 444 c.assembler.CompileJumpToRegister(arm64.RET, arm64ReservedRegisterForTemporary) 445 } 446 447 // compileGoHostFunction implements compiler.compileHostFunction for the arm64 architecture. 448 func (c *arm64Compiler) compileGoDefinedHostFunction() error { 449 // First we must update the location stack to reflect the number of host function inputs. 450 c.locationStack.init(c.typ) 451 452 if c.withListener { 453 if err := c.compileCallGoFunction(nativeCallStatusCodeCallBuiltInFunction, 454 builtinFunctionIndexFunctionListenerBefore); err != nil { 455 return err 456 } 457 } 458 459 // Host function needs access to the caller's Function Instance, and the caller's information is stored in the stack 460 // (as described in the doc of callEngine.stack). Here, we get the caller's *wasm.FunctionInstance from the stack, 461 // and save it in callEngine.exitContext.callerFunctionInstance so we can pass it to the host function 462 // without sacrificing the performance. 463 c.compileReservedStackBasePointerRegisterInitialization() 464 // Alias for readability. 465 tmp := arm64CallingConventionModuleInstanceAddressRegister 466 // Get the location of the callerFunction (*function) in the stack, which depends on the signature. 467 _, _, callerFunction := c.locationStack.getCallFrameLocations(c.typ) 468 // Load the value into the tmp register: tmp = &function{..} 469 callerFunction.setRegister(tmp) 470 c.compileLoadValueOnStackToRegister(callerFunction) 471 // tmp = *(tmp+functionModuleInstanceOffset) = &wasm.ModuleInstance{...} 472 c.assembler.CompileMemoryToRegister(arm64.LDRD, tmp, functionModuleInstanceOffset, tmp) 473 // Load it onto callEngine.exitContext.callerModuleInstance. 474 c.assembler.CompileRegisterToMemory(arm64.STRD, 475 tmp, 476 arm64ReservedRegisterForCallEngine, callEngineExitContextCallerModuleInstanceOffset) 477 // Reset the state of callerFunction value location so that we won't mess up subsequent code generation below. 478 c.locationStack.releaseRegister(callerFunction) 479 480 if err := c.compileCallGoFunction(nativeCallStatusCodeCallGoHostFunction, 0); err != nil { 481 return err 482 } 483 484 // Initializes the reserved stack base pointer which is used to retrieve the call frame stack. 485 c.compileReservedStackBasePointerRegisterInitialization() 486 487 // Go function can change the module state in arbitrary way, so we have to force 488 // the callEngine.moduleContext initialization on the function return. To do so, 489 // we zero-out callEngine.moduleInstance. 490 c.assembler.CompileRegisterToMemory(arm64.STRD, 491 arm64.RegRZR, 492 arm64ReservedRegisterForCallEngine, callEngineModuleContextModuleInstanceOffset) 493 494 return c.compileReturnFunction() 495 } 496 497 // setLocationStack sets the given runtimeValueLocationStack to .locationStack field, 498 // while allowing us to track runtimeValueLocationStack.stackPointerCeil across multiple stacks. 499 // This is called when we branch into different block. 500 func (c *arm64Compiler) setLocationStack(newStack *runtimeValueLocationStack) { 501 if c.stackPointerCeil < c.locationStack.stackPointerCeil { 502 c.stackPointerCeil = c.locationStack.stackPointerCeil 503 } 504 c.locationStack = newStack 505 } 506 507 // compileBuiltinFunctionCheckExitCode implements compiler.compileBuiltinFunctionCheckExitCode for the arm64 architecture. 508 func (c *arm64Compiler) compileBuiltinFunctionCheckExitCode() error { 509 if err := c.compileCallGoFunction(nativeCallStatusCodeCallBuiltInFunction, builtinFunctionIndexCheckExitCode); err != nil { 510 return err 511 } 512 513 // After return, we re-initialize reserved registers just like preamble of functions. 514 c.compileReservedStackBasePointerRegisterInitialization() 515 c.compileReservedMemoryRegisterInitialization() 516 return nil 517 } 518 519 // compileLabel implements compiler.compileLabel for the arm64 architecture. 520 func (c *arm64Compiler) compileLabel(o *wazeroir.UnionOperation) (skipThisLabel bool) { 521 labelKey := wazeroir.Label(o.U1) 522 labelInfo := c.label(labelKey) 523 524 // If initialStack is not set, that means this label has never been reached. 525 if !labelInfo.stackInitialized { 526 skipThisLabel = true 527 return 528 } 529 530 if labelBegin := labelInfo.initialInstruction; labelBegin == nil { 531 // We use NOP as a beginning of instructions in a label. 532 // This should be eventually optimized out by assembler. 533 labelInfo.initialInstruction = c.assembler.CompileStandAlone(arm64.NOP) 534 } else { 535 c.assembler.Add(labelBegin) 536 } 537 538 // Set the initial stack. 539 c.setLocationStack(&labelInfo.initialStack) 540 return false 541 } 542 543 // compileUnreachable implements compiler.compileUnreachable for the arm64 architecture. 544 func (c *arm64Compiler) compileUnreachable() error { 545 c.compileExitFromNativeCode(nativeCallStatusCodeUnreachable) 546 return nil 547 } 548 549 // compileSet implements compiler.compileSet for the arm64 architecture. 550 func (c *arm64Compiler) compileSet(o *wazeroir.UnionOperation) error { 551 depth := int(o.U1) 552 isTargetVector := o.B3 553 554 setTargetIndex := int(c.locationStack.sp) - 1 - depth 555 556 if isTargetVector { 557 _ = c.locationStack.pop() 558 } 559 v := c.locationStack.pop() 560 if err := c.compileEnsureOnRegister(v); err != nil { 561 return err 562 } 563 564 targetLocation := &c.locationStack.stack[setTargetIndex] 565 if targetLocation.onRegister() { 566 // We no longer need the register previously used by the target location. 567 c.markRegisterUnused(targetLocation.register) 568 } 569 570 reg := v.register 571 targetLocation.setRegister(reg) 572 targetLocation.valueType = v.valueType 573 if isTargetVector { 574 hi := &c.locationStack.stack[setTargetIndex+1] 575 hi.setRegister(reg) 576 } 577 return nil 578 } 579 580 // compileGlobalGet implements compiler.compileGlobalGet for the arm64 architecture. 581 func (c *arm64Compiler) compileGlobalGet(o *wazeroir.UnionOperation) error { 582 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 583 return err 584 } 585 586 index := uint32(o.U1) 587 588 wasmValueType := c.ir.Globals[index].ValType 589 isV128 := wasmValueType == wasm.ValueTypeV128 590 // Get the address of globals[index] into globalAddressReg. 591 globalAddressReg, err := c.compileReadGlobalAddress(index) 592 if err != nil { 593 return err 594 } 595 596 if isV128 { 597 resultReg, err := c.allocateRegister(registerTypeVector) 598 if err != nil { 599 return err 600 } 601 c.assembler.CompileConstToRegister(arm64.ADD, globalInstanceValueOffset, globalAddressReg) 602 c.assembler.CompileMemoryToVectorRegister(arm64.VMOV, globalAddressReg, 0, 603 resultReg, arm64.VectorArrangementQ) 604 605 c.pushVectorRuntimeValueLocationOnRegister(resultReg) 606 } else { 607 ldr := arm64.NOP 608 var result asm.Register 609 var vt runtimeValueType 610 switch wasmValueType { 611 case wasm.ValueTypeI32: 612 ldr = arm64.LDRW 613 vt = runtimeValueTypeI32 614 result = globalAddressReg 615 case wasm.ValueTypeI64, wasm.ValueTypeExternref, wasm.ValueTypeFuncref: 616 ldr = arm64.LDRD 617 vt = runtimeValueTypeI64 618 result = globalAddressReg 619 case wasm.ValueTypeF32: 620 result, err = c.allocateRegister(registerTypeVector) 621 if err != nil { 622 return err 623 } 624 ldr = arm64.FLDRS 625 vt = runtimeValueTypeF32 626 case wasm.ValueTypeF64: 627 result, err = c.allocateRegister(registerTypeVector) 628 if err != nil { 629 return err 630 } 631 ldr = arm64.FLDRD 632 vt = runtimeValueTypeF64 633 } 634 635 // "result = [globalAddressReg + globalInstanceValueOffset] (== globals[index].Val)" 636 c.assembler.CompileMemoryToRegister( 637 ldr, 638 globalAddressReg, globalInstanceValueOffset, 639 result, 640 ) 641 642 c.pushRuntimeValueLocationOnRegister(result, vt) 643 } 644 return nil 645 } 646 647 // compileGlobalSet implements compiler.compileGlobalSet for the arm64 architecture. 648 func (c *arm64Compiler) compileGlobalSet(o *wazeroir.UnionOperation) error { 649 index := uint32(o.U1) 650 651 wasmValueType := c.ir.Globals[index].ValType 652 isV128 := wasmValueType == wasm.ValueTypeV128 653 654 var val *runtimeValueLocation 655 if isV128 { 656 val = c.locationStack.popV128() 657 } else { 658 val = c.locationStack.pop() 659 } 660 if err := c.compileEnsureOnRegister(val); err != nil { 661 return err 662 } 663 664 globalInstanceAddressRegister, err := c.compileReadGlobalAddress(index) 665 if err != nil { 666 return err 667 } 668 669 if isV128 { 670 c.assembler.CompileVectorRegisterToMemory(arm64.VMOV, 671 val.register, globalInstanceAddressRegister, globalInstanceValueOffset, 672 arm64.VectorArrangementQ) 673 } else { 674 var str asm.Instruction 675 switch c.ir.Globals[index].ValType { 676 case wasm.ValueTypeI32: 677 str = arm64.STRW 678 case wasm.ValueTypeI64, wasm.ValueTypeExternref, wasm.ValueTypeFuncref: 679 str = arm64.STRD 680 case wasm.ValueTypeF32: 681 str = arm64.FSTRS 682 case wasm.ValueTypeF64: 683 str = arm64.FSTRD 684 } 685 686 // At this point "globalInstanceAddressRegister = globals[index]". 687 // Therefore, this means "globals[index].Val = val.register" 688 c.assembler.CompileRegisterToMemory( 689 str, 690 val.register, 691 globalInstanceAddressRegister, globalInstanceValueOffset, 692 ) 693 } 694 695 c.markRegisterUnused(val.register) 696 return nil 697 } 698 699 // compileReadGlobalAddress adds instructions to store the absolute address of the global instance at globalIndex into a register 700 func (c *arm64Compiler) compileReadGlobalAddress(globalIndex uint32) (destinationRegister asm.Register, err error) { 701 // TODO: rethink about the type used in store `globals []*GlobalInstance`. 702 // If we use `[]GlobalInstance` instead, we could reduce one MOV instruction here. 703 704 destinationRegister, err = c.allocateRegister(registerTypeGeneralPurpose) 705 if err != nil { 706 return 707 } 708 709 // "destinationRegister = globalIndex * 8" 710 c.assembler.CompileConstToRegister( 711 // globalIndex is an index to []*GlobalInstance, therefore 712 // we have to multiply it by the size of *GlobalInstance == the pointer size == 8. 713 arm64.MOVD, int64(globalIndex)*8, destinationRegister, 714 ) 715 716 // "arm64ReservedRegisterForTemporary = &globals[0]" 717 c.assembler.CompileMemoryToRegister( 718 arm64.LDRD, 719 arm64ReservedRegisterForCallEngine, callEngineModuleContextGlobalElement0AddressOffset, 720 arm64ReservedRegisterForTemporary, 721 ) 722 723 // "destinationRegister = [arm64ReservedRegisterForTemporary + destinationRegister] (== globals[globalIndex])". 724 c.assembler.CompileMemoryWithRegisterOffsetToRegister( 725 arm64.LDRD, 726 arm64ReservedRegisterForTemporary, destinationRegister, 727 destinationRegister, 728 ) 729 return 730 } 731 732 // compileBr implements compiler.compileBr for the arm64 architecture. 733 func (c *arm64Compiler) compileBr(o *wazeroir.UnionOperation) error { 734 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 735 return err 736 } 737 return c.compileBranchInto(wazeroir.Label(o.U1)) 738 } 739 740 // compileBrIf implements compiler.compileBrIf for the arm64 architecture. 741 func (c *arm64Compiler) compileBrIf(o *wazeroir.UnionOperation) error { 742 cond := c.locationStack.pop() 743 744 var conditionalBR asm.Node 745 if cond.onConditionalRegister() { 746 // If the cond is on a conditional register, it corresponds to one of "conditional codes" 747 // https://developer.arm.com/documentation/dui0801/a/Condition-Codes/Condition-code-suffixes 748 // Here we represent the conditional codes by using arm64.COND_** registers, and that means the 749 // conditional jump can be performed if we use arm64.B**. 750 // For example, if we have arm64.CondEQ on cond, that means we performed compileEq right before 751 // this compileBrIf and BrIf can be achieved by arm64.BCONDEQ. 752 var brInst asm.Instruction 753 switch cond.conditionalRegister { 754 case arm64.CondEQ: 755 brInst = arm64.BCONDEQ 756 case arm64.CondNE: 757 brInst = arm64.BCONDNE 758 case arm64.CondHS: 759 brInst = arm64.BCONDHS 760 case arm64.CondLO: 761 brInst = arm64.BCONDLO 762 case arm64.CondMI: 763 brInst = arm64.BCONDMI 764 case arm64.CondHI: 765 brInst = arm64.BCONDHI 766 case arm64.CondLS: 767 brInst = arm64.BCONDLS 768 case arm64.CondGE: 769 brInst = arm64.BCONDGE 770 case arm64.CondLT: 771 brInst = arm64.BCONDLT 772 case arm64.CondGT: 773 brInst = arm64.BCONDGT 774 case arm64.CondLE: 775 brInst = arm64.BCONDLE 776 default: 777 // BUG: This means that we use the cond.conditionalRegister somewhere in this file, 778 // but not covered in switch ^. That shouldn't happen. 779 return fmt.Errorf("unsupported condition for br_if: %v", cond.conditionalRegister) 780 } 781 conditionalBR = c.assembler.CompileJump(brInst) 782 } else { 783 // If the value is not on the conditional register, we compare the value with the zero register, 784 // and then do the conditional BR if the value doesn't equal zero. 785 if err := c.compileEnsureOnRegister(cond); err != nil { 786 return err 787 } 788 // Compare the value with zero register. Note that the value is ensured to be i32 by function validation phase, 789 // so we use CMPW (32-bit compare) here. 790 c.assembler.CompileTwoRegistersToNone(arm64.CMPW, cond.register, arm64.RegRZR) 791 792 conditionalBR = c.assembler.CompileJump(arm64.BCONDNE) 793 794 c.markRegisterUnused(cond.register) 795 } 796 797 // Emit the code for branching into else branch. 798 elseTarget := wazeroir.Label(o.U2) 799 if err := c.compileBranchInto(elseTarget); err != nil { 800 return err 801 } 802 // We branch into here from the original conditional BR (conditionalBR). 803 c.assembler.SetJumpTargetOnNext(conditionalBR) 804 thenTarget := wazeroir.Label(o.U1) 805 if err := compileDropRange(c, o.U3); err != nil { 806 return err 807 } 808 return c.compileBranchInto(thenTarget) 809 } 810 811 func (c *arm64Compiler) compileBranchInto(target wazeroir.Label) error { 812 if target.IsReturnTarget() { 813 return c.compileReturnFunction() 814 } else { 815 if c.ir.LabelCallers[target] > 1 { 816 // We can only re-use register state if when there's a single call-site. 817 // Release existing values on registers to the stack if there's multiple ones to have 818 // the consistent value location state at the beginning of label. 819 if err := c.compileReleaseAllRegistersToStack(); err != nil { 820 return err 821 } 822 } 823 // Set the initial stack of the target label, so we can start compiling the label 824 // with the appropriate value locations. Note we clone the stack here as we maybe 825 // manipulate the stack before compiler reaches the label. 826 targetLabel := c.label(target) 827 if !targetLabel.stackInitialized { 828 targetLabel.initialStack.cloneFrom(*c.locationStack) 829 targetLabel.stackInitialized = true 830 } 831 832 br := c.assembler.CompileJump(arm64.B) 833 c.assignBranchTarget(target, br) 834 return nil 835 } 836 } 837 838 // assignBranchTarget assigns the given label's initial instruction to the destination of br. 839 func (c *arm64Compiler) assignBranchTarget(label wazeroir.Label, br asm.Node) { 840 target := c.label(label) 841 842 targetInst := target.initialInstruction 843 if targetInst == nil { 844 // If the label isn't compiled yet, allocate the NOP node, and set as the initial instruction. 845 targetInst = c.assembler.AllocateNOP() 846 target.initialInstruction = targetInst 847 } 848 849 br.AssignJumpTarget(targetInst) 850 } 851 852 // compileBrTable implements compiler.compileBrTable for the arm64 architecture. 853 func (c *arm64Compiler) compileBrTable(o *wazeroir.UnionOperation) error { 854 // If the operation only consists of the default target, we branch into it and return early. 855 if len(o.Us) == 2 { 856 loc := c.locationStack.pop() 857 if loc.onRegister() { 858 c.markRegisterUnused(loc.register) 859 } 860 if err := compileDropRange(c, o.Us[1]); err != nil { 861 return err 862 } 863 return c.compileBranchInto(wazeroir.Label(o.Us[0])) 864 } 865 866 index := c.locationStack.pop() 867 if err := c.compileEnsureOnRegister(index); err != nil { 868 return err 869 } 870 871 if isZeroRegister(index.register) { 872 reg, err := c.allocateRegister(registerTypeGeneralPurpose) 873 if err != nil { 874 return err 875 } 876 index.setRegister(reg) 877 c.markRegisterUsed(reg) 878 879 // Zero the value on a picked register. 880 c.assembler.CompileRegisterToRegister(arm64.MOVD, arm64.RegRZR, reg) 881 } 882 883 tmpReg, err := c.allocateRegister(registerTypeGeneralPurpose) 884 if err != nil { 885 return err 886 } 887 888 // Load the branch table's length. 889 // "tmpReg = len(o.Targets)" 890 c.assembler.CompileConstToRegister(arm64.MOVW, int64(len(o.Us)/2-1), tmpReg) 891 // Compare the length with offset. 892 c.assembler.CompileTwoRegistersToNone(arm64.CMPW, tmpReg, index.register) 893 // If the value exceeds the length, we will branch into the default target (corresponding to len(o.Targets) index). 894 brDefaultIndex := c.assembler.CompileJump(arm64.BCONDLO) 895 c.assembler.CompileRegisterToRegister(arm64.MOVW, tmpReg, index.register) 896 c.assembler.SetJumpTargetOnNext(brDefaultIndex) 897 898 // We prepare the asm.StaticConst which holds the offset of 899 // each target's first instruction (incl. default) 900 // relative to the beginning of label tables. 901 // 902 // For example, if we have targets=[L0, L1] and default=L_DEFAULT, 903 // we emit the code like this at [Emit the code for each target and default branch] below. 904 // 905 // L0: 906 // 0x123001: XXXX, ... 907 // ..... 908 // L1: 909 // 0x123005: YYY, ... 910 // ..... 911 // L_DEFAULT: 912 // 0x123009: ZZZ, ... 913 // 914 // then offsetData becomes like [0x0, 0x5, 0x8]. 915 // By using this offset list, we could jump into the label for the index by 916 // "jmp offsetData[index]+0x123001" and "0x123001" can be acquired by ADR instruction. 917 // 918 // Note: We store each offset of 32-bit unsigned integer as 4 consecutive bytes. So more precisely, 919 // the above example's offsetData would be [0x0, 0x0, 0x0, 0x0, 0x5, 0x0, 0x0, 0x0, 0x8, 0x0, 0x0, 0x0]. 920 // 921 // Note: this is similar to how GCC implements Switch statements in C. 922 offsetData := asm.NewStaticConst(make([]byte, 4*(len(o.Us)/2))) 923 924 // "tmpReg = &offsetData[0]" 925 c.assembler.CompileStaticConstToRegister(arm64.ADR, offsetData, tmpReg) 926 927 // "index.register = tmpReg + (index.register << 2) (== &offsetData[offset])" 928 c.assembler.CompileLeftShiftedRegisterToRegister(arm64.ADD, index.register, 2, tmpReg, index.register) 929 930 // "index.register = *index.register (== offsetData[offset])" 931 c.assembler.CompileMemoryToRegister(arm64.LDRW, index.register, 0, index.register) 932 933 // Now we read the address of the beginning of the jump table. 934 // In the above example, this corresponds to reading the address of 0x123001. 935 c.assembler.CompileReadInstructionAddress(tmpReg, arm64.B) 936 937 // Now we have the address of L0 in tmp register, and the offset to the target label in the index.register. 938 // So we could achieve the br_table jump by adding them and jump into the resulting address. 939 c.assembler.CompileRegisterToRegister(arm64.ADD, tmpReg, index.register) 940 941 c.assembler.CompileJumpToRegister(arm64.B, index.register) 942 943 // We no longer need the index's register, so mark it unused. 944 c.markRegisterUnused(index.register) 945 946 // [Emit the code for each targets and default branch] 947 labelInitialInstructions := make([]asm.Node, len(o.Us)/2) 948 949 // Since we might end up having the different stack state in each branch, 950 // we need to save the initial stack state here, and use the same initial state 951 // for each iteration. 952 initialLocationStack := c.getSavedTemporaryLocationStack() 953 954 for i := range labelInitialInstructions { 955 // Emit the initial instruction of each target where 956 // we use NOP as we don't yet know the next instruction in each label. 957 init := c.assembler.CompileStandAlone(arm64.NOP) 958 labelInitialInstructions[i] = init 959 960 targetLabel := wazeroir.Label(o.Us[i*2]) 961 targetToDrop := o.Us[i*2+1] 962 if err = compileDropRange(c, targetToDrop); err != nil { 963 return err 964 } 965 if err = c.compileBranchInto(targetLabel); err != nil { 966 return err 967 } 968 // After the iteration, reset the stack's state with initialLocationStack. 969 c.locationStack.cloneFrom(initialLocationStack) 970 } 971 972 c.assembler.BuildJumpTable(offsetData, labelInitialInstructions) 973 return nil 974 } 975 976 func (c *arm64Compiler) getSavedTemporaryLocationStack() runtimeValueLocationStack { 977 initialLocationStack := *c.locationStack // Take copy! 978 // Use c.brTableTmp for the underlying stack so that we could reduce the allocations. 979 if diff := int(initialLocationStack.sp) - len(c.brTableTmp); diff > 0 { 980 c.brTableTmp = append(c.brTableTmp, make([]runtimeValueLocation, diff)...) 981 } 982 copy(c.brTableTmp, initialLocationStack.stack[:initialLocationStack.sp]) 983 initialLocationStack.stack = c.brTableTmp 984 return initialLocationStack 985 } 986 987 // compileCall implements compiler.compileCall for the arm64 architecture. 988 func (c *arm64Compiler) compileCall(o *wazeroir.UnionOperation) error { 989 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 990 return err 991 } 992 993 functionIndex := o.U1 994 995 tp := &c.ir.Types[c.ir.Functions[functionIndex]] 996 997 targetFunctionAddressReg, err := c.allocateRegister(registerTypeGeneralPurpose) 998 if err != nil { 999 return err 1000 } 1001 c.markRegisterUsed(targetFunctionAddressReg) 1002 defer c.markRegisterUnused(targetFunctionAddressReg) 1003 1004 // 3) Set rc.next to specify which function is executed on the current call frame. 1005 // 1006 // First, we read the address of the first item of ce.functions slice (= &ce.functions[0]) 1007 // into tmp. 1008 c.assembler.CompileMemoryToRegister(arm64.LDRD, 1009 arm64ReservedRegisterForCallEngine, callEngineModuleContextFunctionsElement0AddressOffset, 1010 targetFunctionAddressReg) 1011 1012 c.assembler.CompileConstToRegister( 1013 arm64.ADD, 1014 int64(functionIndex)*functionSize, // * 8 because the size of *function equals 8 bytes. 1015 targetFunctionAddressReg) 1016 1017 return c.compileCallImpl(targetFunctionAddressReg, tp) 1018 } 1019 1020 // compileCallImpl implements compiler.compileCall and compiler.compileCallIndirect for the arm64 architecture. 1021 func (c *arm64Compiler) compileCallImpl(targetFunctionAddressRegister asm.Register, functype *wasm.FunctionType) error { 1022 // Release all the registers as our calling convention requires the caller-save. 1023 if err := c.compileReleaseAllRegistersToStack(); err != nil { 1024 return err 1025 } 1026 1027 tmp, ok := c.locationStack.takeFreeRegister(registerTypeGeneralPurpose) 1028 if !ok { 1029 panic("BUG: cannot take a free register") 1030 } 1031 1032 // The stack should look like: 1033 // 1034 // reserved slots for results (if len(results) > len(args)) 1035 // | | 1036 // ,arg0, ..., argN, ..., _, .returnAddress, .returnStackBasePointerInBytes, .function, .... 1037 // | | | 1038 // | callFrame{^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^} 1039 // | 1040 // nextStackBasePointerOffset 1041 // 1042 // where callFrame is used to return to this currently executed function. 1043 1044 nextStackBasePointerOffset := int64(c.locationStack.sp) - int64(functype.ParamNumInUint64) 1045 1046 callFrameReturnAddressLoc, callFrameStackBasePointerInBytesLoc, callFrameFunctionLoc := c.locationStack.pushCallFrame(functype) 1047 1048 // Save the current stack base pointer at callFrameStackBasePointerInBytesLoc. 1049 c.assembler.CompileMemoryToRegister(arm64.LDRD, 1050 arm64ReservedRegisterForCallEngine, callEngineStackContextStackBasePointerInBytesOffset, 1051 tmp) 1052 callFrameStackBasePointerInBytesLoc.setRegister(tmp) 1053 c.compileReleaseRegisterToStack(callFrameStackBasePointerInBytesLoc) 1054 1055 // Set callEngine.stackContext.stackBasePointer for the next function. 1056 c.assembler.CompileConstToRegister(arm64.ADD, nextStackBasePointerOffset<<3, tmp) 1057 c.assembler.CompileRegisterToMemory(arm64.STRD, 1058 tmp, 1059 arm64ReservedRegisterForCallEngine, callEngineStackContextStackBasePointerInBytesOffset) 1060 1061 // Save the currently executed *function (placed at callEngine.moduleContext.fn) into callFrameFunctionLoc. 1062 c.assembler.CompileMemoryToRegister(arm64.LDRD, 1063 arm64ReservedRegisterForCallEngine, callEngineModuleContextFnOffset, 1064 tmp) 1065 callFrameFunctionLoc.setRegister(tmp) 1066 c.compileReleaseRegisterToStack(callFrameFunctionLoc) 1067 1068 // Set callEngine.moduleContext.fn to the next *function. 1069 c.assembler.CompileRegisterToMemory(arm64.STRD, 1070 targetFunctionAddressRegister, 1071 arm64ReservedRegisterForCallEngine, callEngineModuleContextFnOffset) 1072 1073 // Write the return address into callFrameReturnAddressLoc. 1074 c.assembler.CompileReadInstructionAddress(tmp, arm64.B) 1075 callFrameReturnAddressLoc.setRegister(tmp) 1076 c.compileReleaseRegisterToStack(callFrameReturnAddressLoc) 1077 1078 if targetFunctionAddressRegister == arm64CallingConventionModuleInstanceAddressRegister { 1079 // This case we must move the value on targetFunctionAddressRegister to another register, otherwise 1080 // the address (jump target below) will be modified and result in segfault. 1081 // See #526. 1082 c.assembler.CompileRegisterToRegister(arm64.MOVD, targetFunctionAddressRegister, tmp) 1083 targetFunctionAddressRegister = tmp 1084 } 1085 1086 // Also, we have to put the code's moduleInstance address into arm64CallingConventionModuleInstanceAddressRegister. 1087 c.assembler.CompileMemoryToRegister(arm64.LDRD, 1088 targetFunctionAddressRegister, functionModuleInstanceOffset, 1089 arm64CallingConventionModuleInstanceAddressRegister, 1090 ) 1091 1092 // Then, br into the target function's initial address. 1093 c.assembler.CompileMemoryToRegister(arm64.LDRD, 1094 targetFunctionAddressRegister, functionCodeInitialAddressOffset, 1095 targetFunctionAddressRegister) 1096 1097 c.assembler.CompileJumpToRegister(arm64.B, targetFunctionAddressRegister) 1098 1099 // We consumed the function parameters, the call frame stack and reserved slots during the call. 1100 c.locationStack.sp = uint64(nextStackBasePointerOffset) 1101 1102 // Also, the function results were pushed by the call. 1103 for _, t := range functype.Results { 1104 loc := c.locationStack.pushRuntimeValueLocationOnStack() 1105 switch t { 1106 case wasm.ValueTypeI32: 1107 loc.valueType = runtimeValueTypeI32 1108 case wasm.ValueTypeI64, wasm.ValueTypeFuncref, wasm.ValueTypeExternref: 1109 loc.valueType = runtimeValueTypeI64 1110 case wasm.ValueTypeF32: 1111 loc.valueType = runtimeValueTypeF32 1112 case wasm.ValueTypeF64: 1113 loc.valueType = runtimeValueTypeF64 1114 case wasm.ValueTypeV128: 1115 loc.valueType = runtimeValueTypeV128Lo 1116 hi := c.locationStack.pushRuntimeValueLocationOnStack() 1117 hi.valueType = runtimeValueTypeV128Hi 1118 } 1119 } 1120 1121 if err := c.compileModuleContextInitialization(); err != nil { 1122 return err 1123 } 1124 1125 // On the function return, we initialize the state for this function. 1126 c.compileReservedStackBasePointerRegisterInitialization() 1127 1128 c.compileReservedMemoryRegisterInitialization() 1129 return nil 1130 } 1131 1132 // compileCallIndirect implements compiler.compileCallIndirect for the arm64 architecture. 1133 func (c *arm64Compiler) compileCallIndirect(o *wazeroir.UnionOperation) (err error) { 1134 offset := c.locationStack.pop() 1135 if err = c.compileEnsureOnRegister(offset); err != nil { 1136 return err 1137 } 1138 typeIndex := o.U1 1139 tableIndex := o.U2 1140 1141 offsetReg := offset.register 1142 if isZeroRegister(offsetReg) { 1143 offsetReg, err = c.allocateRegister(registerTypeGeneralPurpose) 1144 if err != nil { 1145 return err 1146 } 1147 c.markRegisterUsed(offsetReg) 1148 1149 // Zero the value on a picked register. 1150 c.assembler.CompileRegisterToRegister(arm64.MOVD, arm64.RegRZR, offsetReg) 1151 } 1152 1153 tmp, err := c.allocateRegister(registerTypeGeneralPurpose) 1154 if err != nil { 1155 return err 1156 } 1157 c.markRegisterUsed(tmp) 1158 1159 tmp2, err := c.allocateRegister(registerTypeGeneralPurpose) 1160 if err != nil { 1161 return err 1162 } 1163 c.markRegisterUsed(tmp2) 1164 1165 // First, we need to check if the offset doesn't exceed the length of table. 1166 // "tmp = &Tables[0]" 1167 c.assembler.CompileMemoryToRegister(arm64.LDRD, 1168 arm64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset, 1169 tmp, 1170 ) 1171 // tmp = [tmp + TableIndex*8] = [&Tables[0] + TableIndex*sizeOf(*tableInstance)] = Tables[tableIndex] 1172 c.assembler.CompileMemoryToRegister(arm64.LDRD, 1173 tmp, int64(tableIndex)*8, 1174 tmp, 1175 ) 1176 // tmp2 = [tmp + tableInstanceTableLenOffset] = len(Tables[tableIndex]) 1177 c.assembler.CompileMemoryToRegister(arm64.LDRD, tmp, tableInstanceTableLenOffset, tmp2) 1178 1179 // "cmp tmp2, offset" 1180 c.assembler.CompileTwoRegistersToNone(arm64.CMP, tmp2, offsetReg) 1181 1182 // If it exceeds len(table), we trap. 1183 c.compileMaybeExitFromNativeCode(arm64.BCONDLO, nativeCallStatusCodeInvalidTableAccess) 1184 // Otherwise, we proceed to do function type check. 1185 1186 // We need to obtain the absolute address of table element. 1187 // "tmp = &Tables[tableIndex].table[0]" 1188 c.assembler.CompileMemoryToRegister( 1189 arm64.LDRD, 1190 tmp, tableInstanceTableOffset, 1191 tmp, 1192 ) 1193 // "offset = tmp + (offset << pointerSizeLog2) (== &table[offset])" 1194 // Here we left shifting by 3 in order to get the offset in bytes, 1195 // and the table element type is uintptr which is 8 bytes. 1196 c.assembler.CompileLeftShiftedRegisterToRegister( 1197 arm64.ADD, 1198 offsetReg, pointerSizeLog2, 1199 tmp, 1200 offsetReg, 1201 ) 1202 1203 // "offset = (*offset) (== table[offset])" 1204 c.assembler.CompileMemoryToRegister(arm64.LDRD, offsetReg, 0, offsetReg) 1205 1206 // Check if the value of table[offset] equals zero, meaning that the target element is uninitialized. 1207 c.assembler.CompileTwoRegistersToNone(arm64.CMP, arm64.RegRZR, offsetReg) 1208 1209 // Skipped if the target is initialized. 1210 c.compileMaybeExitFromNativeCode(arm64.BCONDNE, nativeCallStatusCodeInvalidTableAccess) 1211 1212 // next we check the type matches, i.e. table[offset].source.TypeID == targetFunctionType. 1213 // "tmp = table[offset].typeID" 1214 c.assembler.CompileMemoryToRegister( 1215 arm64.LDRD, 1216 offsetReg, functionTypeIDOffset, 1217 tmp, 1218 ) 1219 // "tmp2 = ModuleInstance.TypeIDs[index]" 1220 c.assembler.CompileMemoryToRegister(arm64.LDRD, 1221 arm64ReservedRegisterForCallEngine, callEngineModuleContextTypeIDsElement0AddressOffset, 1222 tmp2) 1223 c.assembler.CompileMemoryToRegister(arm64.LDRW, tmp2, int64(typeIndex)*4, tmp2) 1224 1225 // Compare these two values, and if they equal, we are ready to make function call. 1226 c.assembler.CompileTwoRegistersToNone(arm64.CMPW, tmp, tmp2) 1227 // Skipped if the type matches. 1228 c.compileMaybeExitFromNativeCode(arm64.BCONDEQ, nativeCallStatusCodeTypeMismatchOnIndirectCall) 1229 1230 targetFunctionType := &c.ir.Types[typeIndex] 1231 if err := c.compileCallImpl(offsetReg, targetFunctionType); err != nil { 1232 return err 1233 } 1234 1235 // The offset register should be marked as un-used as we consumed in the function call. 1236 c.markRegisterUnused(offsetReg, tmp, tmp2) 1237 return nil 1238 } 1239 1240 // compileDrop implements compiler.compileDrop for the arm64 architecture. 1241 func (c *arm64Compiler) compileDrop(o *wazeroir.UnionOperation) error { 1242 return compileDropRange(c, o.U1) 1243 } 1244 1245 func (c *arm64Compiler) compileSelectV128Impl(selectorRegister asm.Register) error { 1246 x2 := c.locationStack.popV128() 1247 if err := c.compileEnsureOnRegister(x2); err != nil { 1248 return err 1249 } 1250 1251 x1 := c.locationStack.popV128() 1252 if err := c.compileEnsureOnRegister(x1); err != nil { 1253 return err 1254 } 1255 1256 c.assembler.CompileTwoRegistersToNone(arm64.CMPW, arm64.RegRZR, selectorRegister) 1257 brIfNotZero := c.assembler.CompileJump(arm64.BCONDNE) 1258 1259 // In this branch, we select the value of x2, so we move the value into x1.register so that 1260 // we can have the result in x1.register regardless of the selection. 1261 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.VORR, 1262 x2.register, x2.register, x1.register, arm64.VectorArrangement16B) 1263 1264 c.assembler.SetJumpTargetOnNext(brIfNotZero) 1265 1266 // As noted, the result exists in x1.register regardless of the selector. 1267 c.pushVectorRuntimeValueLocationOnRegister(x1.register) 1268 // Plus, x2.register is no longer used. 1269 c.markRegisterUnused(x2.register) 1270 return nil 1271 } 1272 1273 // compileSelect implements compiler.compileSelect for the arm64 architecture. 1274 func (c *arm64Compiler) compileSelect(o *wazeroir.UnionOperation) error { 1275 cv, err := c.popValueOnRegister() 1276 if err != nil { 1277 return err 1278 } 1279 1280 isTargetVector := o.B3 1281 if isTargetVector { 1282 return c.compileSelectV128Impl(cv.register) 1283 } 1284 1285 c.markRegisterUsed(cv.register) 1286 1287 x1, x2, err := c.popTwoValuesOnRegisters() 1288 if err != nil { 1289 return err 1290 } 1291 1292 if isZeroRegister(x1.register) && isZeroRegister(x2.register) { 1293 // If both values are zero, the result is always zero. 1294 c.pushRuntimeValueLocationOnRegister(arm64.RegRZR, x1.valueType) 1295 c.markRegisterUnused(cv.register) 1296 return nil 1297 } 1298 1299 // In the following, we emit the code so that x1's register contains the chosen value 1300 // no matter which of original x1 or x2 is selected. 1301 // 1302 // If x1 is currently on zero register, we cannot place the result because 1303 // "MOV arm64.RegRZR x2.register" results in arm64.RegRZR regardless of the value. 1304 // So we explicitly assign a general purpose register to x1 here. 1305 if isZeroRegister(x1.register) { 1306 // Mark x2 and cv's registers are used so they won't be chosen. 1307 c.markRegisterUsed(x2.register) 1308 // Pick the non-zero register for x1. 1309 x1Reg, err := c.allocateRegister(registerTypeGeneralPurpose) 1310 if err != nil { 1311 return err 1312 } 1313 x1.setRegister(x1Reg) 1314 // And zero our the picked register. 1315 c.assembler.CompileRegisterToRegister(arm64.MOVD, arm64.RegRZR, x1Reg) 1316 } 1317 1318 // At this point, x1 is non-zero register, and x2 is either general purpose or zero register. 1319 1320 c.assembler.CompileTwoRegistersToNone(arm64.CMPW, arm64.RegRZR, cv.register) 1321 brIfNotZero := c.assembler.CompileJump(arm64.BCONDNE) 1322 1323 // If cv == 0, we move the value of x2 to the x1.register. 1324 1325 switch x1.valueType { 1326 case runtimeValueTypeI32: 1327 // TODO: use 32-bit mov 1328 c.assembler.CompileRegisterToRegister(arm64.MOVD, x2.register, x1.register) 1329 case runtimeValueTypeI64: 1330 c.assembler.CompileRegisterToRegister(arm64.MOVD, x2.register, x1.register) 1331 case runtimeValueTypeF32: 1332 // TODO: use 32-bit mov 1333 c.assembler.CompileRegisterToRegister(arm64.FMOVD, x2.register, x1.register) 1334 case runtimeValueTypeF64: 1335 c.assembler.CompileRegisterToRegister(arm64.FMOVD, x2.register, x1.register) 1336 default: 1337 return errors.New("TODO: implement vector type select") 1338 } 1339 1340 c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) 1341 1342 // Otherwise, nothing to do for select. 1343 c.assembler.SetJumpTargetOnNext(brIfNotZero) 1344 1345 // Only x1.register is reused. 1346 c.markRegisterUnused(cv.register, x2.register) 1347 return nil 1348 } 1349 1350 // compilePick implements compiler.compilePick for the arm64 architecture. 1351 func (c *arm64Compiler) compilePick(o *wazeroir.UnionOperation) error { 1352 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 1353 return err 1354 } 1355 depth := o.U1 1356 isTargetVector := o.B3 1357 1358 pickTarget := &c.locationStack.stack[c.locationStack.sp-1-uint64(depth)] 1359 pickedRegister, err := c.allocateRegister(pickTarget.getRegisterType()) 1360 if err != nil { 1361 return err 1362 } 1363 1364 if pickTarget.onRegister() { // Copy the value to the pickedRegister. 1365 switch pickTarget.valueType { 1366 case runtimeValueTypeI32: 1367 c.assembler.CompileRegisterToRegister(arm64.MOVW, pickTarget.register, pickedRegister) 1368 case runtimeValueTypeI64: 1369 c.assembler.CompileRegisterToRegister(arm64.MOVD, pickTarget.register, pickedRegister) 1370 case runtimeValueTypeF32: 1371 c.assembler.CompileRegisterToRegister(arm64.FMOVS, pickTarget.register, pickedRegister) 1372 case runtimeValueTypeF64: 1373 c.assembler.CompileRegisterToRegister(arm64.FMOVD, pickTarget.register, pickedRegister) 1374 case runtimeValueTypeV128Lo: 1375 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.VORR, 1376 pickTarget.register, pickTarget.register, pickedRegister, arm64.VectorArrangement16B) 1377 case runtimeValueTypeV128Hi: 1378 panic("BUG") // since pick target must point to the lower 64-bits of vectors. 1379 } 1380 } else if pickTarget.onStack() { 1381 // Temporarily assign a register to the pick target, and then load the value. 1382 pickTarget.setRegister(pickedRegister) 1383 c.compileLoadValueOnStackToRegister(pickTarget) 1384 1385 // After the load, we revert the register assignment to the pick target. 1386 pickTarget.setRegister(asm.NilRegister) 1387 if isTargetVector { 1388 hi := &c.locationStack.stack[pickTarget.stackPointer+1] 1389 hi.setRegister(asm.NilRegister) 1390 } 1391 } 1392 1393 // Now we have the value of the target on the pickedRegister, 1394 // so push the location. 1395 c.pushRuntimeValueLocationOnRegister(pickedRegister, pickTarget.valueType) 1396 if isTargetVector { 1397 c.pushRuntimeValueLocationOnRegister(pickedRegister, runtimeValueTypeV128Hi) 1398 } 1399 return nil 1400 } 1401 1402 // compileAdd implements compiler.compileAdd for the arm64 architecture. 1403 func (c *arm64Compiler) compileAdd(o *wazeroir.UnionOperation) error { 1404 x1, x2, err := c.popTwoValuesOnRegisters() 1405 if err != nil { 1406 return err 1407 } 1408 1409 // Addition can be nop if one of operands is zero. 1410 if isZeroRegister(x1.register) { 1411 c.pushRuntimeValueLocationOnRegister(x2.register, x1.valueType) 1412 return nil 1413 } else if isZeroRegister(x2.register) { 1414 c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) 1415 return nil 1416 } 1417 1418 var inst asm.Instruction 1419 unsignedType := wazeroir.UnsignedType(o.B1) 1420 switch unsignedType { 1421 case wazeroir.UnsignedTypeI32: 1422 inst = arm64.ADDW 1423 case wazeroir.UnsignedTypeI64: 1424 inst = arm64.ADD 1425 case wazeroir.UnsignedTypeF32: 1426 inst = arm64.FADDS 1427 case wazeroir.UnsignedTypeF64: 1428 inst = arm64.FADDD 1429 } 1430 1431 c.assembler.CompileRegisterToRegister(inst, x2.register, x1.register) 1432 // The result is placed on a register for x1, so record it. 1433 c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) 1434 return nil 1435 } 1436 1437 // compileSub implements compiler.compileSub for the arm64 architecture. 1438 func (c *arm64Compiler) compileSub(o *wazeroir.UnionOperation) error { 1439 x1, x2, err := c.popTwoValuesOnRegisters() 1440 if err != nil { 1441 return err 1442 } 1443 1444 // If both of registers are zeros, this can be nop and push the zero register. 1445 if isZeroRegister(x1.register) && isZeroRegister(x2.register) { 1446 c.pushRuntimeValueLocationOnRegister(arm64.RegRZR, x1.valueType) 1447 return nil 1448 } 1449 1450 // At this point, at least one of x1 or x2 registers is non zero. 1451 // Choose the non-zero register as destination. 1452 destinationReg := x1.register 1453 if isZeroRegister(x1.register) { 1454 destinationReg = x2.register 1455 } 1456 1457 var inst asm.Instruction 1458 var vt runtimeValueType 1459 unsignedType := wazeroir.UnsignedType(o.B1) 1460 switch unsignedType { 1461 case wazeroir.UnsignedTypeI32: 1462 inst = arm64.SUBW 1463 vt = runtimeValueTypeI32 1464 case wazeroir.UnsignedTypeI64: 1465 inst = arm64.SUB 1466 vt = runtimeValueTypeI64 1467 case wazeroir.UnsignedTypeF32: 1468 inst = arm64.FSUBS 1469 vt = runtimeValueTypeF32 1470 case wazeroir.UnsignedTypeF64: 1471 inst = arm64.FSUBD 1472 vt = runtimeValueTypeF64 1473 } 1474 1475 c.assembler.CompileTwoRegistersToRegister(inst, x2.register, x1.register, destinationReg) 1476 c.pushRuntimeValueLocationOnRegister(destinationReg, vt) 1477 return nil 1478 } 1479 1480 // compileMul implements compiler.compileMul for the arm64 architecture. 1481 func (c *arm64Compiler) compileMul(o *wazeroir.UnionOperation) error { 1482 x1, x2, err := c.popTwoValuesOnRegisters() 1483 if err != nil { 1484 return err 1485 } 1486 1487 // Multiplication can be done by putting a zero register if one of operands is zero. 1488 if isZeroRegister(x1.register) || isZeroRegister(x2.register) { 1489 c.pushRuntimeValueLocationOnRegister(arm64.RegRZR, x1.valueType) 1490 return nil 1491 } 1492 1493 var inst asm.Instruction 1494 var vt runtimeValueType 1495 unsignedType := wazeroir.UnsignedType(o.B1) 1496 switch unsignedType { 1497 case wazeroir.UnsignedTypeI32: 1498 inst = arm64.MULW 1499 vt = runtimeValueTypeI32 1500 case wazeroir.UnsignedTypeI64: 1501 inst = arm64.MUL 1502 vt = runtimeValueTypeI64 1503 case wazeroir.UnsignedTypeF32: 1504 inst = arm64.FMULS 1505 vt = runtimeValueTypeF32 1506 case wazeroir.UnsignedTypeF64: 1507 inst = arm64.FMULD 1508 vt = runtimeValueTypeF64 1509 } 1510 1511 c.assembler.CompileRegisterToRegister(inst, x2.register, x1.register) 1512 // The result is placed on a register for x1, so record it. 1513 c.pushRuntimeValueLocationOnRegister(x1.register, vt) 1514 return nil 1515 } 1516 1517 // compileClz implements compiler.compileClz for the arm64 architecture. 1518 func (c *arm64Compiler) compileClz(o *wazeroir.UnionOperation) error { 1519 v, err := c.popValueOnRegister() 1520 if err != nil { 1521 return err 1522 } 1523 1524 unsignedInt := wazeroir.UnsignedInt(o.B1) 1525 if isZeroRegister(v.register) { 1526 // If the target is zero register, the result is always 32 (or 64 for 64-bits), 1527 // so we allocate a register and put the const on it. 1528 reg, err := c.allocateRegister(registerTypeGeneralPurpose) 1529 if err != nil { 1530 return err 1531 } 1532 var vt runtimeValueType 1533 if unsignedInt == wazeroir.UnsignedInt32 { 1534 vt = runtimeValueTypeI32 1535 c.assembler.CompileConstToRegister(arm64.MOVW, 32, reg) 1536 } else { 1537 vt = runtimeValueTypeI64 1538 c.assembler.CompileConstToRegister(arm64.MOVD, 64, reg) 1539 } 1540 c.pushRuntimeValueLocationOnRegister(reg, vt) 1541 return nil 1542 } 1543 1544 reg := v.register 1545 var vt runtimeValueType 1546 if unsignedInt == wazeroir.UnsignedInt32 { 1547 vt = runtimeValueTypeI32 1548 c.assembler.CompileRegisterToRegister(arm64.CLZW, reg, reg) 1549 } else { 1550 vt = runtimeValueTypeI64 1551 c.assembler.CompileRegisterToRegister(arm64.CLZ, reg, reg) 1552 } 1553 c.pushRuntimeValueLocationOnRegister(reg, vt) 1554 return nil 1555 } 1556 1557 // compileCtz implements compiler.compileCtz for the arm64 architecture. 1558 func (c *arm64Compiler) compileCtz(o *wazeroir.UnionOperation) error { 1559 v, err := c.popValueOnRegister() 1560 if err != nil { 1561 return err 1562 } 1563 1564 unsignedInt := wazeroir.UnsignedInt(o.B1) 1565 reg := v.register 1566 if isZeroRegister(reg) { 1567 // If the target is zero register, the result is always 32 (or 64 for 64-bits), 1568 // so we allocate a register and put the const on it. 1569 reg, err := c.allocateRegister(registerTypeGeneralPurpose) 1570 if err != nil { 1571 return err 1572 } 1573 var vt runtimeValueType 1574 if unsignedInt == wazeroir.UnsignedInt32 { 1575 vt = runtimeValueTypeI32 1576 c.assembler.CompileConstToRegister(arm64.MOVW, 32, reg) 1577 } else { 1578 vt = runtimeValueTypeI64 1579 c.assembler.CompileConstToRegister(arm64.MOVD, 64, reg) 1580 } 1581 c.pushRuntimeValueLocationOnRegister(reg, vt) 1582 return nil 1583 } 1584 1585 // Since arm64 doesn't have an instruction directly counting trailing zeros, 1586 // we reverse the bits first, and then do CLZ, which is exactly the same as 1587 // gcc implements __builtin_ctz for arm64. 1588 var vt runtimeValueType 1589 if unsignedInt == wazeroir.UnsignedInt32 { 1590 vt = runtimeValueTypeI32 1591 c.assembler.CompileRegisterToRegister(arm64.RBITW, reg, reg) 1592 c.assembler.CompileRegisterToRegister(arm64.CLZW, reg, reg) 1593 } else { 1594 vt = runtimeValueTypeI64 1595 c.assembler.CompileRegisterToRegister(arm64.RBIT, reg, reg) 1596 c.assembler.CompileRegisterToRegister(arm64.CLZ, reg, reg) 1597 } 1598 c.pushRuntimeValueLocationOnRegister(reg, vt) 1599 return nil 1600 } 1601 1602 // compilePopcnt implements compiler.compilePopcnt for the arm64 architecture. 1603 func (c *arm64Compiler) compilePopcnt(o *wazeroir.UnionOperation) error { 1604 v, err := c.popValueOnRegister() 1605 if err != nil { 1606 return err 1607 } 1608 1609 reg := v.register 1610 if isZeroRegister(reg) { 1611 c.pushRuntimeValueLocationOnRegister(reg, v.valueType) 1612 return nil 1613 } 1614 1615 freg, err := c.allocateRegister(registerTypeVector) 1616 if err != nil { 1617 return err 1618 } 1619 1620 // arm64 doesn't have an instruction for population count on scalar register, 1621 // so we use the vector one (VCNT). 1622 // This exactly what the official Go implements bits.OneCount. 1623 // For example, "func () int { return bits.OneCount(10) }" is compiled as 1624 // 1625 // MOVD $10, R0 ;; Load 10. 1626 // FMOVD R0, F0 1627 // VCNT V0.B8, V0.B8 1628 // UADDLV V0.B8, V0 1629 // 1630 var movInst asm.Instruction 1631 unsignedInt := wazeroir.UnsignedInt(o.B1) 1632 if unsignedInt == wazeroir.UnsignedInt32 { 1633 movInst = arm64.FMOVS 1634 } else { 1635 movInst = arm64.FMOVD 1636 } 1637 c.assembler.CompileRegisterToRegister(movInst, reg, freg) 1638 c.assembler.CompileVectorRegisterToVectorRegister(arm64.VCNT, freg, freg, 1639 arm64.VectorArrangement16B, arm64.VectorIndexNone, arm64.VectorIndexNone) 1640 c.assembler.CompileVectorRegisterToVectorRegister(arm64.UADDLV, freg, freg, arm64.VectorArrangement8B, 1641 arm64.VectorIndexNone, arm64.VectorIndexNone) 1642 1643 c.assembler.CompileRegisterToRegister(movInst, freg, reg) 1644 1645 c.pushRuntimeValueLocationOnRegister(reg, v.valueType) 1646 return nil 1647 } 1648 1649 // compileDiv implements compiler.compileDiv for the arm64 architecture. 1650 func (c *arm64Compiler) compileDiv(o *wazeroir.UnionOperation) error { 1651 dividend, divisor, err := c.popTwoValuesOnRegisters() 1652 if err != nil { 1653 return err 1654 } 1655 1656 signedType := wazeroir.SignedType(o.B1) 1657 1658 // If the divisor is on the zero register, exit from the function deterministically. 1659 if isZeroRegister(divisor.register) { 1660 // Push any value so that the subsequent instruction can have a consistent location stack state. 1661 v := c.locationStack.pushRuntimeValueLocationOnStack() 1662 switch signedType { 1663 case wazeroir.SignedTypeInt32, wazeroir.SignedTypeUint32: 1664 v.valueType = runtimeValueTypeI32 1665 case wazeroir.SignedTypeUint64, wazeroir.SignedTypeInt64: 1666 v.valueType = runtimeValueTypeI64 1667 } 1668 c.compileExitFromNativeCode(nativeCallStatusIntegerDivisionByZero) 1669 return nil 1670 } 1671 1672 var inst asm.Instruction 1673 var vt runtimeValueType 1674 switch signedType { 1675 case wazeroir.SignedTypeUint32: 1676 inst = arm64.UDIVW 1677 if err := c.compileIntegerDivPrecheck(true, false, dividend.register, divisor.register); err != nil { 1678 return err 1679 } 1680 vt = runtimeValueTypeI32 1681 case wazeroir.SignedTypeUint64: 1682 if err := c.compileIntegerDivPrecheck(false, false, dividend.register, divisor.register); err != nil { 1683 return err 1684 } 1685 inst = arm64.UDIV 1686 vt = runtimeValueTypeI64 1687 case wazeroir.SignedTypeInt32: 1688 if err := c.compileIntegerDivPrecheck(true, true, dividend.register, divisor.register); err != nil { 1689 return err 1690 } 1691 inst = arm64.SDIVW 1692 vt = runtimeValueTypeI32 1693 case wazeroir.SignedTypeInt64: 1694 if err := c.compileIntegerDivPrecheck(false, true, dividend.register, divisor.register); err != nil { 1695 return err 1696 } 1697 inst = arm64.SDIV 1698 vt = runtimeValueTypeI64 1699 case wazeroir.SignedTypeFloat32: 1700 inst = arm64.FDIVS 1701 vt = runtimeValueTypeF32 1702 case wazeroir.SignedTypeFloat64: 1703 inst = arm64.FDIVD 1704 vt = runtimeValueTypeF64 1705 } 1706 1707 c.assembler.CompileRegisterToRegister(inst, divisor.register, dividend.register) 1708 1709 c.pushRuntimeValueLocationOnRegister(dividend.register, vt) 1710 return nil 1711 } 1712 1713 // compileIntegerDivPrecheck adds instructions to check if the divisor and dividend are sound for division operation. 1714 // First, this adds instructions to check if the divisor equals zero, and if so, exits the function. 1715 // Plus, for signed divisions, check if the result might result in overflow or not. 1716 func (c *arm64Compiler) compileIntegerDivPrecheck(is32Bit, isSigned bool, dividend, divisor asm.Register) error { 1717 // We check the divisor value equals zero. 1718 var cmpInst, movInst, loadInst asm.Instruction 1719 var minValueOffsetInVM int64 1720 if is32Bit { 1721 cmpInst = arm64.CMPW 1722 movInst = arm64.MOVW 1723 loadInst = arm64.LDRW 1724 minValueOffsetInVM = arm64CallEngineArchContextMinimum32BitSignedIntOffset 1725 } else { 1726 cmpInst = arm64.CMP 1727 movInst = arm64.MOVD 1728 loadInst = arm64.LDRD 1729 minValueOffsetInVM = arm64CallEngineArchContextMinimum64BitSignedIntOffset 1730 } 1731 c.assembler.CompileTwoRegistersToNone(cmpInst, arm64.RegRZR, divisor) 1732 1733 // If it is zero, we exit with nativeCallStatusIntegerDivisionByZero. 1734 c.compileMaybeExitFromNativeCode(arm64.BCONDNE, nativeCallStatusIntegerDivisionByZero) 1735 // Otherwise, we proceed. 1736 1737 // If the operation is a signed integer div, we have to do an additional check on overflow. 1738 if isSigned { 1739 // For signed division, we have to have branches for "math.MinInt{32,64} / -1" 1740 // case which results in the overflow. 1741 1742 // First, we compare the divisor with -1. 1743 c.assembler.CompileConstToRegister(movInst, -1, arm64ReservedRegisterForTemporary) 1744 c.assembler.CompileTwoRegistersToNone(cmpInst, arm64ReservedRegisterForTemporary, divisor) 1745 1746 // If they not equal, we skip the following check. 1747 brIfDivisorNonMinusOne := c.assembler.CompileJump(arm64.BCONDNE) 1748 1749 // Otherwise, we further check if the dividend equals math.MinInt32 or MinInt64. 1750 c.assembler.CompileMemoryToRegister( 1751 loadInst, 1752 arm64ReservedRegisterForCallEngine, minValueOffsetInVM, 1753 arm64ReservedRegisterForTemporary, 1754 ) 1755 c.assembler.CompileTwoRegistersToNone(cmpInst, arm64ReservedRegisterForTemporary, dividend) 1756 1757 // If they not equal, we are safe to execute the division. 1758 // Otherwise, we raise overflow error. 1759 c.compileMaybeExitFromNativeCode(arm64.BCONDNE, nativeCallStatusIntegerOverflow) 1760 1761 c.assembler.SetJumpTargetOnNext(brIfDivisorNonMinusOne) 1762 } 1763 return nil 1764 } 1765 1766 // compileRem implements compiler.compileRem for the arm64 architecture. 1767 func (c *arm64Compiler) compileRem(o *wazeroir.UnionOperation) error { 1768 dividend, divisor, err := c.popTwoValuesOnRegisters() 1769 if err != nil { 1770 return err 1771 } 1772 1773 dividendReg := dividend.register 1774 divisorReg := divisor.register 1775 1776 // If the divisor is on the zero register, exit from the function deterministically. 1777 if isZeroRegister(divisor.register) { 1778 // Push any value so that the subsequent instruction can have a consistent location stack state. 1779 v := c.locationStack.pushRuntimeValueLocationOnStack() 1780 v.valueType = runtimeValueTypeI32 1781 c.compileExitFromNativeCode(nativeCallStatusIntegerDivisionByZero) 1782 return nil 1783 } 1784 1785 var divInst, msubInst, cmpInst asm.Instruction 1786 signedInt := wazeroir.SignedInt(o.B1) 1787 switch signedInt { 1788 case wazeroir.SignedUint32: 1789 divInst = arm64.UDIVW 1790 msubInst = arm64.MSUBW 1791 cmpInst = arm64.CMPW 1792 case wazeroir.SignedUint64: 1793 divInst = arm64.UDIV 1794 msubInst = arm64.MSUB 1795 cmpInst = arm64.CMP 1796 case wazeroir.SignedInt32: 1797 divInst = arm64.SDIVW 1798 msubInst = arm64.MSUBW 1799 cmpInst = arm64.CMPW 1800 case wazeroir.SignedInt64: 1801 divInst = arm64.SDIV 1802 msubInst = arm64.MSUB 1803 cmpInst = arm64.CMP 1804 } 1805 1806 // We check the divisor value equals zero. 1807 c.assembler.CompileTwoRegistersToNone(cmpInst, arm64.RegRZR, divisorReg) 1808 1809 // If it is zero, we exit with nativeCallStatusIntegerDivisionByZero. 1810 c.compileMaybeExitFromNativeCode(arm64.BCONDNE, nativeCallStatusIntegerDivisionByZero) 1811 // Otherwise, we proceed. 1812 1813 // Temporarily mark them used to allocate a result register while keeping these values. 1814 c.markRegisterUsed(dividend.register, divisor.register) 1815 1816 resultReg, err := c.allocateRegister(registerTypeGeneralPurpose) 1817 if err != nil { 1818 return err 1819 } 1820 1821 // arm64 doesn't have an instruction for rem, we use calculate it by two instructions: UDIV (SDIV for signed) and MSUB. 1822 // This exactly the same code that Clang emits. 1823 // [input: x0=dividend, x1=divisor] 1824 // >> UDIV x2, x0, x1 1825 // >> MSUB x3, x2, x1, x0 1826 // [result: x2=quotient, x3=remainder] 1827 // 1828 c.assembler.CompileTwoRegistersToRegister(divInst, divisorReg, dividendReg, resultReg) 1829 // ResultReg = dividendReg - (divisorReg * resultReg) 1830 c.assembler.CompileThreeRegistersToRegister(msubInst, divisorReg, dividendReg, resultReg, resultReg) 1831 1832 c.markRegisterUnused(dividend.register, divisor.register) 1833 c.pushRuntimeValueLocationOnRegister(resultReg, dividend.valueType) 1834 return nil 1835 } 1836 1837 // compileAnd implements compiler.compileAnd for the arm64 architecture. 1838 func (c *arm64Compiler) compileAnd(o *wazeroir.UnionOperation) error { 1839 x1, x2, err := c.popTwoValuesOnRegisters() 1840 if err != nil { 1841 return err 1842 } 1843 1844 // If either of the registers x1 or x2 is zero, 1845 // the result will always be zero. 1846 if isZeroRegister(x1.register) || isZeroRegister(x2.register) { 1847 c.pushRuntimeValueLocationOnRegister(arm64.RegRZR, x1.valueType) 1848 return nil 1849 } 1850 1851 // At this point, at least one of x1 or x2 registers is non zero. 1852 // Choose the non-zero register as destination. 1853 destinationReg := x1.register 1854 if isZeroRegister(x1.register) { 1855 destinationReg = x2.register 1856 } 1857 1858 var inst asm.Instruction 1859 unsignedInt := wazeroir.UnsignedInt(o.B1) 1860 switch unsignedInt { 1861 case wazeroir.UnsignedInt32: 1862 inst = arm64.ANDW 1863 case wazeroir.UnsignedInt64: 1864 inst = arm64.AND 1865 } 1866 1867 c.assembler.CompileTwoRegistersToRegister(inst, x2.register, x1.register, destinationReg) 1868 c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) 1869 return nil 1870 } 1871 1872 // compileOr implements compiler.compileOr for the arm64 architecture. 1873 func (c *arm64Compiler) compileOr(o *wazeroir.UnionOperation) error { 1874 x1, x2, err := c.popTwoValuesOnRegisters() 1875 if err != nil { 1876 return err 1877 } 1878 1879 if isZeroRegister(x1.register) { 1880 c.pushRuntimeValueLocationOnRegister(x2.register, x2.valueType) 1881 return nil 1882 } 1883 if isZeroRegister(x2.register) { 1884 c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) 1885 return nil 1886 } 1887 1888 var inst asm.Instruction 1889 unsignedInt := wazeroir.UnsignedInt(o.B1) 1890 switch unsignedInt { 1891 case wazeroir.UnsignedInt32: 1892 inst = arm64.ORRW 1893 case wazeroir.UnsignedInt64: 1894 inst = arm64.ORR 1895 } 1896 1897 c.assembler.CompileTwoRegistersToRegister(inst, x2.register, x1.register, x1.register) 1898 c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) 1899 return nil 1900 } 1901 1902 // compileXor implements compiler.compileXor for the arm64 architecture. 1903 func (c *arm64Compiler) compileXor(o *wazeroir.UnionOperation) error { 1904 x1, x2, err := c.popTwoValuesOnRegisters() 1905 if err != nil { 1906 return err 1907 } 1908 1909 // At this point, at least one of x1 or x2 registers is non zero. 1910 // Choose the non-zero register as destination. 1911 destinationReg := x1.register 1912 if isZeroRegister(x1.register) { 1913 destinationReg = x2.register 1914 } 1915 1916 var inst asm.Instruction 1917 unsignedInt := wazeroir.UnsignedInt(o.B1) 1918 switch unsignedInt { 1919 case wazeroir.UnsignedInt32: 1920 inst = arm64.EORW 1921 case wazeroir.UnsignedInt64: 1922 inst = arm64.EOR 1923 } 1924 1925 c.assembler.CompileTwoRegistersToRegister(inst, x2.register, x1.register, destinationReg) 1926 c.pushRuntimeValueLocationOnRegister(destinationReg, x1.valueType) 1927 return nil 1928 } 1929 1930 // compileShl implements compiler.compileShl for the arm64 architecture. 1931 func (c *arm64Compiler) compileShl(o *wazeroir.UnionOperation) error { 1932 x1, x2, err := c.popTwoValuesOnRegisters() 1933 if err != nil { 1934 return err 1935 } 1936 1937 if isZeroRegister(x1.register) || isZeroRegister(x2.register) { 1938 c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) 1939 return nil 1940 } 1941 1942 var inst asm.Instruction 1943 unsignedInt := wazeroir.UnsignedInt(o.B1) 1944 switch unsignedInt { 1945 case wazeroir.UnsignedInt32: 1946 inst = arm64.LSLW 1947 case wazeroir.UnsignedInt64: 1948 inst = arm64.LSL 1949 } 1950 1951 c.assembler.CompileTwoRegistersToRegister(inst, x2.register, x1.register, x1.register) 1952 c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) 1953 return nil 1954 } 1955 1956 // compileShr implements compiler.compileShr for the arm64 architecture. 1957 func (c *arm64Compiler) compileShr(o *wazeroir.UnionOperation) error { 1958 x1, x2, err := c.popTwoValuesOnRegisters() 1959 if err != nil { 1960 return err 1961 } 1962 1963 if isZeroRegister(x1.register) || isZeroRegister(x2.register) { 1964 c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) 1965 return nil 1966 } 1967 1968 var inst asm.Instruction 1969 signedInt := wazeroir.SignedInt(o.B1) 1970 switch signedInt { 1971 case wazeroir.SignedInt32: 1972 inst = arm64.ASRW 1973 case wazeroir.SignedInt64: 1974 inst = arm64.ASR 1975 case wazeroir.SignedUint32: 1976 inst = arm64.LSRW 1977 case wazeroir.SignedUint64: 1978 inst = arm64.LSR 1979 } 1980 1981 c.assembler.CompileTwoRegistersToRegister(inst, x2.register, x1.register, x1.register) 1982 c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) 1983 return nil 1984 } 1985 1986 // compileRotl implements compiler.compileRotl for the arm64 architecture. 1987 func (c *arm64Compiler) compileRotl(o *wazeroir.UnionOperation) error { 1988 x1, x2, err := c.popTwoValuesOnRegisters() 1989 if err != nil { 1990 return err 1991 } 1992 1993 if isZeroRegister(x1.register) || isZeroRegister(x2.register) { 1994 c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) 1995 return nil 1996 } 1997 1998 var inst, neginst asm.Instruction 1999 unsignedInt := wazeroir.UnsignedInt(o.B1) 2000 switch unsignedInt { 2001 case wazeroir.UnsignedInt32: 2002 inst = arm64.RORW 2003 neginst = arm64.NEGW 2004 case wazeroir.UnsignedInt64: 2005 inst = arm64.ROR 2006 neginst = arm64.NEG 2007 } 2008 2009 // Arm64 doesn't have rotate left instruction. 2010 // The shift amount needs to be converted to a negative number, similar to assembly output of bits.RotateLeft. 2011 c.assembler.CompileRegisterToRegister(neginst, x2.register, x2.register) 2012 2013 c.assembler.CompileTwoRegistersToRegister(inst, x2.register, x1.register, x1.register) 2014 c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) 2015 return nil 2016 } 2017 2018 // compileRotr implements compiler.compileRotr for the arm64 architecture. 2019 func (c *arm64Compiler) compileRotr(o *wazeroir.UnionOperation) error { 2020 x1, x2, err := c.popTwoValuesOnRegisters() 2021 if err != nil { 2022 return err 2023 } 2024 2025 if isZeroRegister(x1.register) || isZeroRegister(x2.register) { 2026 c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) 2027 return nil 2028 } 2029 2030 var inst asm.Instruction 2031 unsignedInt := wazeroir.UnsignedInt(o.B1) 2032 switch unsignedInt { 2033 case wazeroir.UnsignedInt32: 2034 inst = arm64.RORW 2035 case wazeroir.UnsignedInt64: 2036 inst = arm64.ROR 2037 } 2038 2039 c.assembler.CompileTwoRegistersToRegister(inst, x2.register, x1.register, x1.register) 2040 c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) 2041 return nil 2042 } 2043 2044 // compileAbs implements compiler.compileAbs for the arm64 architecture. 2045 func (c *arm64Compiler) compileAbs(o *wazeroir.UnionOperation) error { 2046 if wazeroir.Float(o.B1) == wazeroir.Float32 { 2047 return c.compileSimpleUnop(arm64.FABSS, runtimeValueTypeF32) 2048 } else { 2049 return c.compileSimpleUnop(arm64.FABSD, runtimeValueTypeF64) 2050 } 2051 } 2052 2053 // compileNeg implements compiler.compileNeg for the arm64 architecture. 2054 func (c *arm64Compiler) compileNeg(o *wazeroir.UnionOperation) error { 2055 if wazeroir.Float(o.B1) == wazeroir.Float32 { 2056 return c.compileSimpleUnop(arm64.FNEGS, runtimeValueTypeF32) 2057 } else { 2058 return c.compileSimpleUnop(arm64.FNEGD, runtimeValueTypeF64) 2059 } 2060 } 2061 2062 // compileCeil implements compiler.compileCeil for the arm64 architecture. 2063 func (c *arm64Compiler) compileCeil(o *wazeroir.UnionOperation) error { 2064 if wazeroir.Float(o.B1) == wazeroir.Float32 { 2065 return c.compileSimpleUnop(arm64.FRINTPS, runtimeValueTypeF32) 2066 } else { 2067 return c.compileSimpleUnop(arm64.FRINTPD, runtimeValueTypeF64) 2068 } 2069 } 2070 2071 // compileFloor implements compiler.compileFloor for the arm64 architecture. 2072 func (c *arm64Compiler) compileFloor(o *wazeroir.UnionOperation) error { 2073 if wazeroir.Float(o.B1) == wazeroir.Float32 { 2074 return c.compileSimpleUnop(arm64.FRINTMS, runtimeValueTypeF32) 2075 } else { 2076 return c.compileSimpleUnop(arm64.FRINTMD, runtimeValueTypeF64) 2077 } 2078 } 2079 2080 // compileTrunc implements compiler.compileTrunc for the arm64 architecture. 2081 func (c *arm64Compiler) compileTrunc(o *wazeroir.UnionOperation) error { 2082 if wazeroir.Float(o.B1) == wazeroir.Float32 { 2083 return c.compileSimpleUnop(arm64.FRINTZS, runtimeValueTypeF32) 2084 } else { 2085 return c.compileSimpleUnop(arm64.FRINTZD, runtimeValueTypeF64) 2086 } 2087 } 2088 2089 // compileNearest implements compiler.compileNearest for the arm64 architecture. 2090 func (c *arm64Compiler) compileNearest(o *wazeroir.UnionOperation) error { 2091 if wazeroir.Float(o.B1) == wazeroir.Float32 { 2092 return c.compileSimpleUnop(arm64.FRINTNS, runtimeValueTypeF32) 2093 } else { 2094 return c.compileSimpleUnop(arm64.FRINTND, runtimeValueTypeF64) 2095 } 2096 } 2097 2098 // compileSqrt implements compiler.compileSqrt for the arm64 architecture. 2099 func (c *arm64Compiler) compileSqrt(o *wazeroir.UnionOperation) error { 2100 if wazeroir.Float(o.B1) == wazeroir.Float32 { 2101 return c.compileSimpleUnop(arm64.FSQRTS, runtimeValueTypeF32) 2102 } else { 2103 return c.compileSimpleUnop(arm64.FSQRTD, runtimeValueTypeF64) 2104 } 2105 } 2106 2107 // compileMin implements compiler.compileMin for the arm64 architecture. 2108 func (c *arm64Compiler) compileMin(o *wazeroir.UnionOperation) error { 2109 if wazeroir.Float(o.B1) == wazeroir.Float32 { 2110 return c.compileSimpleFloatBinop(arm64.FMINS) 2111 } else { 2112 return c.compileSimpleFloatBinop(arm64.FMIND) 2113 } 2114 } 2115 2116 // compileMax implements compiler.compileMax for the arm64 architecture. 2117 func (c *arm64Compiler) compileMax(o *wazeroir.UnionOperation) error { 2118 if wazeroir.Float(o.B1) == wazeroir.Float32 { 2119 return c.compileSimpleFloatBinop(arm64.FMAXS) 2120 } else { 2121 return c.compileSimpleFloatBinop(arm64.FMAXD) 2122 } 2123 } 2124 2125 func (c *arm64Compiler) compileSimpleFloatBinop(inst asm.Instruction) error { 2126 x1, x2, err := c.popTwoValuesOnRegisters() 2127 if err != nil { 2128 return err 2129 } 2130 c.assembler.CompileRegisterToRegister(inst, x2.register, x1.register) 2131 c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) 2132 return nil 2133 } 2134 2135 // compileCopysign implements compiler.compileCopysign for the arm64 architecture. 2136 func (c *arm64Compiler) compileCopysign(o *wazeroir.UnionOperation) error { 2137 x1, x2, err := c.popTwoValuesOnRegisters() 2138 if err != nil { 2139 return err 2140 } 2141 2142 var ldr asm.Instruction 2143 var minValueOffsetInVM int64 2144 if wazeroir.Float(o.B1) == wazeroir.Float32 { 2145 ldr = arm64.FLDRS 2146 minValueOffsetInVM = arm64CallEngineArchContextMinimum32BitSignedIntOffset 2147 } else { 2148 ldr = arm64.FLDRD 2149 minValueOffsetInVM = arm64CallEngineArchContextMinimum64BitSignedIntOffset 2150 } 2151 2152 c.markRegisterUsed(x1.register, x2.register) 2153 freg, err := c.allocateRegister(registerTypeVector) 2154 if err != nil { 2155 return err 2156 } 2157 2158 // This is exactly the same code emitted by GCC for "__builtin_copysign": 2159 // 2160 // mov x0, -9223372036854775808 2161 // fmov d2, x0 2162 // vbit v0.8b, v1.8b, v2.8b 2163 // 2164 // "mov freg, -9223372036854775808 (stored at ce.minimum64BitSignedInt)" 2165 c.assembler.CompileMemoryToRegister( 2166 ldr, 2167 arm64ReservedRegisterForCallEngine, minValueOffsetInVM, 2168 freg, 2169 ) 2170 2171 // VBIT inserts each bit from the first operand into the destination if the corresponding bit of the second operand is 1, 2172 // otherwise it leaves the destination bit unchanged. 2173 // See https://developer.arm.com/documentation/dui0801/g/Advanced-SIMD-Instructions--32-bit-/VBIT 2174 // 2175 // "vbit vreg.8b, x2vreg.8b, x1vreg.8b" == "inserting 64th bit of x2 into x1". 2176 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.VBIT, 2177 freg, x2.register, x1.register, arm64.VectorArrangement16B) 2178 2179 c.markRegisterUnused(x2.register) 2180 c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) 2181 return nil 2182 } 2183 2184 // compileI32WrapFromI64 implements compiler.compileI32WrapFromI64 for the arm64 architecture. 2185 func (c *arm64Compiler) compileI32WrapFromI64() error { 2186 return c.compileSimpleUnop(arm64.MOVW, runtimeValueTypeI32) 2187 } 2188 2189 // compileITruncFromF implements compiler.compileITruncFromF for the arm64 architecture. 2190 func (c *arm64Compiler) compileITruncFromF(o *wazeroir.UnionOperation) error { 2191 // Clear the floating point status register (FPSR). 2192 c.assembler.CompileRegisterToRegister(arm64.MSR, arm64.RegRZR, arm64.RegFPSR) 2193 2194 var vt runtimeValueType 2195 var convinst asm.Instruction 2196 inputType := wazeroir.Float(o.B1) 2197 outputType := wazeroir.SignedInt(o.B2) 2198 nonTrapping := o.B3 2199 2200 is32bitFloat := inputType == wazeroir.Float32 2201 if is32bitFloat && outputType == wazeroir.SignedInt32 { 2202 convinst = arm64.FCVTZSSW 2203 vt = runtimeValueTypeI32 2204 } else if is32bitFloat && outputType == wazeroir.SignedInt64 { 2205 convinst = arm64.FCVTZSS 2206 vt = runtimeValueTypeI64 2207 } else if !is32bitFloat && outputType == wazeroir.SignedInt32 { 2208 convinst = arm64.FCVTZSDW 2209 vt = runtimeValueTypeI32 2210 } else if !is32bitFloat && outputType == wazeroir.SignedInt64 { 2211 convinst = arm64.FCVTZSD 2212 vt = runtimeValueTypeI64 2213 } else if is32bitFloat && outputType == wazeroir.SignedUint32 { 2214 convinst = arm64.FCVTZUSW 2215 vt = runtimeValueTypeI32 2216 } else if is32bitFloat && outputType == wazeroir.SignedUint64 { 2217 convinst = arm64.FCVTZUS 2218 vt = runtimeValueTypeI64 2219 } else if !is32bitFloat && outputType == wazeroir.SignedUint32 { 2220 convinst = arm64.FCVTZUDW 2221 vt = runtimeValueTypeI32 2222 } else if !is32bitFloat && outputType == wazeroir.SignedUint64 { 2223 convinst = arm64.FCVTZUD 2224 vt = runtimeValueTypeI64 2225 } 2226 2227 source, err := c.popValueOnRegister() 2228 if err != nil { 2229 return err 2230 } 2231 sourceReg := source.register 2232 2233 destinationReg, err := c.allocateRegister(registerTypeGeneralPurpose) 2234 if err != nil { 2235 return err 2236 } 2237 2238 c.assembler.CompileRegisterToRegister(convinst, sourceReg, destinationReg) 2239 c.pushRuntimeValueLocationOnRegister(destinationReg, vt) 2240 2241 if !nonTrapping { 2242 // Obtain the floating point status register value into the general purpose register, 2243 // so that we can check if the conversion resulted in undefined behavior. 2244 c.assembler.CompileRegisterToRegister(arm64.MRS, arm64.RegFPSR, arm64ReservedRegisterForTemporary) 2245 // Check if the conversion was undefined by comparing the status with 1. 2246 // See https://developer.arm.com/documentation/ddi0595/2020-12/AArch64-Registers/FPSR--Floating-point-Status-Register 2247 c.assembler.CompileRegisterAndConstToNone(arm64.CMP, arm64ReservedRegisterForTemporary, 1) 2248 2249 brOK := c.assembler.CompileJump(arm64.BCONDNE) 2250 2251 // If so, exit the execution with errors depending on whether or not the source value is NaN. 2252 var floatcmp asm.Instruction 2253 if is32bitFloat { 2254 floatcmp = arm64.FCMPS 2255 } else { 2256 floatcmp = arm64.FCMPD 2257 } 2258 c.assembler.CompileTwoRegistersToNone(floatcmp, sourceReg, sourceReg) 2259 // VS flag is set if at least one of values for FCMP is NaN. 2260 // https://developer.arm.com/documentation/dui0801/g/Condition-Codes/Comparison-of-condition-code-meanings-in-integer-and-floating-point-code 2261 // If the source value is not NaN, the operation was overflow. 2262 c.compileMaybeExitFromNativeCode(arm64.BCONDVS, nativeCallStatusIntegerOverflow) 2263 2264 // Otherwise, the operation was invalid as this is trying to convert NaN to integer. 2265 c.compileExitFromNativeCode(nativeCallStatusCodeInvalidFloatToIntConversion) 2266 2267 // Otherwise, we branch into the next instruction. 2268 c.assembler.SetJumpTargetOnNext(brOK) 2269 } 2270 return nil 2271 } 2272 2273 // compileFConvertFromI implements compiler.compileFConvertFromI for the arm64 architecture. 2274 func (c *arm64Compiler) compileFConvertFromI(o *wazeroir.UnionOperation) error { 2275 var convinst asm.Instruction 2276 inputType := wazeroir.SignedInt(o.B1) 2277 outputType := wazeroir.Float(o.B2) 2278 2279 if outputType == wazeroir.Float32 && inputType == wazeroir.SignedInt32 { 2280 convinst = arm64.SCVTFWS 2281 } else if outputType == wazeroir.Float32 && inputType == wazeroir.SignedInt64 { 2282 convinst = arm64.SCVTFS 2283 } else if outputType == wazeroir.Float64 && inputType == wazeroir.SignedInt32 { 2284 convinst = arm64.SCVTFWD 2285 } else if outputType == wazeroir.Float64 && inputType == wazeroir.SignedInt64 { 2286 convinst = arm64.SCVTFD 2287 } else if outputType == wazeroir.Float32 && inputType == wazeroir.SignedUint32 { 2288 convinst = arm64.UCVTFWS 2289 } else if outputType == wazeroir.Float32 && inputType == wazeroir.SignedUint64 { 2290 convinst = arm64.UCVTFS 2291 } else if outputType == wazeroir.Float64 && inputType == wazeroir.SignedUint32 { 2292 convinst = arm64.UCVTFWD 2293 } else if outputType == wazeroir.Float64 && inputType == wazeroir.SignedUint64 { 2294 convinst = arm64.UCVTFD 2295 } 2296 2297 var vt runtimeValueType 2298 if outputType == wazeroir.Float32 { 2299 vt = runtimeValueTypeF32 2300 } else { 2301 vt = runtimeValueTypeF64 2302 } 2303 return c.compileSimpleConversion(convinst, registerTypeVector, vt) 2304 } 2305 2306 // compileF32DemoteFromF64 implements compiler.compileF32DemoteFromF64 for the arm64 architecture. 2307 func (c *arm64Compiler) compileF32DemoteFromF64() error { 2308 return c.compileSimpleUnop(arm64.FCVTDS, runtimeValueTypeF32) 2309 } 2310 2311 // compileF64PromoteFromF32 implements compiler.compileF64PromoteFromF32 for the arm64 architecture. 2312 func (c *arm64Compiler) compileF64PromoteFromF32() error { 2313 return c.compileSimpleUnop(arm64.FCVTSD, runtimeValueTypeF64) 2314 } 2315 2316 // compileI32ReinterpretFromF32 implements compiler.compileI32ReinterpretFromF32 for the arm64 architecture. 2317 func (c *arm64Compiler) compileI32ReinterpretFromF32() error { 2318 if peek := c.locationStack.peek(); peek.onStack() { 2319 // If the value is on the stack, this is no-op as there is nothing to do for converting type. 2320 peek.valueType = runtimeValueTypeI32 2321 return nil 2322 } 2323 return c.compileSimpleConversion(arm64.FMOVS, registerTypeGeneralPurpose, runtimeValueTypeI32) 2324 } 2325 2326 // compileI64ReinterpretFromF64 implements compiler.compileI64ReinterpretFromF64 for the arm64 architecture. 2327 func (c *arm64Compiler) compileI64ReinterpretFromF64() error { 2328 if peek := c.locationStack.peek(); peek.onStack() { 2329 // If the value is on the stack, this is no-op as there is nothing to do for converting type. 2330 peek.valueType = runtimeValueTypeI64 2331 return nil 2332 } 2333 return c.compileSimpleConversion(arm64.FMOVD, registerTypeGeneralPurpose, runtimeValueTypeI64) 2334 } 2335 2336 // compileF32ReinterpretFromI32 implements compiler.compileF32ReinterpretFromI32 for the arm64 architecture. 2337 func (c *arm64Compiler) compileF32ReinterpretFromI32() error { 2338 if peek := c.locationStack.peek(); peek.onStack() { 2339 // If the value is on the stack, this is no-op as there is nothing to do for converting type. 2340 peek.valueType = runtimeValueTypeF32 2341 return nil 2342 } 2343 return c.compileSimpleConversion(arm64.FMOVS, registerTypeVector, runtimeValueTypeF32) 2344 } 2345 2346 // compileF64ReinterpretFromI64 implements compiler.compileF64ReinterpretFromI64 for the arm64 architecture. 2347 func (c *arm64Compiler) compileF64ReinterpretFromI64() error { 2348 if peek := c.locationStack.peek(); peek.onStack() { 2349 // If the value is on the stack, this is no-op as there is nothing to do for converting type. 2350 peek.valueType = runtimeValueTypeF64 2351 return nil 2352 } 2353 return c.compileSimpleConversion(arm64.FMOVD, registerTypeVector, runtimeValueTypeF64) 2354 } 2355 2356 func (c *arm64Compiler) compileSimpleConversion(inst asm.Instruction, destinationRegType registerType, resultRuntimeValueType runtimeValueType) error { 2357 source, err := c.popValueOnRegister() 2358 if err != nil { 2359 return err 2360 } 2361 2362 destinationReg, err := c.allocateRegister(destinationRegType) 2363 if err != nil { 2364 return err 2365 } 2366 2367 c.assembler.CompileRegisterToRegister(inst, source.register, destinationReg) 2368 c.pushRuntimeValueLocationOnRegister(destinationReg, resultRuntimeValueType) 2369 return nil 2370 } 2371 2372 // compileExtend implements compiler.compileExtend for the arm64 architecture. 2373 func (c *arm64Compiler) compileExtend(o *wazeroir.UnionOperation) error { 2374 signed := o.B1 != 0 2375 if signed { 2376 return c.compileSimpleUnop(arm64.SXTW, runtimeValueTypeI64) 2377 } else { 2378 return c.compileSimpleUnop(arm64.MOVW, runtimeValueTypeI64) 2379 } 2380 } 2381 2382 // compileSignExtend32From8 implements compiler.compileSignExtend32From8 for the arm64 architecture. 2383 func (c *arm64Compiler) compileSignExtend32From8() error { 2384 return c.compileSimpleUnop(arm64.SXTBW, runtimeValueTypeI32) 2385 } 2386 2387 // compileSignExtend32From16 implements compiler.compileSignExtend32From16 for the arm64 architecture. 2388 func (c *arm64Compiler) compileSignExtend32From16() error { 2389 return c.compileSimpleUnop(arm64.SXTHW, runtimeValueTypeI32) 2390 } 2391 2392 // compileSignExtend64From8 implements compiler.compileSignExtend64From8 for the arm64 architecture. 2393 func (c *arm64Compiler) compileSignExtend64From8() error { 2394 return c.compileSimpleUnop(arm64.SXTB, runtimeValueTypeI64) 2395 } 2396 2397 // compileSignExtend64From16 implements compiler.compileSignExtend64From16 for the arm64 architecture. 2398 func (c *arm64Compiler) compileSignExtend64From16() error { 2399 return c.compileSimpleUnop(arm64.SXTH, runtimeValueTypeI64) 2400 } 2401 2402 // compileSignExtend64From32 implements compiler.compileSignExtend64From32 for the arm64 architecture. 2403 func (c *arm64Compiler) compileSignExtend64From32() error { 2404 return c.compileSimpleUnop(arm64.SXTW, runtimeValueTypeI64) 2405 } 2406 2407 func (c *arm64Compiler) compileSimpleUnop(inst asm.Instruction, resultRuntimeValueType runtimeValueType) error { 2408 v, err := c.popValueOnRegister() 2409 if err != nil { 2410 return err 2411 } 2412 reg := v.register 2413 c.assembler.CompileRegisterToRegister(inst, reg, reg) 2414 c.pushRuntimeValueLocationOnRegister(reg, resultRuntimeValueType) 2415 return nil 2416 } 2417 2418 // compileEq implements compiler.compileEq for the arm64 architecture. 2419 func (c *arm64Compiler) compileEq(o *wazeroir.UnionOperation) error { 2420 return c.emitEqOrNe(true, wazeroir.UnsignedType(o.B1)) 2421 } 2422 2423 // compileNe implements compiler.compileNe for the arm64 architecture. 2424 func (c *arm64Compiler) compileNe(o *wazeroir.UnionOperation) error { 2425 return c.emitEqOrNe(false, wazeroir.UnsignedType(o.B1)) 2426 } 2427 2428 // emitEqOrNe implements compiler.compileEq and compiler.compileNe for the arm64 architecture. 2429 func (c *arm64Compiler) emitEqOrNe(isEq bool, unsignedType wazeroir.UnsignedType) error { 2430 x1, x2, err := c.popTwoValuesOnRegisters() 2431 if err != nil { 2432 return err 2433 } 2434 2435 var inst asm.Instruction 2436 switch unsignedType { 2437 case wazeroir.UnsignedTypeI32: 2438 inst = arm64.CMPW 2439 case wazeroir.UnsignedTypeI64: 2440 inst = arm64.CMP 2441 case wazeroir.UnsignedTypeF32: 2442 inst = arm64.FCMPS 2443 case wazeroir.UnsignedTypeF64: 2444 inst = arm64.FCMPD 2445 } 2446 2447 c.assembler.CompileTwoRegistersToNone(inst, x2.register, x1.register) 2448 2449 // Push the comparison result as a conditional register value. 2450 cond := arm64.CondNE 2451 if isEq { 2452 cond = arm64.CondEQ 2453 } 2454 c.locationStack.pushRuntimeValueLocationOnConditionalRegister(cond) 2455 return nil 2456 } 2457 2458 // compileEqz implements compiler.compileEqz for the arm64 architecture. 2459 func (c *arm64Compiler) compileEqz(o *wazeroir.UnionOperation) error { 2460 x1, err := c.popValueOnRegister() 2461 if err != nil { 2462 return err 2463 } 2464 2465 var inst asm.Instruction 2466 unsignedInt := wazeroir.UnsignedInt(o.B1) 2467 switch unsignedInt { 2468 case wazeroir.UnsignedInt32: 2469 inst = arm64.CMPW 2470 case wazeroir.UnsignedInt64: 2471 inst = arm64.CMP 2472 } 2473 2474 c.assembler.CompileTwoRegistersToNone(inst, arm64.RegRZR, x1.register) 2475 2476 // Push the comparison result as a conditional register value. 2477 c.locationStack.pushRuntimeValueLocationOnConditionalRegister(arm64.CondEQ) 2478 return nil 2479 } 2480 2481 // compileLt implements compiler.compileLt for the arm64 architecture. 2482 func (c *arm64Compiler) compileLt(o *wazeroir.UnionOperation) error { 2483 x1, x2, err := c.popTwoValuesOnRegisters() 2484 if err != nil { 2485 return err 2486 } 2487 2488 var inst asm.Instruction 2489 var conditionalRegister asm.ConditionalRegisterState 2490 signedType := wazeroir.SignedType(o.B1) 2491 switch signedType { 2492 case wazeroir.SignedTypeUint32: 2493 inst = arm64.CMPW 2494 conditionalRegister = arm64.CondLO 2495 case wazeroir.SignedTypeUint64: 2496 inst = arm64.CMP 2497 conditionalRegister = arm64.CondLO 2498 case wazeroir.SignedTypeInt32: 2499 inst = arm64.CMPW 2500 conditionalRegister = arm64.CondLT 2501 case wazeroir.SignedTypeInt64: 2502 inst = arm64.CMP 2503 conditionalRegister = arm64.CondLT 2504 case wazeroir.SignedTypeFloat32: 2505 inst = arm64.FCMPS 2506 conditionalRegister = arm64.CondMI 2507 case wazeroir.SignedTypeFloat64: 2508 inst = arm64.FCMPD 2509 conditionalRegister = arm64.CondMI 2510 } 2511 2512 c.assembler.CompileTwoRegistersToNone(inst, x2.register, x1.register) 2513 2514 // Push the comparison result as a conditional register value. 2515 c.locationStack.pushRuntimeValueLocationOnConditionalRegister(conditionalRegister) 2516 return nil 2517 } 2518 2519 // compileGt implements compiler.compileGt for the arm64 architecture. 2520 func (c *arm64Compiler) compileGt(o *wazeroir.UnionOperation) error { 2521 x1, x2, err := c.popTwoValuesOnRegisters() 2522 if err != nil { 2523 return err 2524 } 2525 2526 var inst asm.Instruction 2527 var conditionalRegister asm.ConditionalRegisterState 2528 signedType := wazeroir.SignedType(o.B1) 2529 switch signedType { 2530 case wazeroir.SignedTypeUint32: 2531 inst = arm64.CMPW 2532 conditionalRegister = arm64.CondHI 2533 case wazeroir.SignedTypeUint64: 2534 inst = arm64.CMP 2535 conditionalRegister = arm64.CondHI 2536 case wazeroir.SignedTypeInt32: 2537 inst = arm64.CMPW 2538 conditionalRegister = arm64.CondGT 2539 case wazeroir.SignedTypeInt64: 2540 inst = arm64.CMP 2541 conditionalRegister = arm64.CondGT 2542 case wazeroir.SignedTypeFloat32: 2543 inst = arm64.FCMPS 2544 conditionalRegister = arm64.CondGT 2545 case wazeroir.SignedTypeFloat64: 2546 inst = arm64.FCMPD 2547 conditionalRegister = arm64.CondGT 2548 } 2549 2550 c.assembler.CompileTwoRegistersToNone(inst, x2.register, x1.register) 2551 2552 // Push the comparison result as a conditional register value. 2553 c.locationStack.pushRuntimeValueLocationOnConditionalRegister(conditionalRegister) 2554 return nil 2555 } 2556 2557 // compileLe implements compiler.compileLe for the arm64 architecture. 2558 func (c *arm64Compiler) compileLe(o *wazeroir.UnionOperation) error { 2559 x1, x2, err := c.popTwoValuesOnRegisters() 2560 if err != nil { 2561 return err 2562 } 2563 2564 var inst asm.Instruction 2565 var conditionalRegister asm.ConditionalRegisterState 2566 signedType := wazeroir.SignedType(o.B1) 2567 switch signedType { 2568 case wazeroir.SignedTypeUint32: 2569 inst = arm64.CMPW 2570 conditionalRegister = arm64.CondLS 2571 case wazeroir.SignedTypeUint64: 2572 inst = arm64.CMP 2573 conditionalRegister = arm64.CondLS 2574 case wazeroir.SignedTypeInt32: 2575 inst = arm64.CMPW 2576 conditionalRegister = arm64.CondLE 2577 case wazeroir.SignedTypeInt64: 2578 inst = arm64.CMP 2579 conditionalRegister = arm64.CondLE 2580 case wazeroir.SignedTypeFloat32: 2581 inst = arm64.FCMPS 2582 conditionalRegister = arm64.CondLS 2583 case wazeroir.SignedTypeFloat64: 2584 inst = arm64.FCMPD 2585 conditionalRegister = arm64.CondLS 2586 } 2587 2588 c.assembler.CompileTwoRegistersToNone(inst, x2.register, x1.register) 2589 2590 // Push the comparison result as a conditional register value. 2591 c.locationStack.pushRuntimeValueLocationOnConditionalRegister(conditionalRegister) 2592 return nil 2593 } 2594 2595 // compileGe implements compiler.compileGe for the arm64 architecture. 2596 func (c *arm64Compiler) compileGe(o *wazeroir.UnionOperation) error { 2597 x1, x2, err := c.popTwoValuesOnRegisters() 2598 if err != nil { 2599 return err 2600 } 2601 2602 var inst asm.Instruction 2603 var conditionalRegister asm.ConditionalRegisterState 2604 signedType := wazeroir.SignedType(o.B1) 2605 switch signedType { 2606 case wazeroir.SignedTypeUint32: 2607 inst = arm64.CMPW 2608 conditionalRegister = arm64.CondHS 2609 case wazeroir.SignedTypeUint64: 2610 inst = arm64.CMP 2611 conditionalRegister = arm64.CondHS 2612 case wazeroir.SignedTypeInt32: 2613 inst = arm64.CMPW 2614 conditionalRegister = arm64.CondGE 2615 case wazeroir.SignedTypeInt64: 2616 inst = arm64.CMP 2617 conditionalRegister = arm64.CondGE 2618 case wazeroir.SignedTypeFloat32: 2619 inst = arm64.FCMPS 2620 conditionalRegister = arm64.CondGE 2621 case wazeroir.SignedTypeFloat64: 2622 inst = arm64.FCMPD 2623 conditionalRegister = arm64.CondGE 2624 } 2625 2626 c.assembler.CompileTwoRegistersToNone(inst, x2.register, x1.register) 2627 2628 // Push the comparison result as a conditional register value. 2629 c.locationStack.pushRuntimeValueLocationOnConditionalRegister(conditionalRegister) 2630 return nil 2631 } 2632 2633 // compileLoad implements compiler.compileLoad for the arm64 architecture. 2634 func (c *arm64Compiler) compileLoad(o *wazeroir.UnionOperation) error { 2635 var ( 2636 isFloat bool 2637 loadInst asm.Instruction 2638 targetSizeInBytes int64 2639 vt runtimeValueType 2640 ) 2641 2642 unsignedType := wazeroir.UnsignedType(o.B1) 2643 offset := uint32(o.U2) 2644 2645 switch unsignedType { 2646 case wazeroir.UnsignedTypeI32: 2647 loadInst = arm64.LDRW 2648 targetSizeInBytes = 32 / 8 2649 vt = runtimeValueTypeI32 2650 case wazeroir.UnsignedTypeI64: 2651 loadInst = arm64.LDRD 2652 targetSizeInBytes = 64 / 8 2653 vt = runtimeValueTypeI64 2654 case wazeroir.UnsignedTypeF32: 2655 loadInst = arm64.FLDRS 2656 isFloat = true 2657 targetSizeInBytes = 32 / 8 2658 vt = runtimeValueTypeF32 2659 case wazeroir.UnsignedTypeF64: 2660 loadInst = arm64.FLDRD 2661 isFloat = true 2662 targetSizeInBytes = 64 / 8 2663 vt = runtimeValueTypeF64 2664 } 2665 return c.compileLoadImpl(offset, loadInst, targetSizeInBytes, isFloat, vt) 2666 } 2667 2668 // compileLoad8 implements compiler.compileLoad8 for the arm64 architecture. 2669 func (c *arm64Compiler) compileLoad8(o *wazeroir.UnionOperation) error { 2670 var loadInst asm.Instruction 2671 var vt runtimeValueType 2672 2673 signedInt := wazeroir.SignedInt(o.B1) 2674 offset := uint32(o.U2) 2675 2676 switch signedInt { 2677 case wazeroir.SignedInt32: 2678 loadInst = arm64.LDRSBW 2679 vt = runtimeValueTypeI32 2680 case wazeroir.SignedInt64: 2681 loadInst = arm64.LDRSBD 2682 vt = runtimeValueTypeI64 2683 case wazeroir.SignedUint32: 2684 loadInst = arm64.LDRB 2685 vt = runtimeValueTypeI32 2686 case wazeroir.SignedUint64: 2687 loadInst = arm64.LDRB 2688 vt = runtimeValueTypeI64 2689 } 2690 return c.compileLoadImpl(offset, loadInst, 1, false, vt) 2691 } 2692 2693 // compileLoad16 implements compiler.compileLoad16 for the arm64 architecture. 2694 func (c *arm64Compiler) compileLoad16(o *wazeroir.UnionOperation) error { 2695 var loadInst asm.Instruction 2696 var vt runtimeValueType 2697 2698 signedInt := wazeroir.SignedInt(o.B1) 2699 offset := uint32(o.U2) 2700 2701 switch signedInt { 2702 case wazeroir.SignedInt32: 2703 loadInst = arm64.LDRSHW 2704 vt = runtimeValueTypeI32 2705 case wazeroir.SignedInt64: 2706 loadInst = arm64.LDRSHD 2707 vt = runtimeValueTypeI64 2708 case wazeroir.SignedUint32: 2709 loadInst = arm64.LDRH 2710 vt = runtimeValueTypeI32 2711 case wazeroir.SignedUint64: 2712 loadInst = arm64.LDRH 2713 vt = runtimeValueTypeI64 2714 } 2715 return c.compileLoadImpl(offset, loadInst, 16/8, false, vt) 2716 } 2717 2718 // compileLoad32 implements compiler.compileLoad32 for the arm64 architecture. 2719 func (c *arm64Compiler) compileLoad32(o *wazeroir.UnionOperation) error { 2720 var loadInst asm.Instruction 2721 signed := o.B1 == 1 2722 offset := uint32(o.U2) 2723 2724 if signed { 2725 loadInst = arm64.LDRSW 2726 } else { 2727 loadInst = arm64.LDRW 2728 } 2729 return c.compileLoadImpl(offset, loadInst, 32/8, false, runtimeValueTypeI64) 2730 } 2731 2732 // compileLoadImpl implements compileLoadImpl* variants for arm64 architecture. 2733 func (c *arm64Compiler) compileLoadImpl(offsetArg uint32, loadInst asm.Instruction, 2734 targetSizeInBytes int64, isFloat bool, resultRuntimeValueType runtimeValueType, 2735 ) error { 2736 offsetReg, err := c.compileMemoryAccessOffsetSetup(offsetArg, targetSizeInBytes) 2737 if err != nil { 2738 return err 2739 } 2740 2741 resultRegister := offsetReg 2742 if isFloat { 2743 resultRegister, err = c.allocateRegister(registerTypeVector) 2744 if err != nil { 2745 return err 2746 } 2747 } 2748 2749 // "resultRegister = [arm64ReservedRegisterForMemory + offsetReg]" 2750 // In other words, "resultRegister = memory.Buffer[offset: offset+targetSizeInBytes]" 2751 c.assembler.CompileMemoryWithRegisterOffsetToRegister( 2752 loadInst, 2753 arm64ReservedRegisterForMemory, offsetReg, 2754 resultRegister, 2755 ) 2756 2757 c.pushRuntimeValueLocationOnRegister(resultRegister, resultRuntimeValueType) 2758 return nil 2759 } 2760 2761 // compileStore implements compiler.compileStore for the arm64 architecture. 2762 func (c *arm64Compiler) compileStore(o *wazeroir.UnionOperation) error { 2763 var movInst asm.Instruction 2764 var targetSizeInBytes int64 2765 unsignedType := wazeroir.UnsignedType(o.B1) 2766 offset := uint32(o.U2) 2767 switch unsignedType { 2768 case wazeroir.UnsignedTypeI32: 2769 movInst = arm64.STRW 2770 targetSizeInBytes = 32 / 8 2771 case wazeroir.UnsignedTypeI64: 2772 movInst = arm64.STRD 2773 targetSizeInBytes = 64 / 8 2774 case wazeroir.UnsignedTypeF32: 2775 movInst = arm64.FSTRS 2776 targetSizeInBytes = 32 / 8 2777 case wazeroir.UnsignedTypeF64: 2778 movInst = arm64.FSTRD 2779 targetSizeInBytes = 64 / 8 2780 } 2781 return c.compileStoreImpl(offset, movInst, targetSizeInBytes) 2782 } 2783 2784 // compileStore8 implements compiler.compileStore8 for the arm64 architecture. 2785 func (c *arm64Compiler) compileStore8(o *wazeroir.UnionOperation) error { 2786 return c.compileStoreImpl(uint32(o.U2), arm64.STRB, 1) 2787 } 2788 2789 // compileStore16 implements compiler.compileStore16 for the arm64 architecture. 2790 func (c *arm64Compiler) compileStore16(o *wazeroir.UnionOperation) error { 2791 return c.compileStoreImpl(uint32(o.U2), arm64.STRH, 16/8) 2792 } 2793 2794 // compileStore32 implements compiler.compileStore32 for the arm64 architecture. 2795 func (c *arm64Compiler) compileStore32(o *wazeroir.UnionOperation) error { 2796 return c.compileStoreImpl(uint32(o.U2), arm64.STRW, 32/8) 2797 } 2798 2799 // compileStoreImpl implements compleStore* variants for arm64 architecture. 2800 func (c *arm64Compiler) compileStoreImpl(offsetArg uint32, storeInst asm.Instruction, targetSizeInBytes int64) error { 2801 val, err := c.popValueOnRegister() 2802 if err != nil { 2803 return err 2804 } 2805 // Mark temporarily used as compileMemoryAccessOffsetSetup might try allocating register. 2806 c.markRegisterUsed(val.register) 2807 2808 offsetReg, err := c.compileMemoryAccessOffsetSetup(offsetArg, targetSizeInBytes) 2809 if err != nil { 2810 return err 2811 } 2812 2813 // "[arm64ReservedRegisterForMemory + offsetReg] = val.register" 2814 // In other words, "memory.Buffer[offset: offset+targetSizeInBytes] = val.register" 2815 c.assembler.CompileRegisterToMemoryWithRegisterOffset( 2816 storeInst, val.register, 2817 arm64ReservedRegisterForMemory, offsetReg, 2818 ) 2819 2820 c.markRegisterUnused(val.register) 2821 return nil 2822 } 2823 2824 // compileMemoryAccessOffsetSetup pops the top value from the stack (called "base"), stores "base + offsetArg" 2825 // into a register, and returns the stored register. We call the result "offset" because we access the memory 2826 // as memory.Buffer[offset: offset+targetSizeInBytes]. 2827 // 2828 // Note: this also emits the instructions to check the out of bounds memory access. 2829 // In other words, if the offset+targetSizeInBytes exceeds the memory size, the code exits with nativeCallStatusCodeMemoryOutOfBounds status. 2830 func (c *arm64Compiler) compileMemoryAccessOffsetSetup(offsetArg uint32, targetSizeInBytes int64) (offsetRegister asm.Register, err error) { 2831 base, err := c.popValueOnRegister() 2832 if err != nil { 2833 return 0, err 2834 } 2835 2836 offsetRegister = base.register 2837 if isZeroRegister(base.register) { 2838 offsetRegister, err = c.allocateRegister(registerTypeGeneralPurpose) 2839 if err != nil { 2840 return 2841 } 2842 c.assembler.CompileRegisterToRegister(arm64.MOVD, arm64.RegRZR, offsetRegister) 2843 } 2844 2845 if offsetConst := int64(offsetArg) + targetSizeInBytes; offsetConst <= math.MaxUint32 { 2846 // "offsetRegister = base + offsetArg + targetSizeInBytes" 2847 c.assembler.CompileConstToRegister(arm64.ADD, offsetConst, offsetRegister) 2848 } else { 2849 // If the offset const is too large, we exit with nativeCallStatusCodeMemoryOutOfBounds. 2850 c.compileExitFromNativeCode(nativeCallStatusCodeMemoryOutOfBounds) 2851 return 2852 } 2853 2854 // "arm64ReservedRegisterForTemporary = len(memory.Buffer)" 2855 c.compileLoadMemoryBufferLen(arm64ReservedRegisterForTemporary) 2856 2857 // Check if offsetRegister(= base+offsetArg+targetSizeInBytes) > len(memory.Buffer). 2858 c.assembler.CompileTwoRegistersToNone(arm64.CMP, arm64ReservedRegisterForTemporary, offsetRegister) 2859 2860 // If offsetRegister(= base+offsetArg+targetSizeInBytes) exceeds the memory length, 2861 // we exit the function with nativeCallStatusCodeMemoryOutOfBounds. 2862 c.compileMaybeExitFromNativeCode(arm64.BCONDLS, nativeCallStatusCodeMemoryOutOfBounds) 2863 2864 // Otherwise, we subtract targetSizeInBytes from offsetRegister. 2865 c.assembler.CompileConstToRegister(arm64.SUB, targetSizeInBytes, offsetRegister) 2866 return offsetRegister, nil 2867 } 2868 2869 // compileMemoryAccessBaseSetup pops the top value from the stack (called "base"), stores "memoryBufferStart + base + offsetArg" 2870 // into a register, and returns the stored register. We call the result "base" because it refers to "base addressing" as 2871 // per arm docs, which are reads from addresses without offsets. The result is equivalent to &memory.Buffer[offset]. 2872 // 2873 // Note: this also emits the instructions to check the out of bounds memory access. 2874 // In other words, if the offset+targetSizeInBytes exceeds the memory size, the code exits with nativeCallStatusCodeMemoryOutOfBounds status. 2875 func (c *arm64Compiler) compileMemoryAccessBaseSetup(offsetArg uint32, targetSizeInBytes int64) (baseRegister asm.Register, err error) { 2876 offsetReg, err := c.compileMemoryAccessOffsetSetup(offsetArg, targetSizeInBytes) 2877 if err != nil { 2878 return 2879 } 2880 2881 c.assembler.CompileRegisterToRegister(arm64.ADD, arm64ReservedRegisterForMemory, offsetReg) 2882 baseRegister = offsetReg 2883 return 2884 } 2885 2886 func (c *arm64Compiler) compileMemoryAlignmentCheck(baseRegister asm.Register, targetSizeInBytes int64) { 2887 if targetSizeInBytes == 1 { 2888 return // No alignment restrictions when accessing a byte 2889 } 2890 var checkBits asm.ConstantValue 2891 switch targetSizeInBytes { 2892 case 2: 2893 checkBits = 0b1 2894 case 4: 2895 checkBits = 0b11 2896 case 8: 2897 checkBits = 0b111 2898 } 2899 c.assembler.CompileRegisterAndConstToRegister(arm64.ANDS, baseRegister, checkBits, arm64.RegRZR) 2900 c.compileMaybeExitFromNativeCode(arm64.BCONDEQ, nativeCallStatusUnalignedAtomic) 2901 } 2902 2903 // compileMemoryGrow implements compileMemoryGrow variants for arm64 architecture. 2904 func (c *arm64Compiler) compileMemoryGrow() error { 2905 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 2906 return err 2907 } 2908 2909 if err := c.compileCallGoFunction(nativeCallStatusCodeCallBuiltInFunction, builtinFunctionIndexMemoryGrow); err != nil { 2910 return err 2911 } 2912 2913 // After return, we re-initialize reserved registers just like preamble of functions. 2914 c.compileReservedStackBasePointerRegisterInitialization() 2915 c.compileReservedMemoryRegisterInitialization() 2916 return nil 2917 } 2918 2919 // compileMemorySize implements compileMemorySize variants for arm64 architecture. 2920 func (c *arm64Compiler) compileMemorySize() error { 2921 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 2922 return err 2923 } 2924 2925 reg, err := c.allocateRegister(registerTypeGeneralPurpose) 2926 if err != nil { 2927 return err 2928 } 2929 2930 // "reg = len(memory.Buffer)" 2931 c.compileLoadMemoryBufferLen(reg) 2932 2933 // memory.size loads the page size of memory, so we have to divide by the page size. 2934 // "reg = reg >> wasm.MemoryPageSizeInBits (== reg / wasm.MemoryPageSize) " 2935 c.assembler.CompileConstToRegister( 2936 arm64.LSR, 2937 wasm.MemoryPageSizeInBits, 2938 reg, 2939 ) 2940 2941 c.pushRuntimeValueLocationOnRegister(reg, runtimeValueTypeI32) 2942 return nil 2943 } 2944 2945 // compileCallGoFunction adds instructions to call a Go function whose address equals the addr parameter. 2946 // compilerStatus is set before making call, and it should be either nativeCallStatusCodeCallBuiltInFunction or 2947 // nativeCallStatusCodeCallGoHostFunction. 2948 func (c *arm64Compiler) compileCallGoFunction(compilerStatus nativeCallStatusCode, builtinFunction wasm.Index) error { 2949 // Release all the registers as our calling convention requires the caller-save. 2950 if err := c.compileReleaseAllRegistersToStack(); err != nil { 2951 return err 2952 } 2953 2954 if compilerStatus == nativeCallStatusCodeCallBuiltInFunction { 2955 // Set the target function address to ce.functionCallAddress 2956 // "tmp = $index" 2957 c.assembler.CompileConstToRegister( 2958 arm64.MOVD, 2959 int64(builtinFunction), 2960 arm64ReservedRegisterForTemporary, 2961 ) 2962 // "[arm64ReservedRegisterForCallEngine + callEngineExitContextFunctionCallAddressOffset] = tmp" 2963 // In other words, "ce.functionCallAddress = tmp (== $addr)" 2964 c.assembler.CompileRegisterToMemory( 2965 arm64.STRW, 2966 arm64ReservedRegisterForTemporary, 2967 arm64ReservedRegisterForCallEngine, callEngineExitContextBuiltinFunctionCallIndexOffset, 2968 ) 2969 } 2970 2971 c.compileExitFromNativeCode(compilerStatus) 2972 return nil 2973 } 2974 2975 // compileConstI32 implements compiler.compileConstI32 for the arm64 architecture. 2976 func (c *arm64Compiler) compileConstI32(o *wazeroir.UnionOperation) error { 2977 return c.compileIntConstant(true, o.U1) 2978 } 2979 2980 // compileConstI64 implements compiler.compileConstI64 for the arm64 architecture. 2981 func (c *arm64Compiler) compileConstI64(o *wazeroir.UnionOperation) error { 2982 return c.compileIntConstant(false, o.U1) 2983 } 2984 2985 // compileIntConstant adds instructions to load an integer constant. 2986 // is32bit is true if the target value is originally 32-bit const, false otherwise. 2987 // value holds the (zero-extended for 32-bit case) load target constant. 2988 func (c *arm64Compiler) compileIntConstant(is32bit bool, value uint64) error { 2989 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 2990 return err 2991 } 2992 2993 var inst asm.Instruction 2994 var vt runtimeValueType 2995 if is32bit { 2996 inst = arm64.MOVW 2997 vt = runtimeValueTypeI32 2998 } else { 2999 inst = arm64.MOVD 3000 vt = runtimeValueTypeI64 3001 } 3002 3003 if value == 0 { 3004 c.pushRuntimeValueLocationOnRegister(arm64.RegRZR, vt) 3005 } else { 3006 // Take a register to load the value. 3007 reg, err := c.allocateRegister(registerTypeGeneralPurpose) 3008 if err != nil { 3009 return err 3010 } 3011 3012 c.assembler.CompileConstToRegister(inst, int64(value), reg) 3013 3014 c.pushRuntimeValueLocationOnRegister(reg, vt) 3015 } 3016 return nil 3017 } 3018 3019 // compileConstF32 implements compiler.compileConstF32 for the arm64 architecture. 3020 func (c *arm64Compiler) compileConstF32(o *wazeroir.UnionOperation) error { 3021 return c.compileFloatConstant(true, o.U1 /*uint64(math.Float32bits(o.Value))*/) 3022 } 3023 3024 // compileConstF64 implements compiler.compileConstF64 for the arm64 architecture. 3025 func (c *arm64Compiler) compileConstF64(o *wazeroir.UnionOperation) error { 3026 return c.compileFloatConstant(false, o.U1 /*math.Float64bits(o.Value)*/) 3027 } 3028 3029 // compileFloatConstant adds instructions to load a float constant. 3030 // is32bit is true if the target value is originally 32-bit const, false otherwise. 3031 // value holds the (zero-extended for 32-bit case) bit representation of load target float constant. 3032 func (c *arm64Compiler) compileFloatConstant(is32bit bool, value uint64) error { 3033 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 3034 return err 3035 } 3036 3037 // Take a register to load the value. 3038 reg, err := c.allocateRegister(registerTypeVector) 3039 if err != nil { 3040 return err 3041 } 3042 3043 tmpReg := arm64.RegRZR 3044 if value != 0 { 3045 tmpReg = arm64ReservedRegisterForTemporary 3046 var inst asm.Instruction 3047 if is32bit { 3048 inst = arm64.MOVW 3049 } else { 3050 inst = arm64.MOVD 3051 } 3052 c.assembler.CompileConstToRegister(inst, int64(value), tmpReg) 3053 } 3054 3055 // Use FMOV instruction to move the value on integer register into the float one. 3056 var inst asm.Instruction 3057 var vt runtimeValueType 3058 if is32bit { 3059 vt = runtimeValueTypeF32 3060 inst = arm64.FMOVS 3061 } else { 3062 vt = runtimeValueTypeF64 3063 inst = arm64.FMOVD 3064 } 3065 c.assembler.CompileRegisterToRegister(inst, tmpReg, reg) 3066 3067 c.pushRuntimeValueLocationOnRegister(reg, vt) 3068 return nil 3069 } 3070 3071 // compileMemoryInit implements compiler.compileMemoryInit for the arm64 architecture. 3072 func (c *arm64Compiler) compileMemoryInit(o *wazeroir.UnionOperation) error { 3073 dataIndex := uint32(o.U1) 3074 return c.compileInitImpl(false, dataIndex, 0) 3075 } 3076 3077 // compileInitImpl implements compileTableInit and compileMemoryInit. 3078 // 3079 // TODO: the compiled code in this function should be reused and compile at once as 3080 // the code is independent of any module. 3081 func (c *arm64Compiler) compileInitImpl(isTable bool, index, tableIndex uint32) error { 3082 outOfBoundsErrorStatus := nativeCallStatusCodeMemoryOutOfBounds 3083 if isTable { 3084 outOfBoundsErrorStatus = nativeCallStatusCodeInvalidTableAccess 3085 } 3086 3087 copySize, err := c.popValueOnRegister() 3088 if err != nil { 3089 return err 3090 } 3091 c.markRegisterUsed(copySize.register) 3092 3093 sourceOffset, err := c.popValueOnRegister() 3094 if err != nil { 3095 return err 3096 } 3097 if isZeroRegister(sourceOffset.register) { 3098 sourceOffset.register, err = c.allocateRegister(registerTypeGeneralPurpose) 3099 if err != nil { 3100 return err 3101 } 3102 c.assembler.CompileRegisterToRegister(arm64.MOVD, arm64.RegRZR, sourceOffset.register) 3103 } 3104 c.markRegisterUsed(sourceOffset.register) 3105 3106 destinationOffset, err := c.popValueOnRegister() 3107 if err != nil { 3108 return err 3109 } 3110 if isZeroRegister(destinationOffset.register) { 3111 destinationOffset.register, err = c.allocateRegister(registerTypeGeneralPurpose) 3112 if err != nil { 3113 return err 3114 } 3115 c.assembler.CompileRegisterToRegister(arm64.MOVD, arm64.RegRZR, destinationOffset.register) 3116 } 3117 c.markRegisterUsed(destinationOffset.register) 3118 3119 tableInstanceAddressReg := asm.NilRegister 3120 if isTable { 3121 tableInstanceAddressReg, err = c.allocateRegister(registerTypeGeneralPurpose) 3122 if err != nil { 3123 return err 3124 } 3125 c.markRegisterUsed(tableInstanceAddressReg) 3126 } 3127 3128 if !isZeroRegister(copySize.register) { 3129 // sourceOffset += size. 3130 c.assembler.CompileRegisterToRegister(arm64.ADD, copySize.register, sourceOffset.register) 3131 // destinationOffset += size. 3132 c.assembler.CompileRegisterToRegister(arm64.ADD, copySize.register, destinationOffset.register) 3133 } 3134 3135 instanceAddr, err := c.allocateRegister(registerTypeGeneralPurpose) 3136 if err != nil { 3137 return err 3138 } 3139 3140 if isTable { 3141 c.compileLoadElemInstanceAddress(index, instanceAddr) 3142 } else { 3143 c.compileLoadDataInstanceAddress(index, instanceAddr) 3144 } 3145 3146 // Check data instance bounds. 3147 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3148 instanceAddr, 8, // DataInstance and Element instance holds the length is stored at offset 8. 3149 arm64ReservedRegisterForTemporary) 3150 3151 c.assembler.CompileTwoRegistersToNone(arm64.CMP, arm64ReservedRegisterForTemporary, sourceOffset.register) 3152 // If not, raise out of bounds memory access error. 3153 c.compileMaybeExitFromNativeCode(arm64.BCONDLS, outOfBoundsErrorStatus) 3154 3155 // Otherwise, ready to copy the value from destination to source. 3156 // Check destination bounds. 3157 if isTable { 3158 // arm64ReservedRegisterForTemporary = &tables[0] 3159 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3160 arm64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset, 3161 arm64ReservedRegisterForTemporary) 3162 // tableInstanceAddressReg = arm64ReservedRegisterForTemporary + tableIndex*8 3163 // = &tables[0] + sizeOf(*tableInstance)*8 3164 // = &tables[tableIndex] 3165 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3166 arm64ReservedRegisterForTemporary, int64(tableIndex)*8, 3167 tableInstanceAddressReg) 3168 // arm64ReservedRegisterForTemporary = [tableInstanceAddressReg+tableInstanceTableLenOffset] = len(tables[tableIndex]) 3169 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3170 tableInstanceAddressReg, tableInstanceTableLenOffset, 3171 arm64ReservedRegisterForTemporary) 3172 } else { 3173 c.compileLoadMemoryBufferLen(arm64ReservedRegisterForTemporary) 3174 } 3175 3176 c.assembler.CompileTwoRegistersToNone(arm64.CMP, arm64ReservedRegisterForTemporary, destinationOffset.register) 3177 // If not, raise out of bounds memory access error. 3178 c.compileMaybeExitFromNativeCode(arm64.BCONDLS, outOfBoundsErrorStatus) 3179 3180 // Otherwise, ready to copy the value from source to destination. 3181 if !isZeroRegister(copySize.register) { 3182 // If the size equals zero, we can skip the entire instructions beflow. 3183 c.assembler.CompileTwoRegistersToNone(arm64.CMP, arm64.RegRZR, copySize.register) 3184 skipCopyJump := c.assembler.CompileJump(arm64.BCONDEQ) 3185 3186 var ldr, str asm.Instruction 3187 var movSize int64 3188 if isTable { 3189 ldr, str = arm64.LDRD, arm64.STRD 3190 movSize = 8 3191 3192 // arm64ReservedRegisterForTemporary = &Table[0] 3193 c.assembler.CompileMemoryToRegister(arm64.LDRD, tableInstanceAddressReg, 3194 tableInstanceTableOffset, arm64ReservedRegisterForTemporary) 3195 // destinationOffset = (destinationOffset<< pointerSizeLog2) + arm64ReservedRegisterForTemporary 3196 c.assembler.CompileLeftShiftedRegisterToRegister(arm64.ADD, 3197 destinationOffset.register, pointerSizeLog2, 3198 arm64ReservedRegisterForTemporary, destinationOffset.register) 3199 3200 // arm64ReservedRegisterForTemporary = &ElementInstance.References[0] 3201 c.assembler.CompileMemoryToRegister(arm64.LDRD, instanceAddr, 0, arm64ReservedRegisterForTemporary) 3202 // sourceOffset = (sourceOffset<< pointerSizeLog2) + arm64ReservedRegisterForTemporary 3203 c.assembler.CompileLeftShiftedRegisterToRegister(arm64.ADD, 3204 sourceOffset.register, pointerSizeLog2, 3205 arm64ReservedRegisterForTemporary, sourceOffset.register) 3206 3207 // copySize = copySize << pointerSizeLog2 3208 c.assembler.CompileConstToRegister(arm64.LSL, pointerSizeLog2, copySize.register) 3209 } else { 3210 ldr, str = arm64.LDRB, arm64.STRB 3211 movSize = 1 3212 3213 // destinationOffset += memory buffer's absolute address. 3214 c.assembler.CompileRegisterToRegister(arm64.ADD, arm64ReservedRegisterForMemory, destinationOffset.register) 3215 3216 // sourceOffset += data buffer's absolute address. 3217 c.assembler.CompileMemoryToRegister(arm64.LDRD, instanceAddr, 0, arm64ReservedRegisterForTemporary) 3218 c.assembler.CompileRegisterToRegister(arm64.ADD, arm64ReservedRegisterForTemporary, sourceOffset.register) 3219 3220 } 3221 3222 // Negate the counter. 3223 c.assembler.CompileRegisterToRegister(arm64.NEG, copySize.register, copySize.register) 3224 3225 beginCopyLoop := c.assembler.CompileStandAlone(arm64.NOP) 3226 3227 // arm64ReservedRegisterForTemporary = [sourceOffset + (size.register)] 3228 c.assembler.CompileMemoryWithRegisterOffsetToRegister(ldr, 3229 sourceOffset.register, copySize.register, 3230 arm64ReservedRegisterForTemporary) 3231 // [destinationOffset + (size.register)] = arm64ReservedRegisterForTemporary. 3232 c.assembler.CompileRegisterToMemoryWithRegisterOffset(str, 3233 arm64ReservedRegisterForTemporary, 3234 destinationOffset.register, copySize.register, 3235 ) 3236 3237 // Decrement the size counter and if the value is still negative, continue the loop. 3238 c.assembler.CompileConstToRegister(arm64.ADDS, movSize, copySize.register) 3239 c.assembler.CompileJump(arm64.BCONDMI).AssignJumpTarget(beginCopyLoop) 3240 3241 c.assembler.SetJumpTargetOnNext(skipCopyJump) 3242 } 3243 3244 c.markRegisterUnused(copySize.register, sourceOffset.register, 3245 destinationOffset.register, instanceAddr, tableInstanceAddressReg) 3246 return nil 3247 } 3248 3249 // compileDataDrop implements compiler.compileDataDrop for the arm64 architecture. 3250 func (c *arm64Compiler) compileDataDrop(o *wazeroir.UnionOperation) error { 3251 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 3252 return err 3253 } 3254 3255 tmp, err := c.allocateRegister(registerTypeGeneralPurpose) 3256 if err != nil { 3257 return err 3258 } 3259 3260 dataIndex := uint32(o.U1) 3261 c.compileLoadDataInstanceAddress(dataIndex, tmp) 3262 3263 // Clears the content of DataInstance[o.DataIndex] (== []byte type). 3264 c.assembler.CompileRegisterToMemory(arm64.STRD, arm64.RegRZR, tmp, 0) 3265 c.assembler.CompileRegisterToMemory(arm64.STRD, arm64.RegRZR, tmp, 8) 3266 c.assembler.CompileRegisterToMemory(arm64.STRD, arm64.RegRZR, tmp, 16) 3267 return nil 3268 } 3269 3270 func (c *arm64Compiler) compileLoadDataInstanceAddress(dataIndex uint32, dst asm.Register) { 3271 // dst = dataIndex * dataInstanceStructSize 3272 c.assembler.CompileConstToRegister(arm64.MOVD, int64(dataIndex)*dataInstanceStructSize, dst) 3273 3274 // arm64ReservedRegisterForTemporary = &moduleInstance.DataInstances[0] 3275 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3276 arm64ReservedRegisterForCallEngine, callEngineModuleContextDataInstancesElement0AddressOffset, 3277 arm64ReservedRegisterForTemporary, 3278 ) 3279 3280 // dst = arm64ReservedRegisterForTemporary + dst 3281 // = &moduleInstance.DataInstances[0] + dataIndex*dataInstanceStructSize 3282 // = &moduleInstance.DataInstances[dataIndex] 3283 c.assembler.CompileRegisterToRegister(arm64.ADD, arm64ReservedRegisterForTemporary, dst) 3284 } 3285 3286 // compileMemoryCopy implements compiler.compileMemoryCopy for the arm64 architecture. 3287 func (c *arm64Compiler) compileMemoryCopy() error { 3288 return c.compileCopyImpl(false, 0, 0) 3289 } 3290 3291 // compileCopyImpl implements compileTableCopy and compileMemoryCopy. 3292 // 3293 // TODO: the compiled code in this function should be reused and compile at once as 3294 // the code is independent of any module. 3295 func (c *arm64Compiler) compileCopyImpl(isTable bool, srcTableIndex, dstTableIndex uint32) error { 3296 outOfBoundsErrorStatus := nativeCallStatusCodeMemoryOutOfBounds 3297 if isTable { 3298 outOfBoundsErrorStatus = nativeCallStatusCodeInvalidTableAccess 3299 } 3300 3301 copySize, err := c.popValueOnRegister() 3302 if err != nil { 3303 return err 3304 } 3305 c.markRegisterUsed(copySize.register) 3306 3307 sourceOffset, err := c.popValueOnRegister() 3308 if err != nil { 3309 return err 3310 } 3311 if isZeroRegister(sourceOffset.register) { 3312 sourceOffset.register, err = c.allocateRegister(registerTypeGeneralPurpose) 3313 if err != nil { 3314 return err 3315 } 3316 c.assembler.CompileRegisterToRegister(arm64.MOVD, arm64.RegRZR, sourceOffset.register) 3317 } 3318 c.markRegisterUsed(sourceOffset.register) 3319 3320 destinationOffset, err := c.popValueOnRegister() 3321 if err != nil { 3322 return err 3323 } 3324 if isZeroRegister(destinationOffset.register) { 3325 destinationOffset.register, err = c.allocateRegister(registerTypeGeneralPurpose) 3326 if err != nil { 3327 return err 3328 } 3329 c.assembler.CompileRegisterToRegister(arm64.MOVD, arm64.RegRZR, destinationOffset.register) 3330 } 3331 c.markRegisterUsed(destinationOffset.register) 3332 3333 if !isZeroRegister(copySize.register) { 3334 // sourceOffset += size. 3335 c.assembler.CompileRegisterToRegister(arm64.ADD, copySize.register, sourceOffset.register) 3336 // destinationOffset += size. 3337 c.assembler.CompileRegisterToRegister(arm64.ADD, copySize.register, destinationOffset.register) 3338 } 3339 3340 if isTable { 3341 // arm64ReservedRegisterForTemporary = &tables[0] 3342 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3343 arm64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset, 3344 arm64ReservedRegisterForTemporary) 3345 // arm64ReservedRegisterForTemporary = arm64ReservedRegisterForTemporary + srcTableIndex*8 3346 // = &tables[0] + sizeOf(*tableInstance)*8 3347 // = &tables[srcTableIndex] 3348 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3349 arm64ReservedRegisterForTemporary, int64(srcTableIndex)*8, 3350 arm64ReservedRegisterForTemporary) 3351 // arm64ReservedRegisterForTemporary = [arm64ReservedRegisterForTemporary+tableInstanceTableLenOffset] = len(tables[srcTableIndex]) 3352 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3353 arm64ReservedRegisterForTemporary, tableInstanceTableLenOffset, 3354 arm64ReservedRegisterForTemporary) 3355 } else { 3356 // arm64ReservedRegisterForTemporary = len(memoryInst.Buffer). 3357 c.compileLoadMemoryBufferLen(arm64ReservedRegisterForTemporary) 3358 } 3359 3360 // Check memory len >= sourceOffset. 3361 c.assembler.CompileTwoRegistersToNone(arm64.CMP, arm64ReservedRegisterForTemporary, sourceOffset.register) 3362 // If not, raise out of bounds memory access error. 3363 c.compileMaybeExitFromNativeCode(arm64.BCONDLS, outOfBoundsErrorStatus) 3364 3365 // Otherwise, check memory len >= destinationOffset. 3366 if isTable { 3367 // arm64ReservedRegisterForTemporary = &tables[0] 3368 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3369 arm64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset, 3370 arm64ReservedRegisterForTemporary) 3371 // arm64ReservedRegisterForTemporary = arm64ReservedRegisterForTemporary + dstTableIndex*8 3372 // = &tables[0] + sizeOf(*tableInstance)*8 3373 // = &tables[dstTableIndex] 3374 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3375 arm64ReservedRegisterForTemporary, int64(dstTableIndex)*8, 3376 arm64ReservedRegisterForTemporary) 3377 // arm64ReservedRegisterForTemporary = [arm64ReservedRegisterForTemporary+tableInstanceTableLenOffset] = len(tables[dstTableIndex]) 3378 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3379 arm64ReservedRegisterForTemporary, tableInstanceTableLenOffset, 3380 arm64ReservedRegisterForTemporary) 3381 } 3382 3383 c.assembler.CompileTwoRegistersToNone(arm64.CMP, arm64ReservedRegisterForTemporary, destinationOffset.register) 3384 // If not, raise out of bounds memory access error. 3385 c.compileMaybeExitFromNativeCode(arm64.BCONDLS, outOfBoundsErrorStatus) 3386 3387 // Otherwise, ready to copy the value from source to destination. 3388 var ldr, str asm.Instruction 3389 var movSize int64 3390 if isTable { 3391 ldr, str = arm64.LDRD, arm64.STRD 3392 movSize = 8 3393 } else { 3394 ldr, str = arm64.LDRB, arm64.STRB 3395 movSize = 1 3396 } 3397 3398 // If the size equals zero, we can skip the entire instructions beflow. 3399 if !isZeroRegister(copySize.register) { 3400 c.assembler.CompileTwoRegistersToNone(arm64.CMP, arm64.RegRZR, copySize.register) 3401 skipCopyJump := c.assembler.CompileJump(arm64.BCONDEQ) 3402 3403 // If source offet < destination offset: for (i = size-1; i >= 0; i--) dst[i] = src[i]; 3404 c.assembler.CompileTwoRegistersToNone(arm64.CMP, sourceOffset.register, destinationOffset.register) 3405 destLowerThanSourceJump := c.assembler.CompileJump(arm64.BCONDLS) 3406 var endJump asm.Node 3407 { 3408 // sourceOffset -= size. 3409 c.assembler.CompileRegisterToRegister(arm64.SUB, copySize.register, sourceOffset.register) 3410 // destinationOffset -= size. 3411 c.assembler.CompileRegisterToRegister(arm64.SUB, copySize.register, destinationOffset.register) 3412 3413 if isTable { 3414 // arm64ReservedRegisterForTemporary = &Tables[dstTableIndex].Table[0] 3415 c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForCallEngine, 3416 callEngineModuleContextTablesElement0AddressOffset, arm64ReservedRegisterForTemporary) 3417 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3418 arm64ReservedRegisterForTemporary, int64(dstTableIndex)*8, 3419 arm64ReservedRegisterForTemporary) 3420 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3421 arm64ReservedRegisterForTemporary, tableInstanceTableOffset, 3422 arm64ReservedRegisterForTemporary) 3423 // destinationOffset = (destinationOffset<< pointerSizeLog2) + &Table[dstTableIndex].Table[0] 3424 c.assembler.CompileLeftShiftedRegisterToRegister(arm64.ADD, 3425 destinationOffset.register, pointerSizeLog2, 3426 arm64ReservedRegisterForTemporary, destinationOffset.register) 3427 3428 // arm64ReservedRegisterForTemporary = &Tables[srcTableIndex] 3429 c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForCallEngine, 3430 callEngineModuleContextTablesElement0AddressOffset, arm64ReservedRegisterForTemporary) 3431 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3432 arm64ReservedRegisterForTemporary, int64(srcTableIndex)*8, 3433 arm64ReservedRegisterForTemporary) 3434 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3435 arm64ReservedRegisterForTemporary, tableInstanceTableOffset, 3436 arm64ReservedRegisterForTemporary) 3437 // sourceOffset = (sourceOffset<< 3) + &Table[0] 3438 c.assembler.CompileLeftShiftedRegisterToRegister(arm64.ADD, 3439 sourceOffset.register, pointerSizeLog2, 3440 arm64ReservedRegisterForTemporary, sourceOffset.register) 3441 3442 // copySize = copySize << pointerSizeLog2 as each element has 8 bytes and we copy one by one. 3443 c.assembler.CompileConstToRegister(arm64.LSL, pointerSizeLog2, copySize.register) 3444 } else { 3445 // sourceOffset += memory buffer's absolute address. 3446 c.assembler.CompileRegisterToRegister(arm64.ADD, arm64ReservedRegisterForMemory, sourceOffset.register) 3447 // destinationOffset += memory buffer's absolute address. 3448 c.assembler.CompileRegisterToRegister(arm64.ADD, arm64ReservedRegisterForMemory, destinationOffset.register) 3449 } 3450 3451 beginCopyLoop := c.assembler.CompileStandAlone(arm64.NOP) 3452 3453 // size -= 1 3454 c.assembler.CompileConstToRegister(arm64.SUBS, movSize, copySize.register) 3455 3456 // arm64ReservedRegisterForTemporary = [sourceOffset + (size.register)] 3457 c.assembler.CompileMemoryWithRegisterOffsetToRegister(ldr, 3458 sourceOffset.register, copySize.register, 3459 arm64ReservedRegisterForTemporary) 3460 // [destinationOffset + (size.register)] = arm64ReservedRegisterForTemporary. 3461 c.assembler.CompileRegisterToMemoryWithRegisterOffset(str, 3462 arm64ReservedRegisterForTemporary, 3463 destinationOffset.register, copySize.register, 3464 ) 3465 3466 // If the value on the copySize.register is not equal zero, continue the loop. 3467 c.assembler.CompileJump(arm64.BCONDNE).AssignJumpTarget(beginCopyLoop) 3468 3469 // Otherwise, exit the loop. 3470 endJump = c.assembler.CompileJump(arm64.B) 3471 } 3472 3473 // Else (destination offet < source offset): for (i = 0; i < size; i++) dst[counter-1-i] = src[counter-1-i]; 3474 c.assembler.SetJumpTargetOnNext(destLowerThanSourceJump) 3475 { 3476 3477 if isTable { 3478 // arm64ReservedRegisterForTemporary = &Tables[dstTableIndex].Table[0] 3479 c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForCallEngine, 3480 callEngineModuleContextTablesElement0AddressOffset, arm64ReservedRegisterForTemporary) 3481 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3482 arm64ReservedRegisterForTemporary, int64(dstTableIndex)*8, 3483 arm64ReservedRegisterForTemporary) 3484 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3485 arm64ReservedRegisterForTemporary, tableInstanceTableOffset, 3486 arm64ReservedRegisterForTemporary) 3487 // destinationOffset = (destinationOffset<< interfaceDataySizeLog2) + &Table[dstTableIndex].Table[0] 3488 c.assembler.CompileLeftShiftedRegisterToRegister(arm64.ADD, 3489 destinationOffset.register, pointerSizeLog2, 3490 arm64ReservedRegisterForTemporary, destinationOffset.register) 3491 3492 // arm64ReservedRegisterForTemporary = &Tables[srcTableIndex] 3493 c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForCallEngine, 3494 callEngineModuleContextTablesElement0AddressOffset, arm64ReservedRegisterForTemporary) 3495 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3496 arm64ReservedRegisterForTemporary, int64(srcTableIndex)*8, 3497 arm64ReservedRegisterForTemporary) 3498 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3499 arm64ReservedRegisterForTemporary, tableInstanceTableOffset, 3500 arm64ReservedRegisterForTemporary) 3501 // sourceOffset = (sourceOffset<< 3) + &Table[0] 3502 c.assembler.CompileLeftShiftedRegisterToRegister(arm64.ADD, 3503 sourceOffset.register, pointerSizeLog2, 3504 arm64ReservedRegisterForTemporary, sourceOffset.register) 3505 3506 // copySize = copySize << pointerSizeLog2 as each element has 8 bytes and we copy one by one. 3507 c.assembler.CompileConstToRegister(arm64.LSL, pointerSizeLog2, copySize.register) 3508 } else { 3509 // sourceOffset += memory buffer's absolute address. 3510 c.assembler.CompileRegisterToRegister(arm64.ADD, arm64ReservedRegisterForMemory, sourceOffset.register) 3511 // destinationOffset += memory buffer's absolute address. 3512 c.assembler.CompileRegisterToRegister(arm64.ADD, arm64ReservedRegisterForMemory, destinationOffset.register) 3513 } 3514 3515 // Negate the counter. 3516 c.assembler.CompileRegisterToRegister(arm64.NEG, copySize.register, copySize.register) 3517 3518 beginCopyLoop := c.assembler.CompileStandAlone(arm64.NOP) 3519 3520 // arm64ReservedRegisterForTemporary = [sourceOffset + (size.register)] 3521 c.assembler.CompileMemoryWithRegisterOffsetToRegister(ldr, 3522 sourceOffset.register, copySize.register, 3523 arm64ReservedRegisterForTemporary) 3524 // [destinationOffset + (size.register)] = arm64ReservedRegisterForTemporary. 3525 c.assembler.CompileRegisterToMemoryWithRegisterOffset(str, 3526 arm64ReservedRegisterForTemporary, 3527 destinationOffset.register, copySize.register, 3528 ) 3529 3530 // size += 1 3531 c.assembler.CompileConstToRegister(arm64.ADDS, movSize, copySize.register) 3532 c.assembler.CompileJump(arm64.BCONDMI).AssignJumpTarget(beginCopyLoop) 3533 } 3534 c.assembler.SetJumpTargetOnNext(skipCopyJump) 3535 c.assembler.SetJumpTargetOnNext(endJump) 3536 } 3537 3538 // Mark all of the operand registers. 3539 c.markRegisterUnused(copySize.register, sourceOffset.register, destinationOffset.register) 3540 3541 return nil 3542 } 3543 3544 // compileMemoryFill implements compiler.compileMemoryCopy for the arm64 architecture. 3545 func (c *arm64Compiler) compileMemoryFill() error { 3546 return c.compileFillImpl(false, 0) 3547 } 3548 3549 // compileFillImpl implements TableFill and MemoryFill. 3550 // 3551 // TODO: the compiled code in this function should be reused and compile at once as 3552 // the code is independent of any module. 3553 func (c *arm64Compiler) compileFillImpl(isTable bool, tableIndex uint32) error { 3554 outOfBoundsErrorStatus := nativeCallStatusCodeMemoryOutOfBounds 3555 if isTable { 3556 outOfBoundsErrorStatus = nativeCallStatusCodeInvalidTableAccess 3557 } 3558 3559 fillSize, err := c.popValueOnRegister() 3560 if err != nil { 3561 return err 3562 } 3563 c.markRegisterUsed(fillSize.register) 3564 3565 value, err := c.popValueOnRegister() 3566 if err != nil { 3567 return err 3568 } 3569 c.markRegisterUsed(value.register) 3570 3571 destinationOffset, err := c.popValueOnRegister() 3572 if err != nil { 3573 return err 3574 } 3575 if isZeroRegister(destinationOffset.register) { 3576 destinationOffset.register, err = c.allocateRegister(registerTypeGeneralPurpose) 3577 if err != nil { 3578 return err 3579 } 3580 c.assembler.CompileRegisterToRegister(arm64.MOVD, arm64.RegRZR, destinationOffset.register) 3581 } 3582 c.markRegisterUsed(destinationOffset.register) 3583 3584 // destinationOffset += size. 3585 c.assembler.CompileRegisterToRegister(arm64.ADD, fillSize.register, destinationOffset.register) 3586 3587 if isTable { 3588 // arm64ReservedRegisterForTemporary = &tables[0] 3589 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3590 arm64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset, 3591 arm64ReservedRegisterForTemporary) 3592 // arm64ReservedRegisterForTemporary = arm64ReservedRegisterForTemporary + srcTableIndex*8 3593 // = &tables[0] + sizeOf(*tableInstance)*8 3594 // = &tables[srcTableIndex] 3595 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3596 arm64ReservedRegisterForTemporary, int64(tableIndex)*8, 3597 arm64ReservedRegisterForTemporary) 3598 // arm64ReservedRegisterForTemporary = [arm64ReservedRegisterForTemporary+tableInstanceTableLenOffset] = len(tables[srcTableIndex]) 3599 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3600 arm64ReservedRegisterForTemporary, tableInstanceTableLenOffset, 3601 arm64ReservedRegisterForTemporary) 3602 } else { 3603 // arm64ReservedRegisterForTemporary = len(memoryInst.Buffer). 3604 c.compileLoadMemoryBufferLen(arm64ReservedRegisterForTemporary) 3605 } 3606 3607 // Check len >= destinationOffset. 3608 c.assembler.CompileTwoRegistersToNone(arm64.CMP, arm64ReservedRegisterForTemporary, destinationOffset.register) 3609 3610 // If not, raise the runtime error. 3611 c.compileMaybeExitFromNativeCode(arm64.BCONDLS, outOfBoundsErrorStatus) 3612 3613 // Otherwise, ready to copy the value from destination to source. 3614 // If the size equals zero, we can skip the entire instructions below. 3615 c.assembler.CompileTwoRegistersToNone(arm64.CMP, arm64.RegRZR, fillSize.register) 3616 skipCopyJump := c.assembler.CompileJump(arm64.BCONDEQ) 3617 3618 // destinationOffset -= size. 3619 c.assembler.CompileRegisterToRegister(arm64.SUB, fillSize.register, destinationOffset.register) 3620 3621 var str asm.Instruction 3622 var movSize int64 3623 if isTable { 3624 str = arm64.STRD 3625 movSize = 8 3626 3627 // arm64ReservedRegisterForTemporary = &Tables[dstTableIndex].Table[0] 3628 c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForCallEngine, 3629 callEngineModuleContextTablesElement0AddressOffset, arm64ReservedRegisterForTemporary) 3630 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3631 arm64ReservedRegisterForTemporary, int64(tableIndex)*8, 3632 arm64ReservedRegisterForTemporary) 3633 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3634 arm64ReservedRegisterForTemporary, tableInstanceTableOffset, 3635 arm64ReservedRegisterForTemporary) 3636 // destinationOffset = (destinationOffset<< pointerSizeLog2) + &Table[dstTableIndex].Table[0] 3637 c.assembler.CompileLeftShiftedRegisterToRegister(arm64.ADD, 3638 destinationOffset.register, pointerSizeLog2, 3639 arm64ReservedRegisterForTemporary, destinationOffset.register) 3640 3641 // copySize = copySize << pointerSizeLog2 as each element has 8 bytes and we copy one by one. 3642 c.assembler.CompileConstToRegister(arm64.LSL, pointerSizeLog2, fillSize.register) 3643 } else { 3644 str = arm64.STRB 3645 movSize = 1 3646 3647 // destinationOffset += memory buffer's absolute address. 3648 c.assembler.CompileRegisterToRegister(arm64.ADD, arm64ReservedRegisterForMemory, destinationOffset.register) 3649 } 3650 3651 // Naively implement the copy with "for loop" by copying byte one by one. 3652 beginCopyLoop := c.assembler.CompileStandAlone(arm64.NOP) 3653 3654 // size -= 1 3655 c.assembler.CompileConstToRegister(arm64.SUBS, movSize, fillSize.register) 3656 3657 // [destinationOffset + (size.register)] = arm64ReservedRegisterForTemporary. 3658 c.assembler.CompileRegisterToMemoryWithRegisterOffset(str, 3659 value.register, 3660 destinationOffset.register, fillSize.register, 3661 ) 3662 3663 // If the value on the copySizeRgister.register is not equal zero, continue the loop. 3664 continueJump := c.assembler.CompileJump(arm64.BCONDNE) 3665 continueJump.AssignJumpTarget(beginCopyLoop) 3666 3667 // Mark all of the operand registers. 3668 c.markRegisterUnused(fillSize.register, value.register, destinationOffset.register) 3669 3670 c.assembler.SetJumpTargetOnNext(skipCopyJump) 3671 return nil 3672 } 3673 3674 // compileTableInit implements compiler.compileTableInit for the arm64 architecture. 3675 func (c *arm64Compiler) compileTableInit(o *wazeroir.UnionOperation) error { 3676 elemIndex := uint32(o.U1) 3677 tableIndex := uint32(o.U2) 3678 return c.compileInitImpl(true, elemIndex, tableIndex) 3679 } 3680 3681 // compileTableCopy implements compiler.compileTableCopy for the arm64 architecture. 3682 func (c *arm64Compiler) compileTableCopy(o *wazeroir.UnionOperation) error { 3683 return c.compileCopyImpl(true, uint32(o.U1), uint32(o.U2)) 3684 } 3685 3686 // compileElemDrop implements compiler.compileElemDrop for the arm64 architecture. 3687 func (c *arm64Compiler) compileElemDrop(o *wazeroir.UnionOperation) error { 3688 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 3689 return err 3690 } 3691 3692 tmp, err := c.allocateRegister(registerTypeGeneralPurpose) 3693 if err != nil { 3694 return err 3695 } 3696 3697 elemIndex := uint32(o.U1) 3698 c.compileLoadElemInstanceAddress(elemIndex, tmp) 3699 3700 // Clears the content of ElementInstances[o.ElemIndex] (== []interface{} type). 3701 c.assembler.CompileRegisterToMemory(arm64.STRD, arm64.RegRZR, tmp, 0) 3702 c.assembler.CompileRegisterToMemory(arm64.STRD, arm64.RegRZR, tmp, 8) 3703 c.assembler.CompileRegisterToMemory(arm64.STRD, arm64.RegRZR, tmp, 16) 3704 return nil 3705 } 3706 3707 func (c *arm64Compiler) compileLoadElemInstanceAddress(elemIndex uint32, dst asm.Register) { 3708 // dst = dataIndex * elementInstanceStructSize 3709 c.assembler.CompileConstToRegister(arm64.MOVD, int64(elemIndex)*elementInstanceStructSize, dst) 3710 3711 // arm64ReservedRegisterForTemporary = &moduleInstance.ElementInstances[0] 3712 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3713 arm64ReservedRegisterForCallEngine, callEngineModuleContextElementInstancesElement0AddressOffset, 3714 arm64ReservedRegisterForTemporary, 3715 ) 3716 3717 // dst = arm64ReservedRegisterForTemporary + dst 3718 // = &moduleInstance.ElementInstances[0] + elemIndex*elementInstanceStructSize 3719 // = &moduleInstance.ElementInstances[elemIndex] 3720 c.assembler.CompileRegisterToRegister(arm64.ADD, arm64ReservedRegisterForTemporary, dst) 3721 } 3722 3723 // compileRefFunc implements compiler.compileRefFunc for the arm64 architecture. 3724 func (c *arm64Compiler) compileRefFunc(o *wazeroir.UnionOperation) error { 3725 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 3726 return err 3727 } 3728 3729 ref, err := c.allocateRegister(registerTypeGeneralPurpose) 3730 if err != nil { 3731 return err 3732 } 3733 // arm64ReservedRegisterForTemporary = [arm64ReservedRegisterForCallEngine + callEngineModuleContextFunctionsElement0AddressOffset] 3734 // = &moduleEngine.functions[0] 3735 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3736 arm64ReservedRegisterForCallEngine, callEngineModuleContextFunctionsElement0AddressOffset, 3737 ref) 3738 3739 // ref = ref + int64(o.FunctionIndex)*sizeOf(function) 3740 // = &moduleEngine.functions[index] 3741 functionIndex := int64(o.U1) 3742 c.assembler.CompileConstToRegister(arm64.ADD, 3743 functionIndex*functionSize, 3744 ref, 3745 ) 3746 3747 c.pushRuntimeValueLocationOnRegister(ref, runtimeValueTypeI64) 3748 return nil 3749 } 3750 3751 // compileTableGet implements compiler.compileTableGet for the arm64 architecture. 3752 func (c *arm64Compiler) compileTableGet(o *wazeroir.UnionOperation) error { 3753 ref, err := c.allocateRegister(registerTypeGeneralPurpose) 3754 if err != nil { 3755 return err 3756 } 3757 c.markRegisterUsed(ref) 3758 3759 offset, err := c.popValueOnRegister() 3760 if err != nil { 3761 return err 3762 } 3763 3764 // arm64ReservedRegisterForTemporary = &tables[0] 3765 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3766 arm64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset, 3767 arm64ReservedRegisterForTemporary) 3768 // arm64ReservedRegisterForTemporary = [arm64ReservedRegisterForTemporary + TableIndex*8] 3769 // = [&tables[0] + TableIndex*sizeOf(*tableInstance)] 3770 // = [&tables[TableIndex]] = tables[TableIndex]. 3771 tableIndex := int64(o.U1) 3772 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3773 arm64ReservedRegisterForTemporary, tableIndex*8, 3774 arm64ReservedRegisterForTemporary) 3775 3776 // Out of bounds check. 3777 // ref = [&tables[TableIndex] + tableInstanceTableLenOffset] = len(tables[TableIndex]) 3778 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3779 arm64ReservedRegisterForTemporary, tableInstanceTableLenOffset, 3780 ref, 3781 ) 3782 // "cmp ref, offset" 3783 c.assembler.CompileTwoRegistersToNone(arm64.CMP, ref, offset.register) 3784 3785 // If it exceeds len(table), we exit the execution. 3786 c.compileMaybeExitFromNativeCode(arm64.BCONDLO, nativeCallStatusCodeInvalidTableAccess) 3787 3788 // ref = [&tables[TableIndex] + tableInstanceTableOffset] = &tables[TableIndex].References[0] 3789 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3790 arm64ReservedRegisterForTemporary, tableInstanceTableOffset, 3791 ref, 3792 ) 3793 3794 // ref = (offset << pointerSizeLog2) + ref 3795 // = &tables[TableIndex].References[0] + sizeOf(uintptr) * offset 3796 // = &tables[TableIndex].References[offset] 3797 c.assembler.CompileLeftShiftedRegisterToRegister(arm64.ADD, 3798 offset.register, pointerSizeLog2, ref, ref) 3799 3800 // ref = [&tables[TableIndex]] = load the Reference's pointer as uint64. 3801 c.assembler.CompileMemoryToRegister(arm64.LDRD, ref, 0, ref) 3802 3803 c.pushRuntimeValueLocationOnRegister(ref, runtimeValueTypeI64) // table elements are opaque 64-bit at runtime. 3804 return nil 3805 } 3806 3807 // compileTableSet implements compiler.compileTableSet for the arm64 architecture. 3808 func (c *arm64Compiler) compileTableSet(o *wazeroir.UnionOperation) error { 3809 ref := c.locationStack.pop() 3810 if err := c.compileEnsureOnRegister(ref); err != nil { 3811 return err 3812 } 3813 3814 offset := c.locationStack.pop() 3815 if err := c.compileEnsureOnRegister(offset); err != nil { 3816 return err 3817 } 3818 3819 tmp, err := c.allocateRegister(registerTypeGeneralPurpose) 3820 if err != nil { 3821 return err 3822 } 3823 3824 // arm64ReservedRegisterForTemporary = &tables[0] 3825 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3826 arm64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset, 3827 arm64ReservedRegisterForTemporary) 3828 // arm64ReservedRegisterForTemporary = arm64ReservedRegisterForTemporary + TableIndex*8 3829 // = &tables[0] + TableIndex*sizeOf(*tableInstance) 3830 // = &tables[TableIndex] 3831 tableIndex := int64(o.U1) 3832 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3833 arm64ReservedRegisterForTemporary, tableIndex*8, 3834 arm64ReservedRegisterForTemporary) 3835 3836 // Out of bounds check. 3837 // tmp = [&tables[TableIndex] + tableInstanceTableLenOffset] = len(tables[TableIndex]) 3838 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3839 arm64ReservedRegisterForTemporary, tableInstanceTableLenOffset, 3840 tmp, 3841 ) 3842 // "cmp tmp, offset" 3843 c.assembler.CompileTwoRegistersToNone(arm64.CMP, tmp, offset.register) 3844 3845 // If it exceeds len(table), we exit the execution. 3846 c.compileMaybeExitFromNativeCode(arm64.BCONDLO, nativeCallStatusCodeInvalidTableAccess) 3847 3848 // tmp = [&tables[TableIndex] + tableInstanceTableOffset] = &tables[TableIndex].References[0] 3849 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3850 arm64ReservedRegisterForTemporary, tableInstanceTableOffset, 3851 tmp, 3852 ) 3853 3854 // tmp = (offset << pointerSizeLog2) + tmp 3855 // = &tables[TableIndex].References[0] + sizeOf(uintptr) * offset 3856 // = &tables[TableIndex].References[offset] 3857 c.assembler.CompileLeftShiftedRegisterToRegister(arm64.ADD, offset.register, pointerSizeLog2, tmp, tmp) 3858 3859 // Set the reference's raw pointer. 3860 c.assembler.CompileRegisterToMemory(arm64.STRD, ref.register, tmp, 0) 3861 3862 c.markRegisterUnused(offset.register, ref.register, tmp) 3863 return nil 3864 } 3865 3866 // compileTableGrow implements compiler.compileTableGrow for the arm64 architecture. 3867 func (c *arm64Compiler) compileTableGrow(o *wazeroir.UnionOperation) error { 3868 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 3869 return err 3870 } 3871 3872 // Pushes the table index. 3873 tableIndex := o.U1 3874 if err := c.compileIntConstant(true, tableIndex); err != nil { 3875 return err 3876 } 3877 3878 // Table grow cannot be done in assembly just like memory grow as it involves with allocation in Go. 3879 // Therefore, call out to the built function for this purpose. 3880 if err := c.compileCallGoFunction(nativeCallStatusCodeCallBuiltInFunction, builtinFunctionIndexTableGrow); err != nil { 3881 return err 3882 } 3883 3884 // TableGrow consumes three values (table index, number of items, initial value). 3885 for i := 0; i < 3; i++ { 3886 c.locationStack.pop() 3887 } 3888 3889 // Then, the previous length was pushed as the result. 3890 v := c.locationStack.pushRuntimeValueLocationOnStack() 3891 v.valueType = runtimeValueTypeI32 3892 3893 // After return, we re-initialize reserved registers just like preamble of functions. 3894 c.compileReservedStackBasePointerRegisterInitialization() 3895 c.compileReservedMemoryRegisterInitialization() 3896 return nil 3897 } 3898 3899 // compileTableSize implements compiler.compileTableSize for the arm64 architecture. 3900 func (c *arm64Compiler) compileTableSize(o *wazeroir.UnionOperation) error { 3901 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 3902 return err 3903 } 3904 result, err := c.allocateRegister(registerTypeGeneralPurpose) 3905 if err != nil { 3906 return err 3907 } 3908 c.markRegisterUsed(result) 3909 3910 // arm64ReservedRegisterForTemporary = &tables[0] 3911 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3912 arm64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset, 3913 arm64ReservedRegisterForTemporary) 3914 // arm64ReservedRegisterForTemporary = [arm64ReservedRegisterForTemporary + TableIndex*8] 3915 // = [&tables[0] + TableIndex*sizeOf(*tableInstance)] 3916 // = [&tables[TableIndex]] = tables[TableIndex]. 3917 tableIndex := int64(o.U1) 3918 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3919 arm64ReservedRegisterForTemporary, tableIndex*8, 3920 arm64ReservedRegisterForTemporary) 3921 3922 // result = [&tables[TableIndex] + tableInstanceTableLenOffset] = len(tables[TableIndex]) 3923 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3924 arm64ReservedRegisterForTemporary, tableInstanceTableLenOffset, 3925 result, 3926 ) 3927 3928 c.pushRuntimeValueLocationOnRegister(result, runtimeValueTypeI32) 3929 return nil 3930 } 3931 3932 // compileTableFill implements compiler.compileTableFill for the arm64 architecture. 3933 func (c *arm64Compiler) compileTableFill(o *wazeroir.UnionOperation) error { 3934 tableIndex := uint32(o.U1) 3935 return c.compileFillImpl(true, tableIndex) 3936 } 3937 3938 // popTwoValuesOnRegisters pops two values from the location stacks, ensures 3939 // these two values are located on registers, and mark them unused. 3940 // 3941 // TODO: we’d usually prefix this with compileXXX as this might end up emitting instructions, 3942 // but the name seems awkward. 3943 func (c *arm64Compiler) popTwoValuesOnRegisters() (x1, x2 *runtimeValueLocation, err error) { 3944 x2 = c.locationStack.pop() 3945 if err = c.compileEnsureOnRegister(x2); err != nil { 3946 return 3947 } 3948 3949 x1 = c.locationStack.pop() 3950 if err = c.compileEnsureOnRegister(x1); err != nil { 3951 return 3952 } 3953 3954 c.markRegisterUnused(x2.register) 3955 c.markRegisterUnused(x1.register) 3956 return 3957 } 3958 3959 // popValueOnRegister pops one value from the location stack, ensures 3960 // that it is located on a register, and mark it unused. 3961 // 3962 // TODO: we’d usually prefix this with compileXXX as this might end up emitting instructions, 3963 // but the name seems awkward. 3964 func (c *arm64Compiler) popValueOnRegister() (v *runtimeValueLocation, err error) { 3965 v = c.locationStack.pop() 3966 if err = c.compileEnsureOnRegister(v); err != nil { 3967 return 3968 } 3969 3970 c.markRegisterUnused(v.register) 3971 return 3972 } 3973 3974 // compileEnsureOnRegister emits instructions to ensure that a value is located on a register. 3975 func (c *arm64Compiler) compileEnsureOnRegister(loc *runtimeValueLocation) (err error) { 3976 if loc.onStack() { 3977 reg, err := c.allocateRegister(loc.getRegisterType()) 3978 if err != nil { 3979 return err 3980 } 3981 3982 // Record that the value holds the register and the register is marked used. 3983 loc.setRegister(reg) 3984 c.markRegisterUsed(reg) 3985 3986 c.compileLoadValueOnStackToRegister(loc) 3987 } else if loc.onConditionalRegister() { 3988 err = c.compileLoadConditionalRegisterToGeneralPurposeRegister(loc) 3989 } 3990 return 3991 } 3992 3993 // maybeCompileMoveTopConditionalToGeneralPurposeRegister moves the top value on the stack 3994 // if the value is located on a conditional register. 3995 // 3996 // This is usually called at the beginning of methods on compiler interface where we possibly 3997 // compile instructions without saving the conditional register value. 3998 // compile* functions without calling this function is saving the conditional 3999 // value to the stack or register by invoking ensureOnGeneralPurposeRegister for the top. 4000 func (c *arm64Compiler) maybeCompileMoveTopConditionalToGeneralPurposeRegister() (err error) { 4001 if c.locationStack.sp > 0 { 4002 if loc := c.locationStack.peek(); loc.onConditionalRegister() { 4003 err = c.compileLoadConditionalRegisterToGeneralPurposeRegister(loc) 4004 } 4005 } 4006 return 4007 } 4008 4009 // loadConditionalRegisterToGeneralPurposeRegister saves the conditional register value 4010 // to a general purpose register. 4011 func (c *arm64Compiler) compileLoadConditionalRegisterToGeneralPurposeRegister(loc *runtimeValueLocation) error { 4012 reg, err := c.allocateRegister(loc.getRegisterType()) 4013 if err != nil { 4014 return err 4015 } 4016 4017 c.markRegisterUsed(reg) 4018 c.assembler.CompileConditionalRegisterSet(loc.conditionalRegister, reg) 4019 4020 // Record that now the value is located on a general purpose register. 4021 loc.setRegister(reg) 4022 return nil 4023 } 4024 4025 // compileLoadValueOnStackToRegister implements compiler.compileLoadValueOnStackToRegister for arm64. 4026 func (c *arm64Compiler) compileLoadValueOnStackToRegister(loc *runtimeValueLocation) { 4027 switch loc.valueType { 4028 case runtimeValueTypeI32: 4029 c.assembler.CompileMemoryToRegister(arm64.LDRW, arm64ReservedRegisterForStackBasePointerAddress, 4030 int64(loc.stackPointer)*8, loc.register) 4031 case runtimeValueTypeI64: 4032 c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForStackBasePointerAddress, 4033 int64(loc.stackPointer)*8, loc.register) 4034 case runtimeValueTypeF32: 4035 c.assembler.CompileMemoryToRegister(arm64.FLDRS, arm64ReservedRegisterForStackBasePointerAddress, 4036 int64(loc.stackPointer)*8, loc.register) 4037 case runtimeValueTypeF64: 4038 c.assembler.CompileMemoryToRegister(arm64.FLDRD, arm64ReservedRegisterForStackBasePointerAddress, 4039 int64(loc.stackPointer)*8, loc.register) 4040 case runtimeValueTypeV128Lo: 4041 c.assembler.CompileMemoryToVectorRegister(arm64.VMOV, 4042 arm64ReservedRegisterForStackBasePointerAddress, int64(loc.stackPointer)*8, loc.register, 4043 arm64.VectorArrangementQ) 4044 // Higher 64-bits are loaded as well ^^. 4045 hi := &c.locationStack.stack[loc.stackPointer+1] 4046 hi.setRegister(loc.register) 4047 case runtimeValueTypeV128Hi: 4048 panic("BUG: V128Hi must be be loaded to a register along with V128Lo") 4049 } 4050 } 4051 4052 // allocateRegister implements compiler.allocateRegister for arm64. 4053 func (c *arm64Compiler) allocateRegister(t registerType) (reg asm.Register, err error) { 4054 var ok bool 4055 // Try to get the unused register. 4056 reg, ok = c.locationStack.takeFreeRegister(t) 4057 if ok { 4058 return 4059 } 4060 4061 // If not found, we have to steal the register. 4062 stealTarget, ok := c.locationStack.takeStealTargetFromUsedRegister(t) 4063 if !ok { 4064 err = fmt.Errorf("cannot steal register") 4065 return 4066 } 4067 4068 // Release the steal target register value onto stack location. 4069 reg = stealTarget.register 4070 c.compileReleaseRegisterToStack(stealTarget) 4071 return 4072 } 4073 4074 func (c *arm64Compiler) compileLoadMemoryBufferLen(destReg asm.Register) { 4075 // destReg = ce.moduleContext.MemoryInstance (pointer) 4076 c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForCallEngine, callEngineModuleContextMemoryInstanceOffset, destReg) 4077 // destReg = len(mem.Buffer) 4078 c.assembler.CompileConstToRegister(arm64.ADD, memoryInstanceBufferLenOffset, destReg) 4079 c.assembler.CompileMemoryWithRegisterSourceToRegister(arm64.LDARD, destReg, destReg) 4080 } 4081 4082 // compileReleaseAllRegistersToStack adds instructions to store all the values located on 4083 // either general purpose or conditional registers onto the memory stack. 4084 // See releaseRegisterToStack. 4085 func (c *arm64Compiler) compileReleaseAllRegistersToStack() (err error) { 4086 for i := uint64(0); i < c.locationStack.sp; i++ { 4087 if loc := &c.locationStack.stack[i]; loc.onRegister() { 4088 c.compileReleaseRegisterToStack(loc) 4089 } else if loc.onConditionalRegister() { 4090 if err = c.compileLoadConditionalRegisterToGeneralPurposeRegister(loc); err != nil { 4091 return 4092 } 4093 c.compileReleaseRegisterToStack(loc) 4094 } 4095 } 4096 return 4097 } 4098 4099 // releaseRegisterToStack adds an instruction to write the value on a register back to memory stack region. 4100 func (c *arm64Compiler) compileReleaseRegisterToStack(loc *runtimeValueLocation) { 4101 switch loc.valueType { 4102 case runtimeValueTypeI32: 4103 c.assembler.CompileRegisterToMemory(arm64.STRW, loc.register, arm64ReservedRegisterForStackBasePointerAddress, int64(loc.stackPointer)*8) 4104 case runtimeValueTypeI64: 4105 c.assembler.CompileRegisterToMemory(arm64.STRD, loc.register, arm64ReservedRegisterForStackBasePointerAddress, int64(loc.stackPointer)*8) 4106 case runtimeValueTypeF32: 4107 c.assembler.CompileRegisterToMemory(arm64.FSTRS, loc.register, arm64ReservedRegisterForStackBasePointerAddress, int64(loc.stackPointer)*8) 4108 case runtimeValueTypeF64: 4109 c.assembler.CompileRegisterToMemory(arm64.FSTRD, loc.register, arm64ReservedRegisterForStackBasePointerAddress, int64(loc.stackPointer)*8) 4110 case runtimeValueTypeV128Lo: 4111 c.assembler.CompileVectorRegisterToMemory(arm64.VMOV, 4112 loc.register, arm64ReservedRegisterForStackBasePointerAddress, int64(loc.stackPointer)*8, 4113 arm64.VectorArrangementQ) 4114 // Higher 64-bits are released as well ^^. 4115 hi := &c.locationStack.stack[loc.stackPointer+1] 4116 c.locationStack.releaseRegister(hi) 4117 case runtimeValueTypeV128Hi: 4118 panic("BUG: V128Hi must be released to the stack along with V128Lo") 4119 default: 4120 panic("BUG") 4121 } 4122 4123 // Mark the register is free. 4124 c.locationStack.releaseRegister(loc) 4125 } 4126 4127 // compileReservedStackBasePointerRegisterInitialization adds instructions to initialize arm64ReservedRegisterForStackBasePointerAddress 4128 // so that it points to the absolute address of the stack base for this function. 4129 func (c *arm64Compiler) compileReservedStackBasePointerRegisterInitialization() { 4130 // First, load the address of the first element in the value stack into arm64ReservedRegisterForStackBasePointerAddress temporarily. 4131 c.assembler.CompileMemoryToRegister(arm64.LDRD, 4132 arm64ReservedRegisterForCallEngine, callEngineStackContextStackElement0AddressOffset, 4133 arm64ReservedRegisterForStackBasePointerAddress) 4134 4135 // next we move the base pointer (ce.stackBasePointer) to arm64ReservedRegisterForTemporary. 4136 c.assembler.CompileMemoryToRegister(arm64.LDRD, 4137 arm64ReservedRegisterForCallEngine, callEngineStackContextStackBasePointerInBytesOffset, 4138 arm64ReservedRegisterForTemporary) 4139 4140 // Finally, we calculate "callEngineStackContextStackBasePointerInBytesOffset + arm64ReservedRegisterForTemporary" 4141 c.assembler.CompileRegisterToRegister(arm64.ADD, arm64ReservedRegisterForTemporary, arm64ReservedRegisterForStackBasePointerAddress) 4142 } 4143 4144 func (c *arm64Compiler) compileReservedMemoryRegisterInitialization() { 4145 if c.ir.HasMemory || c.ir.UsesMemory { 4146 // "arm64ReservedRegisterForMemory = ce.MemoryElement0Address" 4147 c.assembler.CompileMemoryToRegister( 4148 arm64.LDRD, 4149 arm64ReservedRegisterForCallEngine, callEngineModuleContextMemoryElement0AddressOffset, 4150 arm64ReservedRegisterForMemory, 4151 ) 4152 } 4153 } 4154 4155 // compileModuleContextInitialization adds instructions to initialize ce.moduleContext's fields based on 4156 // ce.moduleContext.ModuleInstanceAddress. 4157 // This is called in two cases: in function preamble, and on the return from (non-Go) function calls. 4158 func (c *arm64Compiler) compileModuleContextInitialization() error { 4159 tmpX, found := c.locationStack.takeFreeRegister(registerTypeGeneralPurpose) 4160 if !found { 4161 panic("BUG: all the registers should be free at this point") 4162 } 4163 c.markRegisterUsed(tmpX) 4164 tmpY, found := c.locationStack.takeFreeRegister(registerTypeGeneralPurpose) 4165 if !found { 4166 panic("BUG: all the registers should be free at this point") 4167 } 4168 c.markRegisterUsed(tmpY) 4169 4170 // "tmpX = ce.ModuleInstanceAddress" 4171 c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForCallEngine, callEngineModuleContextModuleInstanceOffset, tmpX) 4172 4173 // If the module instance address stays the same, we could skip the entire code below. 4174 c.assembler.CompileTwoRegistersToNone(arm64.CMP, arm64CallingConventionModuleInstanceAddressRegister, tmpX) 4175 brIfModuleUnchanged := c.assembler.CompileJump(arm64.BCONDEQ) 4176 4177 // Otherwise, update the moduleEngine.moduleContext.ModuleInstanceAddress. 4178 c.assembler.CompileRegisterToMemory(arm64.STRD, 4179 arm64CallingConventionModuleInstanceAddressRegister, 4180 arm64ReservedRegisterForCallEngine, callEngineModuleContextModuleInstanceOffset, 4181 ) 4182 4183 // Also, we have to update the following fields: 4184 // * callEngine.moduleContext.globalElement0Address 4185 // * callEngine.moduleContext.memoryElement0Address 4186 // * callEngine.moduleContext.memorySliceLen 4187 // * callEngine.moduleContext.memoryInstance 4188 // * callEngine.moduleContext.tableElement0Address 4189 // * callEngine.moduleContext.tableSliceLen 4190 // * callEngine.moduleContext.functionsElement0Address 4191 // * callEngine.moduleContext.typeIDsElement0Address 4192 // * callEngine.moduleContext.dataInstancesElement0Address 4193 // * callEngine.moduleContext.elementInstancesElement0Address 4194 4195 // Update globalElement0Address. 4196 // 4197 // Note: if there's global.get or set instruction in the function, the existence of the globals 4198 // is ensured by function validation at module instantiation phase, and that's why it is ok to 4199 // skip the initialization if the module's globals slice is empty. 4200 if len(c.ir.Globals) > 0 { 4201 // "tmpX = &moduleInstance.Globals[0]" 4202 c.assembler.CompileMemoryToRegister(arm64.LDRD, 4203 arm64CallingConventionModuleInstanceAddressRegister, moduleInstanceGlobalsOffset, 4204 tmpX, 4205 ) 4206 4207 // "ce.GlobalElement0Address = tmpX (== &moduleInstance.Globals[0])" 4208 c.assembler.CompileRegisterToMemory( 4209 arm64.STRD, tmpX, 4210 arm64ReservedRegisterForCallEngine, callEngineModuleContextGlobalElement0AddressOffset, 4211 ) 4212 } 4213 4214 // Update memoryElement0Address and memorySliceLen. 4215 // 4216 // Note: if there's memory instruction in the function, memory instance must be non-nil. 4217 // That is ensured by function validation at module instantiation phase, and that's 4218 // why it is ok to skip the initialization if the module's memory instance is nil. 4219 if c.ir.HasMemory { 4220 // "tmpX = moduleInstance.Memory" 4221 c.assembler.CompileMemoryToRegister( 4222 arm64.LDRD, 4223 arm64CallingConventionModuleInstanceAddressRegister, moduleInstanceMemoryOffset, 4224 tmpX, 4225 ) 4226 4227 // First, set ce.memoryInstance 4228 c.assembler.CompileRegisterToMemory( 4229 arm64.STRD, 4230 tmpX, 4231 arm64ReservedRegisterForCallEngine, callEngineModuleContextMemoryInstanceOffset, 4232 ) 4233 4234 // Then, we write ce.memoryElement0Address. 4235 // 4236 // "tmpY = *tmpX (== &memory.Buffer[0])" 4237 c.assembler.CompileMemoryToRegister( 4238 arm64.LDRD, 4239 tmpX, memoryInstanceBufferOffset, 4240 tmpY, 4241 ) 4242 // "ce.memoryElement0Address = tmpY". 4243 c.assembler.CompileRegisterToMemory( 4244 arm64.STRD, 4245 tmpY, 4246 arm64ReservedRegisterForCallEngine, callEngineModuleContextMemoryElement0AddressOffset, 4247 ) 4248 } 4249 4250 // Update tableElement0Address, tableSliceLen and typeIDsElement0Address. 4251 // 4252 // Note: if there's table instruction in the function, the existence of the table 4253 // is ensured by function validation at module instantiation phase, and that's 4254 // why it is ok to skip the initialization if the module's table doesn't exist. 4255 if c.ir.HasTable { 4256 // "tmpX = &tables[0] (type of **wasm.Table)" 4257 c.assembler.CompileMemoryToRegister( 4258 arm64.LDRD, 4259 arm64CallingConventionModuleInstanceAddressRegister, moduleInstanceTablesOffset, 4260 tmpX, 4261 ) 4262 4263 // Update ce.tableElement0Address. 4264 // "ce.tableElement0Address = tmpX". 4265 c.assembler.CompileRegisterToMemory( 4266 arm64.STRD, 4267 tmpX, 4268 arm64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset, 4269 ) 4270 4271 // Finally, we put &ModuleInstance.TypeIDs[0] into moduleContext.typeIDsElement0Address. 4272 c.assembler.CompileMemoryToRegister(arm64.LDRD, 4273 arm64CallingConventionModuleInstanceAddressRegister, moduleInstanceTypeIDsOffset, tmpX) 4274 c.assembler.CompileRegisterToMemory(arm64.STRD, 4275 tmpX, arm64ReservedRegisterForCallEngine, callEngineModuleContextTypeIDsElement0AddressOffset) 4276 } 4277 4278 // Update callEngine.moduleContext.functionsElement0Address 4279 { 4280 // "tmpX = [moduleInstanceAddressRegister + moduleInstanceEngineOffset + interfaceDataOffset] (== *moduleEngine)" 4281 // 4282 // Go's interface is laid out on memory as two quad words as struct {tab, data uintptr} 4283 // where tab points to the interface table, and the latter points to the actual 4284 // implementation of interface. This case, we extract "data" pointer as *moduleEngine. 4285 // See the following references for detail: 4286 // * https://research.swtch.com/interfaces 4287 // * https://github.com/golang/go/blob/release-branch.go1.20/src/runtime/runtime2.go#L207-L210 4288 c.assembler.CompileMemoryToRegister( 4289 arm64.LDRD, 4290 arm64CallingConventionModuleInstanceAddressRegister, moduleInstanceEngineOffset+interfaceDataOffset, 4291 tmpX, 4292 ) 4293 4294 // "tmpY = [tmpX + moduleEngineFunctionsOffset] (== &moduleEngine.functions[0])" 4295 c.assembler.CompileMemoryToRegister( 4296 arm64.LDRD, 4297 tmpX, moduleEngineFunctionsOffset, 4298 tmpY, 4299 ) 4300 4301 // "callEngine.moduleContext.functionsElement0Address = tmpY". 4302 c.assembler.CompileRegisterToMemory( 4303 arm64.STRD, 4304 tmpY, 4305 arm64ReservedRegisterForCallEngine, callEngineModuleContextFunctionsElement0AddressOffset, 4306 ) 4307 } 4308 4309 // Update dataInstancesElement0Address. 4310 if c.ir.HasDataInstances { 4311 // "tmpX = &moduleInstance.DataInstances[0]" 4312 c.assembler.CompileMemoryToRegister( 4313 arm64.LDRD, 4314 arm64CallingConventionModuleInstanceAddressRegister, moduleInstanceDataInstancesOffset, 4315 tmpX, 4316 ) 4317 // "callEngine.moduleContext.dataInstancesElement0Address = tmpX". 4318 c.assembler.CompileRegisterToMemory( 4319 arm64.STRD, 4320 tmpX, 4321 arm64ReservedRegisterForCallEngine, callEngineModuleContextDataInstancesElement0AddressOffset, 4322 ) 4323 } 4324 4325 // Update callEngine.moduleContext.elementInstancesElement0Address 4326 if c.ir.HasElementInstances { 4327 // "tmpX = &moduleInstance.DataInstances[0]" 4328 c.assembler.CompileMemoryToRegister( 4329 arm64.LDRD, 4330 arm64CallingConventionModuleInstanceAddressRegister, moduleInstanceElementInstancesOffset, 4331 tmpX, 4332 ) 4333 // "callEngine.moduleContext.dataInstancesElement0Address = tmpX". 4334 c.assembler.CompileRegisterToMemory( 4335 arm64.STRD, 4336 tmpX, 4337 arm64ReservedRegisterForCallEngine, callEngineModuleContextElementInstancesElement0AddressOffset, 4338 ) 4339 } 4340 4341 c.assembler.SetJumpTargetOnNext(brIfModuleUnchanged) 4342 c.markRegisterUnused(tmpX, tmpY) 4343 return nil 4344 } 4345 4346 func (c *arm64Compiler) compileAtomicLoad(o *wazeroir.UnionOperation) error { 4347 var ( 4348 loadInst asm.Instruction 4349 targetSizeInBytes int64 4350 vt runtimeValueType 4351 ) 4352 4353 unsignedType := wazeroir.UnsignedType(o.B1) 4354 offset := uint32(o.U2) 4355 4356 switch unsignedType { 4357 case wazeroir.UnsignedTypeI32: 4358 loadInst = arm64.LDARW 4359 targetSizeInBytes = 32 / 8 4360 vt = runtimeValueTypeI32 4361 case wazeroir.UnsignedTypeI64: 4362 loadInst = arm64.LDARD 4363 targetSizeInBytes = 64 / 8 4364 vt = runtimeValueTypeI64 4365 } 4366 return c.compileAtomicLoadImpl(offset, loadInst, targetSizeInBytes, vt) 4367 } 4368 4369 // compileAtomicLoad8 implements compiler.compileAtomicLoad8 for the arm64 architecture. 4370 func (c *arm64Compiler) compileAtomicLoad8(o *wazeroir.UnionOperation) error { 4371 var vt runtimeValueType 4372 4373 unsignedType := wazeroir.UnsignedType(o.B1) 4374 offset := uint32(o.U2) 4375 4376 switch unsignedType { 4377 case wazeroir.UnsignedTypeI32: 4378 vt = runtimeValueTypeI32 4379 case wazeroir.UnsignedTypeI64: 4380 vt = runtimeValueTypeI64 4381 } 4382 return c.compileAtomicLoadImpl(offset, arm64.LDARB, 1, vt) 4383 } 4384 4385 // compileAtomicLoad16 implements compiler.compileAtomicLoad16 for the arm64 architecture. 4386 func (c *arm64Compiler) compileAtomicLoad16(o *wazeroir.UnionOperation) error { 4387 var vt runtimeValueType 4388 4389 unsignedType := wazeroir.UnsignedType(o.B1) 4390 offset := uint32(o.U2) 4391 4392 switch unsignedType { 4393 case wazeroir.UnsignedTypeI32: 4394 vt = runtimeValueTypeI32 4395 case wazeroir.UnsignedTypeI64: 4396 vt = runtimeValueTypeI64 4397 } 4398 return c.compileAtomicLoadImpl(offset, arm64.LDARH, 16/8, vt) 4399 } 4400 4401 func (c *arm64Compiler) compileAtomicLoadImpl(offsetArg uint32, loadInst asm.Instruction, 4402 targetSizeInBytes int64, resultRuntimeValueType runtimeValueType, 4403 ) error { 4404 baseReg, err := c.compileMemoryAccessBaseSetup(offsetArg, targetSizeInBytes) 4405 if err != nil { 4406 return err 4407 } 4408 4409 c.compileMemoryAlignmentCheck(baseReg, targetSizeInBytes) 4410 4411 resultRegister := baseReg 4412 c.assembler.CompileMemoryWithRegisterSourceToRegister(loadInst, baseReg, resultRegister) 4413 4414 c.pushRuntimeValueLocationOnRegister(resultRegister, resultRuntimeValueType) 4415 return nil 4416 } 4417 4418 func (c *arm64Compiler) compileAtomicStore(o *wazeroir.UnionOperation) error { 4419 var ( 4420 storeInst asm.Instruction 4421 targetSizeInBytes int64 4422 ) 4423 4424 unsignedType := wazeroir.UnsignedType(o.B1) 4425 offset := uint32(o.U2) 4426 4427 switch unsignedType { 4428 case wazeroir.UnsignedTypeI32: 4429 storeInst = arm64.STLRW 4430 targetSizeInBytes = 32 / 8 4431 case wazeroir.UnsignedTypeI64: 4432 storeInst = arm64.STLRD 4433 targetSizeInBytes = 64 / 8 4434 } 4435 return c.compileAtomicStoreImpl(offset, storeInst, targetSizeInBytes) 4436 } 4437 4438 // compileAtomicStore8 implements compiler.compileAtomiStore8 for the arm64 architecture. 4439 func (c *arm64Compiler) compileAtomicStore8(o *wazeroir.UnionOperation) error { 4440 offset := uint32(o.U2) 4441 return c.compileAtomicStoreImpl(offset, arm64.STLRB, 1) 4442 } 4443 4444 // compileAtomicStore16 implements compiler.compileAtomicStore16 for the arm64 architecture. 4445 func (c *arm64Compiler) compileAtomicStore16(o *wazeroir.UnionOperation) error { 4446 offset := uint32(o.U2) 4447 return c.compileAtomicStoreImpl(offset, arm64.STLRH, 16/8) 4448 } 4449 4450 func (c *arm64Compiler) compileAtomicStoreImpl(offsetArg uint32, storeInst asm.Instruction, targetSizeInBytes int64) error { 4451 val, err := c.popValueOnRegister() 4452 if err != nil { 4453 return err 4454 } 4455 // Mark temporarily used as compileMemoryAccessOffsetSetup might try allocating register. 4456 c.markRegisterUsed(val.register) 4457 4458 baseReg, err := c.compileMemoryAccessBaseSetup(offsetArg, targetSizeInBytes) 4459 if err != nil { 4460 return err 4461 } 4462 c.compileMemoryAlignmentCheck(baseReg, targetSizeInBytes) 4463 4464 c.assembler.CompileRegisterToMemoryWithRegisterDest( 4465 storeInst, 4466 val.register, 4467 baseReg, 4468 ) 4469 4470 c.markRegisterUnused(val.register) 4471 return nil 4472 } 4473 4474 func (c *arm64Compiler) compileAtomicRMW(o *wazeroir.UnionOperation) error { 4475 var ( 4476 inst asm.Instruction 4477 targetSizeInBytes int64 4478 vt runtimeValueType 4479 negateArg bool 4480 flipArg bool 4481 ) 4482 4483 unsignedType := wazeroir.UnsignedType(o.B1) 4484 op := wazeroir.AtomicArithmeticOp(o.B2) 4485 offset := uint32(o.U2) 4486 4487 switch unsignedType { 4488 case wazeroir.UnsignedTypeI32: 4489 targetSizeInBytes = 32 / 8 4490 vt = runtimeValueTypeI32 4491 switch op { 4492 case wazeroir.AtomicArithmeticOpAdd: 4493 inst = arm64.LDADDALW 4494 case wazeroir.AtomicArithmeticOpSub: 4495 inst = arm64.LDADDALW 4496 negateArg = true 4497 case wazeroir.AtomicArithmeticOpAnd: 4498 inst = arm64.LDCLRALW 4499 flipArg = true 4500 case wazeroir.AtomicArithmeticOpOr: 4501 inst = arm64.LDSETALW 4502 case wazeroir.AtomicArithmeticOpXor: 4503 inst = arm64.LDEORALW 4504 case wazeroir.AtomicArithmeticOpNop: 4505 inst = arm64.SWPALW 4506 } 4507 case wazeroir.UnsignedTypeI64: 4508 targetSizeInBytes = 64 / 8 4509 vt = runtimeValueTypeI64 4510 switch op { 4511 case wazeroir.AtomicArithmeticOpAdd: 4512 inst = arm64.LDADDALD 4513 case wazeroir.AtomicArithmeticOpSub: 4514 inst = arm64.LDADDALD 4515 negateArg = true 4516 case wazeroir.AtomicArithmeticOpAnd: 4517 inst = arm64.LDCLRALD 4518 flipArg = true 4519 case wazeroir.AtomicArithmeticOpOr: 4520 inst = arm64.LDSETALD 4521 case wazeroir.AtomicArithmeticOpXor: 4522 inst = arm64.LDEORALD 4523 case wazeroir.AtomicArithmeticOpNop: 4524 inst = arm64.SWPALD 4525 } 4526 } 4527 return c.compileAtomicRMWImpl(inst, offset, negateArg, flipArg, targetSizeInBytes, vt) 4528 } 4529 4530 func (c *arm64Compiler) compileAtomicRMW8(o *wazeroir.UnionOperation) error { 4531 var ( 4532 inst asm.Instruction 4533 vt runtimeValueType 4534 negateArg bool 4535 flipArg bool 4536 ) 4537 4538 unsignedType := wazeroir.UnsignedType(o.B1) 4539 op := wazeroir.AtomicArithmeticOp(o.B2) 4540 offset := uint32(o.U2) 4541 4542 switch op { 4543 case wazeroir.AtomicArithmeticOpAdd: 4544 inst = arm64.LDADDALB 4545 case wazeroir.AtomicArithmeticOpSub: 4546 inst = arm64.LDADDALB 4547 negateArg = true 4548 case wazeroir.AtomicArithmeticOpAnd: 4549 inst = arm64.LDCLRALB 4550 flipArg = true 4551 case wazeroir.AtomicArithmeticOpOr: 4552 inst = arm64.LDSETALB 4553 case wazeroir.AtomicArithmeticOpXor: 4554 inst = arm64.LDEORALB 4555 case wazeroir.AtomicArithmeticOpNop: 4556 inst = arm64.SWPALB 4557 } 4558 4559 switch unsignedType { 4560 case wazeroir.UnsignedTypeI32: 4561 vt = runtimeValueTypeI32 4562 case wazeroir.UnsignedTypeI64: 4563 vt = runtimeValueTypeI64 4564 } 4565 return c.compileAtomicRMWImpl(inst, offset, negateArg, flipArg, 1, vt) 4566 } 4567 4568 func (c *arm64Compiler) compileAtomicRMW16(o *wazeroir.UnionOperation) error { 4569 var ( 4570 inst asm.Instruction 4571 vt runtimeValueType 4572 negateArg bool 4573 flipArg bool 4574 ) 4575 4576 unsignedType := wazeroir.UnsignedType(o.B1) 4577 op := wazeroir.AtomicArithmeticOp(o.B2) 4578 offset := uint32(o.U2) 4579 4580 switch op { 4581 case wazeroir.AtomicArithmeticOpAdd: 4582 inst = arm64.LDADDALH 4583 case wazeroir.AtomicArithmeticOpSub: 4584 inst = arm64.LDADDALH 4585 negateArg = true 4586 case wazeroir.AtomicArithmeticOpAnd: 4587 inst = arm64.LDCLRALH 4588 flipArg = true 4589 case wazeroir.AtomicArithmeticOpOr: 4590 inst = arm64.LDSETALH 4591 case wazeroir.AtomicArithmeticOpXor: 4592 inst = arm64.LDEORALH 4593 case wazeroir.AtomicArithmeticOpNop: 4594 inst = arm64.SWPALH 4595 } 4596 4597 switch unsignedType { 4598 case wazeroir.UnsignedTypeI32: 4599 vt = runtimeValueTypeI32 4600 case wazeroir.UnsignedTypeI64: 4601 vt = runtimeValueTypeI64 4602 } 4603 return c.compileAtomicRMWImpl(inst, offset, negateArg, flipArg, 16/8, vt) 4604 } 4605 4606 func (c *arm64Compiler) compileAtomicRMWImpl(inst asm.Instruction, offsetArg uint32, negateArg bool, flipArg bool, 4607 targetSizeInBytes int64, resultRuntimeValueType runtimeValueType, 4608 ) error { 4609 val, err := c.popValueOnRegister() 4610 if err != nil { 4611 return err 4612 } 4613 // Mark temporarily used as compileMemoryAccessOffsetSetup might try allocating register. 4614 c.markRegisterUsed(val.register) 4615 4616 if negateArg { 4617 switch resultRuntimeValueType { 4618 case runtimeValueTypeI32: 4619 c.assembler.CompileRegisterToRegister(arm64.NEGW, val.register, val.register) 4620 case runtimeValueTypeI64: 4621 c.assembler.CompileRegisterToRegister(arm64.NEG, val.register, val.register) 4622 } 4623 } 4624 4625 if flipArg { 4626 switch resultRuntimeValueType { 4627 case runtimeValueTypeI32: 4628 c.assembler.CompileTwoRegistersToRegister(arm64.ORNW, val.register, arm64.RegRZR, val.register) 4629 case runtimeValueTypeI64: 4630 c.assembler.CompileTwoRegistersToRegister(arm64.ORN, val.register, arm64.RegRZR, val.register) 4631 } 4632 } 4633 4634 addrReg, err := c.compileMemoryAccessBaseSetup(offsetArg, targetSizeInBytes) 4635 if err != nil { 4636 return err 4637 } 4638 c.compileMemoryAlignmentCheck(addrReg, targetSizeInBytes) 4639 4640 resultRegister := addrReg 4641 c.assembler.CompileTwoRegistersToRegister(inst, val.register, addrReg, resultRegister) 4642 4643 c.markRegisterUnused(val.register) 4644 4645 c.pushRuntimeValueLocationOnRegister(resultRegister, resultRuntimeValueType) 4646 return nil 4647 } 4648 4649 func (c *arm64Compiler) compileAtomicRMWCmpxchg(o *wazeroir.UnionOperation) error { 4650 var ( 4651 casInst asm.Instruction 4652 targetSizeInBytes int64 4653 vt runtimeValueType 4654 ) 4655 4656 unsignedType := wazeroir.UnsignedType(o.B1) 4657 offset := uint32(o.U2) 4658 4659 switch unsignedType { 4660 case wazeroir.UnsignedTypeI32: 4661 casInst = arm64.CASALW 4662 targetSizeInBytes = 32 / 8 4663 vt = runtimeValueTypeI32 4664 case wazeroir.UnsignedTypeI64: 4665 casInst = arm64.CASALD 4666 targetSizeInBytes = 64 / 8 4667 vt = runtimeValueTypeI64 4668 } 4669 return c.compileAtomicRMWCmpxchgImpl(casInst, offset, targetSizeInBytes, vt) 4670 } 4671 4672 func (c *arm64Compiler) compileAtomicRMW8Cmpxchg(o *wazeroir.UnionOperation) error { 4673 var vt runtimeValueType 4674 4675 unsignedType := wazeroir.UnsignedType(o.B1) 4676 offset := uint32(o.U2) 4677 4678 switch unsignedType { 4679 case wazeroir.UnsignedTypeI32: 4680 vt = runtimeValueTypeI32 4681 case wazeroir.UnsignedTypeI64: 4682 vt = runtimeValueTypeI64 4683 } 4684 return c.compileAtomicRMWCmpxchgImpl(arm64.CASALB, offset, 1, vt) 4685 } 4686 4687 func (c *arm64Compiler) compileAtomicRMW16Cmpxchg(o *wazeroir.UnionOperation) error { 4688 var vt runtimeValueType 4689 4690 unsignedType := wazeroir.UnsignedType(o.B1) 4691 offset := uint32(o.U2) 4692 4693 switch unsignedType { 4694 case wazeroir.UnsignedTypeI32: 4695 vt = runtimeValueTypeI32 4696 case wazeroir.UnsignedTypeI64: 4697 vt = runtimeValueTypeI64 4698 } 4699 return c.compileAtomicRMWCmpxchgImpl(arm64.CASALH, offset, 16/8, vt) 4700 } 4701 4702 func (c *arm64Compiler) compileAtomicRMWCmpxchgImpl(inst asm.Instruction, offsetArg uint32, targetSizeInBytes int64, resultRuntimeValueType runtimeValueType) error { 4703 repl, err := c.popValueOnRegister() 4704 if err != nil { 4705 return err 4706 } 4707 c.markRegisterUsed(repl.register) 4708 // CAS instruction loads the old value into the register with the comparison value. 4709 exp, err := c.popValueOnRegister() 4710 if err != nil { 4711 return err 4712 } 4713 if isZeroRegister(exp.register) { 4714 // exp is also used to load, so if it's set to the zero register we need to move to a 4715 // loadable register. 4716 reg, err := c.allocateRegister(registerTypeGeneralPurpose) 4717 if err != nil { 4718 return err 4719 } 4720 c.assembler.CompileRegisterToRegister(arm64.MOVD, arm64.RegRZR, reg) 4721 exp.register = reg 4722 } 4723 // Mark temporarily used as compileMemoryAccessOffsetSetup might try allocating register. 4724 c.markRegisterUsed(exp.register) 4725 4726 addrReg, err := c.compileMemoryAccessBaseSetup(offsetArg, targetSizeInBytes) 4727 if err != nil { 4728 return err 4729 } 4730 c.compileMemoryAlignmentCheck(addrReg, targetSizeInBytes) 4731 4732 c.assembler.CompileTwoRegistersToRegister(inst, exp.register, addrReg, repl.register) 4733 4734 c.markRegisterUnused(repl.register) 4735 c.pushRuntimeValueLocationOnRegister(exp.register, resultRuntimeValueType) 4736 return nil 4737 } 4738 4739 func (c *arm64Compiler) compileAtomicMemoryWait(o *wazeroir.UnionOperation) error { 4740 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 4741 return err 4742 } 4743 4744 var ( 4745 vt runtimeValueType 4746 targetSizeInBytes int64 4747 waitFunc wasm.Index 4748 ) 4749 4750 unsignedType := wazeroir.UnsignedType(o.B1) 4751 offset := uint32(o.U2) 4752 4753 switch unsignedType { 4754 case wazeroir.UnsignedTypeI32: 4755 vt = runtimeValueTypeI32 4756 targetSizeInBytes = 32 / 8 4757 waitFunc = builtinFunctionMemoryWait32 4758 case wazeroir.UnsignedTypeI64: 4759 vt = runtimeValueTypeI64 4760 targetSizeInBytes = 64 / 8 4761 waitFunc = builtinFunctionMemoryWait64 4762 } 4763 4764 timeout, err := c.popValueOnRegister() 4765 if err != nil { 4766 return err 4767 } 4768 c.markRegisterUsed(timeout.register) 4769 exp, err := c.popValueOnRegister() 4770 if err != nil { 4771 return err 4772 } 4773 c.markRegisterUsed(exp.register) 4774 4775 baseReg, err := c.compileMemoryAccessBaseSetup(offset, targetSizeInBytes) 4776 if err != nil { 4777 return err 4778 } 4779 c.markRegisterUsed(baseReg) 4780 c.compileMemoryAlignmentCheck(baseReg, targetSizeInBytes) 4781 4782 // Push address, values, and timeout back to read in Go 4783 c.pushRuntimeValueLocationOnRegister(baseReg, runtimeValueTypeI64) 4784 c.pushRuntimeValueLocationOnRegister(exp.register, vt) 4785 c.pushRuntimeValueLocationOnRegister(timeout.register, runtimeValueTypeI64) 4786 if err := c.compileCallGoFunction(nativeCallStatusCodeCallBuiltInFunction, waitFunc); err != nil { 4787 return err 4788 } 4789 // Address, values and timeout consumed in Go 4790 c.locationStack.pop() 4791 c.locationStack.pop() 4792 c.locationStack.pop() 4793 4794 // Then, the result was pushed. 4795 v := c.locationStack.pushRuntimeValueLocationOnStack() 4796 v.valueType = runtimeValueTypeI32 4797 4798 c.markRegisterUnused(baseReg) 4799 c.markRegisterUnused(exp.register) 4800 c.markRegisterUnused(timeout.register) 4801 4802 // After return, we re-initialize reserved registers just like preamble of functions. 4803 c.compileReservedStackBasePointerRegisterInitialization() 4804 c.compileReservedMemoryRegisterInitialization() 4805 4806 return nil 4807 } 4808 4809 func (c *arm64Compiler) compileAtomicMemoryNotify(o *wazeroir.UnionOperation) error { 4810 offset := uint32(o.U2) 4811 4812 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 4813 return err 4814 } 4815 4816 count, err := c.popValueOnRegister() 4817 if err != nil { 4818 return err 4819 } 4820 c.markRegisterUsed(count.register) 4821 4822 baseReg, err := c.compileMemoryAccessBaseSetup(offset, 4) 4823 if err != nil { 4824 return err 4825 } 4826 c.compileMemoryAlignmentCheck(baseReg, 4) 4827 4828 // Push address and count back to read in Go 4829 c.pushRuntimeValueLocationOnRegister(baseReg, runtimeValueTypeI64) 4830 c.pushRuntimeValueLocationOnRegister(count.register, runtimeValueTypeI32) 4831 if err := c.compileCallGoFunction(nativeCallStatusCodeCallBuiltInFunction, builtinFunctionMemoryNotify); err != nil { 4832 return err 4833 } 4834 4835 // Address and count consumed by Go 4836 c.locationStack.pop() 4837 c.locationStack.pop() 4838 4839 // Then, the result was pushed. 4840 v := c.locationStack.pushRuntimeValueLocationOnStack() 4841 v.valueType = runtimeValueTypeI32 4842 4843 c.markRegisterUnused(count.register) 4844 4845 // After return, we re-initialize reserved registers just like preamble of functions. 4846 c.compileReservedStackBasePointerRegisterInitialization() 4847 c.compileReservedMemoryRegisterInitialization() 4848 return nil 4849 } 4850 4851 func (c *arm64Compiler) compileAtomicFence(_ *wazeroir.UnionOperation) error { 4852 c.assembler.CompileStandAlone(arm64.DMB) 4853 return nil 4854 }