wa-lang.org/wazero@v1.0.2/internal/engine/compiler/impl_arm64.go (about) 1 // This file implements the compiler for arm64 target. 2 // Please refer to https://developer.arm.com/documentation/102374/latest/ 3 // if unfamiliar with arm64 instructions and semantics. 4 package compiler 5 6 import ( 7 "bytes" 8 "errors" 9 "fmt" 10 "math" 11 12 "wa-lang.org/wazero/internal/asm" 13 "wa-lang.org/wazero/internal/asm/arm64" 14 "wa-lang.org/wazero/internal/platform" 15 "wa-lang.org/wazero/internal/wasm" 16 "wa-lang.org/wazero/internal/wazeroir" 17 ) 18 19 type arm64Compiler struct { 20 assembler arm64.Assembler 21 ir *wazeroir.CompilationResult 22 // locationStack holds the state of wazeroir virtual stack. 23 // and each item is either placed in register or the actual memory stack. 24 locationStack *runtimeValueLocationStack 25 // labels maps a label (e.g. ".L1_then") to *arm64LabelInfo. 26 labels map[string]*arm64LabelInfo 27 // stackPointerCeil is the greatest stack pointer value (from runtimeValueLocationStack) seen during compilation. 28 stackPointerCeil uint64 29 // onStackPointerCeilDeterminedCallBack hold a callback which are called when the ceil of stack pointer is determined before generating native code. 30 onStackPointerCeilDeterminedCallBack func(stackPointerCeil uint64) 31 withListener bool 32 } 33 34 func newArm64Compiler(ir *wazeroir.CompilationResult, withListener bool) (compiler, error) { 35 return &arm64Compiler{ 36 assembler: arm64.NewAssembler(arm64ReservedRegisterForTemporary), 37 locationStack: newRuntimeValueLocationStack(), 38 ir: ir, 39 labels: map[string]*arm64LabelInfo{}, 40 withListener: withListener, 41 }, nil 42 } 43 44 var ( 45 arm64UnreservedVectorRegisters = []asm.Register{ 46 arm64.RegV0, arm64.RegV1, arm64.RegV2, arm64.RegV3, 47 arm64.RegV4, arm64.RegV5, arm64.RegV6, arm64.RegV7, arm64.RegV8, 48 arm64.RegV9, arm64.RegV10, arm64.RegV11, arm64.RegV12, arm64.RegV13, 49 arm64.RegV14, arm64.RegV15, arm64.RegV16, arm64.RegV17, arm64.RegV18, 50 arm64.RegV19, arm64.RegV20, arm64.RegV21, arm64.RegV22, arm64.RegV23, 51 arm64.RegV24, arm64.RegV25, arm64.RegV26, arm64.RegV27, arm64.RegV28, 52 arm64.RegV29, arm64.RegV30, arm64.RegV31, 53 } 54 55 // Note (see arm64 section in https://go.dev/doc/asm): 56 // * RegR18 is reserved as a platform register, and we don't use it in Compiler. 57 // * RegR28 is reserved for Goroutine by Go runtime, and we don't use it in Compiler. 58 arm64UnreservedGeneralPurposeRegisters = []asm.Register{ // nolint 59 arm64.RegR3, arm64.RegR4, arm64.RegR5, arm64.RegR6, arm64.RegR7, arm64.RegR8, 60 arm64.RegR9, arm64.RegR10, arm64.RegR11, arm64.RegR12, arm64.RegR13, 61 arm64.RegR14, arm64.RegR15, arm64.RegR16, arm64.RegR17, arm64.RegR19, 62 arm64.RegR20, arm64.RegR21, arm64.RegR22, arm64.RegR23, arm64.RegR24, 63 arm64.RegR25, arm64.RegR26, arm64.RegR29, arm64.RegR30, 64 } 65 ) 66 67 const ( 68 // arm64ReservedRegisterForCallEngine holds the pointer to callEngine instance (i.e. *callEngine as uintptr) 69 arm64ReservedRegisterForCallEngine = arm64.RegR0 70 // arm64ReservedRegisterForStackBasePointerAddress holds stack base pointer's address (callEngine.stackBasePointer) in the current function call. 71 arm64ReservedRegisterForStackBasePointerAddress = arm64.RegR1 72 // arm64ReservedRegisterForMemory holds the pointer to the memory slice's data (i.e. &memory.Buffer[0] as uintptr). 73 arm64ReservedRegisterForMemory = arm64.RegR2 74 // arm64ReservedRegisterForTemporary is the temporary register which is available at any point of execution, but its content shouldn't be supposed to live beyond the single operation. 75 // Note: we choose R27 as that is the temporary register used in Go's assembler. 76 arm64ReservedRegisterForTemporary = arm64.RegR27 77 ) 78 79 var arm64CallingConventionModuleInstanceAddressRegister = arm64.RegR29 80 81 const ( 82 // arm64CallEngineArchContextCompilerCallReturnAddressOffset is the offset of archContext.nativeCallReturnAddress in callEngine. 83 arm64CallEngineArchContextCompilerCallReturnAddressOffset = 136 84 // arm64CallEngineArchContextMinimum32BitSignedIntOffset is the offset of archContext.minimum32BitSignedIntAddress in callEngine. 85 arm64CallEngineArchContextMinimum32BitSignedIntOffset = 144 86 // arm64CallEngineArchContextMinimum64BitSignedIntOffset is the offset of archContext.minimum64BitSignedIntAddress in callEngine. 87 arm64CallEngineArchContextMinimum64BitSignedIntOffset = 152 88 ) 89 90 func isZeroRegister(r asm.Register) bool { 91 return r == arm64.RegRZR 92 } 93 94 // compile implements compiler.compile for the arm64 architecture. 95 func (c *arm64Compiler) compile() (code []byte, stackPointerCeil uint64, err error) { 96 // c.stackPointerCeil tracks the stack pointer ceiling (max seen) value across all runtimeValueLocationStack(s) 97 // used for all labels (via setLocationStack), excluding the current one. 98 // Hence, we check here if the final block's max one exceeds the current c.stackPointerCeil. 99 stackPointerCeil = c.stackPointerCeil 100 if stackPointerCeil < c.locationStack.stackPointerCeil { 101 stackPointerCeil = c.locationStack.stackPointerCeil 102 } 103 104 // Now that the ceil of stack pointer is determined, we are invoking the callback. 105 // Note: this must be called before Assemble() below. 106 if c.onStackPointerCeilDeterminedCallBack != nil { 107 c.onStackPointerCeilDeterminedCallBack(stackPointerCeil) 108 } 109 110 var original []byte 111 original, err = c.assembler.Assemble() 112 if err != nil { 113 return 114 } 115 116 code, err = platform.MmapCodeSegment(bytes.NewReader(original), len(original)) 117 return 118 } 119 120 // arm64LabelInfo holds a wazeroir label specific information in this function. 121 type arm64LabelInfo struct { 122 // initialInstruction is the initial instruction for this label so other block can branch into it. 123 initialInstruction asm.Node 124 // initialStack is the initial value location stack from which we start compiling this label. 125 initialStack *runtimeValueLocationStack 126 // labelBeginningCallbacks holds callbacks should to be called with initialInstruction 127 labelBeginningCallbacks []func(asm.Node) 128 } 129 130 func (c *arm64Compiler) label(labelKey string) *arm64LabelInfo { 131 ret, ok := c.labels[labelKey] 132 if ok { 133 return ret 134 } 135 c.labels[labelKey] = &arm64LabelInfo{} 136 return c.labels[labelKey] 137 } 138 139 // runtimeValueLocationStack implements compilerImpl.runtimeValueLocationStack for the amd64 architecture. 140 func (c *arm64Compiler) runtimeValueLocationStack() *runtimeValueLocationStack { 141 return c.locationStack 142 } 143 144 // pushRuntimeValueLocationOnRegister implements compiler.pushRuntimeValueLocationOnRegister for arm64. 145 func (c *arm64Compiler) pushRuntimeValueLocationOnRegister(reg asm.Register, vt runtimeValueType) (ret *runtimeValueLocation) { 146 ret = c.locationStack.pushRuntimeValueLocationOnRegister(reg, vt) 147 c.markRegisterUsed(reg) 148 return 149 } 150 151 // pushVectorRuntimeValueLocationOnRegister implements compiler.pushVectorRuntimeValueLocationOnRegister for arm64. 152 func (c *arm64Compiler) pushVectorRuntimeValueLocationOnRegister(reg asm.Register) (lowerBitsLocation *runtimeValueLocation) { 153 lowerBitsLocation = c.locationStack.pushRuntimeValueLocationOnRegister(reg, runtimeValueTypeV128Lo) 154 c.locationStack.pushRuntimeValueLocationOnRegister(reg, runtimeValueTypeV128Hi) 155 c.markRegisterUsed(reg) 156 return 157 } 158 159 func (c *arm64Compiler) markRegisterUsed(regs ...asm.Register) { 160 for _, reg := range regs { 161 if !isZeroRegister(reg) && reg != asm.NilRegister { 162 c.locationStack.markRegisterUsed(reg) 163 } 164 } 165 } 166 167 func (c *arm64Compiler) markRegisterUnused(regs ...asm.Register) { 168 for _, reg := range regs { 169 if !isZeroRegister(reg) && reg != asm.NilRegister { 170 c.locationStack.markRegisterUnused(reg) 171 } 172 } 173 } 174 175 func (c *arm64Compiler) String() (ret string) { return c.locationStack.String() } 176 177 // compilePreamble implements compiler.compilePreamble for the arm64 architecture. 178 func (c *arm64Compiler) compilePreamble() error { 179 c.markRegisterUsed(arm64CallingConventionModuleInstanceAddressRegister) 180 defer c.markRegisterUnused(arm64CallingConventionModuleInstanceAddressRegister) 181 182 c.locationStack.init(c.ir.Signature) 183 184 // Check if it's necessary to grow the value stack before entering function body. 185 if err := c.compileMaybeGrowStack(); err != nil { 186 return err 187 } 188 189 if err := c.compileModuleContextInitialization(); err != nil { 190 return err 191 } 192 193 if c.withListener { 194 if err := c.compileCallGoFunction(nativeCallStatusCodeCallBuiltInFunction, builtinFunctionIndexFunctionListenerBefore); err != nil { 195 return err 196 } 197 } 198 199 // We must initialize the stack base pointer register so that we can manipulate the stack properly. 200 c.compileReservedStackBasePointerRegisterInitialization() 201 202 c.compileReservedMemoryRegisterInitialization() 203 204 return nil 205 } 206 207 // compileMaybeGrowStack adds instructions to check the necessity to grow the value stack, 208 // and if so, make the builtin function call to do so. These instructions are called in the function's 209 // preamble. 210 func (c *arm64Compiler) compileMaybeGrowStack() error { 211 tmpRegs, found := c.locationStack.takeFreeRegisters(registerTypeGeneralPurpose, 2) 212 if !found { 213 panic("BUG: all the registers should be free at this point") 214 } 215 tmpX, tmpY := tmpRegs[0], tmpRegs[1] 216 217 // "tmpX = len(ce.stack)" 218 c.assembler.CompileMemoryToRegister( 219 arm64.LDRD, 220 arm64ReservedRegisterForCallEngine, callEngineStackContextStackLenInBytesOffset, 221 tmpX, 222 ) 223 224 // "tmpY = ce.stackBasePointer" 225 c.assembler.CompileMemoryToRegister( 226 arm64.LDRD, 227 arm64ReservedRegisterForCallEngine, callEngineStackContextStackBasePointerInBytesOffset, 228 tmpY, 229 ) 230 231 // "tmpX = tmpX - tmpY", in other words "tmpX = len(ce.stack) - ce.stackBasePointer" 232 c.assembler.CompileRegisterToRegister( 233 arm64.SUB, 234 tmpY, 235 tmpX, 236 ) 237 238 // "tmpY = stackPointerCeil" 239 loadStackPointerCeil := c.assembler.CompileConstToRegister( 240 arm64.MOVD, 241 math.MaxInt32, 242 tmpY, 243 ) 244 // At this point of compilation, we don't know the value of stack point ceil, 245 // so we lazily resolve the value later. 246 c.onStackPointerCeilDeterminedCallBack = func(stackPointerCeil uint64) { 247 loadStackPointerCeil.AssignSourceConstant(int64(stackPointerCeil) << 3) 248 } 249 250 // Compare tmpX (len(ce.stack) - ce.stackBasePointer) and tmpY (ce.stackPointerCeil) 251 c.assembler.CompileTwoRegistersToNone(arm64.CMP, tmpX, tmpY) 252 253 // If ceil > stackLen - stack base pointer, we need to grow the stack by calling builtin Go function. 254 brIfStackOK := c.assembler.CompileJump(arm64.BCONDLS) 255 if err := c.compileCallGoFunction(nativeCallStatusCodeCallBuiltInFunction, builtinFunctionIndexGrowStack); err != nil { 256 return err 257 } 258 259 // Otherwise, skip calling it. 260 c.assembler.SetJumpTargetOnNext(brIfStackOK) 261 262 c.markRegisterUnused(tmpRegs...) 263 return nil 264 } 265 266 // returnFunction emits instructions to return from the current function frame. 267 // If the current frame is the bottom, the code goes back to the Go code with nativeCallStatusCodeReturned status. 268 // Otherwise, we branch into the caller's return address. 269 func (c *arm64Compiler) compileReturnFunction() error { 270 // Release all the registers as our calling convention requires the caller-save. 271 if err := c.compileReleaseAllRegistersToStack(); err != nil { 272 return err 273 } 274 275 if c.withListener { 276 if err := c.compileCallGoFunction(nativeCallStatusCodeCallBuiltInFunction, builtinFunctionIndexFunctionListenerAfter); err != nil { 277 return err 278 } 279 // After return, we re-initialize the stack base pointer as that is used to return to the caller below. 280 c.compileReservedStackBasePointerRegisterInitialization() 281 } 282 283 // arm64CallingConventionModuleInstanceAddressRegister holds the module intstance's address 284 // so mark it used so that it won't be used as a free register. 285 c.locationStack.markRegisterUsed(arm64CallingConventionModuleInstanceAddressRegister) 286 defer c.locationStack.markRegisterUnused(arm64CallingConventionModuleInstanceAddressRegister) 287 288 returnAddress, callerStackBasePointerInBytes, callerFunction := c.locationStack.getCallFrameLocations(c.ir.Signature) 289 290 // If the return address is zero, meaning that we return from the execution. 291 returnAddress.setRegister(arm64ReservedRegisterForTemporary) 292 c.compileLoadValueOnStackToRegister(returnAddress) 293 c.assembler.CompileTwoRegistersToNone(arm64.CMP, arm64ReservedRegisterForTemporary, arm64.RegRZR) 294 295 // Br if the address does not equal zero. 296 brIfNotEqual := c.assembler.CompileJump(arm64.BCONDNE) 297 // Otherwise, exit. 298 c.compileExitFromNativeCode(nativeCallStatusCodeReturned) 299 300 // If the address doesn't equal zero, return br into returnAddressRegister (caller's return address). 301 c.assembler.SetJumpTargetOnNext(brIfNotEqual) 302 303 // Alias for readability. 304 tmp := arm64CallingConventionModuleInstanceAddressRegister 305 306 // First, restore the stackContext.stackBasePointerInBytesOffset from callerStackBasePointerInBytes. 307 callerStackBasePointerInBytes.setRegister(tmp) 308 c.compileLoadValueOnStackToRegister(callerStackBasePointerInBytes) 309 c.assembler.CompileRegisterToMemory(arm64.STRD, tmp, 310 arm64ReservedRegisterForCallEngine, callEngineStackContextStackBasePointerInBytesOffset) 311 312 // Next, restore moduleContext.fn from callerFunction. 313 callerFunction.setRegister(tmp) 314 c.compileLoadValueOnStackToRegister(callerFunction) 315 c.assembler.CompileRegisterToMemory(arm64.STRD, tmp, 316 arm64ReservedRegisterForCallEngine, callEngineModuleContextFnOffset) 317 318 // Also, we have to put the target function's *wasm.ModuleInstance into arm64CallingConventionModuleInstanceAddressRegister. 319 c.assembler.CompileMemoryToRegister(arm64.LDRD, 320 tmp, functionModuleInstanceAddressOffset, 321 arm64CallingConventionModuleInstanceAddressRegister) 322 323 c.assembler.CompileJumpToRegister(arm64.B, returnAddress.register) 324 return nil 325 } 326 327 // compileExitFromNativeCode adds instructions to give the control back to ce.exec with the given status code. 328 func (c *arm64Compiler) compileExitFromNativeCode(status nativeCallStatusCode) { 329 // Write the current stack pointer to the ce.stackPointer. 330 c.assembler.CompileConstToRegister(arm64.MOVD, int64(c.locationStack.sp), arm64ReservedRegisterForTemporary) 331 c.assembler.CompileRegisterToMemory(arm64.STRD, arm64ReservedRegisterForTemporary, arm64ReservedRegisterForCallEngine, 332 callEngineStackContextStackPointerOffset) 333 334 if status != 0 { 335 c.assembler.CompileConstToRegister(arm64.MOVW, int64(status), arm64ReservedRegisterForTemporary) 336 c.assembler.CompileRegisterToMemory(arm64.STRW, arm64ReservedRegisterForTemporary, 337 arm64ReservedRegisterForCallEngine, callEngineExitContextNativeCallStatusCodeOffset) 338 } else { 339 // If the status == 0, we use zero register to store zero. 340 c.assembler.CompileRegisterToMemory(arm64.STRW, arm64.RegRZR, 341 arm64ReservedRegisterForCallEngine, callEngineExitContextNativeCallStatusCodeOffset) 342 } 343 344 // The return address to the Go code is stored in archContext.compilerReturnAddress which 345 // is embedded in ce. We load the value to the tmpRegister, and then 346 // invoke RET with that register. 347 c.assembler.CompileMemoryToRegister(arm64.LDRD, 348 arm64ReservedRegisterForCallEngine, arm64CallEngineArchContextCompilerCallReturnAddressOffset, 349 arm64ReservedRegisterForTemporary) 350 351 c.assembler.CompileJumpToRegister(arm64.RET, arm64ReservedRegisterForTemporary) 352 } 353 354 // compileGoHostFunction implements compiler.compileHostFunction for the arm64 architecture. 355 func (c *arm64Compiler) compileGoDefinedHostFunction() error { 356 // First we must update the location stack to reflect the number of host function inputs. 357 c.locationStack.init(c.ir.Signature) 358 359 if c.withListener { 360 if err := c.compileCallGoFunction(nativeCallStatusCodeCallBuiltInFunction, 361 builtinFunctionIndexFunctionListenerBefore); err != nil { 362 return err 363 } 364 } 365 366 if err := c.compileCallGoFunction(nativeCallStatusCodeCallGoHostFunction, 0); err != nil { 367 return err 368 } 369 370 // Initializes the reserved stack base pointer which is used to retrieve the call frame stack. 371 c.compileReservedStackBasePointerRegisterInitialization() 372 return c.compileReturnFunction() 373 } 374 375 // setLocationStack sets the given runtimeValueLocationStack to .locationStack field, 376 // while allowing us to track runtimeValueLocationStack.stackPointerCeil across multiple stacks. 377 // This is called when we branch into different block. 378 func (c *arm64Compiler) setLocationStack(newStack *runtimeValueLocationStack) { 379 if c.stackPointerCeil < c.locationStack.stackPointerCeil { 380 c.stackPointerCeil = c.locationStack.stackPointerCeil 381 } 382 c.locationStack = newStack 383 } 384 385 // arm64Compiler implements compiler.arm64Compiler for the arm64 architecture. 386 func (c *arm64Compiler) compileLabel(o *wazeroir.OperationLabel) (skipThisLabel bool) { 387 labelKey := o.Label.String() 388 arm64LabelInfo := c.label(labelKey) 389 390 // If initialStack is not set, that means this label has never been reached. 391 if arm64LabelInfo.initialStack == nil { 392 skipThisLabel = true 393 return 394 } 395 396 // We use NOP as a beginning of instructions in a label. 397 // This should be eventually optimized out by assembler. 398 labelBegin := c.assembler.CompileStandAlone(arm64.NOP) 399 400 // Save the instructions so that backward branching 401 // instructions can branch to this label. 402 arm64LabelInfo.initialInstruction = labelBegin 403 404 // Set the initial stack. 405 c.setLocationStack(arm64LabelInfo.initialStack) 406 407 // Invoke callbacks to notify the forward branching 408 // instructions can properly branch to this label. 409 for _, cb := range arm64LabelInfo.labelBeginningCallbacks { 410 cb(labelBegin) 411 } 412 return false 413 } 414 415 // compileUnreachable implements compiler.compileUnreachable for the arm64 architecture. 416 func (c *arm64Compiler) compileUnreachable() error { 417 c.compileExitFromNativeCode(nativeCallStatusCodeUnreachable) 418 return nil 419 } 420 421 // compileSet implements compiler.compileSet for the arm64 architecture. 422 func (c *arm64Compiler) compileSet(o *wazeroir.OperationSet) error { 423 setTargetIndex := int(c.locationStack.sp) - 1 - o.Depth 424 425 if o.IsTargetVector { 426 _ = c.locationStack.pop() 427 } 428 v := c.locationStack.pop() 429 if err := c.compileEnsureOnRegister(v); err != nil { 430 return err 431 } 432 433 targetLocation := c.locationStack.stack[setTargetIndex] 434 if targetLocation.onRegister() { 435 // We no longer need the register previously used by the target location. 436 c.markRegisterUnused(targetLocation.register) 437 } 438 439 reg := v.register 440 targetLocation.setRegister(reg) 441 if o.IsTargetVector { 442 c.locationStack.stack[setTargetIndex+1].setRegister(reg) 443 } 444 return nil 445 } 446 447 // compileGlobalGet implements compiler.compileGlobalGet for the arm64 architecture. 448 func (c *arm64Compiler) compileGlobalGet(o *wazeroir.OperationGlobalGet) error { 449 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 450 return err 451 } 452 453 wasmValueType := c.ir.Globals[o.Index].ValType 454 isV128 := wasmValueType == wasm.ValueTypeV128 455 // Get the address of globals[index] into globalAddressReg. 456 globalAddressReg, err := c.compileReadGlobalAddress(o.Index) 457 if err != nil { 458 return err 459 } 460 461 if isV128 { 462 resultReg, err := c.allocateRegister(registerTypeVector) 463 if err != nil { 464 return err 465 } 466 c.assembler.CompileConstToRegister(arm64.ADD, globalInstanceValueOffset, globalAddressReg) 467 c.assembler.CompileMemoryToVectorRegister(arm64.VMOV, globalAddressReg, 0, 468 resultReg, arm64.VectorArrangementQ) 469 470 c.pushVectorRuntimeValueLocationOnRegister(resultReg) 471 } else { 472 ldr := arm64.NOP 473 var result asm.Register 474 var vt runtimeValueType 475 switch wasmValueType { 476 case wasm.ValueTypeI32: 477 ldr = arm64.LDRW 478 vt = runtimeValueTypeI32 479 result = globalAddressReg 480 case wasm.ValueTypeI64, wasm.ValueTypeExternref, wasm.ValueTypeFuncref: 481 ldr = arm64.LDRD 482 vt = runtimeValueTypeI64 483 result = globalAddressReg 484 case wasm.ValueTypeF32: 485 result, err = c.allocateRegister(registerTypeVector) 486 if err != nil { 487 return err 488 } 489 ldr = arm64.FLDRS 490 vt = runtimeValueTypeF32 491 case wasm.ValueTypeF64: 492 result, err = c.allocateRegister(registerTypeVector) 493 if err != nil { 494 return err 495 } 496 ldr = arm64.FLDRD 497 vt = runtimeValueTypeF64 498 } 499 500 // "result = [globalAddressReg + globalInstanceValueOffset] (== globals[index].Val)" 501 c.assembler.CompileMemoryToRegister( 502 ldr, 503 globalAddressReg, globalInstanceValueOffset, 504 result, 505 ) 506 507 c.pushRuntimeValueLocationOnRegister(result, vt) 508 } 509 return nil 510 } 511 512 // compileGlobalSet implements compiler.compileGlobalSet for the arm64 architecture. 513 func (c *arm64Compiler) compileGlobalSet(o *wazeroir.OperationGlobalSet) error { 514 wasmValueType := c.ir.Globals[o.Index].ValType 515 isV128 := wasmValueType == wasm.ValueTypeV128 516 517 var val *runtimeValueLocation 518 if isV128 { 519 val = c.locationStack.popV128() 520 } else { 521 val = c.locationStack.pop() 522 } 523 if err := c.compileEnsureOnRegister(val); err != nil { 524 return err 525 } 526 527 globalInstanceAddressRegister, err := c.compileReadGlobalAddress(o.Index) 528 if err != nil { 529 return err 530 } 531 532 if isV128 { 533 c.assembler.CompileVectorRegisterToMemory(arm64.VMOV, 534 val.register, globalInstanceAddressRegister, globalInstanceValueOffset, 535 arm64.VectorArrangementQ) 536 } else { 537 var str asm.Instruction 538 switch c.ir.Globals[o.Index].ValType { 539 case wasm.ValueTypeI32: 540 str = arm64.STRW 541 case wasm.ValueTypeI64, wasm.ValueTypeExternref, wasm.ValueTypeFuncref: 542 str = arm64.STRD 543 case wasm.ValueTypeF32: 544 str = arm64.FSTRS 545 case wasm.ValueTypeF64: 546 str = arm64.FSTRD 547 } 548 549 // At this point "globalInstanceAddressRegister = globals[index]". 550 // Therefore, this means "globals[index].Val = val.register" 551 c.assembler.CompileRegisterToMemory( 552 str, 553 val.register, 554 globalInstanceAddressRegister, globalInstanceValueOffset, 555 ) 556 } 557 558 c.markRegisterUnused(val.register) 559 return nil 560 } 561 562 // compileReadGlobalAddress adds instructions to store the absolute address of the global instance at globalIndex into a register 563 func (c *arm64Compiler) compileReadGlobalAddress(globalIndex uint32) (destinationRegister asm.Register, err error) { 564 // TODO: rethink about the type used in store `globals []*GlobalInstance`. 565 // If we use `[]GlobalInstance` instead, we could reduce one MOV instruction here. 566 567 destinationRegister, err = c.allocateRegister(registerTypeGeneralPurpose) 568 if err != nil { 569 return 570 } 571 572 // "destinationRegister = globalIndex * 8" 573 c.assembler.CompileConstToRegister( 574 // globalIndex is an index to []*GlobalInstance, therefore 575 // we have to multiply it by the size of *GlobalInstance == the pointer size == 8. 576 arm64.MOVD, int64(globalIndex)*8, destinationRegister, 577 ) 578 579 // "arm64ReservedRegisterForTemporary = &globals[0]" 580 c.assembler.CompileMemoryToRegister( 581 arm64.LDRD, 582 arm64ReservedRegisterForCallEngine, callEngineModuleContextGlobalElement0AddressOffset, 583 arm64ReservedRegisterForTemporary, 584 ) 585 586 // "destinationRegister = [arm64ReservedRegisterForTemporary + destinationRegister] (== globals[globalIndex])". 587 c.assembler.CompileMemoryWithRegisterOffsetToRegister( 588 arm64.LDRD, 589 arm64ReservedRegisterForTemporary, destinationRegister, 590 destinationRegister, 591 ) 592 return 593 } 594 595 // compileBr implements compiler.compileBr for the arm64 architecture. 596 func (c *arm64Compiler) compileBr(o *wazeroir.OperationBr) error { 597 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 598 return err 599 } 600 return c.compileBranchInto(o.Target) 601 } 602 603 // compileBrIf implements compiler.compileBrIf for the arm64 architecture. 604 func (c *arm64Compiler) compileBrIf(o *wazeroir.OperationBrIf) error { 605 cond := c.locationStack.pop() 606 607 var conditionalBR asm.Node 608 if cond.onConditionalRegister() { 609 // If the cond is on a conditional register, it corresponds to one of "conditional codes" 610 // https://developer.arm.com/documentation/dui0801/a/Condition-Codes/Condition-code-suffixes 611 // Here we represent the conditional codes by using arm64.COND_** registers, and that means the 612 // conditional jump can be performed if we use arm64.B**. 613 // For example, if we have arm64.CondEQ on cond, that means we performed compileEq right before 614 // this compileBrIf and BrIf can be achieved by arm64.BCONDEQ. 615 var brInst asm.Instruction 616 switch cond.conditionalRegister { 617 case arm64.CondEQ: 618 brInst = arm64.BCONDEQ 619 case arm64.CondNE: 620 brInst = arm64.BCONDNE 621 case arm64.CondHS: 622 brInst = arm64.BCONDHS 623 case arm64.CondLO: 624 brInst = arm64.BCONDLO 625 case arm64.CondMI: 626 brInst = arm64.BCONDMI 627 case arm64.CondHI: 628 brInst = arm64.BCONDHI 629 case arm64.CondLS: 630 brInst = arm64.BCONDLS 631 case arm64.CondGE: 632 brInst = arm64.BCONDGE 633 case arm64.CondLT: 634 brInst = arm64.BCONDLT 635 case arm64.CondGT: 636 brInst = arm64.BCONDGT 637 case arm64.CondLE: 638 brInst = arm64.BCONDLE 639 default: 640 // BUG: This means that we use the cond.conditionalRegister somewhere in this file, 641 // but not covered in switch ^. That shouldn't happen. 642 return fmt.Errorf("unsupported condition for br_if: %v", cond.conditionalRegister) 643 } 644 conditionalBR = c.assembler.CompileJump(brInst) 645 } else { 646 // If the value is not on the conditional register, we compare the value with the zero register, 647 // and then do the conditional BR if the value doesn't equal zero. 648 if err := c.compileEnsureOnRegister(cond); err != nil { 649 return err 650 } 651 // Compare the value with zero register. Note that the value is ensured to be i32 by function validation phase, 652 // so we use CMPW (32-bit compare) here. 653 c.assembler.CompileTwoRegistersToNone(arm64.CMPW, cond.register, arm64.RegRZR) 654 655 conditionalBR = c.assembler.CompileJump(arm64.BCONDNE) 656 657 c.markRegisterUnused(cond.register) 658 } 659 660 // Emit the code for branching into else branch. 661 // We save and clone the location stack because we might end up modifying it inside of branchInto, 662 // and we have to avoid affecting the code generation for Then branch afterwards. 663 saved := c.locationStack 664 c.setLocationStack(saved.clone()) 665 if err := compileDropRange(c, o.Else.ToDrop); err != nil { 666 return err 667 } 668 if err := c.compileBranchInto(o.Else.Target); err != nil { 669 return err 670 } 671 672 // Now ready to emit the code for branching into then branch. 673 // Retrieve the original value location stack so that the code below won't be affected by the Else branch ^^. 674 c.setLocationStack(saved) 675 // We branch into here from the original conditional BR (conditionalBR). 676 c.assembler.SetJumpTargetOnNext(conditionalBR) 677 if err := compileDropRange(c, o.Then.ToDrop); err != nil { 678 return err 679 } 680 return c.compileBranchInto(o.Then.Target) 681 } 682 683 func (c *arm64Compiler) compileBranchInto(target *wazeroir.BranchTarget) error { 684 if target.IsReturnTarget() { 685 return c.compileReturnFunction() 686 } else { 687 labelKey := target.String() 688 if c.ir.LabelCallers[labelKey] > 1 { 689 // We can only re-use register state if when there's a single call-site. 690 // Release existing values on registers to the stack if there's multiple ones to have 691 // the consistent value location state at the beginning of label. 692 if err := c.compileReleaseAllRegistersToStack(); err != nil { 693 return err 694 } 695 } 696 // Set the initial stack of the target label, so we can start compiling the label 697 // with the appropriate value locations. Note we clone the stack here as we maybe 698 // manipulate the stack before compiler reaches the label. 699 targetLabel := c.label(labelKey) 700 if targetLabel.initialStack == nil { 701 targetLabel.initialStack = c.locationStack.clone() 702 } 703 704 br := c.assembler.CompileJump(arm64.B) 705 c.assignBranchTarget(labelKey, br) 706 return nil 707 } 708 } 709 710 // assignBranchTarget assigns the given label's initial instruction to the destination of br. 711 func (c *arm64Compiler) assignBranchTarget(labelKey string, br asm.Node) { 712 target := c.label(labelKey) 713 if target.initialInstruction != nil { 714 br.AssignJumpTarget(target.initialInstruction) 715 } else { 716 // This case, the target label hasn't been compiled yet, so we append the callback and assign 717 // the target instruction when compileLabel is called for the label. 718 target.labelBeginningCallbacks = append(target.labelBeginningCallbacks, func(labelInitialInstruction asm.Node) { 719 br.AssignJumpTarget(labelInitialInstruction) 720 }) 721 } 722 } 723 724 // compileBrTable implements compiler.compileBrTable for the arm64 architecture. 725 func (c *arm64Compiler) compileBrTable(o *wazeroir.OperationBrTable) error { 726 // If the operation only consists of the default target, we branch into it and return early. 727 if len(o.Targets) == 0 { 728 loc := c.locationStack.pop() 729 if loc.onRegister() { 730 c.markRegisterUnused(loc.register) 731 } 732 if err := compileDropRange(c, o.Default.ToDrop); err != nil { 733 return err 734 } 735 return c.compileBranchInto(o.Default.Target) 736 } 737 738 index := c.locationStack.pop() 739 if err := c.compileEnsureOnRegister(index); err != nil { 740 return err 741 } 742 743 if isZeroRegister(index.register) { 744 reg, err := c.allocateRegister(registerTypeGeneralPurpose) 745 if err != nil { 746 return err 747 } 748 index.setRegister(reg) 749 c.markRegisterUsed(reg) 750 751 // Zero the value on a picked register. 752 c.assembler.CompileRegisterToRegister(arm64.MOVD, arm64.RegRZR, reg) 753 } 754 755 tmpReg, err := c.allocateRegister(registerTypeGeneralPurpose) 756 if err != nil { 757 return err 758 } 759 760 // Load the branch table's length. 761 // "tmpReg = len(o.Targets)" 762 c.assembler.CompileConstToRegister(arm64.MOVW, int64(len(o.Targets)), tmpReg) 763 // Compare the length with offset. 764 c.assembler.CompileTwoRegistersToNone(arm64.CMPW, tmpReg, index.register) 765 // If the value exceeds the length, we will branch into the default target (corresponding to len(o.Targets) index). 766 brDefaultIndex := c.assembler.CompileJump(arm64.BCONDLO) 767 c.assembler.CompileRegisterToRegister(arm64.MOVW, tmpReg, index.register) 768 c.assembler.SetJumpTargetOnNext(brDefaultIndex) 769 770 // We prepare the asm.StaticConst which holds the offset of 771 // each target's first instruction (incl. default) 772 // relative to the beginning of label tables. 773 // 774 // For example, if we have targets=[L0, L1] and default=L_DEFAULT, 775 // we emit the code like this at [Emit the code for each target and default branch] below. 776 // 777 // L0: 778 // 0x123001: XXXX, ... 779 // ..... 780 // L1: 781 // 0x123005: YYY, ... 782 // ..... 783 // L_DEFAULT: 784 // 0x123009: ZZZ, ... 785 // 786 // then offsetData becomes like [0x0, 0x5, 0x8]. 787 // By using this offset list, we could jump into the label for the index by 788 // "jmp offsetData[index]+0x123001" and "0x123001" can be acquired by "LEA" 789 // instruction. 790 // 791 // Note: We store each offset of 32-bit unsigned integer as 4 consecutive bytes. So more precisely, 792 // the above example's offsetData would be [0x0, 0x0, 0x0, 0x0, 0x5, 0x0, 0x0, 0x0, 0x8, 0x0, 0x0, 0x0]. 793 // 794 // Note: this is similar to how GCC implements Switch statements in C. 795 offsetData := asm.NewStaticConst(make([]byte, 4*(len(o.Targets)+1))) 796 797 // "tmpReg = &offsetData[0]" 798 c.assembler.CompileStaticConstToRegister(arm64.ADR, offsetData, tmpReg) 799 800 // "index.register = tmpReg + (index.register << 2) (== &offsetData[offset])" 801 c.assembler.CompileLeftShiftedRegisterToRegister(arm64.ADD, index.register, 2, tmpReg, index.register) 802 803 // "index.register = *index.register (== offsetData[offset])" 804 c.assembler.CompileMemoryToRegister(arm64.LDRW, index.register, 0, index.register) 805 806 // Now we read the address of the beginning of the jump table. 807 // In the above example, this corresponds to reading the address of 0x123001. 808 c.assembler.CompileReadInstructionAddress(tmpReg, arm64.B) 809 810 // Now we have the address of L0 in tmp register, and the offset to the target label in the index.register. 811 // So we could achieve the br_table jump by adding them and jump into the resulting address. 812 c.assembler.CompileRegisterToRegister(arm64.ADD, tmpReg, index.register) 813 814 c.assembler.CompileJumpToRegister(arm64.B, index.register) 815 816 // We no longer need the index's register, so mark it unused. 817 c.markRegisterUnused(index.register) 818 819 // [Emit the code for each targets and default branch] 820 labelInitialInstructions := make([]asm.Node, len(o.Targets)+1) 821 saved := c.locationStack 822 for i := range labelInitialInstructions { 823 // Emit the initial instruction of each target where 824 // we use NOP as we don't yet know the next instruction in each label. 825 init := c.assembler.CompileStandAlone(arm64.NOP) 826 labelInitialInstructions[i] = init 827 828 var locationStack *runtimeValueLocationStack 829 var target *wazeroir.BranchTargetDrop 830 if i < len(o.Targets) { 831 target = o.Targets[i] 832 // Clone the location stack so the branch-specific code doesn't 833 // affect others. 834 locationStack = saved.clone() 835 } else { 836 target = o.Default 837 // If this is the default branch, we use the original one 838 // as this is the last code in this block. 839 locationStack = saved 840 } 841 c.setLocationStack(locationStack) 842 if err := compileDropRange(c, target.ToDrop); err != nil { 843 return err 844 } 845 if err := c.compileBranchInto(target.Target); err != nil { 846 return err 847 } 848 } 849 850 c.assembler.BuildJumpTable(offsetData, labelInitialInstructions) 851 return nil 852 } 853 854 // compileCall implements compiler.compileCall for the arm64 architecture. 855 func (c *arm64Compiler) compileCall(o *wazeroir.OperationCall) error { 856 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 857 return err 858 } 859 860 tp := c.ir.Types[c.ir.Functions[o.FunctionIndex]] 861 862 targetFunctionAddressReg, err := c.allocateRegister(registerTypeGeneralPurpose) 863 if err != nil { 864 return err 865 } 866 c.markRegisterUsed(targetFunctionAddressReg) 867 defer c.markRegisterUnused(targetFunctionAddressReg) 868 869 // 3) Set rc.next to specify which function is executed on the current call frame. 870 // 871 // First, we read the address of the first item of ce.functions slice (= &ce.functions[0]) 872 // into tmp. 873 c.assembler.CompileMemoryToRegister(arm64.LDRD, 874 arm64ReservedRegisterForCallEngine, callEngineModuleContextFunctionsElement0AddressOffset, 875 targetFunctionAddressReg) 876 877 c.assembler.CompileMemoryToRegister( 878 arm64.LDRD, 879 targetFunctionAddressReg, int64(o.FunctionIndex)*8, // * 8 because the size of *function equals 8 bytes. 880 targetFunctionAddressReg) 881 882 return c.compileCallImpl(targetFunctionAddressReg, tp) 883 } 884 885 // compileCallImpl implements compiler.compileCall and compiler.compileCallIndirect for the arm64 architecture. 886 func (c *arm64Compiler) compileCallImpl(targetFunctionAddressRegister asm.Register, functype *wasm.FunctionType) error { 887 // Release all the registers as our calling convention requires the caller-save. 888 if err := c.compileReleaseAllRegistersToStack(); err != nil { 889 return err 890 } 891 892 tmp, ok := c.locationStack.takeFreeRegister(registerTypeGeneralPurpose) 893 if !ok { 894 panic("BUG: cannot take a free register") 895 } 896 897 // The stack should look like: 898 // 899 // reserved slots for results (if len(results) > len(args)) 900 // | | 901 // ,arg0, ..., argN, ..., _, .returnAddress, .returnStackBasePointerInBytes, .function, .... 902 // | | | 903 // | callFrame{^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^} 904 // | 905 // nextStackBasePointerOffset 906 // 907 // where callFrame is used to return to this currently executed function. 908 909 nextStackBasePointerOffset := int64(c.locationStack.sp) - int64(functype.ParamNumInUint64) 910 911 callFrameReturnAddressLoc, callFrameStackBasePointerInBytesLoc, callFrameFunctionLoc := c.locationStack.pushCallFrame(functype) 912 913 // Save the current stack base pointer at callFrameStackBasePointerInBytesLoc. 914 c.assembler.CompileMemoryToRegister(arm64.LDRD, 915 arm64ReservedRegisterForCallEngine, callEngineStackContextStackBasePointerInBytesOffset, 916 tmp) 917 callFrameStackBasePointerInBytesLoc.setRegister(tmp) 918 c.compileReleaseRegisterToStack(callFrameStackBasePointerInBytesLoc) 919 920 // Set callEngine.stackContext.stackBasePointer for the next function. 921 c.assembler.CompileConstToRegister(arm64.ADD, nextStackBasePointerOffset<<3, tmp) 922 c.assembler.CompileRegisterToMemory(arm64.STRD, 923 tmp, 924 arm64ReservedRegisterForCallEngine, callEngineStackContextStackBasePointerInBytesOffset) 925 926 // Save the currently executed *function (placed at callEngine.moduleContext.fn) into callFrameFunctionLoc. 927 c.assembler.CompileMemoryToRegister(arm64.LDRD, 928 arm64ReservedRegisterForCallEngine, callEngineModuleContextFnOffset, 929 tmp) 930 callFrameFunctionLoc.setRegister(tmp) 931 c.compileReleaseRegisterToStack(callFrameFunctionLoc) 932 933 // Set callEngine.moduleContext.fn to the next *function. 934 c.assembler.CompileRegisterToMemory(arm64.STRD, 935 targetFunctionAddressRegister, 936 arm64ReservedRegisterForCallEngine, callEngineModuleContextFnOffset) 937 938 // Write the return address into callFrameReturnAddressLoc. 939 c.assembler.CompileReadInstructionAddress(tmp, arm64.B) 940 callFrameReturnAddressLoc.setRegister(tmp) 941 c.compileReleaseRegisterToStack(callFrameReturnAddressLoc) 942 943 if targetFunctionAddressRegister == arm64CallingConventionModuleInstanceAddressRegister { 944 // This case we must move the value on targetFunctionAddressRegister to another register, otherwise 945 // the address (jump target below) will be modified and result in segfault. 946 // See #526. 947 c.assembler.CompileRegisterToRegister(arm64.MOVD, targetFunctionAddressRegister, tmp) 948 targetFunctionAddressRegister = tmp 949 } 950 951 // Also, we have to put the code's moduleInstance address into arm64CallingConventionModuleInstanceAddressRegister. 952 c.assembler.CompileMemoryToRegister(arm64.LDRD, 953 targetFunctionAddressRegister, functionModuleInstanceAddressOffset, 954 arm64CallingConventionModuleInstanceAddressRegister, 955 ) 956 957 // Then, br into the target function's initial address. 958 c.assembler.CompileMemoryToRegister(arm64.LDRD, 959 targetFunctionAddressRegister, functionCodeInitialAddressOffset, 960 targetFunctionAddressRegister) 961 962 c.assembler.CompileJumpToRegister(arm64.B, targetFunctionAddressRegister) 963 964 // We consumed the function parameters, the call frame stack and reserved slots during the call. 965 c.locationStack.sp = uint64(nextStackBasePointerOffset) 966 967 // Also, the function results were pushed by the call. 968 for _, t := range functype.Results { 969 loc := c.locationStack.pushRuntimeValueLocationOnStack() 970 switch t { 971 case wasm.ValueTypeI32: 972 loc.valueType = runtimeValueTypeI32 973 case wasm.ValueTypeI64, wasm.ValueTypeFuncref, wasm.ValueTypeExternref: 974 loc.valueType = runtimeValueTypeI64 975 case wasm.ValueTypeF32: 976 loc.valueType = runtimeValueTypeF32 977 case wasm.ValueTypeF64: 978 loc.valueType = runtimeValueTypeF64 979 case wasm.ValueTypeV128: 980 loc.valueType = runtimeValueTypeV128Lo 981 hi := c.locationStack.pushRuntimeValueLocationOnStack() 982 hi.valueType = runtimeValueTypeV128Hi 983 } 984 } 985 986 if err := c.compileModuleContextInitialization(); err != nil { 987 return err 988 } 989 990 // On the function return, we initialize the state for this function. 991 c.compileReservedStackBasePointerRegisterInitialization() 992 993 c.compileReservedMemoryRegisterInitialization() 994 return nil 995 } 996 997 // compileCallIndirect implements compiler.compileCallIndirect for the arm64 architecture. 998 func (c *arm64Compiler) compileCallIndirect(o *wazeroir.OperationCallIndirect) error { 999 offset := c.locationStack.pop() 1000 if err := c.compileEnsureOnRegister(offset); err != nil { 1001 return err 1002 } 1003 1004 if isZeroRegister(offset.register) { 1005 reg, err := c.allocateRegister(registerTypeGeneralPurpose) 1006 if err != nil { 1007 return err 1008 } 1009 offset.setRegister(reg) 1010 c.markRegisterUsed(reg) 1011 1012 // Zero the value on a picked register. 1013 c.assembler.CompileRegisterToRegister(arm64.MOVD, arm64.RegRZR, reg) 1014 } 1015 1016 tmp, err := c.allocateRegister(registerTypeGeneralPurpose) 1017 if err != nil { 1018 return err 1019 } 1020 c.markRegisterUsed(tmp) 1021 1022 tmp2, err := c.allocateRegister(registerTypeGeneralPurpose) 1023 if err != nil { 1024 return err 1025 } 1026 c.markRegisterUsed(tmp2) 1027 1028 // First, we need to check if the offset doesn't exceed the length of table. 1029 // "tmp = &Tables[0]" 1030 c.assembler.CompileMemoryToRegister(arm64.LDRD, 1031 arm64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset, 1032 tmp, 1033 ) 1034 // tmp = [tmp + TableIndex*8] = [&Tables[0] + TableIndex*sizeOf(*tableInstance)] = Tables[tableIndex] 1035 c.assembler.CompileMemoryToRegister(arm64.LDRD, 1036 tmp, int64(o.TableIndex)*8, 1037 tmp, 1038 ) 1039 // tmp2 = [tmp + tableInstanceTableLenOffset] = len(Tables[tableIndex]) 1040 c.assembler.CompileMemoryToRegister(arm64.LDRD, tmp, tableInstanceTableLenOffset, tmp2) 1041 1042 // "cmp tmp2, offset" 1043 c.assembler.CompileTwoRegistersToNone(arm64.CMP, tmp2, offset.register) 1044 1045 // If it exceeds len(table), we exit the execution. 1046 brIfOffsetOK := c.assembler.CompileJump(arm64.BCONDLO) 1047 c.compileExitFromNativeCode(nativeCallStatusCodeInvalidTableAccess) 1048 1049 // Otherwise, we proceed to do function type check. 1050 c.assembler.SetJumpTargetOnNext(brIfOffsetOK) 1051 1052 // We need to obtains the absolute address of table element. 1053 // "tmp = &Tables[tableIndex].table[0]" 1054 c.assembler.CompileMemoryToRegister( 1055 arm64.LDRD, 1056 tmp, tableInstanceTableOffset, 1057 tmp, 1058 ) 1059 // "offset = tmp + (offset << pointerSizeLog2) (== &table[offset])" 1060 // Here we left shifting by 3 in order to get the offset in bytes, 1061 // and the table element type is uintptr which is 8 bytes. 1062 c.assembler.CompileLeftShiftedRegisterToRegister( 1063 arm64.ADD, 1064 offset.register, pointerSizeLog2, 1065 tmp, 1066 offset.register, 1067 ) 1068 1069 // "offset = (*offset) (== table[offset])" 1070 c.assembler.CompileMemoryToRegister(arm64.LDRD, offset.register, 0, offset.register) 1071 1072 // Check if the value of table[offset] equals zero, meaning that the target element is uninitialized. 1073 c.assembler.CompileTwoRegistersToNone(arm64.CMP, arm64.RegRZR, offset.register) 1074 brIfInitialized := c.assembler.CompileJump(arm64.BCONDNE) 1075 c.compileExitFromNativeCode(nativeCallStatusCodeInvalidTableAccess) 1076 1077 c.assembler.SetJumpTargetOnNext(brIfInitialized) 1078 // next we check the type matches, i.e. table[offset].source.TypeID == targetFunctionType. 1079 // "tmp = table[offset].source ( == *FunctionInstance type)" 1080 c.assembler.CompileMemoryToRegister( 1081 arm64.LDRD, 1082 offset.register, functionSourceOffset, 1083 tmp, 1084 ) 1085 // "tmp = [tmp + functionInstanceTypeIDOffset] (== table[offset].source.TypeID)" 1086 c.assembler.CompileMemoryToRegister( 1087 arm64.LDRW, tmp, functionInstanceTypeIDOffset, 1088 tmp, 1089 ) 1090 // "tmp2 = ModuleInstance.TypeIDs[index]" 1091 c.assembler.CompileMemoryToRegister(arm64.LDRD, 1092 arm64ReservedRegisterForCallEngine, callEngineModuleContextTypeIDsElement0AddressOffset, 1093 tmp2) 1094 c.assembler.CompileMemoryToRegister(arm64.LDRW, tmp2, int64(o.TypeIndex)*4, tmp2) 1095 1096 // Compare these two values, and if they equal, we are ready to make function call. 1097 c.assembler.CompileTwoRegistersToNone(arm64.CMPW, tmp, tmp2) 1098 brIfTypeMatched := c.assembler.CompileJump(arm64.BCONDEQ) 1099 c.compileExitFromNativeCode(nativeCallStatusCodeTypeMismatchOnIndirectCall) 1100 1101 c.assembler.SetJumpTargetOnNext(brIfTypeMatched) 1102 1103 targetFunctionType := c.ir.Types[o.TypeIndex] 1104 if err := c.compileCallImpl(offset.register, targetFunctionType); err != nil { 1105 return err 1106 } 1107 1108 // The offset register should be marked as un-used as we consumed in the function call. 1109 c.markRegisterUnused(offset.register, tmp, tmp2) 1110 return nil 1111 } 1112 1113 // compileDrop implements compiler.compileDrop for the arm64 architecture. 1114 func (c *arm64Compiler) compileDrop(o *wazeroir.OperationDrop) error { 1115 return compileDropRange(c, o.Depth) 1116 } 1117 1118 func (c *arm64Compiler) compileSelectV128Impl(selectorRegister asm.Register) error { 1119 x2 := c.locationStack.popV128() 1120 if err := c.compileEnsureOnRegister(x2); err != nil { 1121 return err 1122 } 1123 1124 x1 := c.locationStack.popV128() 1125 if err := c.compileEnsureOnRegister(x1); err != nil { 1126 return err 1127 } 1128 1129 c.assembler.CompileTwoRegistersToNone(arm64.CMPW, arm64.RegRZR, selectorRegister) 1130 brIfNotZero := c.assembler.CompileJump(arm64.BCONDNE) 1131 1132 // In this branch, we select the value of x2, so we move the value into x1.register so that 1133 // we can have the result in x1.register regardless of the selection. 1134 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.VORR, 1135 x2.register, x2.register, x1.register, arm64.VectorArrangement16B) 1136 1137 c.assembler.SetJumpTargetOnNext(brIfNotZero) 1138 1139 // As noted, the result exists in x1.register regardless of the selector. 1140 c.pushVectorRuntimeValueLocationOnRegister(x1.register) 1141 // Plus, x2.register is no longer used. 1142 c.markRegisterUnused(x2.register) 1143 return nil 1144 } 1145 1146 // compileSelect implements compiler.compileSelect for the arm64 architecture. 1147 func (c *arm64Compiler) compileSelect(o *wazeroir.OperationSelect) error { 1148 cv, err := c.popValueOnRegister() 1149 if err != nil { 1150 return err 1151 } 1152 1153 if o.IsTargetVector { 1154 return c.compileSelectV128Impl(cv.register) 1155 } 1156 1157 c.markRegisterUsed(cv.register) 1158 1159 x1, x2, err := c.popTwoValuesOnRegisters() 1160 if err != nil { 1161 return err 1162 } 1163 1164 if isZeroRegister(x1.register) && isZeroRegister(x2.register) { 1165 // If both values are zero, the result is always zero. 1166 c.pushRuntimeValueLocationOnRegister(arm64.RegRZR, x1.valueType) 1167 c.markRegisterUnused(cv.register) 1168 return nil 1169 } 1170 1171 // In the following, we emit the code so that x1's register contains the chosen value 1172 // no matter which of original x1 or x2 is selected. 1173 // 1174 // If x1 is currently on zero register, we cannot place the result because 1175 // "MOV arm64.RegRZR x2.register" results in arm64.RegRZR regardless of the value. 1176 // So we explicitly assign a general purpose register to x1 here. 1177 if isZeroRegister(x1.register) { 1178 // Mark x2 and cv's registers are used so they won't be chosen. 1179 c.markRegisterUsed(x2.register) 1180 // Pick the non-zero register for x1. 1181 x1Reg, err := c.allocateRegister(registerTypeGeneralPurpose) 1182 if err != nil { 1183 return err 1184 } 1185 x1.setRegister(x1Reg) 1186 // And zero our the picked register. 1187 c.assembler.CompileRegisterToRegister(arm64.MOVD, arm64.RegRZR, x1Reg) 1188 } 1189 1190 // At this point, x1 is non-zero register, and x2 is either general purpose or zero register. 1191 1192 c.assembler.CompileTwoRegistersToNone(arm64.CMPW, arm64.RegRZR, cv.register) 1193 brIfNotZero := c.assembler.CompileJump(arm64.BCONDNE) 1194 1195 // If cv == 0, we move the value of x2 to the x1.register. 1196 1197 switch x1.valueType { 1198 case runtimeValueTypeI32: 1199 // TODO: use 32-bit mov 1200 c.assembler.CompileRegisterToRegister(arm64.MOVD, x2.register, x1.register) 1201 case runtimeValueTypeI64: 1202 c.assembler.CompileRegisterToRegister(arm64.MOVD, x2.register, x1.register) 1203 case runtimeValueTypeF32: 1204 // TODO: use 32-bit mov 1205 c.assembler.CompileRegisterToRegister(arm64.FMOVD, x2.register, x1.register) 1206 case runtimeValueTypeF64: 1207 c.assembler.CompileRegisterToRegister(arm64.FMOVD, x2.register, x1.register) 1208 default: 1209 return errors.New("TODO: implement vector type select") 1210 } 1211 1212 c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) 1213 1214 // Otherwise, nothing to do for select. 1215 c.assembler.SetJumpTargetOnNext(brIfNotZero) 1216 1217 // Only x1.register is reused. 1218 c.markRegisterUnused(cv.register, x2.register) 1219 return nil 1220 } 1221 1222 // compilePick implements compiler.compilePick for the arm64 architecture. 1223 func (c *arm64Compiler) compilePick(o *wazeroir.OperationPick) error { 1224 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 1225 return err 1226 } 1227 1228 pickTarget := c.locationStack.stack[c.locationStack.sp-1-uint64(o.Depth)] 1229 pickedRegister, err := c.allocateRegister(pickTarget.getRegisterType()) 1230 if err != nil { 1231 return err 1232 } 1233 1234 if pickTarget.onRegister() { // Copy the value to the pickedRegister. 1235 switch pickTarget.valueType { 1236 case runtimeValueTypeI32: 1237 c.assembler.CompileRegisterToRegister(arm64.MOVW, pickTarget.register, pickedRegister) 1238 case runtimeValueTypeI64: 1239 c.assembler.CompileRegisterToRegister(arm64.MOVD, pickTarget.register, pickedRegister) 1240 case runtimeValueTypeF32: 1241 c.assembler.CompileRegisterToRegister(arm64.FMOVS, pickTarget.register, pickedRegister) 1242 case runtimeValueTypeF64: 1243 c.assembler.CompileRegisterToRegister(arm64.FMOVD, pickTarget.register, pickedRegister) 1244 case runtimeValueTypeV128Lo: 1245 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.VORR, 1246 pickTarget.register, pickTarget.register, pickedRegister, arm64.VectorArrangement16B) 1247 case runtimeValueTypeV128Hi: 1248 panic("BUG") // since pick target must point to the lower 64-bits of vectors. 1249 } 1250 } else if pickTarget.onStack() { 1251 // Temporarily assign a register to the pick target, and then load the value. 1252 pickTarget.setRegister(pickedRegister) 1253 c.compileLoadValueOnStackToRegister(pickTarget) 1254 1255 // After the load, we revert the register assignment to the pick target. 1256 pickTarget.setRegister(asm.NilRegister) 1257 if o.IsTargetVector { 1258 hi := c.locationStack.stack[pickTarget.stackPointer+1] 1259 hi.setRegister(asm.NilRegister) 1260 } 1261 } 1262 1263 // Now we have the value of the target on the pickedRegister, 1264 // so push the location. 1265 c.pushRuntimeValueLocationOnRegister(pickedRegister, pickTarget.valueType) 1266 if o.IsTargetVector { 1267 c.pushRuntimeValueLocationOnRegister(pickedRegister, runtimeValueTypeV128Hi) 1268 } 1269 return nil 1270 } 1271 1272 // compileAdd implements compiler.compileAdd for the arm64 architecture. 1273 func (c *arm64Compiler) compileAdd(o *wazeroir.OperationAdd) error { 1274 x1, x2, err := c.popTwoValuesOnRegisters() 1275 if err != nil { 1276 return err 1277 } 1278 1279 // Addition can be nop if one of operands is zero. 1280 if isZeroRegister(x1.register) { 1281 c.pushRuntimeValueLocationOnRegister(x2.register, x1.valueType) 1282 return nil 1283 } else if isZeroRegister(x2.register) { 1284 c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) 1285 return nil 1286 } 1287 1288 var inst asm.Instruction 1289 switch o.Type { 1290 case wazeroir.UnsignedTypeI32: 1291 inst = arm64.ADDW 1292 case wazeroir.UnsignedTypeI64: 1293 inst = arm64.ADD 1294 case wazeroir.UnsignedTypeF32: 1295 inst = arm64.FADDS 1296 case wazeroir.UnsignedTypeF64: 1297 inst = arm64.FADDD 1298 } 1299 1300 c.assembler.CompileRegisterToRegister(inst, x2.register, x1.register) 1301 // The result is placed on a register for x1, so record it. 1302 c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) 1303 return nil 1304 } 1305 1306 // compileSub implements compiler.compileSub for the arm64 architecture. 1307 func (c *arm64Compiler) compileSub(o *wazeroir.OperationSub) error { 1308 x1, x2, err := c.popTwoValuesOnRegisters() 1309 if err != nil { 1310 return err 1311 } 1312 1313 // If both of registers are zeros, this can be nop and push the zero register. 1314 if isZeroRegister(x1.register) && isZeroRegister(x2.register) { 1315 c.pushRuntimeValueLocationOnRegister(arm64.RegRZR, x1.valueType) 1316 return nil 1317 } 1318 1319 // At this point, at least one of x1 or x2 registers is non zero. 1320 // Choose the non-zero register as destination. 1321 destinationReg := x1.register 1322 if isZeroRegister(x1.register) { 1323 destinationReg = x2.register 1324 } 1325 1326 var inst asm.Instruction 1327 var vt runtimeValueType 1328 switch o.Type { 1329 case wazeroir.UnsignedTypeI32: 1330 inst = arm64.SUBW 1331 vt = runtimeValueTypeI32 1332 case wazeroir.UnsignedTypeI64: 1333 inst = arm64.SUB 1334 vt = runtimeValueTypeI64 1335 case wazeroir.UnsignedTypeF32: 1336 inst = arm64.FSUBS 1337 vt = runtimeValueTypeF32 1338 case wazeroir.UnsignedTypeF64: 1339 inst = arm64.FSUBD 1340 vt = runtimeValueTypeF64 1341 } 1342 1343 c.assembler.CompileTwoRegistersToRegister(inst, x2.register, x1.register, destinationReg) 1344 c.pushRuntimeValueLocationOnRegister(destinationReg, vt) 1345 return nil 1346 } 1347 1348 // compileMul implements compiler.compileMul for the arm64 architecture. 1349 func (c *arm64Compiler) compileMul(o *wazeroir.OperationMul) error { 1350 x1, x2, err := c.popTwoValuesOnRegisters() 1351 if err != nil { 1352 return err 1353 } 1354 1355 // Multiplication can be done by putting a zero register if one of operands is zero. 1356 if isZeroRegister(x1.register) || isZeroRegister(x2.register) { 1357 c.pushRuntimeValueLocationOnRegister(arm64.RegRZR, x1.valueType) 1358 return nil 1359 } 1360 1361 var inst asm.Instruction 1362 var vt runtimeValueType 1363 switch o.Type { 1364 case wazeroir.UnsignedTypeI32: 1365 inst = arm64.MULW 1366 vt = runtimeValueTypeI32 1367 case wazeroir.UnsignedTypeI64: 1368 inst = arm64.MUL 1369 vt = runtimeValueTypeI64 1370 case wazeroir.UnsignedTypeF32: 1371 inst = arm64.FMULS 1372 vt = runtimeValueTypeF32 1373 case wazeroir.UnsignedTypeF64: 1374 inst = arm64.FMULD 1375 vt = runtimeValueTypeF64 1376 } 1377 1378 c.assembler.CompileRegisterToRegister(inst, x2.register, x1.register) 1379 // The result is placed on a register for x1, so record it. 1380 c.pushRuntimeValueLocationOnRegister(x1.register, vt) 1381 return nil 1382 } 1383 1384 // compileClz implements compiler.compileClz for the arm64 architecture. 1385 func (c *arm64Compiler) compileClz(o *wazeroir.OperationClz) error { 1386 v, err := c.popValueOnRegister() 1387 if err != nil { 1388 return err 1389 } 1390 1391 if isZeroRegister(v.register) { 1392 // If the target is zero register, the result is always 32 (or 64 for 64-bits), 1393 // so we allocate a register and put the const on it. 1394 reg, err := c.allocateRegister(registerTypeGeneralPurpose) 1395 if err != nil { 1396 return err 1397 } 1398 var vt runtimeValueType 1399 if o.Type == wazeroir.UnsignedInt32 { 1400 vt = runtimeValueTypeI32 1401 c.assembler.CompileConstToRegister(arm64.MOVW, 32, reg) 1402 } else { 1403 vt = runtimeValueTypeI64 1404 c.assembler.CompileConstToRegister(arm64.MOVD, 64, reg) 1405 } 1406 c.pushRuntimeValueLocationOnRegister(reg, vt) 1407 return nil 1408 } 1409 1410 reg := v.register 1411 var vt runtimeValueType 1412 if o.Type == wazeroir.UnsignedInt32 { 1413 vt = runtimeValueTypeI32 1414 c.assembler.CompileRegisterToRegister(arm64.CLZW, reg, reg) 1415 } else { 1416 vt = runtimeValueTypeI64 1417 c.assembler.CompileRegisterToRegister(arm64.CLZ, reg, reg) 1418 } 1419 c.pushRuntimeValueLocationOnRegister(reg, vt) 1420 return nil 1421 } 1422 1423 // compileCtz implements compiler.compileCtz for the arm64 architecture. 1424 func (c *arm64Compiler) compileCtz(o *wazeroir.OperationCtz) error { 1425 v, err := c.popValueOnRegister() 1426 if err != nil { 1427 return err 1428 } 1429 1430 reg := v.register 1431 if isZeroRegister(reg) { 1432 // If the target is zero register, the result is always 32 (or 64 for 64-bits), 1433 // so we allocate a register and put the const on it. 1434 reg, err := c.allocateRegister(registerTypeGeneralPurpose) 1435 if err != nil { 1436 return err 1437 } 1438 var vt runtimeValueType 1439 if o.Type == wazeroir.UnsignedInt32 { 1440 vt = runtimeValueTypeI32 1441 c.assembler.CompileConstToRegister(arm64.MOVW, 32, reg) 1442 } else { 1443 vt = runtimeValueTypeI64 1444 c.assembler.CompileConstToRegister(arm64.MOVD, 64, reg) 1445 } 1446 c.pushRuntimeValueLocationOnRegister(reg, vt) 1447 return nil 1448 } 1449 1450 // Since arm64 doesn't have an instruction directly counting trailing zeros, 1451 // we reverse the bits first, and then do CLZ, which is exactly the same as 1452 // gcc implements __builtin_ctz for arm64. 1453 var vt runtimeValueType 1454 if o.Type == wazeroir.UnsignedInt32 { 1455 vt = runtimeValueTypeI32 1456 c.assembler.CompileRegisterToRegister(arm64.RBITW, reg, reg) 1457 c.assembler.CompileRegisterToRegister(arm64.CLZW, reg, reg) 1458 } else { 1459 vt = runtimeValueTypeI64 1460 c.assembler.CompileRegisterToRegister(arm64.RBIT, reg, reg) 1461 c.assembler.CompileRegisterToRegister(arm64.CLZ, reg, reg) 1462 } 1463 c.pushRuntimeValueLocationOnRegister(reg, vt) 1464 return nil 1465 } 1466 1467 // compilePopcnt implements compiler.compilePopcnt for the arm64 architecture. 1468 func (c *arm64Compiler) compilePopcnt(o *wazeroir.OperationPopcnt) error { 1469 v, err := c.popValueOnRegister() 1470 if err != nil { 1471 return err 1472 } 1473 1474 reg := v.register 1475 if isZeroRegister(reg) { 1476 c.pushRuntimeValueLocationOnRegister(reg, v.valueType) 1477 return nil 1478 } 1479 1480 freg, err := c.allocateRegister(registerTypeVector) 1481 if err != nil { 1482 return err 1483 } 1484 1485 // arm64 doesn't have an instruction for population count on scalar register, 1486 // so we use the vector one (VCNT). 1487 // This exactly what the official Go implements bits.OneCount. 1488 // For example, "func () int { return bits.OneCount(10) }" is compiled as 1489 // 1490 // MOVD $10, R0 ;; Load 10. 1491 // FMOVD R0, F0 1492 // VCNT V0.B8, V0.B8 1493 // UADDLV V0.B8, V0 1494 // 1495 var movInst asm.Instruction 1496 if o.Type == wazeroir.UnsignedInt32 { 1497 movInst = arm64.FMOVS 1498 } else { 1499 movInst = arm64.FMOVD 1500 } 1501 c.assembler.CompileRegisterToRegister(movInst, reg, freg) 1502 c.assembler.CompileVectorRegisterToVectorRegister(arm64.VCNT, freg, freg, 1503 arm64.VectorArrangement16B, arm64.VectorIndexNone, arm64.VectorIndexNone) 1504 c.assembler.CompileVectorRegisterToVectorRegister(arm64.UADDLV, freg, freg, arm64.VectorArrangement8B, 1505 arm64.VectorIndexNone, arm64.VectorIndexNone) 1506 1507 c.assembler.CompileRegisterToRegister(movInst, freg, reg) 1508 1509 c.pushRuntimeValueLocationOnRegister(reg, v.valueType) 1510 return nil 1511 } 1512 1513 // compileDiv implements compiler.compileDiv for the arm64 architecture. 1514 func (c *arm64Compiler) compileDiv(o *wazeroir.OperationDiv) error { 1515 dividend, divisor, err := c.popTwoValuesOnRegisters() 1516 if err != nil { 1517 return err 1518 } 1519 1520 // If the divisor is on the zero register, exit from the function deterministically. 1521 if isZeroRegister(divisor.register) { 1522 // Push any value so that the subsequent instruction can have a consistent location stack state. 1523 c.locationStack.pushRuntimeValueLocationOnStack() 1524 c.compileExitFromNativeCode(nativeCallStatusIntegerDivisionByZero) 1525 return nil 1526 } 1527 1528 var inst asm.Instruction 1529 var vt runtimeValueType 1530 switch o.Type { 1531 case wazeroir.SignedTypeUint32: 1532 inst = arm64.UDIVW 1533 if err := c.compileIntegerDivPrecheck(true, false, dividend.register, divisor.register); err != nil { 1534 return err 1535 } 1536 vt = runtimeValueTypeI32 1537 case wazeroir.SignedTypeUint64: 1538 if err := c.compileIntegerDivPrecheck(false, false, dividend.register, divisor.register); err != nil { 1539 return err 1540 } 1541 inst = arm64.UDIV 1542 vt = runtimeValueTypeI64 1543 case wazeroir.SignedTypeInt32: 1544 if err := c.compileIntegerDivPrecheck(true, true, dividend.register, divisor.register); err != nil { 1545 return err 1546 } 1547 inst = arm64.SDIVW 1548 vt = runtimeValueTypeI32 1549 case wazeroir.SignedTypeInt64: 1550 if err := c.compileIntegerDivPrecheck(false, true, dividend.register, divisor.register); err != nil { 1551 return err 1552 } 1553 inst = arm64.SDIV 1554 vt = runtimeValueTypeI64 1555 case wazeroir.SignedTypeFloat32: 1556 inst = arm64.FDIVS 1557 vt = runtimeValueTypeF32 1558 case wazeroir.SignedTypeFloat64: 1559 inst = arm64.FDIVD 1560 vt = runtimeValueTypeF64 1561 } 1562 1563 c.assembler.CompileRegisterToRegister(inst, divisor.register, dividend.register) 1564 1565 c.pushRuntimeValueLocationOnRegister(dividend.register, vt) 1566 return nil 1567 } 1568 1569 // compileIntegerDivPrecheck adds instructions to check if the divisor and dividend are sound for division operation. 1570 // First, this adds instructions to check if the divisor equals zero, and if so, exits the function. 1571 // Plus, for signed divisions, check if the result might result in overflow or not. 1572 func (c *arm64Compiler) compileIntegerDivPrecheck(is32Bit, isSigned bool, dividend, divisor asm.Register) error { 1573 // We check the divisor value equals zero. 1574 var cmpInst, movInst, loadInst asm.Instruction 1575 var minValueOffsetInVM int64 1576 if is32Bit { 1577 cmpInst = arm64.CMPW 1578 movInst = arm64.MOVW 1579 loadInst = arm64.LDRW 1580 minValueOffsetInVM = arm64CallEngineArchContextMinimum32BitSignedIntOffset 1581 } else { 1582 cmpInst = arm64.CMP 1583 movInst = arm64.MOVD 1584 loadInst = arm64.LDRD 1585 minValueOffsetInVM = arm64CallEngineArchContextMinimum64BitSignedIntOffset 1586 } 1587 c.assembler.CompileTwoRegistersToNone(cmpInst, arm64.RegRZR, divisor) 1588 1589 // If it is zero, we exit with nativeCallStatusIntegerDivisionByZero. 1590 brIfDivisorNonZero := c.assembler.CompileJump(arm64.BCONDNE) 1591 c.compileExitFromNativeCode(nativeCallStatusIntegerDivisionByZero) 1592 1593 // Otherwise, we proceed. 1594 c.assembler.SetJumpTargetOnNext(brIfDivisorNonZero) 1595 1596 // If the operation is a signed integer div, we have to do an additional check on overflow. 1597 if isSigned { 1598 // For signed division, we have to have branches for "math.MinInt{32,64} / -1" 1599 // case which results in the overflow. 1600 1601 // First, we compare the divisor with -1. 1602 c.assembler.CompileConstToRegister(movInst, -1, arm64ReservedRegisterForTemporary) 1603 c.assembler.CompileTwoRegistersToNone(cmpInst, arm64ReservedRegisterForTemporary, divisor) 1604 1605 // If they not equal, we skip the following check. 1606 brIfDivisorNonMinusOne := c.assembler.CompileJump(arm64.BCONDNE) 1607 1608 // Otherwise, we further check if the dividend equals math.MinInt32 or MinInt64. 1609 c.assembler.CompileMemoryToRegister( 1610 loadInst, 1611 arm64ReservedRegisterForCallEngine, minValueOffsetInVM, 1612 arm64ReservedRegisterForTemporary, 1613 ) 1614 c.assembler.CompileTwoRegistersToNone(cmpInst, arm64ReservedRegisterForTemporary, dividend) 1615 1616 // If they not equal, we are safe to execute the division. 1617 brIfDividendNotMinInt := c.assembler.CompileJump(arm64.BCONDNE) 1618 1619 // Otherwise, we raise overflow error. 1620 c.compileExitFromNativeCode(nativeCallStatusIntegerOverflow) 1621 1622 c.assembler.SetJumpTargetOnNext(brIfDivisorNonMinusOne, brIfDividendNotMinInt) 1623 } 1624 return nil 1625 } 1626 1627 // compileRem implements compiler.compileRem for the arm64 architecture. 1628 func (c *arm64Compiler) compileRem(o *wazeroir.OperationRem) error { 1629 dividend, divisor, err := c.popTwoValuesOnRegisters() 1630 if err != nil { 1631 return err 1632 } 1633 1634 dividendReg := dividend.register 1635 divisorReg := divisor.register 1636 1637 // If the divisor is on the zero register, exit from the function deterministically. 1638 if isZeroRegister(divisor.register) { 1639 // Push any value so that the subsequent instruction can have a consistent location stack state. 1640 c.locationStack.pushRuntimeValueLocationOnStack() 1641 c.compileExitFromNativeCode(nativeCallStatusIntegerDivisionByZero) 1642 return nil 1643 } 1644 1645 var divInst, msubInst, cmpInst asm.Instruction 1646 switch o.Type { 1647 case wazeroir.SignedUint32: 1648 divInst = arm64.UDIVW 1649 msubInst = arm64.MSUBW 1650 cmpInst = arm64.CMPW 1651 case wazeroir.SignedUint64: 1652 divInst = arm64.UDIV 1653 msubInst = arm64.MSUB 1654 cmpInst = arm64.CMP 1655 case wazeroir.SignedInt32: 1656 divInst = arm64.SDIVW 1657 msubInst = arm64.MSUBW 1658 cmpInst = arm64.CMPW 1659 case wazeroir.SignedInt64: 1660 divInst = arm64.SDIV 1661 msubInst = arm64.MSUB 1662 cmpInst = arm64.CMP 1663 } 1664 1665 // We check the divisor value equals zero. 1666 c.assembler.CompileTwoRegistersToNone(cmpInst, arm64.RegRZR, divisorReg) 1667 1668 // If it is zero, we exit with nativeCallStatusIntegerDivisionByZero. 1669 brIfDivisorNonZero := c.assembler.CompileJump(arm64.BCONDNE) 1670 c.compileExitFromNativeCode(nativeCallStatusIntegerDivisionByZero) 1671 1672 // Otherwise, we proceed. 1673 c.assembler.SetJumpTargetOnNext(brIfDivisorNonZero) 1674 1675 // Temporarily mark them used to allocate a result register while keeping these values. 1676 c.markRegisterUsed(dividend.register, divisor.register) 1677 1678 resultReg, err := c.allocateRegister(registerTypeGeneralPurpose) 1679 if err != nil { 1680 return err 1681 } 1682 1683 // arm64 doesn't have an instruction for rem, we use calculate it by two instructions: UDIV (SDIV for signed) and MSUB. 1684 // This exactly the same code that Clang emits. 1685 // [input: x0=dividend, x1=divisor] 1686 // >> UDIV x2, x0, x1 1687 // >> MSUB x3, x2, x1, x0 1688 // [result: x2=quotient, x3=remainder] 1689 // 1690 c.assembler.CompileTwoRegistersToRegister(divInst, divisorReg, dividendReg, resultReg) 1691 // ResultReg = dividendReg - (divisorReg * resultReg) 1692 c.assembler.CompileThreeRegistersToRegister(msubInst, divisorReg, dividendReg, resultReg, resultReg) 1693 1694 c.markRegisterUnused(dividend.register, divisor.register) 1695 c.pushRuntimeValueLocationOnRegister(resultReg, dividend.valueType) 1696 return nil 1697 } 1698 1699 // compileAnd implements compiler.compileAnd for the arm64 architecture. 1700 func (c *arm64Compiler) compileAnd(o *wazeroir.OperationAnd) error { 1701 x1, x2, err := c.popTwoValuesOnRegisters() 1702 if err != nil { 1703 return err 1704 } 1705 1706 // If either of the registers x1 or x2 is zero, 1707 // the result will always be zero. 1708 if isZeroRegister(x1.register) || isZeroRegister(x2.register) { 1709 c.pushRuntimeValueLocationOnRegister(arm64.RegRZR, x1.valueType) 1710 return nil 1711 } 1712 1713 // At this point, at least one of x1 or x2 registers is non zero. 1714 // Choose the non-zero register as destination. 1715 destinationReg := x1.register 1716 if isZeroRegister(x1.register) { 1717 destinationReg = x2.register 1718 } 1719 1720 var inst asm.Instruction 1721 switch o.Type { 1722 case wazeroir.UnsignedInt32: 1723 inst = arm64.ANDW 1724 case wazeroir.UnsignedInt64: 1725 inst = arm64.AND 1726 } 1727 1728 c.assembler.CompileTwoRegistersToRegister(inst, x2.register, x1.register, destinationReg) 1729 c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) 1730 return nil 1731 } 1732 1733 // compileOr implements compiler.compileOr for the arm64 architecture. 1734 func (c *arm64Compiler) compileOr(o *wazeroir.OperationOr) error { 1735 x1, x2, err := c.popTwoValuesOnRegisters() 1736 if err != nil { 1737 return err 1738 } 1739 1740 if isZeroRegister(x1.register) { 1741 c.pushRuntimeValueLocationOnRegister(x2.register, x2.valueType) 1742 return nil 1743 } 1744 if isZeroRegister(x2.register) { 1745 c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) 1746 return nil 1747 } 1748 1749 var inst asm.Instruction 1750 switch o.Type { 1751 case wazeroir.UnsignedInt32: 1752 inst = arm64.ORRW 1753 case wazeroir.UnsignedInt64: 1754 inst = arm64.ORR 1755 } 1756 1757 c.assembler.CompileTwoRegistersToRegister(inst, x2.register, x1.register, x1.register) 1758 c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) 1759 return nil 1760 } 1761 1762 // compileXor implements compiler.compileXor for the arm64 architecture. 1763 func (c *arm64Compiler) compileXor(o *wazeroir.OperationXor) error { 1764 x1, x2, err := c.popTwoValuesOnRegisters() 1765 if err != nil { 1766 return err 1767 } 1768 1769 // At this point, at least one of x1 or x2 registers is non zero. 1770 // Choose the non-zero register as destination. 1771 destinationReg := x1.register 1772 if isZeroRegister(x1.register) { 1773 destinationReg = x2.register 1774 } 1775 1776 var inst asm.Instruction 1777 switch o.Type { 1778 case wazeroir.UnsignedInt32: 1779 inst = arm64.EORW 1780 case wazeroir.UnsignedInt64: 1781 inst = arm64.EOR 1782 } 1783 1784 c.assembler.CompileTwoRegistersToRegister(inst, x2.register, x1.register, destinationReg) 1785 c.pushRuntimeValueLocationOnRegister(destinationReg, x1.valueType) 1786 return nil 1787 } 1788 1789 // compileShl implements compiler.compileShl for the arm64 architecture. 1790 func (c *arm64Compiler) compileShl(o *wazeroir.OperationShl) error { 1791 x1, x2, err := c.popTwoValuesOnRegisters() 1792 if err != nil { 1793 return err 1794 } 1795 1796 if isZeroRegister(x1.register) || isZeroRegister(x2.register) { 1797 c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) 1798 return nil 1799 } 1800 1801 var inst asm.Instruction 1802 switch o.Type { 1803 case wazeroir.UnsignedInt32: 1804 inst = arm64.LSLW 1805 case wazeroir.UnsignedInt64: 1806 inst = arm64.LSL 1807 } 1808 1809 c.assembler.CompileTwoRegistersToRegister(inst, x2.register, x1.register, x1.register) 1810 c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) 1811 return nil 1812 } 1813 1814 // compileShr implements compiler.compileShr for the arm64 architecture. 1815 func (c *arm64Compiler) compileShr(o *wazeroir.OperationShr) error { 1816 x1, x2, err := c.popTwoValuesOnRegisters() 1817 if err != nil { 1818 return err 1819 } 1820 1821 if isZeroRegister(x1.register) || isZeroRegister(x2.register) { 1822 c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) 1823 return nil 1824 } 1825 1826 var inst asm.Instruction 1827 switch o.Type { 1828 case wazeroir.SignedInt32: 1829 inst = arm64.ASRW 1830 case wazeroir.SignedInt64: 1831 inst = arm64.ASR 1832 case wazeroir.SignedUint32: 1833 inst = arm64.LSRW 1834 case wazeroir.SignedUint64: 1835 inst = arm64.LSR 1836 } 1837 1838 c.assembler.CompileTwoRegistersToRegister(inst, x2.register, x1.register, x1.register) 1839 c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) 1840 return nil 1841 } 1842 1843 // compileRotl implements compiler.compileRotl for the arm64 architecture. 1844 func (c *arm64Compiler) compileRotl(o *wazeroir.OperationRotl) error { 1845 x1, x2, err := c.popTwoValuesOnRegisters() 1846 if err != nil { 1847 return err 1848 } 1849 1850 if isZeroRegister(x1.register) || isZeroRegister(x2.register) { 1851 c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) 1852 return nil 1853 } 1854 1855 var inst, neginst asm.Instruction 1856 switch o.Type { 1857 case wazeroir.UnsignedInt32: 1858 inst = arm64.RORW 1859 neginst = arm64.NEGW 1860 case wazeroir.UnsignedInt64: 1861 inst = arm64.ROR 1862 neginst = arm64.NEG 1863 } 1864 1865 // Arm64 doesn't have rotate left instruction. 1866 // The shift amount needs to be converted to a negative number, similar to assembly output of bits.RotateLeft. 1867 c.assembler.CompileRegisterToRegister(neginst, x2.register, x2.register) 1868 1869 c.assembler.CompileTwoRegistersToRegister(inst, x2.register, x1.register, x1.register) 1870 c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) 1871 return nil 1872 } 1873 1874 // compileRotr implements compiler.compileRotr for the arm64 architecture. 1875 func (c *arm64Compiler) compileRotr(o *wazeroir.OperationRotr) error { 1876 x1, x2, err := c.popTwoValuesOnRegisters() 1877 if err != nil { 1878 return err 1879 } 1880 1881 if isZeroRegister(x1.register) || isZeroRegister(x2.register) { 1882 c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) 1883 return nil 1884 } 1885 1886 var inst asm.Instruction 1887 switch o.Type { 1888 case wazeroir.UnsignedInt32: 1889 inst = arm64.RORW 1890 case wazeroir.UnsignedInt64: 1891 inst = arm64.ROR 1892 } 1893 1894 c.assembler.CompileTwoRegistersToRegister(inst, x2.register, x1.register, x1.register) 1895 c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) 1896 return nil 1897 } 1898 1899 // compileAbs implements compiler.compileAbs for the arm64 architecture. 1900 func (c *arm64Compiler) compileAbs(o *wazeroir.OperationAbs) error { 1901 if o.Type == wazeroir.Float32 { 1902 return c.compileSimpleUnop(arm64.FABSS, runtimeValueTypeF32) 1903 } else { 1904 return c.compileSimpleUnop(arm64.FABSD, runtimeValueTypeF64) 1905 } 1906 } 1907 1908 // compileNeg implements compiler.compileNeg for the arm64 architecture. 1909 func (c *arm64Compiler) compileNeg(o *wazeroir.OperationNeg) error { 1910 if o.Type == wazeroir.Float32 { 1911 return c.compileSimpleUnop(arm64.FNEGS, runtimeValueTypeF32) 1912 } else { 1913 return c.compileSimpleUnop(arm64.FNEGD, runtimeValueTypeF64) 1914 } 1915 } 1916 1917 // compileCeil implements compiler.compileCeil for the arm64 architecture. 1918 func (c *arm64Compiler) compileCeil(o *wazeroir.OperationCeil) error { 1919 if o.Type == wazeroir.Float32 { 1920 return c.compileSimpleUnop(arm64.FRINTPS, runtimeValueTypeF32) 1921 } else { 1922 return c.compileSimpleUnop(arm64.FRINTPD, runtimeValueTypeF64) 1923 } 1924 } 1925 1926 // compileFloor implements compiler.compileFloor for the arm64 architecture. 1927 func (c *arm64Compiler) compileFloor(o *wazeroir.OperationFloor) error { 1928 if o.Type == wazeroir.Float32 { 1929 return c.compileSimpleUnop(arm64.FRINTMS, runtimeValueTypeF32) 1930 } else { 1931 return c.compileSimpleUnop(arm64.FRINTMD, runtimeValueTypeF64) 1932 } 1933 } 1934 1935 // compileTrunc implements compiler.compileTrunc for the arm64 architecture. 1936 func (c *arm64Compiler) compileTrunc(o *wazeroir.OperationTrunc) error { 1937 if o.Type == wazeroir.Float32 { 1938 return c.compileSimpleUnop(arm64.FRINTZS, runtimeValueTypeF32) 1939 } else { 1940 return c.compileSimpleUnop(arm64.FRINTZD, runtimeValueTypeF64) 1941 } 1942 } 1943 1944 // compileNearest implements compiler.compileNearest for the arm64 architecture. 1945 func (c *arm64Compiler) compileNearest(o *wazeroir.OperationNearest) error { 1946 if o.Type == wazeroir.Float32 { 1947 return c.compileSimpleUnop(arm64.FRINTNS, runtimeValueTypeF32) 1948 } else { 1949 return c.compileSimpleUnop(arm64.FRINTND, runtimeValueTypeF64) 1950 } 1951 } 1952 1953 // compileSqrt implements compiler.compileSqrt for the arm64 architecture. 1954 func (c *arm64Compiler) compileSqrt(o *wazeroir.OperationSqrt) error { 1955 if o.Type == wazeroir.Float32 { 1956 return c.compileSimpleUnop(arm64.FSQRTS, runtimeValueTypeF32) 1957 } else { 1958 return c.compileSimpleUnop(arm64.FSQRTD, runtimeValueTypeF64) 1959 } 1960 } 1961 1962 // compileMin implements compiler.compileMin for the arm64 architecture. 1963 func (c *arm64Compiler) compileMin(o *wazeroir.OperationMin) error { 1964 if o.Type == wazeroir.Float32 { 1965 return c.compileSimpleFloatBinop(arm64.FMINS) 1966 } else { 1967 return c.compileSimpleFloatBinop(arm64.FMIND) 1968 } 1969 } 1970 1971 // compileMax implements compiler.compileMax for the arm64 architecture. 1972 func (c *arm64Compiler) compileMax(o *wazeroir.OperationMax) error { 1973 if o.Type == wazeroir.Float32 { 1974 return c.compileSimpleFloatBinop(arm64.FMAXS) 1975 } else { 1976 return c.compileSimpleFloatBinop(arm64.FMAXD) 1977 } 1978 } 1979 1980 func (c *arm64Compiler) compileSimpleFloatBinop(inst asm.Instruction) error { 1981 x1, x2, err := c.popTwoValuesOnRegisters() 1982 if err != nil { 1983 return err 1984 } 1985 c.assembler.CompileRegisterToRegister(inst, x2.register, x1.register) 1986 c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) 1987 return nil 1988 } 1989 1990 // compileCopysign implements compiler.compileCopysign for the arm64 architecture. 1991 func (c *arm64Compiler) compileCopysign(o *wazeroir.OperationCopysign) error { 1992 x1, x2, err := c.popTwoValuesOnRegisters() 1993 if err != nil { 1994 return err 1995 } 1996 1997 var ldr asm.Instruction 1998 var minValueOffsetInVM int64 1999 if o.Type == wazeroir.Float32 { 2000 ldr = arm64.FLDRS 2001 minValueOffsetInVM = arm64CallEngineArchContextMinimum32BitSignedIntOffset 2002 } else { 2003 ldr = arm64.FLDRD 2004 minValueOffsetInVM = arm64CallEngineArchContextMinimum64BitSignedIntOffset 2005 } 2006 2007 c.markRegisterUsed(x1.register, x2.register) 2008 freg, err := c.allocateRegister(registerTypeVector) 2009 if err != nil { 2010 return err 2011 } 2012 2013 // This is exactly the same code emitted by GCC for "__builtin_copysign": 2014 // 2015 // mov x0, -9223372036854775808 2016 // fmov d2, x0 2017 // vbit v0.8b, v1.8b, v2.8b 2018 // 2019 // "mov freg, -9223372036854775808 (stored at ce.minimum64BitSignedInt)" 2020 c.assembler.CompileMemoryToRegister( 2021 ldr, 2022 arm64ReservedRegisterForCallEngine, minValueOffsetInVM, 2023 freg, 2024 ) 2025 2026 // VBIT inserts each bit from the first operand into the destination if the corresponding bit of the second operand is 1, 2027 // otherwise it leaves the destination bit unchanged. 2028 // See https://developer.arm.com/documentation/dui0801/g/Advanced-SIMD-Instructions--32-bit-/VBIT 2029 // 2030 // "vbit vreg.8b, x2vreg.8b, x1vreg.8b" == "inserting 64th bit of x2 into x1". 2031 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.VBIT, 2032 freg, x2.register, x1.register, arm64.VectorArrangement16B) 2033 2034 c.markRegisterUnused(x2.register) 2035 c.pushRuntimeValueLocationOnRegister(x1.register, x1.valueType) 2036 return nil 2037 } 2038 2039 // compileI32WrapFromI64 implements compiler.compileI32WrapFromI64 for the arm64 architecture. 2040 func (c *arm64Compiler) compileI32WrapFromI64() error { 2041 return c.compileSimpleUnop(arm64.MOVW, runtimeValueTypeI64) 2042 } 2043 2044 // compileITruncFromF implements compiler.compileITruncFromF for the arm64 architecture. 2045 func (c *arm64Compiler) compileITruncFromF(o *wazeroir.OperationITruncFromF) error { 2046 // Clear the floating point status register (FPSR). 2047 c.assembler.CompileRegisterToRegister(arm64.MSR, arm64.RegRZR, arm64.RegFPSR) 2048 2049 var vt runtimeValueType 2050 var convinst asm.Instruction 2051 is32bitFloat := o.InputType == wazeroir.Float32 2052 if is32bitFloat && o.OutputType == wazeroir.SignedInt32 { 2053 convinst = arm64.FCVTZSSW 2054 vt = runtimeValueTypeI32 2055 } else if is32bitFloat && o.OutputType == wazeroir.SignedInt64 { 2056 convinst = arm64.FCVTZSS 2057 vt = runtimeValueTypeI64 2058 } else if !is32bitFloat && o.OutputType == wazeroir.SignedInt32 { 2059 convinst = arm64.FCVTZSDW 2060 vt = runtimeValueTypeI32 2061 } else if !is32bitFloat && o.OutputType == wazeroir.SignedInt64 { 2062 convinst = arm64.FCVTZSD 2063 vt = runtimeValueTypeI64 2064 } else if is32bitFloat && o.OutputType == wazeroir.SignedUint32 { 2065 convinst = arm64.FCVTZUSW 2066 vt = runtimeValueTypeI32 2067 } else if is32bitFloat && o.OutputType == wazeroir.SignedUint64 { 2068 convinst = arm64.FCVTZUS 2069 vt = runtimeValueTypeI64 2070 } else if !is32bitFloat && o.OutputType == wazeroir.SignedUint32 { 2071 convinst = arm64.FCVTZUDW 2072 vt = runtimeValueTypeI32 2073 } else if !is32bitFloat && o.OutputType == wazeroir.SignedUint64 { 2074 convinst = arm64.FCVTZUD 2075 vt = runtimeValueTypeI64 2076 } 2077 2078 source, err := c.popValueOnRegister() 2079 if err != nil { 2080 return err 2081 } 2082 2083 destinationReg, err := c.allocateRegister(registerTypeGeneralPurpose) 2084 if err != nil { 2085 return err 2086 } 2087 2088 c.assembler.CompileRegisterToRegister(convinst, source.register, destinationReg) 2089 c.pushRuntimeValueLocationOnRegister(destinationReg, vt) 2090 2091 if !o.NonTrapping { 2092 // Obtain the floating point status register value into the general purpose register, 2093 // so that we can check if the conversion resulted in undefined behavior. 2094 c.assembler.CompileRegisterToRegister(arm64.MRS, arm64.RegFPSR, arm64ReservedRegisterForTemporary) 2095 // Check if the conversion was undefined by comparing the status with 1. 2096 // See https://developer.arm.com/documentation/ddi0595/2020-12/AArch64-Registers/FPSR--Floating-point-Status-Register 2097 c.assembler.CompileRegisterAndConstToNone(arm64.CMP, arm64ReservedRegisterForTemporary, 1) 2098 2099 brOK := c.assembler.CompileJump(arm64.BCONDNE) 2100 2101 // If so, exit the execution with errors depending on whether or not the source value is NaN. 2102 var floatcmp asm.Instruction 2103 if is32bitFloat { 2104 floatcmp = arm64.FCMPS 2105 } else { 2106 floatcmp = arm64.FCMPD 2107 } 2108 c.assembler.CompileTwoRegistersToNone(floatcmp, source.register, source.register) 2109 // VS flag is set if at least one of values for FCMP is NaN. 2110 // https://developer.arm.com/documentation/dui0801/g/Condition-Codes/Comparison-of-condition-code-meanings-in-integer-and-floating-point-code 2111 brIfSourceNaN := c.assembler.CompileJump(arm64.BCONDVS) 2112 2113 // If the source value is not NaN, the operation was overflow. 2114 c.compileExitFromNativeCode(nativeCallStatusIntegerOverflow) 2115 2116 // Otherwise, the operation was invalid as this is trying to convert NaN to integer. 2117 c.assembler.SetJumpTargetOnNext(brIfSourceNaN) 2118 c.compileExitFromNativeCode(nativeCallStatusCodeInvalidFloatToIntConversion) 2119 2120 // Otherwise, we branch into the next instruction. 2121 c.assembler.SetJumpTargetOnNext(brOK) 2122 } 2123 return nil 2124 } 2125 2126 // compileFConvertFromI implements compiler.compileFConvertFromI for the arm64 architecture. 2127 func (c *arm64Compiler) compileFConvertFromI(o *wazeroir.OperationFConvertFromI) error { 2128 var convinst asm.Instruction 2129 if o.OutputType == wazeroir.Float32 && o.InputType == wazeroir.SignedInt32 { 2130 convinst = arm64.SCVTFWS 2131 } else if o.OutputType == wazeroir.Float32 && o.InputType == wazeroir.SignedInt64 { 2132 convinst = arm64.SCVTFS 2133 } else if o.OutputType == wazeroir.Float64 && o.InputType == wazeroir.SignedInt32 { 2134 convinst = arm64.SCVTFWD 2135 } else if o.OutputType == wazeroir.Float64 && o.InputType == wazeroir.SignedInt64 { 2136 convinst = arm64.SCVTFD 2137 } else if o.OutputType == wazeroir.Float32 && o.InputType == wazeroir.SignedUint32 { 2138 convinst = arm64.UCVTFWS 2139 } else if o.OutputType == wazeroir.Float32 && o.InputType == wazeroir.SignedUint64 { 2140 convinst = arm64.UCVTFS 2141 } else if o.OutputType == wazeroir.Float64 && o.InputType == wazeroir.SignedUint32 { 2142 convinst = arm64.UCVTFWD 2143 } else if o.OutputType == wazeroir.Float64 && o.InputType == wazeroir.SignedUint64 { 2144 convinst = arm64.UCVTFD 2145 } 2146 2147 var vt runtimeValueType 2148 if o.OutputType == wazeroir.Float32 { 2149 vt = runtimeValueTypeF32 2150 } else { 2151 vt = runtimeValueTypeF64 2152 } 2153 return c.compileSimpleConversion(convinst, registerTypeVector, vt) 2154 } 2155 2156 // compileF32DemoteFromF64 implements compiler.compileF32DemoteFromF64 for the arm64 architecture. 2157 func (c *arm64Compiler) compileF32DemoteFromF64() error { 2158 return c.compileSimpleUnop(arm64.FCVTDS, runtimeValueTypeF32) 2159 } 2160 2161 // compileF64PromoteFromF32 implements compiler.compileF64PromoteFromF32 for the arm64 architecture. 2162 func (c *arm64Compiler) compileF64PromoteFromF32() error { 2163 return c.compileSimpleUnop(arm64.FCVTSD, runtimeValueTypeF64) 2164 } 2165 2166 // compileI32ReinterpretFromF32 implements compiler.compileI32ReinterpretFromF32 for the arm64 architecture. 2167 func (c *arm64Compiler) compileI32ReinterpretFromF32() error { 2168 if peek := c.locationStack.peek(); peek.onStack() { 2169 // If the value is on the stack, this is no-op as there is nothing to do for converting type. 2170 peek.valueType = runtimeValueTypeI32 2171 return nil 2172 } 2173 return c.compileSimpleConversion(arm64.FMOVS, registerTypeGeneralPurpose, runtimeValueTypeI32) 2174 } 2175 2176 // compileI64ReinterpretFromF64 implements compiler.compileI64ReinterpretFromF64 for the arm64 architecture. 2177 func (c *arm64Compiler) compileI64ReinterpretFromF64() error { 2178 if peek := c.locationStack.peek(); peek.onStack() { 2179 // If the value is on the stack, this is no-op as there is nothing to do for converting type. 2180 peek.valueType = runtimeValueTypeI64 2181 return nil 2182 } 2183 return c.compileSimpleConversion(arm64.FMOVD, registerTypeGeneralPurpose, runtimeValueTypeI64) 2184 } 2185 2186 // compileF32ReinterpretFromI32 implements compiler.compileF32ReinterpretFromI32 for the arm64 architecture. 2187 func (c *arm64Compiler) compileF32ReinterpretFromI32() error { 2188 if peek := c.locationStack.peek(); peek.onStack() { 2189 // If the value is on the stack, this is no-op as there is nothing to do for converting type. 2190 peek.valueType = runtimeValueTypeF32 2191 return nil 2192 } 2193 return c.compileSimpleConversion(arm64.FMOVS, registerTypeVector, runtimeValueTypeF32) 2194 } 2195 2196 // compileF64ReinterpretFromI64 implements compiler.compileF64ReinterpretFromI64 for the arm64 architecture. 2197 func (c *arm64Compiler) compileF64ReinterpretFromI64() error { 2198 if peek := c.locationStack.peek(); peek.onStack() { 2199 // If the value is on the stack, this is no-op as there is nothing to do for converting type. 2200 peek.valueType = runtimeValueTypeF64 2201 return nil 2202 } 2203 return c.compileSimpleConversion(arm64.FMOVD, registerTypeVector, runtimeValueTypeF64) 2204 } 2205 2206 func (c *arm64Compiler) compileSimpleConversion(inst asm.Instruction, destinationRegType registerType, resultRuntimeValueType runtimeValueType) error { 2207 source, err := c.popValueOnRegister() 2208 if err != nil { 2209 return err 2210 } 2211 2212 destinationReg, err := c.allocateRegister(destinationRegType) 2213 if err != nil { 2214 return err 2215 } 2216 2217 c.assembler.CompileRegisterToRegister(inst, source.register, destinationReg) 2218 c.pushRuntimeValueLocationOnRegister(destinationReg, resultRuntimeValueType) 2219 return nil 2220 } 2221 2222 // compileExtend implements compiler.compileExtend for the arm64 architecture. 2223 func (c *arm64Compiler) compileExtend(o *wazeroir.OperationExtend) error { 2224 if o.Signed { 2225 return c.compileSimpleUnop(arm64.SXTW, runtimeValueTypeI64) 2226 } else { 2227 return c.compileSimpleUnop(arm64.MOVW, runtimeValueTypeI64) 2228 } 2229 } 2230 2231 // compileSignExtend32From8 implements compiler.compileSignExtend32From8 for the arm64 architecture. 2232 func (c *arm64Compiler) compileSignExtend32From8() error { 2233 return c.compileSimpleUnop(arm64.SXTBW, runtimeValueTypeI32) 2234 } 2235 2236 // compileSignExtend32From16 implements compiler.compileSignExtend32From16 for the arm64 architecture. 2237 func (c *arm64Compiler) compileSignExtend32From16() error { 2238 return c.compileSimpleUnop(arm64.SXTHW, runtimeValueTypeI32) 2239 } 2240 2241 // compileSignExtend64From8 implements compiler.compileSignExtend64From8 for the arm64 architecture. 2242 func (c *arm64Compiler) compileSignExtend64From8() error { 2243 return c.compileSimpleUnop(arm64.SXTB, runtimeValueTypeI64) 2244 } 2245 2246 // compileSignExtend64From16 implements compiler.compileSignExtend64From16 for the arm64 architecture. 2247 func (c *arm64Compiler) compileSignExtend64From16() error { 2248 return c.compileSimpleUnop(arm64.SXTH, runtimeValueTypeI64) 2249 } 2250 2251 // compileSignExtend64From32 implements compiler.compileSignExtend64From32 for the arm64 architecture. 2252 func (c *arm64Compiler) compileSignExtend64From32() error { 2253 return c.compileSimpleUnop(arm64.SXTW, runtimeValueTypeI64) 2254 } 2255 2256 func (c *arm64Compiler) compileSimpleUnop(inst asm.Instruction, resultRuntimeValueType runtimeValueType) error { 2257 v, err := c.popValueOnRegister() 2258 if err != nil { 2259 return err 2260 } 2261 reg := v.register 2262 c.assembler.CompileRegisterToRegister(inst, reg, reg) 2263 c.pushRuntimeValueLocationOnRegister(reg, resultRuntimeValueType) 2264 return nil 2265 } 2266 2267 // compileEq implements compiler.compileEq for the arm64 architecture. 2268 func (c *arm64Compiler) compileEq(o *wazeroir.OperationEq) error { 2269 return c.emitEqOrNe(true, o.Type) 2270 } 2271 2272 // compileNe implements compiler.compileNe for the arm64 architecture. 2273 func (c *arm64Compiler) compileNe(o *wazeroir.OperationNe) error { 2274 return c.emitEqOrNe(false, o.Type) 2275 } 2276 2277 // emitEqOrNe implements compiler.compileEq and compiler.compileNe for the arm64 architecture. 2278 func (c *arm64Compiler) emitEqOrNe(isEq bool, unsignedType wazeroir.UnsignedType) error { 2279 x1, x2, err := c.popTwoValuesOnRegisters() 2280 if err != nil { 2281 return err 2282 } 2283 2284 var inst asm.Instruction 2285 switch unsignedType { 2286 case wazeroir.UnsignedTypeI32: 2287 inst = arm64.CMPW 2288 case wazeroir.UnsignedTypeI64: 2289 inst = arm64.CMP 2290 case wazeroir.UnsignedTypeF32: 2291 inst = arm64.FCMPS 2292 case wazeroir.UnsignedTypeF64: 2293 inst = arm64.FCMPD 2294 } 2295 2296 c.assembler.CompileTwoRegistersToNone(inst, x2.register, x1.register) 2297 2298 // Push the comparison result as a conditional register value. 2299 cond := arm64.CondNE 2300 if isEq { 2301 cond = arm64.CondEQ 2302 } 2303 c.locationStack.pushRuntimeValueLocationOnConditionalRegister(cond) 2304 return nil 2305 } 2306 2307 // compileEqz implements compiler.compileEqz for the arm64 architecture. 2308 func (c *arm64Compiler) compileEqz(o *wazeroir.OperationEqz) error { 2309 x1, err := c.popValueOnRegister() 2310 if err != nil { 2311 return err 2312 } 2313 2314 var inst asm.Instruction 2315 switch o.Type { 2316 case wazeroir.UnsignedInt32: 2317 inst = arm64.CMPW 2318 case wazeroir.UnsignedInt64: 2319 inst = arm64.CMP 2320 } 2321 2322 c.assembler.CompileTwoRegistersToNone(inst, arm64.RegRZR, x1.register) 2323 2324 // Push the comparison result as a conditional register value. 2325 c.locationStack.pushRuntimeValueLocationOnConditionalRegister(arm64.CondEQ) 2326 return nil 2327 } 2328 2329 // compileLt implements compiler.compileLt for the arm64 architecture. 2330 func (c *arm64Compiler) compileLt(o *wazeroir.OperationLt) error { 2331 x1, x2, err := c.popTwoValuesOnRegisters() 2332 if err != nil { 2333 return err 2334 } 2335 2336 var inst asm.Instruction 2337 var conditionalRegister asm.ConditionalRegisterState 2338 switch o.Type { 2339 case wazeroir.SignedTypeUint32: 2340 inst = arm64.CMPW 2341 conditionalRegister = arm64.CondLO 2342 case wazeroir.SignedTypeUint64: 2343 inst = arm64.CMP 2344 conditionalRegister = arm64.CondLO 2345 case wazeroir.SignedTypeInt32: 2346 inst = arm64.CMPW 2347 conditionalRegister = arm64.CondLT 2348 case wazeroir.SignedTypeInt64: 2349 inst = arm64.CMP 2350 conditionalRegister = arm64.CondLT 2351 case wazeroir.SignedTypeFloat32: 2352 inst = arm64.FCMPS 2353 conditionalRegister = arm64.CondMI 2354 case wazeroir.SignedTypeFloat64: 2355 inst = arm64.FCMPD 2356 conditionalRegister = arm64.CondMI 2357 } 2358 2359 c.assembler.CompileTwoRegistersToNone(inst, x2.register, x1.register) 2360 2361 // Push the comparison result as a conditional register value. 2362 c.locationStack.pushRuntimeValueLocationOnConditionalRegister(conditionalRegister) 2363 return nil 2364 } 2365 2366 // compileGt implements compiler.compileGt for the arm64 architecture. 2367 func (c *arm64Compiler) compileGt(o *wazeroir.OperationGt) error { 2368 x1, x2, err := c.popTwoValuesOnRegisters() 2369 if err != nil { 2370 return err 2371 } 2372 2373 var inst asm.Instruction 2374 var conditionalRegister asm.ConditionalRegisterState 2375 switch o.Type { 2376 case wazeroir.SignedTypeUint32: 2377 inst = arm64.CMPW 2378 conditionalRegister = arm64.CondHI 2379 case wazeroir.SignedTypeUint64: 2380 inst = arm64.CMP 2381 conditionalRegister = arm64.CondHI 2382 case wazeroir.SignedTypeInt32: 2383 inst = arm64.CMPW 2384 conditionalRegister = arm64.CondGT 2385 case wazeroir.SignedTypeInt64: 2386 inst = arm64.CMP 2387 conditionalRegister = arm64.CondGT 2388 case wazeroir.SignedTypeFloat32: 2389 inst = arm64.FCMPS 2390 conditionalRegister = arm64.CondGT 2391 case wazeroir.SignedTypeFloat64: 2392 inst = arm64.FCMPD 2393 conditionalRegister = arm64.CondGT 2394 } 2395 2396 c.assembler.CompileTwoRegistersToNone(inst, x2.register, x1.register) 2397 2398 // Push the comparison result as a conditional register value. 2399 c.locationStack.pushRuntimeValueLocationOnConditionalRegister(conditionalRegister) 2400 return nil 2401 } 2402 2403 // compileLe implements compiler.compileLe for the arm64 architecture. 2404 func (c *arm64Compiler) compileLe(o *wazeroir.OperationLe) error { 2405 x1, x2, err := c.popTwoValuesOnRegisters() 2406 if err != nil { 2407 return err 2408 } 2409 2410 var inst asm.Instruction 2411 var conditionalRegister asm.ConditionalRegisterState 2412 switch o.Type { 2413 case wazeroir.SignedTypeUint32: 2414 inst = arm64.CMPW 2415 conditionalRegister = arm64.CondLS 2416 case wazeroir.SignedTypeUint64: 2417 inst = arm64.CMP 2418 conditionalRegister = arm64.CondLS 2419 case wazeroir.SignedTypeInt32: 2420 inst = arm64.CMPW 2421 conditionalRegister = arm64.CondLE 2422 case wazeroir.SignedTypeInt64: 2423 inst = arm64.CMP 2424 conditionalRegister = arm64.CondLE 2425 case wazeroir.SignedTypeFloat32: 2426 inst = arm64.FCMPS 2427 conditionalRegister = arm64.CondLS 2428 case wazeroir.SignedTypeFloat64: 2429 inst = arm64.FCMPD 2430 conditionalRegister = arm64.CondLS 2431 } 2432 2433 c.assembler.CompileTwoRegistersToNone(inst, x2.register, x1.register) 2434 2435 // Push the comparison result as a conditional register value. 2436 c.locationStack.pushRuntimeValueLocationOnConditionalRegister(conditionalRegister) 2437 return nil 2438 } 2439 2440 // compileGe implements compiler.compileGe for the arm64 architecture. 2441 func (c *arm64Compiler) compileGe(o *wazeroir.OperationGe) error { 2442 x1, x2, err := c.popTwoValuesOnRegisters() 2443 if err != nil { 2444 return err 2445 } 2446 2447 var inst asm.Instruction 2448 var conditionalRegister asm.ConditionalRegisterState 2449 switch o.Type { 2450 case wazeroir.SignedTypeUint32: 2451 inst = arm64.CMPW 2452 conditionalRegister = arm64.CondHS 2453 case wazeroir.SignedTypeUint64: 2454 inst = arm64.CMP 2455 conditionalRegister = arm64.CondHS 2456 case wazeroir.SignedTypeInt32: 2457 inst = arm64.CMPW 2458 conditionalRegister = arm64.CondGE 2459 case wazeroir.SignedTypeInt64: 2460 inst = arm64.CMP 2461 conditionalRegister = arm64.CondGE 2462 case wazeroir.SignedTypeFloat32: 2463 inst = arm64.FCMPS 2464 conditionalRegister = arm64.CondGE 2465 case wazeroir.SignedTypeFloat64: 2466 inst = arm64.FCMPD 2467 conditionalRegister = arm64.CondGE 2468 } 2469 2470 c.assembler.CompileTwoRegistersToNone(inst, x2.register, x1.register) 2471 2472 // Push the comparison result as a conditional register value. 2473 c.locationStack.pushRuntimeValueLocationOnConditionalRegister(conditionalRegister) 2474 return nil 2475 } 2476 2477 // compileLoad implements compiler.compileLoad for the arm64 architecture. 2478 func (c *arm64Compiler) compileLoad(o *wazeroir.OperationLoad) error { 2479 var ( 2480 isFloat bool 2481 loadInst asm.Instruction 2482 targetSizeInBytes int64 2483 vt runtimeValueType 2484 ) 2485 2486 switch o.Type { 2487 case wazeroir.UnsignedTypeI32: 2488 loadInst = arm64.LDRW 2489 targetSizeInBytes = 32 / 8 2490 vt = runtimeValueTypeI32 2491 case wazeroir.UnsignedTypeI64: 2492 loadInst = arm64.LDRD 2493 targetSizeInBytes = 64 / 8 2494 vt = runtimeValueTypeI64 2495 case wazeroir.UnsignedTypeF32: 2496 loadInst = arm64.FLDRS 2497 isFloat = true 2498 targetSizeInBytes = 32 / 8 2499 vt = runtimeValueTypeF32 2500 case wazeroir.UnsignedTypeF64: 2501 loadInst = arm64.FLDRD 2502 isFloat = true 2503 targetSizeInBytes = 64 / 8 2504 vt = runtimeValueTypeF64 2505 } 2506 return c.compileLoadImpl(o.Arg.Offset, loadInst, targetSizeInBytes, isFloat, vt) 2507 } 2508 2509 // compileLoad8 implements compiler.compileLoad8 for the arm64 architecture. 2510 func (c *arm64Compiler) compileLoad8(o *wazeroir.OperationLoad8) error { 2511 var loadInst asm.Instruction 2512 var vt runtimeValueType 2513 switch o.Type { 2514 case wazeroir.SignedInt32: 2515 loadInst = arm64.LDRSBW 2516 vt = runtimeValueTypeI32 2517 case wazeroir.SignedInt64: 2518 loadInst = arm64.LDRSBD 2519 vt = runtimeValueTypeI64 2520 case wazeroir.SignedUint32: 2521 loadInst = arm64.LDRB 2522 vt = runtimeValueTypeI32 2523 case wazeroir.SignedUint64: 2524 loadInst = arm64.LDRB 2525 vt = runtimeValueTypeI64 2526 } 2527 return c.compileLoadImpl(o.Arg.Offset, loadInst, 1, false, vt) 2528 } 2529 2530 // compileLoad16 implements compiler.compileLoad16 for the arm64 architecture. 2531 func (c *arm64Compiler) compileLoad16(o *wazeroir.OperationLoad16) error { 2532 var loadInst asm.Instruction 2533 var vt runtimeValueType 2534 switch o.Type { 2535 case wazeroir.SignedInt32: 2536 loadInst = arm64.LDRSHW 2537 vt = runtimeValueTypeI32 2538 case wazeroir.SignedInt64: 2539 loadInst = arm64.LDRSHD 2540 vt = runtimeValueTypeI64 2541 case wazeroir.SignedUint32: 2542 loadInst = arm64.LDRH 2543 vt = runtimeValueTypeI32 2544 case wazeroir.SignedUint64: 2545 loadInst = arm64.LDRH 2546 vt = runtimeValueTypeI64 2547 } 2548 return c.compileLoadImpl(o.Arg.Offset, loadInst, 16/8, false, vt) 2549 } 2550 2551 // compileLoad32 implements compiler.compileLoad32 for the arm64 architecture. 2552 func (c *arm64Compiler) compileLoad32(o *wazeroir.OperationLoad32) error { 2553 var loadInst asm.Instruction 2554 if o.Signed { 2555 loadInst = arm64.LDRSW 2556 } else { 2557 loadInst = arm64.LDRW 2558 } 2559 return c.compileLoadImpl(o.Arg.Offset, loadInst, 32/8, false, runtimeValueTypeI64) 2560 } 2561 2562 // compileLoadImpl implements compileLoadImpl* variants for arm64 architecture. 2563 func (c *arm64Compiler) compileLoadImpl(offsetArg uint32, loadInst asm.Instruction, 2564 targetSizeInBytes int64, isFloat bool, resultRuntimeValueType runtimeValueType, 2565 ) error { 2566 offsetReg, err := c.compileMemoryAccessOffsetSetup(offsetArg, targetSizeInBytes) 2567 if err != nil { 2568 return err 2569 } 2570 2571 resultRegister := offsetReg 2572 if isFloat { 2573 resultRegister, err = c.allocateRegister(registerTypeVector) 2574 if err != nil { 2575 return err 2576 } 2577 } 2578 2579 // "resultRegister = [arm64ReservedRegisterForMemory + offsetReg]" 2580 // In other words, "resultRegister = memory.Buffer[offset: offset+targetSizeInBytes]" 2581 c.assembler.CompileMemoryWithRegisterOffsetToRegister( 2582 loadInst, 2583 arm64ReservedRegisterForMemory, offsetReg, 2584 resultRegister, 2585 ) 2586 2587 c.pushRuntimeValueLocationOnRegister(resultRegister, resultRuntimeValueType) 2588 return nil 2589 } 2590 2591 // compileStore implements compiler.compileStore for the arm64 architecture. 2592 func (c *arm64Compiler) compileStore(o *wazeroir.OperationStore) error { 2593 var movInst asm.Instruction 2594 var targetSizeInBytes int64 2595 switch o.Type { 2596 case wazeroir.UnsignedTypeI32: 2597 movInst = arm64.STRW 2598 targetSizeInBytes = 32 / 8 2599 case wazeroir.UnsignedTypeI64: 2600 movInst = arm64.STRD 2601 targetSizeInBytes = 64 / 8 2602 case wazeroir.UnsignedTypeF32: 2603 movInst = arm64.FSTRS 2604 targetSizeInBytes = 32 / 8 2605 case wazeroir.UnsignedTypeF64: 2606 movInst = arm64.FSTRD 2607 targetSizeInBytes = 64 / 8 2608 } 2609 return c.compileStoreImpl(o.Arg.Offset, movInst, targetSizeInBytes) 2610 } 2611 2612 // compileStore8 implements compiler.compileStore8 for the arm64 architecture. 2613 func (c *arm64Compiler) compileStore8(o *wazeroir.OperationStore8) error { 2614 return c.compileStoreImpl(o.Arg.Offset, arm64.STRB, 1) 2615 } 2616 2617 // compileStore16 implements compiler.compileStore16 for the arm64 architecture. 2618 func (c *arm64Compiler) compileStore16(o *wazeroir.OperationStore16) error { 2619 return c.compileStoreImpl(o.Arg.Offset, arm64.STRH, 16/8) 2620 } 2621 2622 // compileStore32 implements compiler.compileStore32 for the arm64 architecture. 2623 func (c *arm64Compiler) compileStore32(o *wazeroir.OperationStore32) error { 2624 return c.compileStoreImpl(o.Arg.Offset, arm64.STRW, 32/8) 2625 } 2626 2627 // compileStoreImpl implements compleStore* variants for arm64 architecture. 2628 func (c *arm64Compiler) compileStoreImpl(offsetArg uint32, storeInst asm.Instruction, targetSizeInBytes int64) error { 2629 val, err := c.popValueOnRegister() 2630 if err != nil { 2631 return err 2632 } 2633 // Mark temporarily used as compileMemoryAccessOffsetSetup might try allocating register. 2634 c.markRegisterUsed(val.register) 2635 2636 offsetReg, err := c.compileMemoryAccessOffsetSetup(offsetArg, targetSizeInBytes) 2637 if err != nil { 2638 return err 2639 } 2640 2641 // "[arm64ReservedRegisterForMemory + offsetReg] = val.register" 2642 // In other words, "memory.Buffer[offset: offset+targetSizeInBytes] = val.register" 2643 c.assembler.CompileRegisterToMemoryWithRegisterOffset( 2644 storeInst, val.register, 2645 arm64ReservedRegisterForMemory, offsetReg, 2646 ) 2647 2648 c.markRegisterUnused(val.register) 2649 return nil 2650 } 2651 2652 // compileMemoryAccessOffsetSetup pops the top value from the stack (called "base"), stores "base + offsetArg + targetSizeInBytes" 2653 // into a register, and returns the stored register. We call the result "offset" because we access the memory 2654 // as memory.Buffer[offset: offset+targetSizeInBytes]. 2655 // 2656 // Note: this also emits the instructions to check the out of bounds memory access. 2657 // In other words, if the offset+targetSizeInBytes exceeds the memory size, the code exits with nativeCallStatusCodeMemoryOutOfBounds status. 2658 func (c *arm64Compiler) compileMemoryAccessOffsetSetup(offsetArg uint32, targetSizeInBytes int64) (offsetRegister asm.Register, err error) { 2659 base, err := c.popValueOnRegister() 2660 if err != nil { 2661 return 0, err 2662 } 2663 2664 offsetRegister = base.register 2665 if isZeroRegister(base.register) { 2666 offsetRegister, err = c.allocateRegister(registerTypeGeneralPurpose) 2667 if err != nil { 2668 return 2669 } 2670 c.assembler.CompileRegisterToRegister(arm64.MOVD, arm64.RegRZR, offsetRegister) 2671 } 2672 2673 if offsetConst := int64(offsetArg) + targetSizeInBytes; offsetConst <= math.MaxUint32 { 2674 // "offsetRegister = base + offsetArg + targetSizeInBytes" 2675 c.assembler.CompileConstToRegister(arm64.ADD, offsetConst, offsetRegister) 2676 } else { 2677 // If the offset const is too large, we exit with nativeCallStatusCodeMemoryOutOfBounds. 2678 c.compileExitFromNativeCode(nativeCallStatusCodeMemoryOutOfBounds) 2679 return 2680 } 2681 2682 // "arm64ReservedRegisterForTemporary = len(memory.Buffer)" 2683 c.assembler.CompileMemoryToRegister(arm64.LDRD, 2684 arm64ReservedRegisterForCallEngine, callEngineModuleContextMemorySliceLenOffset, 2685 arm64ReservedRegisterForTemporary) 2686 2687 // Check if offsetRegister(= base+offsetArg+targetSizeInBytes) > len(memory.Buffer). 2688 c.assembler.CompileTwoRegistersToNone(arm64.CMP, arm64ReservedRegisterForTemporary, offsetRegister) 2689 boundsOK := c.assembler.CompileJump(arm64.BCONDLS) 2690 2691 // If offsetRegister(= base+offsetArg+targetSizeInBytes) exceeds the memory length, 2692 // we exit the function with nativeCallStatusCodeMemoryOutOfBounds. 2693 c.compileExitFromNativeCode(nativeCallStatusCodeMemoryOutOfBounds) 2694 2695 // Otherwise, we subtract targetSizeInBytes from offsetRegister. 2696 c.assembler.SetJumpTargetOnNext(boundsOK) 2697 c.assembler.CompileConstToRegister(arm64.SUB, targetSizeInBytes, offsetRegister) 2698 return offsetRegister, nil 2699 } 2700 2701 // compileMemoryGrow implements compileMemoryGrow variants for arm64 architecture. 2702 func (c *arm64Compiler) compileMemoryGrow() error { 2703 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 2704 return err 2705 } 2706 2707 if err := c.compileCallGoFunction(nativeCallStatusCodeCallBuiltInFunction, builtinFunctionIndexMemoryGrow); err != nil { 2708 return err 2709 } 2710 2711 // After return, we re-initialize reserved registers just like preamble of functions. 2712 c.compileReservedStackBasePointerRegisterInitialization() 2713 c.compileReservedMemoryRegisterInitialization() 2714 return nil 2715 } 2716 2717 // compileMemorySize implements compileMemorySize variants for arm64 architecture. 2718 func (c *arm64Compiler) compileMemorySize() error { 2719 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 2720 return err 2721 } 2722 2723 reg, err := c.allocateRegister(registerTypeGeneralPurpose) 2724 if err != nil { 2725 return err 2726 } 2727 2728 // "reg = len(memory.Buffer)" 2729 c.assembler.CompileMemoryToRegister( 2730 arm64.LDRD, 2731 arm64ReservedRegisterForCallEngine, callEngineModuleContextMemorySliceLenOffset, 2732 reg, 2733 ) 2734 2735 // memory.size loads the page size of memory, so we have to divide by the page size. 2736 // "reg = reg >> wasm.MemoryPageSizeInBits (== reg / wasm.MemoryPageSize) " 2737 c.assembler.CompileConstToRegister( 2738 arm64.LSR, 2739 wasm.MemoryPageSizeInBits, 2740 reg, 2741 ) 2742 2743 c.pushRuntimeValueLocationOnRegister(reg, runtimeValueTypeI32) 2744 return nil 2745 } 2746 2747 // compileCallGoFunction adds instructions to call a Go function whose address equals the addr parameter. 2748 // compilerStatus is set before making call, and it should be either nativeCallStatusCodeCallBuiltInFunction or 2749 // nativeCallStatusCodeCallGoHostFunction. 2750 func (c *arm64Compiler) compileCallGoFunction(compilerStatus nativeCallStatusCode, builtinFunction wasm.Index) error { 2751 // Release all the registers as our calling convention requires the caller-save. 2752 if err := c.compileReleaseAllRegistersToStack(); err != nil { 2753 return err 2754 } 2755 2756 if compilerStatus == nativeCallStatusCodeCallBuiltInFunction { 2757 // Set the target function address to ce.functionCallAddress 2758 // "tmp = $index" 2759 c.assembler.CompileConstToRegister( 2760 arm64.MOVD, 2761 int64(builtinFunction), 2762 arm64ReservedRegisterForTemporary, 2763 ) 2764 // "[arm64ReservedRegisterForCallEngine + callEngineExitContextFunctionCallAddressOffset] = tmp" 2765 // In other words, "ce.functionCallAddress = tmp (== $addr)" 2766 c.assembler.CompileRegisterToMemory( 2767 arm64.STRW, 2768 arm64ReservedRegisterForTemporary, 2769 arm64ReservedRegisterForCallEngine, callEngineExitContextBuiltinFunctionCallIndexOffset, 2770 ) 2771 } 2772 2773 // Read the return address, and write it to callEngine.exitContext.returnAddress. 2774 c.assembler.CompileReadInstructionAddress(arm64ReservedRegisterForTemporary, arm64.RET) 2775 c.assembler.CompileRegisterToMemory( 2776 arm64.STRD, arm64ReservedRegisterForTemporary, 2777 arm64ReservedRegisterForCallEngine, callEngineExitContextReturnAddressOffset, 2778 ) 2779 2780 c.compileExitFromNativeCode(compilerStatus) 2781 return nil 2782 } 2783 2784 // compileConstI32 implements compiler.compileConstI32 for the arm64 architecture. 2785 func (c *arm64Compiler) compileConstI32(o *wazeroir.OperationConstI32) error { 2786 return c.compileIntConstant(true, uint64(o.Value)) 2787 } 2788 2789 // compileConstI64 implements compiler.compileConstI64 for the arm64 architecture. 2790 func (c *arm64Compiler) compileConstI64(o *wazeroir.OperationConstI64) error { 2791 return c.compileIntConstant(false, o.Value) 2792 } 2793 2794 // compileIntConstant adds instructions to load an integer constant. 2795 // is32bit is true if the target value is originally 32-bit const, false otherwise. 2796 // value holds the (zero-extended for 32-bit case) load target constant. 2797 func (c *arm64Compiler) compileIntConstant(is32bit bool, value uint64) error { 2798 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 2799 return err 2800 } 2801 2802 var inst asm.Instruction 2803 var vt runtimeValueType 2804 if is32bit { 2805 inst = arm64.MOVW 2806 vt = runtimeValueTypeI32 2807 } else { 2808 inst = arm64.MOVD 2809 vt = runtimeValueTypeI64 2810 } 2811 2812 if value == 0 { 2813 c.pushRuntimeValueLocationOnRegister(arm64.RegRZR, vt) 2814 } else { 2815 // Take a register to load the value. 2816 reg, err := c.allocateRegister(registerTypeGeneralPurpose) 2817 if err != nil { 2818 return err 2819 } 2820 2821 c.assembler.CompileConstToRegister(inst, int64(value), reg) 2822 2823 c.pushRuntimeValueLocationOnRegister(reg, vt) 2824 } 2825 return nil 2826 } 2827 2828 // compileConstF32 implements compiler.compileConstF32 for the arm64 architecture. 2829 func (c *arm64Compiler) compileConstF32(o *wazeroir.OperationConstF32) error { 2830 return c.compileFloatConstant(true, uint64(math.Float32bits(o.Value))) 2831 } 2832 2833 // compileConstF64 implements compiler.compileConstF64 for the arm64 architecture. 2834 func (c *arm64Compiler) compileConstF64(o *wazeroir.OperationConstF64) error { 2835 return c.compileFloatConstant(false, math.Float64bits(o.Value)) 2836 } 2837 2838 // compileFloatConstant adds instructions to load a float constant. 2839 // is32bit is true if the target value is originally 32-bit const, false otherwise. 2840 // value holds the (zero-extended for 32-bit case) bit representation of load target float constant. 2841 func (c *arm64Compiler) compileFloatConstant(is32bit bool, value uint64) error { 2842 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 2843 return err 2844 } 2845 2846 // Take a register to load the value. 2847 reg, err := c.allocateRegister(registerTypeVector) 2848 if err != nil { 2849 return err 2850 } 2851 2852 tmpReg := arm64.RegRZR 2853 if value != 0 { 2854 tmpReg = arm64ReservedRegisterForTemporary 2855 var inst asm.Instruction 2856 if is32bit { 2857 inst = arm64.MOVW 2858 } else { 2859 inst = arm64.MOVD 2860 } 2861 c.assembler.CompileConstToRegister(inst, int64(value), tmpReg) 2862 } 2863 2864 // Use FMOV instruction to move the value on integer register into the float one. 2865 var inst asm.Instruction 2866 var vt runtimeValueType 2867 if is32bit { 2868 vt = runtimeValueTypeF32 2869 inst = arm64.FMOVS 2870 } else { 2871 vt = runtimeValueTypeF64 2872 inst = arm64.FMOVD 2873 } 2874 c.assembler.CompileRegisterToRegister(inst, tmpReg, reg) 2875 2876 c.pushRuntimeValueLocationOnRegister(reg, vt) 2877 return nil 2878 } 2879 2880 // compileMemoryInit implements compiler.compileMemoryInit for the arm64 architecture. 2881 func (c *arm64Compiler) compileMemoryInit(o *wazeroir.OperationMemoryInit) error { 2882 return c.compileInitImpl(false, o.DataIndex, 0) 2883 } 2884 2885 // compileInitImpl implements compileTableInit and compileMemoryInit. 2886 // 2887 // TODO: the compiled code in this function should be reused and compile at once as 2888 // the code is independent of any module. 2889 func (c *arm64Compiler) compileInitImpl(isTable bool, index, tableIndex uint32) error { 2890 outOfBoundsErrorStatus := nativeCallStatusCodeMemoryOutOfBounds 2891 if isTable { 2892 outOfBoundsErrorStatus = nativeCallStatusCodeInvalidTableAccess 2893 } 2894 2895 copySize, err := c.popValueOnRegister() 2896 if err != nil { 2897 return err 2898 } 2899 c.markRegisterUsed(copySize.register) 2900 2901 sourceOffset, err := c.popValueOnRegister() 2902 if err != nil { 2903 return err 2904 } 2905 if isZeroRegister(sourceOffset.register) { 2906 sourceOffset.register, err = c.allocateRegister(registerTypeGeneralPurpose) 2907 if err != nil { 2908 return err 2909 } 2910 c.assembler.CompileRegisterToRegister(arm64.MOVD, arm64.RegRZR, sourceOffset.register) 2911 } 2912 c.markRegisterUsed(sourceOffset.register) 2913 2914 destinationOffset, err := c.popValueOnRegister() 2915 if err != nil { 2916 return err 2917 } 2918 if isZeroRegister(destinationOffset.register) { 2919 destinationOffset.register, err = c.allocateRegister(registerTypeGeneralPurpose) 2920 if err != nil { 2921 return err 2922 } 2923 c.assembler.CompileRegisterToRegister(arm64.MOVD, arm64.RegRZR, destinationOffset.register) 2924 } 2925 c.markRegisterUsed(destinationOffset.register) 2926 2927 tableInstanceAddressReg := asm.NilRegister 2928 if isTable { 2929 tableInstanceAddressReg, err = c.allocateRegister(registerTypeGeneralPurpose) 2930 if err != nil { 2931 return err 2932 } 2933 c.markRegisterUsed(tableInstanceAddressReg) 2934 } 2935 2936 if !isZeroRegister(copySize.register) { 2937 // sourceOffset += size. 2938 c.assembler.CompileRegisterToRegister(arm64.ADD, copySize.register, sourceOffset.register) 2939 // destinationOffset += size. 2940 c.assembler.CompileRegisterToRegister(arm64.ADD, copySize.register, destinationOffset.register) 2941 } 2942 2943 instanceAddr, err := c.allocateRegister(registerTypeGeneralPurpose) 2944 if err != nil { 2945 return err 2946 } 2947 2948 if isTable { 2949 c.compileLoadElemInstanceAddress(index, instanceAddr) 2950 } else { 2951 c.compileLoadDataInstanceAddress(index, instanceAddr) 2952 } 2953 2954 // Check data instance bounds. 2955 c.assembler.CompileMemoryToRegister(arm64.LDRD, 2956 instanceAddr, 8, // DataInstance and Element instance holds the length is stored at offset 8. 2957 arm64ReservedRegisterForTemporary) 2958 2959 c.assembler.CompileTwoRegistersToNone(arm64.CMP, arm64ReservedRegisterForTemporary, sourceOffset.register) 2960 sourceBoundsOK := c.assembler.CompileJump(arm64.BCONDLS) 2961 2962 // If not, raise out of bounds memory access error. 2963 c.compileExitFromNativeCode(outOfBoundsErrorStatus) 2964 2965 c.assembler.SetJumpTargetOnNext(sourceBoundsOK) 2966 2967 // Check destination bounds. 2968 if isTable { 2969 // arm64ReservedRegisterForTemporary = &tables[0] 2970 c.assembler.CompileMemoryToRegister(arm64.LDRD, 2971 arm64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset, 2972 arm64ReservedRegisterForTemporary) 2973 // tableInstanceAddressReg = arm64ReservedRegisterForTemporary + tableIndex*8 2974 // = &tables[0] + sizeOf(*tableInstance)*8 2975 // = &tables[tableIndex] 2976 c.assembler.CompileMemoryToRegister(arm64.LDRD, 2977 arm64ReservedRegisterForTemporary, int64(tableIndex)*8, 2978 tableInstanceAddressReg) 2979 // arm64ReservedRegisterForTemporary = [tableInstanceAddressReg+tableInstanceTableLenOffset] = len(tables[tableIndex]) 2980 c.assembler.CompileMemoryToRegister(arm64.LDRD, 2981 tableInstanceAddressReg, tableInstanceTableLenOffset, 2982 arm64ReservedRegisterForTemporary) 2983 } else { 2984 c.assembler.CompileMemoryToRegister(arm64.LDRD, 2985 arm64ReservedRegisterForCallEngine, callEngineModuleContextMemorySliceLenOffset, 2986 arm64ReservedRegisterForTemporary) 2987 } 2988 2989 c.assembler.CompileTwoRegistersToNone(arm64.CMP, arm64ReservedRegisterForTemporary, destinationOffset.register) 2990 destinationBoundsOK := c.assembler.CompileJump(arm64.BCONDLS) 2991 2992 // If not, raise out of bounds memory access error. 2993 c.compileExitFromNativeCode(outOfBoundsErrorStatus) 2994 2995 // Otherwise, ready to copy the value from source to destination. 2996 c.assembler.SetJumpTargetOnNext(destinationBoundsOK) 2997 2998 if !isZeroRegister(copySize.register) { 2999 // If the size equals zero, we can skip the entire instructions beflow. 3000 c.assembler.CompileTwoRegistersToNone(arm64.CMP, arm64.RegRZR, copySize.register) 3001 skipCopyJump := c.assembler.CompileJump(arm64.BCONDEQ) 3002 3003 var ldr, str asm.Instruction 3004 var movSize int64 3005 if isTable { 3006 ldr, str = arm64.LDRD, arm64.STRD 3007 movSize = 8 3008 3009 // arm64ReservedRegisterForTemporary = &Table[0] 3010 c.assembler.CompileMemoryToRegister(arm64.LDRD, tableInstanceAddressReg, 3011 tableInstanceTableOffset, arm64ReservedRegisterForTemporary) 3012 // destinationOffset = (destinationOffset<< pointerSizeLog2) + arm64ReservedRegisterForTemporary 3013 c.assembler.CompileLeftShiftedRegisterToRegister(arm64.ADD, 3014 destinationOffset.register, pointerSizeLog2, 3015 arm64ReservedRegisterForTemporary, destinationOffset.register) 3016 3017 // arm64ReservedRegisterForTemporary = &ElementInstance.References[0] 3018 c.assembler.CompileMemoryToRegister(arm64.LDRD, instanceAddr, 0, arm64ReservedRegisterForTemporary) 3019 // sourceOffset = (sourceOffset<< pointerSizeLog2) + arm64ReservedRegisterForTemporary 3020 c.assembler.CompileLeftShiftedRegisterToRegister(arm64.ADD, 3021 sourceOffset.register, pointerSizeLog2, 3022 arm64ReservedRegisterForTemporary, sourceOffset.register) 3023 3024 // copySize = copySize << pointerSizeLog2 3025 c.assembler.CompileConstToRegister(arm64.LSL, pointerSizeLog2, copySize.register) 3026 } else { 3027 ldr, str = arm64.LDRB, arm64.STRB 3028 movSize = 1 3029 3030 // destinationOffset += memory buffer's absolute address. 3031 c.assembler.CompileRegisterToRegister(arm64.ADD, arm64ReservedRegisterForMemory, destinationOffset.register) 3032 3033 // sourceOffset += data buffer's absolute address. 3034 c.assembler.CompileMemoryToRegister(arm64.LDRD, instanceAddr, 0, arm64ReservedRegisterForTemporary) 3035 c.assembler.CompileRegisterToRegister(arm64.ADD, arm64ReservedRegisterForTemporary, sourceOffset.register) 3036 3037 } 3038 3039 // Negate the counter. 3040 c.assembler.CompileRegisterToRegister(arm64.NEG, copySize.register, copySize.register) 3041 3042 beginCopyLoop := c.assembler.CompileStandAlone(arm64.NOP) 3043 3044 // arm64ReservedRegisterForTemporary = [sourceOffset + (size.register)] 3045 c.assembler.CompileMemoryWithRegisterOffsetToRegister(ldr, 3046 sourceOffset.register, copySize.register, 3047 arm64ReservedRegisterForTemporary) 3048 // [destinationOffset + (size.register)] = arm64ReservedRegisterForTemporary. 3049 c.assembler.CompileRegisterToMemoryWithRegisterOffset(str, 3050 arm64ReservedRegisterForTemporary, 3051 destinationOffset.register, copySize.register, 3052 ) 3053 3054 // Decrement the size counter and if the value is still negative, continue the loop. 3055 c.assembler.CompileConstToRegister(arm64.ADDS, movSize, copySize.register) 3056 c.assembler.CompileJump(arm64.BCONDMI).AssignJumpTarget(beginCopyLoop) 3057 3058 c.assembler.SetJumpTargetOnNext(skipCopyJump) 3059 } 3060 3061 c.markRegisterUnused(copySize.register, sourceOffset.register, 3062 destinationOffset.register, instanceAddr, tableInstanceAddressReg) 3063 return nil 3064 } 3065 3066 // compileDataDrop implements compiler.compileDataDrop for the arm64 architecture. 3067 func (c *arm64Compiler) compileDataDrop(o *wazeroir.OperationDataDrop) error { 3068 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 3069 return err 3070 } 3071 3072 tmp, err := c.allocateRegister(registerTypeGeneralPurpose) 3073 if err != nil { 3074 return err 3075 } 3076 3077 c.compileLoadDataInstanceAddress(o.DataIndex, tmp) 3078 3079 // Clears the content of DataInstance[o.DataIndex] (== []byte type). 3080 c.assembler.CompileRegisterToMemory(arm64.STRD, arm64.RegRZR, tmp, 0) 3081 c.assembler.CompileRegisterToMemory(arm64.STRD, arm64.RegRZR, tmp, 8) 3082 c.assembler.CompileRegisterToMemory(arm64.STRD, arm64.RegRZR, tmp, 16) 3083 return nil 3084 } 3085 3086 func (c *arm64Compiler) compileLoadDataInstanceAddress(dataIndex uint32, dst asm.Register) { 3087 // dst = dataIndex * dataInstanceStructSize 3088 c.assembler.CompileConstToRegister(arm64.MOVD, int64(dataIndex)*dataInstanceStructSize, dst) 3089 3090 // arm64ReservedRegisterForTemporary = &moduleInstance.DataInstances[0] 3091 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3092 arm64ReservedRegisterForCallEngine, callEngineModuleContextDataInstancesElement0AddressOffset, 3093 arm64ReservedRegisterForTemporary, 3094 ) 3095 3096 // dst = arm64ReservedRegisterForTemporary + dst 3097 // = &moduleInstance.DataInstances[0] + dataIndex*dataInstanceStructSize 3098 // = &moduleInstance.DataInstances[dataIndex] 3099 c.assembler.CompileRegisterToRegister(arm64.ADD, arm64ReservedRegisterForTemporary, dst) 3100 } 3101 3102 // compileMemoryCopy implements compiler.compileMemoryCopy for the arm64 architecture. 3103 func (c *arm64Compiler) compileMemoryCopy() error { 3104 return c.compileCopyImpl(false, 0, 0) 3105 } 3106 3107 // compileCopyImpl implements compileTableCopy and compileMemoryCopy. 3108 // 3109 // TODO: the compiled code in this function should be reused and compile at once as 3110 // the code is independent of any module. 3111 func (c *arm64Compiler) compileCopyImpl(isTable bool, srcTableIndex, dstTableIndex uint32) error { 3112 outOfBoundsErrorStatus := nativeCallStatusCodeMemoryOutOfBounds 3113 if isTable { 3114 outOfBoundsErrorStatus = nativeCallStatusCodeInvalidTableAccess 3115 } 3116 3117 copySize, err := c.popValueOnRegister() 3118 if err != nil { 3119 return err 3120 } 3121 c.markRegisterUsed(copySize.register) 3122 3123 sourceOffset, err := c.popValueOnRegister() 3124 if err != nil { 3125 return err 3126 } 3127 if isZeroRegister(sourceOffset.register) { 3128 sourceOffset.register, err = c.allocateRegister(registerTypeGeneralPurpose) 3129 if err != nil { 3130 return err 3131 } 3132 c.assembler.CompileRegisterToRegister(arm64.MOVD, arm64.RegRZR, sourceOffset.register) 3133 } 3134 c.markRegisterUsed(sourceOffset.register) 3135 3136 destinationOffset, err := c.popValueOnRegister() 3137 if err != nil { 3138 return err 3139 } 3140 if isZeroRegister(destinationOffset.register) { 3141 destinationOffset.register, err = c.allocateRegister(registerTypeGeneralPurpose) 3142 if err != nil { 3143 return err 3144 } 3145 c.assembler.CompileRegisterToRegister(arm64.MOVD, arm64.RegRZR, destinationOffset.register) 3146 } 3147 c.markRegisterUsed(destinationOffset.register) 3148 3149 if !isZeroRegister(copySize.register) { 3150 // sourceOffset += size. 3151 c.assembler.CompileRegisterToRegister(arm64.ADD, copySize.register, sourceOffset.register) 3152 // destinationOffset += size. 3153 c.assembler.CompileRegisterToRegister(arm64.ADD, copySize.register, destinationOffset.register) 3154 } 3155 3156 if isTable { 3157 // arm64ReservedRegisterForTemporary = &tables[0] 3158 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3159 arm64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset, 3160 arm64ReservedRegisterForTemporary) 3161 // arm64ReservedRegisterForTemporary = arm64ReservedRegisterForTemporary + srcTableIndex*8 3162 // = &tables[0] + sizeOf(*tableInstance)*8 3163 // = &tables[srcTableIndex] 3164 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3165 arm64ReservedRegisterForTemporary, int64(srcTableIndex)*8, 3166 arm64ReservedRegisterForTemporary) 3167 // arm64ReservedRegisterForTemporary = [arm64ReservedRegisterForTemporary+tableInstanceTableLenOffset] = len(tables[srcTableIndex]) 3168 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3169 arm64ReservedRegisterForTemporary, tableInstanceTableLenOffset, 3170 arm64ReservedRegisterForTemporary) 3171 } else { 3172 // arm64ReservedRegisterForTemporary = len(memoryInst.Buffer). 3173 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3174 arm64ReservedRegisterForCallEngine, callEngineModuleContextMemorySliceLenOffset, 3175 arm64ReservedRegisterForTemporary) 3176 } 3177 3178 // Check memory len >= sourceOffset. 3179 c.assembler.CompileTwoRegistersToNone(arm64.CMP, arm64ReservedRegisterForTemporary, sourceOffset.register) 3180 sourceBoundsOK := c.assembler.CompileJump(arm64.BCONDLS) 3181 3182 // If not, raise out of bounds memory access error. 3183 c.compileExitFromNativeCode(outOfBoundsErrorStatus) 3184 3185 c.assembler.SetJumpTargetOnNext(sourceBoundsOK) 3186 3187 // Otherwise, check memory len >= destinationOffset. 3188 if isTable { 3189 // arm64ReservedRegisterForTemporary = &tables[0] 3190 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3191 arm64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset, 3192 arm64ReservedRegisterForTemporary) 3193 // arm64ReservedRegisterForTemporary = arm64ReservedRegisterForTemporary + dstTableIndex*8 3194 // = &tables[0] + sizeOf(*tableInstance)*8 3195 // = &tables[dstTableIndex] 3196 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3197 arm64ReservedRegisterForTemporary, int64(dstTableIndex)*8, 3198 arm64ReservedRegisterForTemporary) 3199 // arm64ReservedRegisterForTemporary = [arm64ReservedRegisterForTemporary+tableInstanceTableLenOffset] = len(tables[dstTableIndex]) 3200 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3201 arm64ReservedRegisterForTemporary, tableInstanceTableLenOffset, 3202 arm64ReservedRegisterForTemporary) 3203 } 3204 3205 c.assembler.CompileTwoRegistersToNone(arm64.CMP, arm64ReservedRegisterForTemporary, destinationOffset.register) 3206 destinationBoundsOK := c.assembler.CompileJump(arm64.BCONDLS) 3207 3208 // If not, raise out of bounds memory access error. 3209 c.compileExitFromNativeCode(outOfBoundsErrorStatus) 3210 3211 // Otherwise, ready to copy the value from source to destination. 3212 c.assembler.SetJumpTargetOnNext(destinationBoundsOK) 3213 3214 var ldr, str asm.Instruction 3215 var movSize int64 3216 if isTable { 3217 ldr, str = arm64.LDRD, arm64.STRD 3218 movSize = 8 3219 } else { 3220 ldr, str = arm64.LDRB, arm64.STRB 3221 movSize = 1 3222 } 3223 3224 // If the size equals zero, we can skip the entire instructions beflow. 3225 if !isZeroRegister(copySize.register) { 3226 c.assembler.CompileTwoRegistersToNone(arm64.CMP, arm64.RegRZR, copySize.register) 3227 skipCopyJump := c.assembler.CompileJump(arm64.BCONDEQ) 3228 3229 // If source offet < destination offset: for (i = size-1; i >= 0; i--) dst[i] = src[i]; 3230 c.assembler.CompileTwoRegistersToNone(arm64.CMP, sourceOffset.register, destinationOffset.register) 3231 destLowerThanSourceJump := c.assembler.CompileJump(arm64.BCONDLS) 3232 var endJump asm.Node 3233 { 3234 // sourceOffset -= size. 3235 c.assembler.CompileRegisterToRegister(arm64.SUB, copySize.register, sourceOffset.register) 3236 // destinationOffset -= size. 3237 c.assembler.CompileRegisterToRegister(arm64.SUB, copySize.register, destinationOffset.register) 3238 3239 if isTable { 3240 // arm64ReservedRegisterForTemporary = &Tables[dstTableIndex].Table[0] 3241 c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForCallEngine, 3242 callEngineModuleContextTablesElement0AddressOffset, arm64ReservedRegisterForTemporary) 3243 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3244 arm64ReservedRegisterForTemporary, int64(dstTableIndex)*8, 3245 arm64ReservedRegisterForTemporary) 3246 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3247 arm64ReservedRegisterForTemporary, tableInstanceTableOffset, 3248 arm64ReservedRegisterForTemporary) 3249 // destinationOffset = (destinationOffset<< pointerSizeLog2) + &Table[dstTableIndex].Table[0] 3250 c.assembler.CompileLeftShiftedRegisterToRegister(arm64.ADD, 3251 destinationOffset.register, pointerSizeLog2, 3252 arm64ReservedRegisterForTemporary, destinationOffset.register) 3253 3254 // arm64ReservedRegisterForTemporary = &Tables[srcTableIndex] 3255 c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForCallEngine, 3256 callEngineModuleContextTablesElement0AddressOffset, arm64ReservedRegisterForTemporary) 3257 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3258 arm64ReservedRegisterForTemporary, int64(srcTableIndex)*8, 3259 arm64ReservedRegisterForTemporary) 3260 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3261 arm64ReservedRegisterForTemporary, tableInstanceTableOffset, 3262 arm64ReservedRegisterForTemporary) 3263 // sourceOffset = (sourceOffset<< 3) + &Table[0] 3264 c.assembler.CompileLeftShiftedRegisterToRegister(arm64.ADD, 3265 sourceOffset.register, pointerSizeLog2, 3266 arm64ReservedRegisterForTemporary, sourceOffset.register) 3267 3268 // copySize = copySize << pointerSizeLog2 as each element has 8 bytes and we copy one by one. 3269 c.assembler.CompileConstToRegister(arm64.LSL, pointerSizeLog2, copySize.register) 3270 } else { 3271 // sourceOffset += memory buffer's absolute address. 3272 c.assembler.CompileRegisterToRegister(arm64.ADD, arm64ReservedRegisterForMemory, sourceOffset.register) 3273 // destinationOffset += memory buffer's absolute address. 3274 c.assembler.CompileRegisterToRegister(arm64.ADD, arm64ReservedRegisterForMemory, destinationOffset.register) 3275 } 3276 3277 beginCopyLoop := c.assembler.CompileStandAlone(arm64.NOP) 3278 3279 // size -= 1 3280 c.assembler.CompileConstToRegister(arm64.SUBS, movSize, copySize.register) 3281 3282 // arm64ReservedRegisterForTemporary = [sourceOffset + (size.register)] 3283 c.assembler.CompileMemoryWithRegisterOffsetToRegister(ldr, 3284 sourceOffset.register, copySize.register, 3285 arm64ReservedRegisterForTemporary) 3286 // [destinationOffset + (size.register)] = arm64ReservedRegisterForTemporary. 3287 c.assembler.CompileRegisterToMemoryWithRegisterOffset(str, 3288 arm64ReservedRegisterForTemporary, 3289 destinationOffset.register, copySize.register, 3290 ) 3291 3292 // If the value on the copySize.register is not equal zero, continue the loop. 3293 c.assembler.CompileJump(arm64.BCONDNE).AssignJumpTarget(beginCopyLoop) 3294 3295 // Otherwise, exit the loop. 3296 endJump = c.assembler.CompileJump(arm64.B) 3297 } 3298 3299 // Else (destination offet < source offset): for (i = 0; i < size; i++) dst[counter-1-i] = src[counter-1-i]; 3300 c.assembler.SetJumpTargetOnNext(destLowerThanSourceJump) 3301 { 3302 3303 if isTable { 3304 // arm64ReservedRegisterForTemporary = &Tables[dstTableIndex].Table[0] 3305 c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForCallEngine, 3306 callEngineModuleContextTablesElement0AddressOffset, arm64ReservedRegisterForTemporary) 3307 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3308 arm64ReservedRegisterForTemporary, int64(dstTableIndex)*8, 3309 arm64ReservedRegisterForTemporary) 3310 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3311 arm64ReservedRegisterForTemporary, tableInstanceTableOffset, 3312 arm64ReservedRegisterForTemporary) 3313 // destinationOffset = (destinationOffset<< interfaceDataySizeLog2) + &Table[dstTableIndex].Table[0] 3314 c.assembler.CompileLeftShiftedRegisterToRegister(arm64.ADD, 3315 destinationOffset.register, pointerSizeLog2, 3316 arm64ReservedRegisterForTemporary, destinationOffset.register) 3317 3318 // arm64ReservedRegisterForTemporary = &Tables[srcTableIndex] 3319 c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForCallEngine, 3320 callEngineModuleContextTablesElement0AddressOffset, arm64ReservedRegisterForTemporary) 3321 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3322 arm64ReservedRegisterForTemporary, int64(srcTableIndex)*8, 3323 arm64ReservedRegisterForTemporary) 3324 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3325 arm64ReservedRegisterForTemporary, tableInstanceTableOffset, 3326 arm64ReservedRegisterForTemporary) 3327 // sourceOffset = (sourceOffset<< 3) + &Table[0] 3328 c.assembler.CompileLeftShiftedRegisterToRegister(arm64.ADD, 3329 sourceOffset.register, pointerSizeLog2, 3330 arm64ReservedRegisterForTemporary, sourceOffset.register) 3331 3332 // copySize = copySize << pointerSizeLog2 as each element has 8 bytes and we copy one by one. 3333 c.assembler.CompileConstToRegister(arm64.LSL, pointerSizeLog2, copySize.register) 3334 } else { 3335 // sourceOffset += memory buffer's absolute address. 3336 c.assembler.CompileRegisterToRegister(arm64.ADD, arm64ReservedRegisterForMemory, sourceOffset.register) 3337 // destinationOffset += memory buffer's absolute address. 3338 c.assembler.CompileRegisterToRegister(arm64.ADD, arm64ReservedRegisterForMemory, destinationOffset.register) 3339 } 3340 3341 // Negate the counter. 3342 c.assembler.CompileRegisterToRegister(arm64.NEG, copySize.register, copySize.register) 3343 3344 beginCopyLoop := c.assembler.CompileStandAlone(arm64.NOP) 3345 3346 // arm64ReservedRegisterForTemporary = [sourceOffset + (size.register)] 3347 c.assembler.CompileMemoryWithRegisterOffsetToRegister(ldr, 3348 sourceOffset.register, copySize.register, 3349 arm64ReservedRegisterForTemporary) 3350 // [destinationOffset + (size.register)] = arm64ReservedRegisterForTemporary. 3351 c.assembler.CompileRegisterToMemoryWithRegisterOffset(str, 3352 arm64ReservedRegisterForTemporary, 3353 destinationOffset.register, copySize.register, 3354 ) 3355 3356 // size += 1 3357 c.assembler.CompileConstToRegister(arm64.ADDS, movSize, copySize.register) 3358 c.assembler.CompileJump(arm64.BCONDMI).AssignJumpTarget(beginCopyLoop) 3359 } 3360 c.assembler.SetJumpTargetOnNext(skipCopyJump, endJump) 3361 } 3362 3363 // Mark all of the operand registers. 3364 c.markRegisterUnused(copySize.register, sourceOffset.register, destinationOffset.register) 3365 3366 return nil 3367 } 3368 3369 // compileMemoryFill implements compiler.compileMemoryCopy for the arm64 architecture. 3370 func (c *arm64Compiler) compileMemoryFill() error { 3371 return c.compileFillImpl(false, 0) 3372 } 3373 3374 // compileFillImpl implements TableFill and MemoryFill. 3375 // 3376 // TODO: the compiled code in this function should be reused and compile at once as 3377 // the code is independent of any module. 3378 func (c *arm64Compiler) compileFillImpl(isTable bool, tableIndex uint32) error { 3379 fillSize, err := c.popValueOnRegister() 3380 if err != nil { 3381 return err 3382 } 3383 c.markRegisterUsed(fillSize.register) 3384 3385 value, err := c.popValueOnRegister() 3386 if err != nil { 3387 return err 3388 } 3389 c.markRegisterUsed(value.register) 3390 3391 destinationOffset, err := c.popValueOnRegister() 3392 if err != nil { 3393 return err 3394 } 3395 if isZeroRegister(destinationOffset.register) { 3396 destinationOffset.register, err = c.allocateRegister(registerTypeGeneralPurpose) 3397 if err != nil { 3398 return err 3399 } 3400 c.assembler.CompileRegisterToRegister(arm64.MOVD, arm64.RegRZR, destinationOffset.register) 3401 } 3402 c.markRegisterUsed(destinationOffset.register) 3403 3404 // destinationOffset += size. 3405 c.assembler.CompileRegisterToRegister(arm64.ADD, fillSize.register, destinationOffset.register) 3406 3407 if isTable { 3408 // arm64ReservedRegisterForTemporary = &tables[0] 3409 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3410 arm64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset, 3411 arm64ReservedRegisterForTemporary) 3412 // arm64ReservedRegisterForTemporary = arm64ReservedRegisterForTemporary + srcTableIndex*8 3413 // = &tables[0] + sizeOf(*tableInstance)*8 3414 // = &tables[srcTableIndex] 3415 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3416 arm64ReservedRegisterForTemporary, int64(tableIndex)*8, 3417 arm64ReservedRegisterForTemporary) 3418 // arm64ReservedRegisterForTemporary = [arm64ReservedRegisterForTemporary+tableInstanceTableLenOffset] = len(tables[srcTableIndex]) 3419 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3420 arm64ReservedRegisterForTemporary, tableInstanceTableLenOffset, 3421 arm64ReservedRegisterForTemporary) 3422 } else { 3423 // arm64ReservedRegisterForTemporary = len(memoryInst.Buffer). 3424 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3425 arm64ReservedRegisterForCallEngine, callEngineModuleContextMemorySliceLenOffset, 3426 arm64ReservedRegisterForTemporary) 3427 } 3428 3429 // Check len >= destinationOffset. 3430 c.assembler.CompileTwoRegistersToNone(arm64.CMP, arm64ReservedRegisterForTemporary, destinationOffset.register) 3431 destinationBoundsOK := c.assembler.CompileJump(arm64.BCONDLS) 3432 3433 // If not, raise the runtime error. 3434 if isTable { 3435 c.compileExitFromNativeCode(nativeCallStatusCodeInvalidTableAccess) 3436 } else { 3437 c.compileExitFromNativeCode(nativeCallStatusCodeMemoryOutOfBounds) 3438 } 3439 3440 // Otherwise, ready to copy the value from destination to source. 3441 c.assembler.SetJumpTargetOnNext(destinationBoundsOK) 3442 3443 // If the size equals zero, we can skip the entire instructions beflow. 3444 c.assembler.CompileTwoRegistersToNone(arm64.CMP, arm64.RegRZR, fillSize.register) 3445 skipCopyJump := c.assembler.CompileJump(arm64.BCONDEQ) 3446 3447 // destinationOffset -= size. 3448 c.assembler.CompileRegisterToRegister(arm64.SUB, fillSize.register, destinationOffset.register) 3449 3450 var str asm.Instruction 3451 var movSize int64 3452 if isTable { 3453 str = arm64.STRD 3454 movSize = 8 3455 3456 // arm64ReservedRegisterForTemporary = &Tables[dstTableIndex].Table[0] 3457 c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForCallEngine, 3458 callEngineModuleContextTablesElement0AddressOffset, arm64ReservedRegisterForTemporary) 3459 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3460 arm64ReservedRegisterForTemporary, int64(tableIndex)*8, 3461 arm64ReservedRegisterForTemporary) 3462 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3463 arm64ReservedRegisterForTemporary, tableInstanceTableOffset, 3464 arm64ReservedRegisterForTemporary) 3465 // destinationOffset = (destinationOffset<< pointerSizeLog2) + &Table[dstTableIndex].Table[0] 3466 c.assembler.CompileLeftShiftedRegisterToRegister(arm64.ADD, 3467 destinationOffset.register, pointerSizeLog2, 3468 arm64ReservedRegisterForTemporary, destinationOffset.register) 3469 3470 // copySize = copySize << pointerSizeLog2 as each element has 8 bytes and we copy one by one. 3471 c.assembler.CompileConstToRegister(arm64.LSL, pointerSizeLog2, fillSize.register) 3472 } else { 3473 str = arm64.STRB 3474 movSize = 1 3475 3476 // destinationOffset += memory buffer's absolute address. 3477 c.assembler.CompileRegisterToRegister(arm64.ADD, arm64ReservedRegisterForMemory, destinationOffset.register) 3478 } 3479 3480 // Naively implement the copy with "for loop" by copying byte one by one. 3481 beginCopyLoop := c.assembler.CompileStandAlone(arm64.NOP) 3482 3483 // size -= 1 3484 c.assembler.CompileConstToRegister(arm64.SUBS, movSize, fillSize.register) 3485 3486 // [destinationOffset + (size.register)] = arm64ReservedRegisterForTemporary. 3487 c.assembler.CompileRegisterToMemoryWithRegisterOffset(str, 3488 value.register, 3489 destinationOffset.register, fillSize.register, 3490 ) 3491 3492 // If the value on the copySizeRgister.register is not equal zero, continue the loop. 3493 continueJump := c.assembler.CompileJump(arm64.BCONDNE) 3494 continueJump.AssignJumpTarget(beginCopyLoop) 3495 3496 // Mark all of the operand registers. 3497 c.markRegisterUnused(fillSize.register, value.register, destinationOffset.register) 3498 3499 c.assembler.SetJumpTargetOnNext(skipCopyJump) 3500 return nil 3501 } 3502 3503 // compileTableInit implements compiler.compileTableInit for the arm64 architecture. 3504 func (c *arm64Compiler) compileTableInit(o *wazeroir.OperationTableInit) error { 3505 return c.compileInitImpl(true, o.ElemIndex, o.TableIndex) 3506 } 3507 3508 // compileTableCopy implements compiler.compileTableCopy for the arm64 architecture. 3509 func (c *arm64Compiler) compileTableCopy(o *wazeroir.OperationTableCopy) error { 3510 return c.compileCopyImpl(true, o.SrcTableIndex, o.DstTableIndex) 3511 } 3512 3513 // compileElemDrop implements compiler.compileElemDrop for the arm64 architecture. 3514 func (c *arm64Compiler) compileElemDrop(o *wazeroir.OperationElemDrop) error { 3515 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 3516 return err 3517 } 3518 3519 tmp, err := c.allocateRegister(registerTypeGeneralPurpose) 3520 if err != nil { 3521 return err 3522 } 3523 3524 c.compileLoadElemInstanceAddress(o.ElemIndex, tmp) 3525 3526 // Clears the content of ElementInstances[o.ElemIndex] (== []interface{} type). 3527 c.assembler.CompileRegisterToMemory(arm64.STRD, arm64.RegRZR, tmp, 0) 3528 c.assembler.CompileRegisterToMemory(arm64.STRD, arm64.RegRZR, tmp, 8) 3529 c.assembler.CompileRegisterToMemory(arm64.STRD, arm64.RegRZR, tmp, 16) 3530 return nil 3531 } 3532 3533 func (c *arm64Compiler) compileLoadElemInstanceAddress(elemIndex uint32, dst asm.Register) { 3534 // dst = dataIndex * elementInstanceStructSize 3535 c.assembler.CompileConstToRegister(arm64.MOVD, int64(elemIndex)*elementInstanceStructSize, dst) 3536 3537 // arm64ReservedRegisterForTemporary = &moduleInstance.ElementInstances[0] 3538 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3539 arm64ReservedRegisterForCallEngine, callEngineModuleContextElementInstancesElement0AddressOffset, 3540 arm64ReservedRegisterForTemporary, 3541 ) 3542 3543 // dst = arm64ReservedRegisterForTemporary + dst 3544 // = &moduleInstance.ElementInstances[0] + elemIndex*elementInstanceStructSize 3545 // = &moduleInstance.ElementInstances[elemIndex] 3546 c.assembler.CompileRegisterToRegister(arm64.ADD, arm64ReservedRegisterForTemporary, dst) 3547 } 3548 3549 // compileRefFunc implements compiler.compileRefFunc for the arm64 architecture. 3550 func (c *arm64Compiler) compileRefFunc(o *wazeroir.OperationRefFunc) error { 3551 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 3552 return err 3553 } 3554 3555 ref, err := c.allocateRegister(registerTypeGeneralPurpose) 3556 if err != nil { 3557 return err 3558 } 3559 // arm64ReservedRegisterForTemporary = [arm64ReservedRegisterForCallEngine + callEngineModuleContextFunctionsElement0AddressOffset] 3560 // = &moduleEngine.functions[0] 3561 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3562 arm64ReservedRegisterForCallEngine, callEngineModuleContextFunctionsElement0AddressOffset, 3563 arm64ReservedRegisterForTemporary) 3564 3565 // ref = [arm64ReservedRegisterForTemporary + int64(o.FunctionIndex)*8] 3566 // = [&moduleEngine.functions[0] + sizeOf(*function) * index] 3567 // = moduleEngine.functions[index] 3568 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3569 arm64ReservedRegisterForTemporary, int64(o.FunctionIndex)*8, // * 8 because the size of *code equals 8 bytes. 3570 ref, 3571 ) 3572 3573 c.pushRuntimeValueLocationOnRegister(ref, runtimeValueTypeI64) 3574 return nil 3575 } 3576 3577 // compileTableGet implements compiler.compileTableGet for the arm64 architecture. 3578 func (c *arm64Compiler) compileTableGet(o *wazeroir.OperationTableGet) error { 3579 ref, err := c.allocateRegister(registerTypeGeneralPurpose) 3580 if err != nil { 3581 return err 3582 } 3583 c.markRegisterUsed(ref) 3584 3585 offset, err := c.popValueOnRegister() 3586 if err != nil { 3587 return err 3588 } 3589 3590 // arm64ReservedRegisterForTemporary = &tables[0] 3591 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3592 arm64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset, 3593 arm64ReservedRegisterForTemporary) 3594 // arm64ReservedRegisterForTemporary = [arm64ReservedRegisterForTemporary + TableIndex*8] 3595 // = [&tables[0] + TableIndex*sizeOf(*tableInstance)] 3596 // = [&tables[TableIndex]] = tables[TableIndex]. 3597 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3598 arm64ReservedRegisterForTemporary, int64(o.TableIndex)*8, 3599 arm64ReservedRegisterForTemporary) 3600 3601 // Out of bounds check. 3602 // ref = [&tables[TableIndex] + tableInstanceTableLenOffset] = len(tables[TableIndex]) 3603 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3604 arm64ReservedRegisterForTemporary, tableInstanceTableLenOffset, 3605 ref, 3606 ) 3607 // "cmp ref, offset" 3608 c.assembler.CompileTwoRegistersToNone(arm64.CMP, ref, offset.register) 3609 3610 // If it exceeds len(table), we exit the execution. 3611 brIfBoundsOK := c.assembler.CompileJump(arm64.BCONDLO) 3612 c.compileExitFromNativeCode(nativeCallStatusCodeInvalidTableAccess) 3613 c.assembler.SetJumpTargetOnNext(brIfBoundsOK) 3614 3615 // ref = [&tables[TableIndex] + tableInstanceTableOffset] = &tables[TableIndex].References[0] 3616 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3617 arm64ReservedRegisterForTemporary, tableInstanceTableOffset, 3618 ref, 3619 ) 3620 3621 // ref = (offset << pointerSizeLog2) + ref 3622 // = &tables[TableIndex].References[0] + sizeOf(uintptr) * offset 3623 // = &tables[TableIndex].References[offset] 3624 c.assembler.CompileLeftShiftedRegisterToRegister(arm64.ADD, 3625 offset.register, pointerSizeLog2, ref, ref) 3626 3627 // ref = [&tables[TableIndex]] = load the Reference's pointer as uint64. 3628 c.assembler.CompileMemoryToRegister(arm64.LDRD, ref, 0, ref) 3629 3630 c.pushRuntimeValueLocationOnRegister(ref, runtimeValueTypeI64) // table elements are opaque 64-bit at runtime. 3631 return nil 3632 } 3633 3634 // compileTableSet implements compiler.compileTableSet for the arm64 architecture. 3635 func (c *arm64Compiler) compileTableSet(o *wazeroir.OperationTableSet) error { 3636 ref := c.locationStack.pop() 3637 if err := c.compileEnsureOnRegister(ref); err != nil { 3638 return err 3639 } 3640 3641 offset := c.locationStack.pop() 3642 if err := c.compileEnsureOnRegister(offset); err != nil { 3643 return err 3644 } 3645 3646 tmp, err := c.allocateRegister(registerTypeGeneralPurpose) 3647 if err != nil { 3648 return err 3649 } 3650 3651 // arm64ReservedRegisterForTemporary = &tables[0] 3652 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3653 arm64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset, 3654 arm64ReservedRegisterForTemporary) 3655 // arm64ReservedRegisterForTemporary = arm64ReservedRegisterForTemporary + TableIndex*8 3656 // = &tables[0] + TableIndex*sizeOf(*tableInstance) 3657 // = &tables[TableIndex] 3658 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3659 arm64ReservedRegisterForTemporary, int64(o.TableIndex)*8, 3660 arm64ReservedRegisterForTemporary) 3661 3662 // Out of bounds check. 3663 // tmp = [&tables[TableIndex] + tableInstanceTableLenOffset] = len(tables[TableIndex]) 3664 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3665 arm64ReservedRegisterForTemporary, tableInstanceTableLenOffset, 3666 tmp, 3667 ) 3668 // "cmp tmp, offset" 3669 c.assembler.CompileTwoRegistersToNone(arm64.CMP, tmp, offset.register) 3670 3671 // If it exceeds len(table), we exit the execution. 3672 brIfBoundsOK := c.assembler.CompileJump(arm64.BCONDLO) 3673 c.compileExitFromNativeCode(nativeCallStatusCodeInvalidTableAccess) 3674 c.assembler.SetJumpTargetOnNext(brIfBoundsOK) 3675 3676 // tmp = [&tables[TableIndex] + tableInstanceTableOffset] = &tables[TableIndex].References[0] 3677 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3678 arm64ReservedRegisterForTemporary, tableInstanceTableOffset, 3679 tmp, 3680 ) 3681 3682 // tmp = (offset << pointerSizeLog2) + tmp 3683 // = &tables[TableIndex].References[0] + sizeOf(uintptr) * offset 3684 // = &tables[TableIndex].References[offset] 3685 c.assembler.CompileLeftShiftedRegisterToRegister(arm64.ADD, offset.register, pointerSizeLog2, tmp, tmp) 3686 3687 // Set the reference's raw pointer. 3688 c.assembler.CompileRegisterToMemory(arm64.STRD, ref.register, tmp, 0) 3689 3690 c.markRegisterUnused(offset.register, ref.register, tmp) 3691 return nil 3692 } 3693 3694 // compileTableGrow implements compiler.compileTableGrow for the arm64 architecture. 3695 func (c *arm64Compiler) compileTableGrow(o *wazeroir.OperationTableGrow) error { 3696 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 3697 return err 3698 } 3699 3700 // Pushes the table index. 3701 if err := c.compileConstI32(&wazeroir.OperationConstI32{Value: o.TableIndex}); err != nil { 3702 return err 3703 } 3704 3705 // Table grow cannot be done in assembly just like memory grow as it involves with allocation in Go. 3706 // Therefore, call out to the built function for this purpose. 3707 if err := c.compileCallGoFunction(nativeCallStatusCodeCallBuiltInFunction, builtinFunctionIndexTableGrow); err != nil { 3708 return err 3709 } 3710 3711 // TableGrow consumes three values (table index, number of items, initial value). 3712 for i := 0; i < 3; i++ { 3713 c.locationStack.pop() 3714 } 3715 3716 // Then, the previous length was pushed as the result. 3717 c.locationStack.pushRuntimeValueLocationOnStack() 3718 3719 // After return, we re-initialize reserved registers just like preamble of functions. 3720 c.compileReservedStackBasePointerRegisterInitialization() 3721 c.compileReservedMemoryRegisterInitialization() 3722 return nil 3723 } 3724 3725 // compileTableSize implements compiler.compileTableSize for the arm64 architecture. 3726 func (c *arm64Compiler) compileTableSize(o *wazeroir.OperationTableSize) error { 3727 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 3728 return err 3729 } 3730 result, err := c.allocateRegister(registerTypeGeneralPurpose) 3731 if err != nil { 3732 return err 3733 } 3734 c.markRegisterUsed(result) 3735 3736 // arm64ReservedRegisterForTemporary = &tables[0] 3737 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3738 arm64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset, 3739 arm64ReservedRegisterForTemporary) 3740 // arm64ReservedRegisterForTemporary = [arm64ReservedRegisterForTemporary + TableIndex*8] 3741 // = [&tables[0] + TableIndex*sizeOf(*tableInstance)] 3742 // = [&tables[TableIndex]] = tables[TableIndex]. 3743 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3744 arm64ReservedRegisterForTemporary, int64(o.TableIndex)*8, 3745 arm64ReservedRegisterForTemporary) 3746 3747 // result = [&tables[TableIndex] + tableInstanceTableLenOffset] = len(tables[TableIndex]) 3748 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3749 arm64ReservedRegisterForTemporary, tableInstanceTableLenOffset, 3750 result, 3751 ) 3752 3753 c.pushRuntimeValueLocationOnRegister(result, runtimeValueTypeI32) 3754 return nil 3755 } 3756 3757 // compileTableFill implements compiler.compileTableFill for the arm64 architecture. 3758 func (c *arm64Compiler) compileTableFill(o *wazeroir.OperationTableFill) error { 3759 return c.compileFillImpl(true, o.TableIndex) 3760 } 3761 3762 // popTwoValuesOnRegisters pops two values from the location stacks, ensures 3763 // these two values are located on registers, and mark them unused. 3764 // 3765 // TODO: we’d usually prefix this with compileXXX as this might end up emitting instructions, 3766 // but the name seems awkward. 3767 func (c *arm64Compiler) popTwoValuesOnRegisters() (x1, x2 *runtimeValueLocation, err error) { 3768 x2 = c.locationStack.pop() 3769 if err = c.compileEnsureOnRegister(x2); err != nil { 3770 return 3771 } 3772 3773 x1 = c.locationStack.pop() 3774 if err = c.compileEnsureOnRegister(x1); err != nil { 3775 return 3776 } 3777 3778 c.markRegisterUnused(x2.register) 3779 c.markRegisterUnused(x1.register) 3780 return 3781 } 3782 3783 // popValueOnRegister pops one value from the location stack, ensures 3784 // that it is located on a register, and mark it unused. 3785 // 3786 // TODO: we’d usually prefix this with compileXXX as this might end up emitting instructions, 3787 // but the name seems awkward. 3788 func (c *arm64Compiler) popValueOnRegister() (v *runtimeValueLocation, err error) { 3789 v = c.locationStack.pop() 3790 if err = c.compileEnsureOnRegister(v); err != nil { 3791 return 3792 } 3793 3794 c.markRegisterUnused(v.register) 3795 return 3796 } 3797 3798 // compileEnsureOnRegister emits instructions to ensure that a value is located on a register. 3799 func (c *arm64Compiler) compileEnsureOnRegister(loc *runtimeValueLocation) (err error) { 3800 if loc.onStack() { 3801 reg, err := c.allocateRegister(loc.getRegisterType()) 3802 if err != nil { 3803 return err 3804 } 3805 3806 // Record that the value holds the register and the register is marked used. 3807 loc.setRegister(reg) 3808 c.markRegisterUsed(reg) 3809 3810 c.compileLoadValueOnStackToRegister(loc) 3811 } else if loc.onConditionalRegister() { 3812 err = c.compileLoadConditionalRegisterToGeneralPurposeRegister(loc) 3813 } 3814 return 3815 } 3816 3817 // maybeCompileMoveTopConditionalToGeneralPurposeRegister moves the top value on the stack 3818 // if the value is located on a conditional register. 3819 // 3820 // This is usually called at the beginning of methods on compiler interface where we possibly 3821 // compile instructions without saving the conditional register value. 3822 // compile* functions without calling this function is saving the conditional 3823 // value to the stack or register by invoking ensureOnGeneralPurposeRegister for the top. 3824 func (c *arm64Compiler) maybeCompileMoveTopConditionalToGeneralPurposeRegister() (err error) { 3825 if c.locationStack.sp > 0 { 3826 if loc := c.locationStack.peek(); loc.onConditionalRegister() { 3827 err = c.compileLoadConditionalRegisterToGeneralPurposeRegister(loc) 3828 } 3829 } 3830 return 3831 } 3832 3833 // loadConditionalRegisterToGeneralPurposeRegister saves the conditional register value 3834 // to a general purpose register. 3835 func (c *arm64Compiler) compileLoadConditionalRegisterToGeneralPurposeRegister(loc *runtimeValueLocation) error { 3836 reg, err := c.allocateRegister(loc.getRegisterType()) 3837 if err != nil { 3838 return err 3839 } 3840 3841 c.markRegisterUsed(reg) 3842 c.assembler.CompileConditionalRegisterSet(loc.conditionalRegister, reg) 3843 3844 // Record that now the value is located on a general purpose register. 3845 loc.setRegister(reg) 3846 return nil 3847 } 3848 3849 // compileLoadValueOnStackToRegister implements compiler.compileLoadValueOnStackToRegister for arm64. 3850 func (c *arm64Compiler) compileLoadValueOnStackToRegister(loc *runtimeValueLocation) { 3851 switch loc.valueType { 3852 case runtimeValueTypeI32: 3853 c.assembler.CompileMemoryToRegister(arm64.LDRW, arm64ReservedRegisterForStackBasePointerAddress, 3854 int64(loc.stackPointer)*8, loc.register) 3855 case runtimeValueTypeI64: 3856 c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForStackBasePointerAddress, 3857 int64(loc.stackPointer)*8, loc.register) 3858 case runtimeValueTypeF32: 3859 c.assembler.CompileMemoryToRegister(arm64.FLDRS, arm64ReservedRegisterForStackBasePointerAddress, 3860 int64(loc.stackPointer)*8, loc.register) 3861 case runtimeValueTypeF64: 3862 c.assembler.CompileMemoryToRegister(arm64.FLDRD, arm64ReservedRegisterForStackBasePointerAddress, 3863 int64(loc.stackPointer)*8, loc.register) 3864 case runtimeValueTypeV128Lo: 3865 c.assembler.CompileMemoryToVectorRegister(arm64.VMOV, 3866 arm64ReservedRegisterForStackBasePointerAddress, int64(loc.stackPointer)*8, loc.register, 3867 arm64.VectorArrangementQ) 3868 // Higher 64-bits are loaded as well ^^. 3869 hi := c.locationStack.stack[loc.stackPointer+1] 3870 hi.setRegister(loc.register) 3871 case runtimeValueTypeV128Hi: 3872 panic("BUG: V128Hi must be be loaded to a register along with V128Lo") 3873 } 3874 } 3875 3876 // allocateRegister implements compiler.allocateRegister for arm64. 3877 func (c *arm64Compiler) allocateRegister(t registerType) (reg asm.Register, err error) { 3878 var ok bool 3879 // Try to get the unused register. 3880 reg, ok = c.locationStack.takeFreeRegister(t) 3881 if ok { 3882 return 3883 } 3884 3885 // If not found, we have to steal the register. 3886 stealTarget, ok := c.locationStack.takeStealTargetFromUsedRegister(t) 3887 if !ok { 3888 err = fmt.Errorf("cannot steal register") 3889 return 3890 } 3891 3892 // Release the steal target register value onto stack location. 3893 reg = stealTarget.register 3894 c.compileReleaseRegisterToStack(stealTarget) 3895 return 3896 } 3897 3898 // compileReleaseAllRegistersToStack adds instructions to store all the values located on 3899 // either general purpose or conditional registers onto the memory stack. 3900 // See releaseRegisterToStack. 3901 func (c *arm64Compiler) compileReleaseAllRegistersToStack() (err error) { 3902 for i := uint64(0); i < c.locationStack.sp; i++ { 3903 if loc := c.locationStack.stack[i]; loc.onRegister() { 3904 c.compileReleaseRegisterToStack(loc) 3905 } else if loc.onConditionalRegister() { 3906 if err = c.compileLoadConditionalRegisterToGeneralPurposeRegister(loc); err != nil { 3907 return 3908 } 3909 c.compileReleaseRegisterToStack(loc) 3910 } 3911 } 3912 return 3913 } 3914 3915 // releaseRegisterToStack adds an instruction to write the value on a register back to memory stack region. 3916 func (c *arm64Compiler) compileReleaseRegisterToStack(loc *runtimeValueLocation) { 3917 switch loc.valueType { 3918 case runtimeValueTypeI32: 3919 c.assembler.CompileRegisterToMemory(arm64.STRW, loc.register, arm64ReservedRegisterForStackBasePointerAddress, int64(loc.stackPointer)*8) 3920 case runtimeValueTypeI64: 3921 c.assembler.CompileRegisterToMemory(arm64.STRD, loc.register, arm64ReservedRegisterForStackBasePointerAddress, int64(loc.stackPointer)*8) 3922 case runtimeValueTypeF32: 3923 c.assembler.CompileRegisterToMemory(arm64.FSTRS, loc.register, arm64ReservedRegisterForStackBasePointerAddress, int64(loc.stackPointer)*8) 3924 case runtimeValueTypeF64: 3925 c.assembler.CompileRegisterToMemory(arm64.FSTRD, loc.register, arm64ReservedRegisterForStackBasePointerAddress, int64(loc.stackPointer)*8) 3926 case runtimeValueTypeV128Lo: 3927 c.assembler.CompileVectorRegisterToMemory(arm64.VMOV, 3928 loc.register, arm64ReservedRegisterForStackBasePointerAddress, int64(loc.stackPointer)*8, 3929 arm64.VectorArrangementQ) 3930 // Higher 64-bits are released as well ^^. 3931 hi := c.locationStack.stack[loc.stackPointer+1] 3932 c.locationStack.releaseRegister(hi) 3933 case runtimeValueTypeV128Hi: 3934 panic("BUG: V128Hi must be released to the stack along with V128Lo") 3935 } 3936 3937 // Mark the register is free. 3938 c.locationStack.releaseRegister(loc) 3939 } 3940 3941 // compileReservedStackBasePointerRegisterInitialization adds instructions to initialize arm64ReservedRegisterForStackBasePointerAddress 3942 // so that it points to the absolute address of the stack base for this function. 3943 func (c *arm64Compiler) compileReservedStackBasePointerRegisterInitialization() { 3944 // First, load the address of the first element in the value stack into arm64ReservedRegisterForStackBasePointerAddress temporarily. 3945 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3946 arm64ReservedRegisterForCallEngine, callEngineStackContextStackElement0AddressOffset, 3947 arm64ReservedRegisterForStackBasePointerAddress) 3948 3949 // next we move the base pointer (ce.stackBasePointer) to arm64ReservedRegisterForTemporary. 3950 c.assembler.CompileMemoryToRegister(arm64.LDRD, 3951 arm64ReservedRegisterForCallEngine, callEngineStackContextStackBasePointerInBytesOffset, 3952 arm64ReservedRegisterForTemporary) 3953 3954 // Finally, we calculate "callEngineStackContextStackBasePointerInBytesOffset + arm64ReservedRegisterForTemporary" 3955 c.assembler.CompileRegisterToRegister(arm64.ADD, arm64ReservedRegisterForTemporary, arm64ReservedRegisterForStackBasePointerAddress) 3956 } 3957 3958 func (c *arm64Compiler) compileReservedMemoryRegisterInitialization() { 3959 if c.ir.HasMemory || c.ir.UsesMemory { 3960 // "arm64ReservedRegisterForMemory = ce.MemoryElement0Address" 3961 c.assembler.CompileMemoryToRegister( 3962 arm64.LDRD, 3963 arm64ReservedRegisterForCallEngine, callEngineModuleContextMemoryElement0AddressOffset, 3964 arm64ReservedRegisterForMemory, 3965 ) 3966 } 3967 } 3968 3969 // compileModuleContextInitialization adds instructions to initialize ce.moduleContext's fields based on 3970 // ce.moduleContext.ModuleInstanceAddress. 3971 // This is called in two cases: in function preamble, and on the return from (non-Go) function calls. 3972 func (c *arm64Compiler) compileModuleContextInitialization() error { 3973 regs, found := c.locationStack.takeFreeRegisters(registerTypeGeneralPurpose, 2) 3974 if !found { 3975 panic("BUG: all the registers should be free at this point") 3976 } 3977 c.markRegisterUsed(regs...) 3978 3979 // Alias these free registers for readability. 3980 tmpX, tmpY := regs[0], regs[1] 3981 3982 // "tmpX = ce.ModuleInstanceAddress" 3983 c.assembler.CompileMemoryToRegister(arm64.LDRD, arm64ReservedRegisterForCallEngine, callEngineModuleContextModuleInstanceAddressOffset, tmpX) 3984 3985 // If the module instance address stays the same, we could skip the entire code below. 3986 c.assembler.CompileTwoRegistersToNone(arm64.CMP, arm64CallingConventionModuleInstanceAddressRegister, tmpX) 3987 brIfModuleUnchanged := c.assembler.CompileJump(arm64.BCONDEQ) 3988 3989 // Otherwise, update the moduleEngine.moduleContext.ModuleInstanceAddress. 3990 c.assembler.CompileRegisterToMemory(arm64.STRD, 3991 arm64CallingConventionModuleInstanceAddressRegister, 3992 arm64ReservedRegisterForCallEngine, callEngineModuleContextModuleInstanceAddressOffset, 3993 ) 3994 3995 // Also, we have to update the following fields: 3996 // * callEngine.moduleContext.globalElement0Address 3997 // * callEngine.moduleContext.memoryElement0Address 3998 // * callEngine.moduleContext.memorySliceLen 3999 // * callEngine.moduleContext.memoryInstance 4000 // * callEngine.moduleContext.tableElement0Address 4001 // * callEngine.moduleContext.tableSliceLen 4002 // * callEngine.moduleContext.functionsElement0Address 4003 // * callEngine.moduleContext.typeIDsElement0Address 4004 // * callEngine.moduleContext.dataInstancesElement0Address 4005 // * callEngine.moduleContext.elementInstancesElement0Address 4006 4007 // Update globalElement0Address. 4008 // 4009 // Note: if there's global.get or set instruction in the function, the existence of the globals 4010 // is ensured by function validation at module instantiation phase, and that's why it is ok to 4011 // skip the initialization if the module's globals slice is empty. 4012 if len(c.ir.Globals) > 0 { 4013 // "tmpX = &moduleInstance.Globals[0]" 4014 c.assembler.CompileMemoryToRegister(arm64.LDRD, 4015 arm64CallingConventionModuleInstanceAddressRegister, moduleInstanceGlobalsOffset, 4016 tmpX, 4017 ) 4018 4019 // "ce.GlobalElement0Address = tmpX (== &moduleInstance.Globals[0])" 4020 c.assembler.CompileRegisterToMemory( 4021 arm64.STRD, tmpX, 4022 arm64ReservedRegisterForCallEngine, callEngineModuleContextGlobalElement0AddressOffset, 4023 ) 4024 } 4025 4026 // Update memoryElement0Address and memorySliceLen. 4027 // 4028 // Note: if there's memory instruction in the function, memory instance must be non-nil. 4029 // That is ensured by function validation at module instantiation phase, and that's 4030 // why it is ok to skip the initialization if the module's memory instance is nil. 4031 if c.ir.HasMemory { 4032 // "tmpX = moduleInstance.Memory" 4033 c.assembler.CompileMemoryToRegister( 4034 arm64.LDRD, 4035 arm64CallingConventionModuleInstanceAddressRegister, moduleInstanceMemoryOffset, 4036 tmpX, 4037 ) 4038 4039 // First, set ce.memoryInstance 4040 c.assembler.CompileRegisterToMemory( 4041 arm64.STRD, 4042 tmpX, 4043 arm64ReservedRegisterForCallEngine, callEngineModuleContextMemoryInstanceOffset, 4044 ) 4045 4046 // Next, we write the memory length into ce.MemorySliceLen. 4047 // 4048 // "tmpY = [tmpX + memoryInstanceBufferLenOffset] (== len(memory.Buffer))" 4049 c.assembler.CompileMemoryToRegister( 4050 arm64.LDRD, 4051 tmpX, memoryInstanceBufferLenOffset, 4052 tmpY, 4053 ) 4054 // "ce.MemorySliceLen = tmpY". 4055 c.assembler.CompileRegisterToMemory( 4056 arm64.STRD, 4057 tmpY, 4058 arm64ReservedRegisterForCallEngine, callEngineModuleContextMemorySliceLenOffset, 4059 ) 4060 4061 // Finally, we write ce.memoryElement0Address. 4062 // 4063 // "tmpY = *tmpX (== &memory.Buffer[0])" 4064 c.assembler.CompileMemoryToRegister( 4065 arm64.LDRD, 4066 tmpX, memoryInstanceBufferOffset, 4067 tmpY, 4068 ) 4069 // "ce.memoryElement0Address = tmpY". 4070 c.assembler.CompileRegisterToMemory( 4071 arm64.STRD, 4072 tmpY, 4073 arm64ReservedRegisterForCallEngine, callEngineModuleContextMemoryElement0AddressOffset, 4074 ) 4075 } 4076 4077 // Update tableElement0Address, tableSliceLen and typeIDsElement0Address. 4078 // 4079 // Note: if there's table instruction in the function, the existence of the table 4080 // is ensured by function validation at module instantiation phase, and that's 4081 // why it is ok to skip the initialization if the module's table doesn't exist. 4082 if c.ir.HasTable { 4083 // "tmpX = &tables[0] (type of **wasm.Table)" 4084 c.assembler.CompileMemoryToRegister( 4085 arm64.LDRD, 4086 arm64CallingConventionModuleInstanceAddressRegister, moduleInstanceTablesOffset, 4087 tmpX, 4088 ) 4089 4090 // Update ce.tableElement0Address. 4091 // "ce.tableElement0Address = tmpX". 4092 c.assembler.CompileRegisterToMemory( 4093 arm64.STRD, 4094 tmpX, 4095 arm64ReservedRegisterForCallEngine, callEngineModuleContextTablesElement0AddressOffset, 4096 ) 4097 4098 // Finally, we put &ModuleInstance.TypeIDs[0] into moduleContext.typeIDsElement0Address. 4099 c.assembler.CompileMemoryToRegister(arm64.LDRD, 4100 arm64CallingConventionModuleInstanceAddressRegister, moduleInstanceTypeIDsOffset, tmpX) 4101 c.assembler.CompileRegisterToMemory(arm64.STRD, 4102 tmpX, arm64ReservedRegisterForCallEngine, callEngineModuleContextTypeIDsElement0AddressOffset) 4103 } 4104 4105 // Update callEngine.moduleContext.functionsElement0Address 4106 { 4107 // "tmpX = [moduleInstanceAddressRegister + moduleInstanceEngineOffset + interfaceDataOffset] (== *moduleEngine)" 4108 // 4109 // Go's interface is laid out on memory as two quad words as struct {tab, data uintptr} 4110 // where tab points to the interface table, and the latter points to the actual 4111 // implementation of interface. This case, we extract "data" pointer as *moduleEngine. 4112 // See the following references for detail: 4113 // * https://research.swtch.com/interfaces 4114 // * https://github.com/golang/go/blob/release-branch.go1.17/src/runtime/runtime2.go#L207-L210 4115 c.assembler.CompileMemoryToRegister( 4116 arm64.LDRD, 4117 arm64CallingConventionModuleInstanceAddressRegister, moduleInstanceEngineOffset+interfaceDataOffset, 4118 tmpX, 4119 ) 4120 4121 // "tmpY = [tmpX + moduleEngineFunctionsOffset] (== &moduleEngine.functions[0])" 4122 c.assembler.CompileMemoryToRegister( 4123 arm64.LDRD, 4124 tmpX, moduleEngineFunctionsOffset, 4125 tmpY, 4126 ) 4127 4128 // "callEngine.moduleContext.functionsElement0Address = tmpY". 4129 c.assembler.CompileRegisterToMemory( 4130 arm64.STRD, 4131 tmpY, 4132 arm64ReservedRegisterForCallEngine, callEngineModuleContextFunctionsElement0AddressOffset, 4133 ) 4134 } 4135 4136 // Update dataInstancesElement0Address. 4137 if c.ir.HasDataInstances { 4138 // "tmpX = &moduleInstance.DataInstances[0]" 4139 c.assembler.CompileMemoryToRegister( 4140 arm64.LDRD, 4141 arm64CallingConventionModuleInstanceAddressRegister, moduleInstanceDataInstancesOffset, 4142 tmpX, 4143 ) 4144 // "callEngine.moduleContext.dataInstancesElement0Address = tmpX". 4145 c.assembler.CompileRegisterToMemory( 4146 arm64.STRD, 4147 tmpX, 4148 arm64ReservedRegisterForCallEngine, callEngineModuleContextDataInstancesElement0AddressOffset, 4149 ) 4150 } 4151 4152 // Update callEngine.moduleContext.elementInstancesElement0Address 4153 if c.ir.HasElementInstances { 4154 // "tmpX = &moduleInstance.DataInstances[0]" 4155 c.assembler.CompileMemoryToRegister( 4156 arm64.LDRD, 4157 arm64CallingConventionModuleInstanceAddressRegister, moduleInstanceElementInstancesOffset, 4158 tmpX, 4159 ) 4160 // "callEngine.moduleContext.dataInstancesElement0Address = tmpX". 4161 c.assembler.CompileRegisterToMemory( 4162 arm64.STRD, 4163 tmpX, 4164 arm64ReservedRegisterForCallEngine, callEngineModuleContextElementInstancesElement0AddressOffset, 4165 ) 4166 } 4167 4168 c.assembler.SetJumpTargetOnNext(brIfModuleUnchanged) 4169 c.markRegisterUnused(regs...) 4170 return nil 4171 }