wa-lang.org/wazero@v1.0.2/internal/engine/compiler/impl_vec_arm64.go (about) 1 package compiler 2 3 import ( 4 "wa-lang.org/wazero/internal/asm" 5 "wa-lang.org/wazero/internal/asm/arm64" 6 "wa-lang.org/wazero/internal/wazeroir" 7 ) 8 9 // compileV128Const implements compiler.compileV128Const for arm64. 10 func (c *arm64Compiler) compileV128Const(o *wazeroir.OperationV128Const) error { 11 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 12 return err 13 } 14 15 result, err := c.allocateRegister(registerTypeVector) 16 if err != nil { 17 return err 18 } 19 20 // Moves the lower 64-bits as a scalar float. 21 intReg := arm64ReservedRegisterForTemporary 22 if o.Lo == 0 { 23 intReg = arm64.RegRZR 24 } else { 25 c.assembler.CompileConstToRegister(arm64.MOVD, int64(o.Lo), arm64ReservedRegisterForTemporary) 26 } 27 c.assembler.CompileRegisterToRegister(arm64.FMOVD, intReg, result) 28 29 // Then, insert the higher bits with INS(vector,general). 30 intReg = arm64ReservedRegisterForTemporary 31 if o.Hi == 0 { 32 intReg = arm64.RegRZR 33 } else { 34 c.assembler.CompileConstToRegister(arm64.MOVD, int64(o.Hi), arm64ReservedRegisterForTemporary) 35 } 36 // "ins Vn.D[1], intReg" 37 c.assembler.CompileRegisterToVectorRegister(arm64.INSGEN, intReg, result, arm64.VectorArrangementD, 1) 38 39 c.pushVectorRuntimeValueLocationOnRegister(result) 40 return nil 41 } 42 43 // compileV128Add implements compiler.compileV128Add for arm64. 44 func (c *arm64Compiler) compileV128Add(o *wazeroir.OperationV128Add) error { 45 x2 := c.locationStack.popV128() 46 if err := c.compileEnsureOnRegister(x2); err != nil { 47 return err 48 } 49 50 x1 := c.locationStack.popV128() 51 if err := c.compileEnsureOnRegister(x1); err != nil { 52 return err 53 } 54 55 var arr arm64.VectorArrangement 56 var inst asm.Instruction 57 switch o.Shape { 58 case wazeroir.ShapeI8x16: 59 inst = arm64.VADD 60 arr = arm64.VectorArrangement16B 61 case wazeroir.ShapeI16x8: 62 inst = arm64.VADD 63 arr = arm64.VectorArrangement8H 64 case wazeroir.ShapeI32x4: 65 inst = arm64.VADD 66 arr = arm64.VectorArrangement4S 67 case wazeroir.ShapeI64x2: 68 inst = arm64.VADD 69 arr = arm64.VectorArrangement2D 70 case wazeroir.ShapeF32x4: 71 inst = arm64.VFADDS 72 arr = arm64.VectorArrangement4S 73 case wazeroir.ShapeF64x2: 74 inst = arm64.VFADDD 75 arr = arm64.VectorArrangement2D 76 } 77 78 c.assembler.CompileVectorRegisterToVectorRegister(inst, x1.register, x2.register, arr, 79 arm64.VectorIndexNone, arm64.VectorIndexNone) 80 81 c.pushVectorRuntimeValueLocationOnRegister(x2.register) 82 c.markRegisterUnused(x1.register) 83 return nil 84 } 85 86 // compileV128Sub implements compiler.compileV128Sub for arm64. 87 func (c *arm64Compiler) compileV128Sub(o *wazeroir.OperationV128Sub) (err error) { 88 x2 := c.locationStack.popV128() 89 if err := c.compileEnsureOnRegister(x2); err != nil { 90 return err 91 } 92 93 x1 := c.locationStack.popV128() 94 if err := c.compileEnsureOnRegister(x1); err != nil { 95 return err 96 } 97 98 var arr arm64.VectorArrangement 99 var inst asm.Instruction 100 switch o.Shape { 101 case wazeroir.ShapeI8x16: 102 inst = arm64.VSUB 103 arr = arm64.VectorArrangement16B 104 case wazeroir.ShapeI16x8: 105 inst = arm64.VSUB 106 arr = arm64.VectorArrangement8H 107 case wazeroir.ShapeI32x4: 108 inst = arm64.VSUB 109 arr = arm64.VectorArrangement4S 110 case wazeroir.ShapeI64x2: 111 inst = arm64.VSUB 112 arr = arm64.VectorArrangement2D 113 case wazeroir.ShapeF32x4: 114 inst = arm64.VFSUBS 115 arr = arm64.VectorArrangement4S 116 case wazeroir.ShapeF64x2: 117 inst = arm64.VFSUBD 118 arr = arm64.VectorArrangement2D 119 } 120 121 c.assembler.CompileVectorRegisterToVectorRegister(inst, x2.register, x1.register, arr, 122 arm64.VectorIndexNone, arm64.VectorIndexNone) 123 124 c.pushVectorRuntimeValueLocationOnRegister(x1.register) 125 c.markRegisterUnused(x2.register) 126 return 127 } 128 129 // compileV128Load implements compiler.compileV128Load for arm64. 130 func (c *arm64Compiler) compileV128Load(o *wazeroir.OperationV128Load) (err error) { 131 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 132 return err 133 } 134 result, err := c.allocateRegister(registerTypeVector) 135 if err != nil { 136 return err 137 } 138 139 switch o.Type { 140 case wazeroir.V128LoadType128: 141 offset, err := c.compileMemoryAccessOffsetSetup(o.Arg.Offset, 16) 142 if err != nil { 143 return err 144 } 145 c.assembler.CompileMemoryWithRegisterOffsetToVectorRegister(arm64.VMOV, 146 arm64ReservedRegisterForMemory, offset, result, arm64.VectorArrangementQ, 147 ) 148 case wazeroir.V128LoadType8x8s: 149 offset, err := c.compileMemoryAccessOffsetSetup(o.Arg.Offset, 8) 150 if err != nil { 151 return err 152 } 153 c.assembler.CompileMemoryWithRegisterOffsetToVectorRegister(arm64.VMOV, 154 arm64ReservedRegisterForMemory, offset, result, arm64.VectorArrangementD, 155 ) 156 c.assembler.CompileVectorRegisterToVectorRegister(arm64.SSHLL, result, result, 157 arm64.VectorArrangement8B, arm64.VectorIndexNone, arm64.VectorIndexNone) 158 case wazeroir.V128LoadType8x8u: 159 offset, err := c.compileMemoryAccessOffsetSetup(o.Arg.Offset, 8) 160 if err != nil { 161 return err 162 } 163 c.assembler.CompileMemoryWithRegisterOffsetToVectorRegister(arm64.VMOV, 164 arm64ReservedRegisterForMemory, offset, result, arm64.VectorArrangementD, 165 ) 166 c.assembler.CompileVectorRegisterToVectorRegister(arm64.USHLL, result, result, 167 arm64.VectorArrangement8B, arm64.VectorIndexNone, arm64.VectorIndexNone) 168 case wazeroir.V128LoadType16x4s: 169 offset, err := c.compileMemoryAccessOffsetSetup(o.Arg.Offset, 8) 170 if err != nil { 171 return err 172 } 173 c.assembler.CompileMemoryWithRegisterOffsetToVectorRegister(arm64.VMOV, 174 arm64ReservedRegisterForMemory, offset, result, arm64.VectorArrangementD, 175 ) 176 c.assembler.CompileVectorRegisterToVectorRegister(arm64.SSHLL, result, result, 177 arm64.VectorArrangement4H, arm64.VectorIndexNone, arm64.VectorIndexNone) 178 case wazeroir.V128LoadType16x4u: 179 offset, err := c.compileMemoryAccessOffsetSetup(o.Arg.Offset, 8) 180 if err != nil { 181 return err 182 } 183 c.assembler.CompileMemoryWithRegisterOffsetToVectorRegister(arm64.VMOV, 184 arm64ReservedRegisterForMemory, offset, result, arm64.VectorArrangementD, 185 ) 186 c.assembler.CompileVectorRegisterToVectorRegister(arm64.USHLL, result, result, 187 arm64.VectorArrangement4H, arm64.VectorIndexNone, arm64.VectorIndexNone) 188 case wazeroir.V128LoadType32x2s: 189 offset, err := c.compileMemoryAccessOffsetSetup(o.Arg.Offset, 8) 190 if err != nil { 191 return err 192 } 193 c.assembler.CompileMemoryWithRegisterOffsetToVectorRegister(arm64.VMOV, 194 arm64ReservedRegisterForMemory, offset, result, arm64.VectorArrangementD, 195 ) 196 c.assembler.CompileVectorRegisterToVectorRegister(arm64.SSHLL, result, result, 197 arm64.VectorArrangement2S, arm64.VectorIndexNone, arm64.VectorIndexNone) 198 case wazeroir.V128LoadType32x2u: 199 offset, err := c.compileMemoryAccessOffsetSetup(o.Arg.Offset, 8) 200 if err != nil { 201 return err 202 } 203 c.assembler.CompileMemoryWithRegisterOffsetToVectorRegister(arm64.VMOV, 204 arm64ReservedRegisterForMemory, offset, result, arm64.VectorArrangementD, 205 ) 206 c.assembler.CompileVectorRegisterToVectorRegister(arm64.USHLL, result, result, 207 arm64.VectorArrangement2S, arm64.VectorIndexNone, arm64.VectorIndexNone) 208 case wazeroir.V128LoadType8Splat: 209 offset, err := c.compileMemoryAccessOffsetSetup(o.Arg.Offset, 1) 210 if err != nil { 211 return err 212 } 213 c.assembler.CompileRegisterToRegister(arm64.ADD, arm64ReservedRegisterForMemory, offset) 214 c.assembler.CompileMemoryToVectorRegister(arm64.LD1R, offset, 0, result, arm64.VectorArrangement16B) 215 case wazeroir.V128LoadType16Splat: 216 offset, err := c.compileMemoryAccessOffsetSetup(o.Arg.Offset, 2) 217 if err != nil { 218 return err 219 } 220 c.assembler.CompileRegisterToRegister(arm64.ADD, arm64ReservedRegisterForMemory, offset) 221 c.assembler.CompileMemoryToVectorRegister(arm64.LD1R, offset, 0, result, arm64.VectorArrangement8H) 222 case wazeroir.V128LoadType32Splat: 223 offset, err := c.compileMemoryAccessOffsetSetup(o.Arg.Offset, 4) 224 if err != nil { 225 return err 226 } 227 c.assembler.CompileRegisterToRegister(arm64.ADD, arm64ReservedRegisterForMemory, offset) 228 c.assembler.CompileMemoryToVectorRegister(arm64.LD1R, offset, 0, result, arm64.VectorArrangement4S) 229 case wazeroir.V128LoadType64Splat: 230 offset, err := c.compileMemoryAccessOffsetSetup(o.Arg.Offset, 8) 231 if err != nil { 232 return err 233 } 234 c.assembler.CompileRegisterToRegister(arm64.ADD, arm64ReservedRegisterForMemory, offset) 235 c.assembler.CompileMemoryToVectorRegister(arm64.LD1R, offset, 0, result, arm64.VectorArrangement2D) 236 case wazeroir.V128LoadType32zero: 237 offset, err := c.compileMemoryAccessOffsetSetup(o.Arg.Offset, 4) 238 if err != nil { 239 return err 240 } 241 c.assembler.CompileMemoryWithRegisterOffsetToVectorRegister(arm64.VMOV, 242 arm64ReservedRegisterForMemory, offset, result, arm64.VectorArrangementS, 243 ) 244 case wazeroir.V128LoadType64zero: 245 offset, err := c.compileMemoryAccessOffsetSetup(o.Arg.Offset, 8) 246 if err != nil { 247 return err 248 } 249 c.assembler.CompileMemoryWithRegisterOffsetToVectorRegister(arm64.VMOV, 250 arm64ReservedRegisterForMemory, offset, result, arm64.VectorArrangementD, 251 ) 252 } 253 254 c.pushVectorRuntimeValueLocationOnRegister(result) 255 return 256 } 257 258 // compileV128LoadLane implements compiler.compileV128LoadLane for arm64. 259 func (c *arm64Compiler) compileV128LoadLane(o *wazeroir.OperationV128LoadLane) (err error) { 260 targetVector := c.locationStack.popV128() 261 if err = c.compileEnsureOnRegister(targetVector); err != nil { 262 return 263 } 264 265 targetSizeInBytes := int64(o.LaneSize / 8) 266 source, err := c.compileMemoryAccessOffsetSetup(o.Arg.Offset, targetSizeInBytes) 267 if err != nil { 268 return err 269 } 270 271 var loadInst asm.Instruction 272 var arr arm64.VectorArrangement 273 switch o.LaneSize { 274 case 8: 275 arr = arm64.VectorArrangementB 276 loadInst = arm64.LDRB 277 case 16: 278 arr = arm64.VectorArrangementH 279 loadInst = arm64.LDRH 280 case 32: 281 loadInst = arm64.LDRW 282 arr = arm64.VectorArrangementS 283 case 64: 284 loadInst = arm64.LDRD 285 arr = arm64.VectorArrangementD 286 } 287 288 c.assembler.CompileMemoryWithRegisterOffsetToRegister(loadInst, arm64ReservedRegisterForMemory, source, source) 289 c.assembler.CompileRegisterToVectorRegister(arm64.INSGEN, source, targetVector.register, arr, arm64.VectorIndex(o.LaneIndex)) 290 291 c.pushVectorRuntimeValueLocationOnRegister(targetVector.register) 292 c.locationStack.markRegisterUnused(source) 293 return 294 } 295 296 // compileV128Store implements compiler.compileV128Store for arm64. 297 func (c *arm64Compiler) compileV128Store(o *wazeroir.OperationV128Store) (err error) { 298 v := c.locationStack.popV128() 299 if err = c.compileEnsureOnRegister(v); err != nil { 300 return 301 } 302 303 const targetSizeInBytes = 16 304 offset, err := c.compileMemoryAccessOffsetSetup(o.Arg.Offset, targetSizeInBytes) 305 if err != nil { 306 return err 307 } 308 309 c.assembler.CompileVectorRegisterToMemoryWithRegisterOffset(arm64.VMOV, 310 v.register, arm64ReservedRegisterForMemory, offset, arm64.VectorArrangementQ) 311 312 c.markRegisterUnused(v.register) 313 return 314 } 315 316 // compileV128StoreLane implements compiler.compileV128StoreLane for arm64. 317 func (c *arm64Compiler) compileV128StoreLane(o *wazeroir.OperationV128StoreLane) (err error) { 318 var arr arm64.VectorArrangement 319 var storeInst asm.Instruction 320 switch o.LaneSize { 321 case 8: 322 storeInst = arm64.STRB 323 arr = arm64.VectorArrangementB 324 case 16: 325 storeInst = arm64.STRH 326 arr = arm64.VectorArrangementH 327 case 32: 328 storeInst = arm64.STRW 329 arr = arm64.VectorArrangementS 330 case 64: 331 storeInst = arm64.STRD 332 arr = arm64.VectorArrangementD 333 } 334 335 v := c.locationStack.popV128() 336 if err = c.compileEnsureOnRegister(v); err != nil { 337 return 338 } 339 340 targetSizeInBytes := int64(o.LaneSize / 8) 341 offset, err := c.compileMemoryAccessOffsetSetup(o.Arg.Offset, targetSizeInBytes) 342 if err != nil { 343 return err 344 } 345 346 c.assembler.CompileVectorRegisterToRegister(arm64.UMOV, v.register, arm64ReservedRegisterForTemporary, arr, 347 arm64.VectorIndex(o.LaneIndex)) 348 349 c.assembler.CompileRegisterToMemoryWithRegisterOffset(storeInst, 350 arm64ReservedRegisterForTemporary, arm64ReservedRegisterForMemory, offset) 351 352 c.locationStack.markRegisterUnused(v.register) 353 return 354 } 355 356 // compileV128ExtractLane implements compiler.compileV128ExtractLane for arm64. 357 func (c *arm64Compiler) compileV128ExtractLane(o *wazeroir.OperationV128ExtractLane) (err error) { 358 v := c.locationStack.popV128() 359 if err = c.compileEnsureOnRegister(v); err != nil { 360 return 361 } 362 363 switch o.Shape { 364 case wazeroir.ShapeI8x16: 365 result, err := c.allocateRegister(registerTypeGeneralPurpose) 366 if err != nil { 367 return err 368 } 369 var inst asm.Instruction 370 if o.Signed { 371 inst = arm64.SMOV32 372 } else { 373 inst = arm64.UMOV 374 } 375 c.assembler.CompileVectorRegisterToRegister(inst, v.register, result, 376 arm64.VectorArrangementB, arm64.VectorIndex(o.LaneIndex)) 377 378 c.locationStack.markRegisterUnused(v.register) 379 c.pushRuntimeValueLocationOnRegister(result, runtimeValueTypeI32) 380 case wazeroir.ShapeI16x8: 381 result, err := c.allocateRegister(registerTypeGeneralPurpose) 382 if err != nil { 383 return err 384 } 385 var inst asm.Instruction 386 if o.Signed { 387 inst = arm64.SMOV32 388 } else { 389 inst = arm64.UMOV 390 } 391 c.assembler.CompileVectorRegisterToRegister(inst, v.register, result, 392 arm64.VectorArrangementH, arm64.VectorIndex(o.LaneIndex)) 393 394 c.locationStack.markRegisterUnused(v.register) 395 c.pushRuntimeValueLocationOnRegister(result, runtimeValueTypeI32) 396 case wazeroir.ShapeI32x4: 397 result, err := c.allocateRegister(registerTypeGeneralPurpose) 398 if err != nil { 399 return err 400 } 401 c.assembler.CompileVectorRegisterToRegister(arm64.UMOV, v.register, result, 402 arm64.VectorArrangementS, arm64.VectorIndex(o.LaneIndex)) 403 404 c.locationStack.markRegisterUnused(v.register) 405 c.pushRuntimeValueLocationOnRegister(result, runtimeValueTypeI32) 406 case wazeroir.ShapeI64x2: 407 result, err := c.allocateRegister(registerTypeGeneralPurpose) 408 if err != nil { 409 return err 410 } 411 c.assembler.CompileVectorRegisterToRegister(arm64.UMOV, v.register, result, 412 arm64.VectorArrangementD, arm64.VectorIndex(o.LaneIndex)) 413 414 c.locationStack.markRegisterUnused(v.register) 415 c.pushRuntimeValueLocationOnRegister(result, runtimeValueTypeI64) 416 case wazeroir.ShapeF32x4: 417 c.assembler.CompileVectorRegisterToVectorRegister(arm64.INSELEM, v.register, v.register, 418 arm64.VectorArrangementS, arm64.VectorIndex(o.LaneIndex), 0) 419 c.pushRuntimeValueLocationOnRegister(v.register, runtimeValueTypeF32) 420 case wazeroir.ShapeF64x2: 421 c.assembler.CompileVectorRegisterToVectorRegister(arm64.INSELEM, v.register, v.register, 422 arm64.VectorArrangementD, arm64.VectorIndex(o.LaneIndex), 0) 423 c.pushRuntimeValueLocationOnRegister(v.register, runtimeValueTypeF64) 424 } 425 return 426 } 427 428 // compileV128ReplaceLane implements compiler.compileV128ReplaceLane for arm64. 429 func (c *arm64Compiler) compileV128ReplaceLane(o *wazeroir.OperationV128ReplaceLane) (err error) { 430 origin := c.locationStack.pop() 431 if err = c.compileEnsureOnRegister(origin); err != nil { 432 return 433 } 434 435 vector := c.locationStack.popV128() 436 if err = c.compileEnsureOnRegister(vector); err != nil { 437 return 438 } 439 440 switch o.Shape { 441 case wazeroir.ShapeI8x16: 442 c.assembler.CompileRegisterToVectorRegister(arm64.INSGEN, origin.register, vector.register, 443 arm64.VectorArrangementB, arm64.VectorIndex(o.LaneIndex)) 444 case wazeroir.ShapeI16x8: 445 c.assembler.CompileRegisterToVectorRegister(arm64.INSGEN, origin.register, vector.register, 446 arm64.VectorArrangementH, arm64.VectorIndex(o.LaneIndex)) 447 case wazeroir.ShapeI32x4: 448 c.assembler.CompileRegisterToVectorRegister(arm64.INSGEN, origin.register, vector.register, 449 arm64.VectorArrangementS, arm64.VectorIndex(o.LaneIndex)) 450 case wazeroir.ShapeI64x2: 451 c.assembler.CompileRegisterToVectorRegister(arm64.INSGEN, origin.register, vector.register, 452 arm64.VectorArrangementD, arm64.VectorIndex(o.LaneIndex)) 453 case wazeroir.ShapeF32x4: 454 c.assembler.CompileVectorRegisterToVectorRegister(arm64.INSELEM, origin.register, vector.register, 455 arm64.VectorArrangementS, 0, arm64.VectorIndex(o.LaneIndex)) 456 case wazeroir.ShapeF64x2: 457 c.assembler.CompileVectorRegisterToVectorRegister(arm64.INSELEM, origin.register, vector.register, 458 arm64.VectorArrangementD, 0, arm64.VectorIndex(o.LaneIndex)) 459 } 460 461 c.locationStack.markRegisterUnused(origin.register) 462 c.pushVectorRuntimeValueLocationOnRegister(vector.register) 463 return 464 } 465 466 // compileV128Splat implements compiler.compileV128Splat for arm64. 467 func (c *arm64Compiler) compileV128Splat(o *wazeroir.OperationV128Splat) (err error) { 468 origin := c.locationStack.pop() 469 if err = c.compileEnsureOnRegister(origin); err != nil { 470 return 471 } 472 473 var result asm.Register 474 switch o.Shape { 475 case wazeroir.ShapeI8x16: 476 result, err = c.allocateRegister(registerTypeVector) 477 if err != nil { 478 return 479 } 480 c.assembler.CompileRegisterToVectorRegister(arm64.DUPGEN, origin.register, result, 481 arm64.VectorArrangement16B, arm64.VectorIndexNone) 482 case wazeroir.ShapeI16x8: 483 result, err = c.allocateRegister(registerTypeVector) 484 if err != nil { 485 return 486 } 487 c.assembler.CompileRegisterToVectorRegister(arm64.DUPGEN, origin.register, result, 488 arm64.VectorArrangement8H, arm64.VectorIndexNone) 489 case wazeroir.ShapeI32x4: 490 result, err = c.allocateRegister(registerTypeVector) 491 if err != nil { 492 return 493 } 494 c.assembler.CompileRegisterToVectorRegister(arm64.DUPGEN, origin.register, result, 495 arm64.VectorArrangement4S, arm64.VectorIndexNone) 496 case wazeroir.ShapeI64x2: 497 result, err = c.allocateRegister(registerTypeVector) 498 if err != nil { 499 return 500 } 501 c.assembler.CompileRegisterToVectorRegister(arm64.DUPGEN, origin.register, result, 502 arm64.VectorArrangement2D, arm64.VectorIndexNone) 503 case wazeroir.ShapeF32x4: 504 result = origin.register 505 c.assembler.CompileVectorRegisterToVectorRegister(arm64.DUPELEM, origin.register, result, 506 arm64.VectorArrangementS, 0, arm64.VectorIndexNone) 507 case wazeroir.ShapeF64x2: 508 result = origin.register 509 c.assembler.CompileVectorRegisterToVectorRegister(arm64.DUPELEM, origin.register, result, 510 arm64.VectorArrangementD, 0, arm64.VectorIndexNone) 511 } 512 513 c.locationStack.markRegisterUnused(origin.register) 514 c.pushVectorRuntimeValueLocationOnRegister(result) 515 return 516 } 517 518 func (c *arm64Compiler) onValueReleaseRegisterToStack(reg asm.Register) { 519 for i := uint64(0); i < c.locationStack.sp; i++ { 520 prevValue := c.locationStack.stack[i] 521 if prevValue.register == reg { 522 c.compileReleaseRegisterToStack(prevValue) 523 break 524 } 525 } 526 } 527 528 // compileV128Shuffle implements compiler.compileV128Shuffle for arm64. 529 func (c *arm64Compiler) compileV128Shuffle(o *wazeroir.OperationV128Shuffle) (err error) { 530 // Shuffle needs two operands (v, w) must be next to each other. 531 // For simplicity, we use V29 for v and V30 for w values respectively. 532 const vReg, wReg = arm64.RegV29, arm64.RegV30 533 534 // Ensures that w value is placed on wReg. 535 w := c.locationStack.popV128() 536 if w.register != wReg { 537 // If wReg is already in use, save the value onto the stack. 538 c.onValueReleaseRegisterToStack(wReg) 539 540 if w.onRegister() { 541 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.VORR, 542 w.register, w.register, wReg, arm64.VectorArrangement16B) 543 // We no longer use the old register. 544 c.markRegisterUnused(w.register) 545 } else { // on stack 546 w.setRegister(wReg) 547 c.compileLoadValueOnStackToRegister(w) 548 } 549 } 550 551 // Ensures that v value is placed on wReg. 552 v := c.locationStack.popV128() 553 if v.register != vReg { 554 // If vReg is already in use, save the value onto the stack. 555 c.onValueReleaseRegisterToStack(vReg) 556 557 if v.onRegister() { 558 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.VORR, 559 v.register, v.register, vReg, arm64.VectorArrangement16B) 560 // We no longer use the old register. 561 c.markRegisterUnused(v.register) 562 } else { // on stack 563 v.setRegister(vReg) 564 c.compileLoadValueOnStackToRegister(v) 565 } 566 } 567 568 c.locationStack.markRegisterUsed(vReg, wReg) 569 result, err := c.allocateRegister(registerTypeVector) 570 if err != nil { 571 return err 572 } 573 574 c.assembler.CompileStaticConstToVectorRegister(arm64.VMOV, asm.NewStaticConst(o.Lanes[:]), result, arm64.VectorArrangementQ) 575 c.assembler.CompileVectorRegisterToVectorRegister(arm64.TBL2, vReg, result, arm64.VectorArrangement16B, 576 arm64.VectorIndexNone, arm64.VectorIndexNone) 577 578 c.locationStack.markRegisterUnused(vReg, wReg) 579 c.pushVectorRuntimeValueLocationOnRegister(result) 580 return 581 } 582 583 // compileV128Swizzle implements compiler.compileV128Swizzle for arm64. 584 func (c *arm64Compiler) compileV128Swizzle(*wazeroir.OperationV128Swizzle) (err error) { 585 indexVec := c.locationStack.popV128() 586 if err = c.compileEnsureOnRegister(indexVec); err != nil { 587 return 588 } 589 baseVec := c.locationStack.popV128() 590 if err = c.compileEnsureOnRegister(baseVec); err != nil { 591 return 592 } 593 594 c.assembler.CompileVectorRegisterToVectorRegister(arm64.TBL1, baseVec.register, indexVec.register, 595 arm64.VectorArrangement16B, arm64.VectorIndexNone, arm64.VectorIndexNone) 596 597 c.markRegisterUnused(baseVec.register) 598 c.pushVectorRuntimeValueLocationOnRegister(indexVec.register) 599 return 600 } 601 602 // compileV128AnyTrue implements compiler.compileV128AnyTrue for arm64. 603 func (c *arm64Compiler) compileV128AnyTrue(*wazeroir.OperationV128AnyTrue) (err error) { 604 vector := c.locationStack.popV128() 605 if err = c.compileEnsureOnRegister(vector); err != nil { 606 return 607 } 608 609 v := vector.register 610 c.assembler.CompileVectorRegisterToVectorRegister(arm64.UMAXP, v, v, 611 arm64.VectorArrangement16B, arm64.VectorIndexNone, arm64.VectorIndexNone) 612 c.assembler.CompileVectorRegisterToRegister(arm64.UMOV, v, arm64ReservedRegisterForTemporary, 613 arm64.VectorArrangementD, 0) 614 c.assembler.CompileTwoRegistersToNone(arm64.CMP, arm64.RegRZR, arm64ReservedRegisterForTemporary) 615 c.locationStack.pushRuntimeValueLocationOnConditionalRegister(arm64.CondNE) 616 617 c.locationStack.markRegisterUnused(v) 618 return 619 } 620 621 // compileV128AllTrue implements compiler.compileV128AllTrue for arm64. 622 func (c *arm64Compiler) compileV128AllTrue(o *wazeroir.OperationV128AllTrue) (err error) { 623 vector := c.locationStack.popV128() 624 if err = c.compileEnsureOnRegister(vector); err != nil { 625 return 626 } 627 628 v := vector.register 629 if o.Shape == wazeroir.ShapeI64x2 { 630 c.assembler.CompileVectorRegisterToVectorRegister(arm64.CMEQZERO, arm64.RegRZR, v, 631 arm64.VectorArrangement2D, arm64.VectorIndexNone, arm64.VectorIndexNone) 632 c.assembler.CompileVectorRegisterToVectorRegister(arm64.ADDP, v, v, 633 arm64.VectorArrangement2D, arm64.VectorIndexNone, arm64.VectorIndexNone) 634 c.assembler.CompileTwoRegistersToNone(arm64.FCMPD, v, v) 635 c.locationStack.pushRuntimeValueLocationOnConditionalRegister(arm64.CondEQ) 636 } else { 637 var arr arm64.VectorArrangement 638 switch o.Shape { 639 case wazeroir.ShapeI8x16: 640 arr = arm64.VectorArrangement16B 641 case wazeroir.ShapeI16x8: 642 arr = arm64.VectorArrangement8H 643 case wazeroir.ShapeI32x4: 644 arr = arm64.VectorArrangement4S 645 } 646 647 c.assembler.CompileVectorRegisterToVectorRegister(arm64.UMINV, v, v, 648 arr, arm64.VectorIndexNone, arm64.VectorIndexNone) 649 c.assembler.CompileVectorRegisterToRegister(arm64.UMOV, v, arm64ReservedRegisterForTemporary, 650 arm64.VectorArrangementD, 0) 651 c.assembler.CompileTwoRegistersToNone(arm64.CMP, arm64.RegRZR, arm64ReservedRegisterForTemporary) 652 c.locationStack.pushRuntimeValueLocationOnConditionalRegister(arm64.CondNE) 653 } 654 c.markRegisterUnused(v) 655 return 656 } 657 658 var ( 659 i8x16BitmaskConst = [16]byte{ 660 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 661 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 662 } 663 i16x8BitmaskConst = [16]byte{ 664 0x01, 0x00, 0x02, 0x00, 0x04, 0x00, 0x08, 0x00, 665 0x10, 0x00, 0x20, 0x00, 0x40, 0x00, 0x80, 0x00, 666 } 667 i32x4BitmaskConst = [16]byte{ 668 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 669 0x04, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 670 } 671 ) 672 673 // compileV128BitMask implements compiler.compileV128BitMask for arm64. 674 func (c *arm64Compiler) compileV128BitMask(o *wazeroir.OperationV128BitMask) (err error) { 675 vector := c.locationStack.popV128() 676 if err = c.compileEnsureOnRegister(vector); err != nil { 677 return 678 } 679 680 v := vector.register 681 682 result, err := c.allocateRegister(registerTypeGeneralPurpose) 683 if err != nil { 684 return err 685 } 686 687 switch o.Shape { 688 case wazeroir.ShapeI8x16: 689 vecTmp, err := c.allocateRegister(registerTypeVector) 690 if err != nil { 691 return err 692 } 693 // Right arithmetic shift on the original vector and store the result into vecTmp. So we have: 694 // v[i] = 0xff if vi<0, 0 otherwise. 695 c.assembler.CompileVectorRegisterToVectorRegisterWithConst(arm64.SSHR, v, v, arm64.VectorArrangement16B, 7) 696 697 // Load the bit mask into vecTmp. 698 c.assembler.CompileStaticConstToVectorRegister(arm64.VMOV, asm.NewStaticConst(i8x16BitmaskConst[:]), vecTmp, arm64.VectorArrangementQ) 699 700 // Lane-wise logical AND with i8x16BitmaskConst, meaning that we have 701 // v[i] = (1 << i) if vi<0, 0 otherwise. 702 // 703 // Below, we use the following notation: 704 // wi := (1 << i) if vi<0, 0 otherwise. 705 c.assembler.CompileVectorRegisterToVectorRegister(arm64.VAND, vecTmp, v, arm64.VectorArrangement16B, 706 arm64.VectorIndexNone, arm64.VectorIndexNone) 707 708 // Swap the lower and higher 8 byte elements, and write it into vecTmp, meaning that we have 709 // vecTmp[i] = w(i+8) if i < 8, w(i-8) otherwise. 710 // 711 c.assembler.CompileTwoVectorRegistersToVectorRegisterWithConst(arm64.EXT, v, v, vecTmp, arm64.VectorArrangement16B, 0x8) 712 713 // v = [w0, w8, ..., w7, w15] 714 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.ZIP1, vecTmp, v, v, arm64.VectorArrangement16B) 715 716 // v.h[0] = w0 + ... + w15 717 c.assembler.CompileVectorRegisterToVectorRegister(arm64.ADDV, v, v, 718 arm64.VectorArrangement8H, arm64.VectorIndexNone, arm64.VectorIndexNone) 719 720 // Extract the v.h[0] as the result. 721 c.assembler.CompileVectorRegisterToRegister(arm64.UMOV, v, result, arm64.VectorArrangementH, 0) 722 case wazeroir.ShapeI16x8: 723 vecTmp, err := c.allocateRegister(registerTypeVector) 724 if err != nil { 725 return err 726 } 727 // Right arithmetic shift on the original vector and store the result into vecTmp. So we have: 728 // v[i] = 0xffff if vi<0, 0 otherwise. 729 c.assembler.CompileVectorRegisterToVectorRegisterWithConst(arm64.SSHR, v, v, arm64.VectorArrangement8H, 15) 730 731 // Load the bit mask into vecTmp. 732 c.assembler.CompileStaticConstToVectorRegister(arm64.VMOV, asm.NewStaticConst(i16x8BitmaskConst[:]), vecTmp, arm64.VectorArrangementQ) 733 734 // Lane-wise logical AND with i16x8BitmaskConst, meaning that we have 735 // v[i] = (1 << i) if vi<0, 0 otherwise for i=0..3 736 // = (1 << (i+4)) if vi<0, 0 otherwise for i=3..7 737 c.assembler.CompileVectorRegisterToVectorRegister(arm64.VAND, vecTmp, v, arm64.VectorArrangement16B, 738 arm64.VectorIndexNone, arm64.VectorIndexNone) 739 740 c.assembler.CompileVectorRegisterToVectorRegister(arm64.ADDV, v, v, 741 arm64.VectorArrangement8H, arm64.VectorIndexNone, arm64.VectorIndexNone) 742 743 c.assembler.CompileVectorRegisterToRegister(arm64.UMOV, v, result, arm64.VectorArrangementH, 0) 744 case wazeroir.ShapeI32x4: 745 vecTmp, err := c.allocateRegister(registerTypeVector) 746 if err != nil { 747 return err 748 } 749 750 // Right arithmetic shift on the original vector and store the result into vecTmp. So we have: 751 // v[i] = 0xffffffff if vi<0, 0 otherwise. 752 c.assembler.CompileVectorRegisterToVectorRegisterWithConst(arm64.SSHR, v, v, arm64.VectorArrangement4S, 32) 753 754 // Load the bit mask into vecTmp. 755 c.assembler.CompileStaticConstToVectorRegister(arm64.VMOV, 756 asm.NewStaticConst(i32x4BitmaskConst[:]), vecTmp, arm64.VectorArrangementQ) 757 758 // Lane-wise logical AND with i16x8BitmaskConst, meaning that we have 759 // v[i] = (1 << i) if vi<0, 0 otherwise for i in [0, 1] 760 // = (1 << (i+4)) if vi<0, 0 otherwise for i in [2, 3] 761 c.assembler.CompileVectorRegisterToVectorRegister(arm64.VAND, vecTmp, v, arm64.VectorArrangement16B, 762 arm64.VectorIndexNone, arm64.VectorIndexNone) 763 764 c.assembler.CompileVectorRegisterToVectorRegister(arm64.ADDV, v, v, 765 arm64.VectorArrangement4S, arm64.VectorIndexNone, arm64.VectorIndexNone) 766 767 c.assembler.CompileVectorRegisterToRegister(arm64.UMOV, v, result, arm64.VectorArrangementS, 0) 768 case wazeroir.ShapeI64x2: 769 // Move the lower 64-bit int into result, 770 c.assembler.CompileVectorRegisterToRegister(arm64.UMOV, v, result, 771 arm64.VectorArrangementD, 0) 772 // Move the higher 64-bit int into arm64ReservedRegisterForTemporary. 773 c.assembler.CompileVectorRegisterToRegister(arm64.UMOV, v, arm64ReservedRegisterForTemporary, 774 arm64.VectorArrangementD, 1) 775 776 // Move the sign bit into the least significant bit. 777 c.assembler.CompileConstToRegister(arm64.LSR, 63, result) 778 c.assembler.CompileConstToRegister(arm64.LSR, 63, arm64ReservedRegisterForTemporary) 779 780 // result = (arm64ReservedRegisterForTemporary<<1) | result 781 c.assembler.CompileLeftShiftedRegisterToRegister(arm64.ADD, 782 arm64ReservedRegisterForTemporary, 1, result, result) 783 } 784 785 c.markRegisterUnused(v) 786 c.pushRuntimeValueLocationOnRegister(result, runtimeValueTypeI32) 787 return 788 } 789 790 // compileV128And implements compiler.compileV128And for arm64. 791 func (c *arm64Compiler) compileV128And(*wazeroir.OperationV128And) error { 792 return c.compileV128x2BinOp(arm64.VAND, arm64.VectorArrangement16B) 793 } 794 795 // compileV128Not implements compiler.compileV128Not for arm64. 796 func (c *arm64Compiler) compileV128Not(*wazeroir.OperationV128Not) error { 797 return c.compileV128UniOp(arm64.NOT, arm64.VectorArrangement16B) 798 } 799 800 // compileV128Or implements compiler.compileV128Or for arm64. 801 func (c *arm64Compiler) compileV128Or(*wazeroir.OperationV128Or) error { 802 return c.compileV128x2BinOp(arm64.VORR, arm64.VectorArrangement16B) 803 } 804 805 // compileV128Xor implements compiler.compileV128Xor for arm64. 806 func (c *arm64Compiler) compileV128Xor(*wazeroir.OperationV128Xor) error { 807 return c.compileV128x2BinOp(arm64.EOR, arm64.VectorArrangement16B) 808 } 809 810 // compileV128Bitselect implements compiler.compileV128Bitselect for arm64. 811 func (c *arm64Compiler) compileV128Bitselect(*wazeroir.OperationV128Bitselect) error { 812 selector := c.locationStack.popV128() 813 if err := c.compileEnsureOnRegister(selector); err != nil { 814 return err 815 } 816 817 x2 := c.locationStack.popV128() 818 if err := c.compileEnsureOnRegister(x2); err != nil { 819 return err 820 } 821 822 x1 := c.locationStack.popV128() 823 if err := c.compileEnsureOnRegister(x1); err != nil { 824 return err 825 } 826 827 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.BSL, 828 x2.register, x1.register, selector.register, arm64.VectorArrangement16B) 829 830 c.markRegisterUnused(x1.register, x2.register) 831 c.pushVectorRuntimeValueLocationOnRegister(selector.register) 832 return nil 833 } 834 835 // compileV128AndNot implements compiler.compileV128AndNot for arm64. 836 func (c *arm64Compiler) compileV128AndNot(*wazeroir.OperationV128AndNot) error { 837 return c.compileV128x2BinOp(arm64.BIC, arm64.VectorArrangement16B) 838 } 839 840 func (c *arm64Compiler) compileV128UniOp(inst asm.Instruction, arr arm64.VectorArrangement) error { 841 v := c.locationStack.popV128() 842 if err := c.compileEnsureOnRegister(v); err != nil { 843 return err 844 } 845 846 c.assembler.CompileVectorRegisterToVectorRegister(inst, v.register, v.register, arr, arm64.VectorIndexNone, arm64.VectorIndexNone) 847 848 c.pushVectorRuntimeValueLocationOnRegister(v.register) 849 return nil 850 } 851 852 func (c *arm64Compiler) compileV128x2BinOp(inst asm.Instruction, arr arm64.VectorArrangement) error { 853 x2 := c.locationStack.popV128() 854 if err := c.compileEnsureOnRegister(x2); err != nil { 855 return err 856 } 857 858 x1 := c.locationStack.popV128() 859 if err := c.compileEnsureOnRegister(x1); err != nil { 860 return err 861 } 862 863 c.assembler.CompileVectorRegisterToVectorRegister(inst, x2.register, x1.register, arr, arm64.VectorIndexNone, arm64.VectorIndexNone) 864 865 c.markRegisterUnused(x2.register) 866 c.pushVectorRuntimeValueLocationOnRegister(x1.register) 867 return nil 868 } 869 870 // compileV128Shr implements compiler.compileV128Shr for arm64. 871 func (c *arm64Compiler) compileV128Shr(o *wazeroir.OperationV128Shr) error { 872 var inst asm.Instruction 873 if o.Signed { 874 inst = arm64.SSHL 875 } else { 876 inst = arm64.USHL 877 } 878 return c.compileV128ShiftImpl(o.Shape, inst, true) 879 } 880 881 // compileV128Shl implements compiler.compileV128Shl for arm64. 882 func (c *arm64Compiler) compileV128Shl(o *wazeroir.OperationV128Shl) error { 883 return c.compileV128ShiftImpl(o.Shape, arm64.SSHL, false) 884 } 885 886 func (c *arm64Compiler) compileV128ShiftImpl(shape wazeroir.Shape, ins asm.Instruction, rightShift bool) error { 887 s := c.locationStack.pop() 888 if s.register == arm64.RegRZR { 889 // If the shift amount is zero register, nothing to do here. 890 return nil 891 } 892 893 var modulo asm.ConstantValue 894 var arr arm64.VectorArrangement 895 switch shape { 896 case wazeroir.ShapeI8x16: 897 modulo = 0x7 // modulo 8. 898 arr = arm64.VectorArrangement16B 899 case wazeroir.ShapeI16x8: 900 modulo = 0xf // modulo 16. 901 arr = arm64.VectorArrangement8H 902 case wazeroir.ShapeI32x4: 903 modulo = 0x1f // modulo 32. 904 arr = arm64.VectorArrangement4S 905 case wazeroir.ShapeI64x2: 906 modulo = 0x3f // modulo 64. 907 arr = arm64.VectorArrangement2D 908 } 909 910 if err := c.compileEnsureOnRegister(s); err != nil { 911 return err 912 } 913 914 v := c.locationStack.popV128() 915 if err := c.compileEnsureOnRegister(v); err != nil { 916 return err 917 } 918 919 tmp, err := c.allocateRegister(registerTypeVector) 920 if err != nil { 921 return err 922 } 923 924 c.assembler.CompileConstToRegister(arm64.ANDIMM32, modulo, s.register) 925 926 if rightShift { 927 // Negate the amount to make this as right shift. 928 c.assembler.CompileRegisterToRegister(arm64.NEG, s.register, s.register) 929 } 930 931 // Copy the shift amount into a vector register as SSHL requires it to be there. 932 c.assembler.CompileRegisterToVectorRegister(arm64.DUPGEN, s.register, tmp, 933 arr, arm64.VectorIndexNone) 934 935 c.assembler.CompileVectorRegisterToVectorRegister(ins, tmp, v.register, arr, 936 arm64.VectorIndexNone, arm64.VectorIndexNone) 937 938 c.markRegisterUnused(s.register) 939 c.pushVectorRuntimeValueLocationOnRegister(v.register) 940 return nil 941 } 942 943 // compileV128Cmp implements compiler.compileV128Cmp for arm64. 944 func (c *arm64Compiler) compileV128Cmp(o *wazeroir.OperationV128Cmp) error { 945 x2 := c.locationStack.popV128() 946 if err := c.compileEnsureOnRegister(x2); err != nil { 947 return err 948 } 949 950 x1 := c.locationStack.popV128() 951 if err := c.compileEnsureOnRegister(x1); err != nil { 952 return err 953 } 954 955 var arr arm64.VectorArrangement 956 if o.Type <= wazeroir.V128CmpTypeI8x16GeU { 957 arr = arm64.VectorArrangement16B 958 } else if o.Type <= wazeroir.V128CmpTypeI16x8GeU { 959 arr = arm64.VectorArrangement8H 960 } else if o.Type <= wazeroir.V128CmpTypeI32x4GeU { 961 arr = arm64.VectorArrangement4S 962 } else if o.Type <= wazeroir.V128CmpTypeI64x2GeS { 963 arr = arm64.VectorArrangement2D 964 } else if o.Type <= wazeroir.V128CmpTypeF32x4Ge { 965 arr = arm64.VectorArrangement4S 966 } else { // f64x2 967 arr = arm64.VectorArrangement2D 968 } 969 970 result := x1.register 971 switch o.Type { 972 case wazeroir.V128CmpTypeI8x16Eq, wazeroir.V128CmpTypeI16x8Eq, wazeroir.V128CmpTypeI32x4Eq, wazeroir.V128CmpTypeI64x2Eq: 973 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.CMEQ, x1.register, x2.register, result, arr) 974 case wazeroir.V128CmpTypeI8x16Ne, wazeroir.V128CmpTypeI16x8Ne, wazeroir.V128CmpTypeI32x4Ne, wazeroir.V128CmpTypeI64x2Ne: 975 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.CMEQ, x1.register, x2.register, result, arr) 976 // Reverse the condition by flipping all bits. 977 c.assembler.CompileVectorRegisterToVectorRegister(arm64.NOT, result, result, 978 arm64.VectorArrangement16B, arm64.VectorIndexNone, arm64.VectorIndexNone) 979 case wazeroir.V128CmpTypeI8x16LtS, wazeroir.V128CmpTypeI16x8LtS, wazeroir.V128CmpTypeI32x4LtS, wazeroir.V128CmpTypeI64x2LtS: 980 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.CMGT, x1.register, x2.register, result, arr) 981 case wazeroir.V128CmpTypeI8x16LtU, wazeroir.V128CmpTypeI16x8LtU, wazeroir.V128CmpTypeI32x4LtU: 982 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.CMHI, x1.register, x2.register, result, arr) 983 case wazeroir.V128CmpTypeI8x16GtS, wazeroir.V128CmpTypeI16x8GtS, wazeroir.V128CmpTypeI32x4GtS, wazeroir.V128CmpTypeI64x2GtS: 984 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.CMGT, x2.register, x1.register, result, arr) 985 case wazeroir.V128CmpTypeI8x16GtU, wazeroir.V128CmpTypeI16x8GtU, wazeroir.V128CmpTypeI32x4GtU: 986 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.CMHI, x2.register, x1.register, result, arr) 987 case wazeroir.V128CmpTypeI8x16LeS, wazeroir.V128CmpTypeI16x8LeS, wazeroir.V128CmpTypeI32x4LeS, wazeroir.V128CmpTypeI64x2LeS: 988 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.CMGE, x1.register, x2.register, result, arr) 989 case wazeroir.V128CmpTypeI8x16LeU, wazeroir.V128CmpTypeI16x8LeU, wazeroir.V128CmpTypeI32x4LeU: 990 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.CMHS, x1.register, x2.register, result, arr) 991 case wazeroir.V128CmpTypeI8x16GeS, wazeroir.V128CmpTypeI16x8GeS, wazeroir.V128CmpTypeI32x4GeS, wazeroir.V128CmpTypeI64x2GeS: 992 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.CMGE, x2.register, x1.register, result, arr) 993 case wazeroir.V128CmpTypeI8x16GeU, wazeroir.V128CmpTypeI16x8GeU, wazeroir.V128CmpTypeI32x4GeU: 994 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.CMHS, x2.register, x1.register, result, arr) 995 case wazeroir.V128CmpTypeF32x4Eq, wazeroir.V128CmpTypeF64x2Eq: 996 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.FCMEQ, x2.register, x1.register, result, arr) 997 case wazeroir.V128CmpTypeF32x4Ne, wazeroir.V128CmpTypeF64x2Ne: 998 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.FCMEQ, x2.register, x1.register, result, arr) 999 // Reverse the condition by flipping all bits. 1000 c.assembler.CompileVectorRegisterToVectorRegister(arm64.NOT, result, result, 1001 arm64.VectorArrangement16B, arm64.VectorIndexNone, arm64.VectorIndexNone) 1002 case wazeroir.V128CmpTypeF32x4Lt, wazeroir.V128CmpTypeF64x2Lt: 1003 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.FCMGT, x1.register, x2.register, result, arr) 1004 case wazeroir.V128CmpTypeF32x4Le, wazeroir.V128CmpTypeF64x2Le: 1005 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.FCMGE, x1.register, x2.register, result, arr) 1006 case wazeroir.V128CmpTypeF32x4Gt, wazeroir.V128CmpTypeF64x2Gt: 1007 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.FCMGT, x2.register, x1.register, result, arr) 1008 case wazeroir.V128CmpTypeF32x4Ge, wazeroir.V128CmpTypeF64x2Ge: 1009 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.FCMGE, x2.register, x1.register, result, arr) 1010 } 1011 1012 c.markRegisterUnused(x2.register) 1013 c.pushVectorRuntimeValueLocationOnRegister(result) 1014 return nil 1015 } 1016 1017 // compileV128AddSat implements compiler.compileV128AddSat for arm64. 1018 func (c *arm64Compiler) compileV128AddSat(o *wazeroir.OperationV128AddSat) error { 1019 var inst asm.Instruction 1020 if o.Signed { 1021 inst = arm64.VSQADD 1022 } else { 1023 inst = arm64.VUQADD 1024 } 1025 return c.compileV128x2BinOp(inst, defaultArrangementForShape(o.Shape)) 1026 } 1027 1028 // compileV128SubSat implements compiler.compileV128SubSat for arm64. 1029 func (c *arm64Compiler) compileV128SubSat(o *wazeroir.OperationV128SubSat) error { 1030 var inst asm.Instruction 1031 if o.Signed { 1032 inst = arm64.VSQSUB 1033 } else { 1034 inst = arm64.VUQSUB 1035 } 1036 return c.compileV128x2BinOp(inst, defaultArrangementForShape(o.Shape)) 1037 } 1038 1039 // compileV128Mul implements compiler.compileV128Mul for arm64. 1040 func (c *arm64Compiler) compileV128Mul(o *wazeroir.OperationV128Mul) (err error) { 1041 switch o.Shape { 1042 case wazeroir.ShapeI8x16, wazeroir.ShapeI16x8, wazeroir.ShapeI32x4: 1043 err = c.compileV128x2BinOp(arm64.VMUL, defaultArrangementForShape(o.Shape)) 1044 case wazeroir.ShapeF32x4, wazeroir.ShapeF64x2: 1045 err = c.compileV128x2BinOp(arm64.VFMUL, defaultArrangementForShape(o.Shape)) 1046 case wazeroir.ShapeI64x2: 1047 x2 := c.locationStack.popV128() 1048 if err = c.compileEnsureOnRegister(x2); err != nil { 1049 return 1050 } 1051 1052 x1 := c.locationStack.popV128() 1053 if err = c.compileEnsureOnRegister(x1); err != nil { 1054 return 1055 } 1056 1057 src1, src2 := x1.register, x2.register 1058 1059 tmp1, err := c.allocateRegister(registerTypeVector) 1060 if err != nil { 1061 return err 1062 } 1063 c.markRegisterUsed(tmp1) 1064 1065 tmp2, err := c.allocateRegister(registerTypeVector) 1066 if err != nil { 1067 return err 1068 } 1069 1070 c.markRegisterUsed(tmp2) 1071 1072 tmp3, err := c.allocateRegister(registerTypeVector) 1073 if err != nil { 1074 return err 1075 } 1076 1077 // Following the algorithm in https://chromium-review.googlesource.com/c/v8/v8/+/1781696 1078 c.assembler.CompileVectorRegisterToVectorRegister(arm64.REV64, src2, tmp2, 1079 arm64.VectorArrangement4S, arm64.VectorIndexNone, arm64.VectorIndexNone) 1080 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.VMUL, src1, tmp2, tmp2, arm64.VectorArrangement4S) 1081 1082 c.assembler.CompileVectorRegisterToVectorRegister(arm64.XTN, src1, tmp1, 1083 arm64.VectorArrangement2D, arm64.VectorIndexNone, arm64.VectorIndexNone) 1084 1085 c.assembler.CompileVectorRegisterToVectorRegister(arm64.VADDP, tmp2, tmp2, arm64.VectorArrangement4S, 1086 arm64.VectorIndexNone, arm64.VectorIndexNone, 1087 ) 1088 1089 c.assembler.CompileVectorRegisterToVectorRegister(arm64.XTN, src2, tmp3, 1090 arm64.VectorArrangement2D, arm64.VectorIndexNone, arm64.VectorIndexNone) 1091 1092 c.assembler.CompileVectorRegisterToVectorRegister(arm64.SHLL, tmp2, src1, 1093 arm64.VectorArrangement2S, arm64.VectorIndexNone, arm64.VectorIndexNone) 1094 1095 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.VUMLAL, tmp3, tmp1, src1, arm64.VectorArrangement2S) 1096 1097 c.markRegisterUnused(src2, tmp1, tmp2) 1098 c.pushVectorRuntimeValueLocationOnRegister(src1) 1099 } 1100 return 1101 } 1102 1103 // compileV128Div implements compiler.compileV128Div for arm64. 1104 func (c *arm64Compiler) compileV128Div(o *wazeroir.OperationV128Div) error { 1105 var arr arm64.VectorArrangement 1106 var inst asm.Instruction 1107 switch o.Shape { 1108 case wazeroir.ShapeF32x4: 1109 arr = arm64.VectorArrangement4S 1110 inst = arm64.VFDIV 1111 case wazeroir.ShapeF64x2: 1112 arr = arm64.VectorArrangement2D 1113 inst = arm64.VFDIV 1114 } 1115 return c.compileV128x2BinOp(inst, arr) 1116 } 1117 1118 // compileV128Neg implements compiler.compileV128Neg for arm64. 1119 func (c *arm64Compiler) compileV128Neg(o *wazeroir.OperationV128Neg) error { 1120 var inst asm.Instruction 1121 if o.Shape <= wazeroir.ShapeI64x2 { // Integer lanes 1122 inst = arm64.VNEG 1123 } else { // Floating point lanes 1124 inst = arm64.VFNEG 1125 } 1126 return c.compileV128UniOp(inst, defaultArrangementForShape(o.Shape)) 1127 } 1128 1129 // compileV128Sqrt implements compiler.compileV128Sqrt for arm64. 1130 func (c *arm64Compiler) compileV128Sqrt(o *wazeroir.OperationV128Sqrt) error { 1131 var arr arm64.VectorArrangement 1132 switch o.Shape { 1133 case wazeroir.ShapeF32x4: 1134 arr = arm64.VectorArrangement4S 1135 case wazeroir.ShapeF64x2: 1136 arr = arm64.VectorArrangement2D 1137 } 1138 return c.compileV128UniOp(arm64.VFSQRT, arr) 1139 } 1140 1141 // compileV128Abs implements compiler.compileV128Abs for arm64. 1142 func (c *arm64Compiler) compileV128Abs(o *wazeroir.OperationV128Abs) error { 1143 var inst asm.Instruction 1144 if o.Shape <= wazeroir.ShapeI64x2 { // Integer lanes 1145 inst = arm64.VABS 1146 } else { // Floating point lanes 1147 inst = arm64.VFABS 1148 } 1149 return c.compileV128UniOp(inst, defaultArrangementForShape(o.Shape)) 1150 } 1151 1152 // compileV128Popcnt implements compiler.compileV128Popcnt for arm64. 1153 func (c *arm64Compiler) compileV128Popcnt(o *wazeroir.OperationV128Popcnt) error { 1154 return c.compileV128UniOp(arm64.VCNT, defaultArrangementForShape(o.Shape)) 1155 } 1156 1157 // compileV128Min implements compiler.compileV128Min for arm64. 1158 func (c *arm64Compiler) compileV128Min(o *wazeroir.OperationV128Min) error { 1159 var inst asm.Instruction 1160 if o.Shape <= wazeroir.ShapeI64x2 { // Integer lanes 1161 if o.Signed { 1162 inst = arm64.SMIN 1163 } else { 1164 inst = arm64.UMIN 1165 } 1166 } else { // Floating point lanes 1167 inst = arm64.VFMIN 1168 } 1169 return c.compileV128x2BinOp(inst, defaultArrangementForShape(o.Shape)) 1170 } 1171 1172 func defaultArrangementForShape(s wazeroir.Shape) (arr arm64.VectorArrangement) { 1173 switch s { 1174 case wazeroir.ShapeI8x16: 1175 arr = arm64.VectorArrangement16B 1176 case wazeroir.ShapeI16x8: 1177 arr = arm64.VectorArrangement8H 1178 case wazeroir.ShapeI32x4: 1179 arr = arm64.VectorArrangement4S 1180 case wazeroir.ShapeI64x2: 1181 arr = arm64.VectorArrangement2D 1182 case wazeroir.ShapeF32x4: 1183 arr = arm64.VectorArrangement4S 1184 case wazeroir.ShapeF64x2: 1185 arr = arm64.VectorArrangement2D 1186 } 1187 return 1188 } 1189 1190 // compileV128Max implements compiler.compileV128Max for arm64. 1191 func (c *arm64Compiler) compileV128Max(o *wazeroir.OperationV128Max) error { 1192 var inst asm.Instruction 1193 if o.Shape <= wazeroir.ShapeI64x2 { // Integer lanes 1194 if o.Signed { 1195 inst = arm64.SMAX 1196 } else { 1197 inst = arm64.UMAX 1198 } 1199 } else { // Floating point lanes 1200 inst = arm64.VFMAX 1201 } 1202 return c.compileV128x2BinOp(inst, defaultArrangementForShape(o.Shape)) 1203 } 1204 1205 // compileV128AvgrU implements compiler.compileV128AvgrU for arm64. 1206 func (c *arm64Compiler) compileV128AvgrU(o *wazeroir.OperationV128AvgrU) error { 1207 return c.compileV128x2BinOp(arm64.URHADD, defaultArrangementForShape(o.Shape)) 1208 } 1209 1210 // compileV128Pmin implements compiler.compileV128Pmin for arm64. 1211 func (c *arm64Compiler) compileV128Pmin(o *wazeroir.OperationV128Pmin) error { 1212 return c.compileV128PseudoMinOrMax(defaultArrangementForShape(o.Shape), false) 1213 } 1214 1215 // compileV128Pmax implements compiler.compileV128Pmax for arm64. 1216 func (c *arm64Compiler) compileV128Pmax(o *wazeroir.OperationV128Pmax) error { 1217 return c.compileV128PseudoMinOrMax(defaultArrangementForShape(o.Shape), true) 1218 } 1219 1220 // compileV128PseudoMinOrMax implements compileV128Pmax and compileV128Pmin. 1221 func (c *arm64Compiler) compileV128PseudoMinOrMax(arr arm64.VectorArrangement, max bool) error { 1222 x2 := c.locationStack.popV128() 1223 if err := c.compileEnsureOnRegister(x2); err != nil { 1224 return err 1225 } 1226 1227 x1 := c.locationStack.popV128() 1228 if err := c.compileEnsureOnRegister(x1); err != nil { 1229 return err 1230 } 1231 1232 result, err := c.allocateRegister(registerTypeVector) 1233 if err != nil { 1234 return err 1235 } 1236 1237 x1r, x2r := x1.register, x2.register 1238 1239 // Sets all bits on each lane if x1r's lane satisfies the condition (min or max), zeros otherwise. 1240 if max { 1241 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.FCMGT, x1r, x2r, result, arr) 1242 } else { 1243 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.FCMGT, x2r, x1r, result, arr) 1244 } 1245 // Select each bit based on the result bits ^. 1246 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.BSL, x1r, x2r, result, arm64.VectorArrangement16B) 1247 1248 c.markRegisterUnused(x1r, x2r) 1249 c.pushVectorRuntimeValueLocationOnRegister(result) 1250 return nil 1251 } 1252 1253 // compileV128Ceil implements compiler.compileV128Ceil for arm64. 1254 func (c *arm64Compiler) compileV128Ceil(o *wazeroir.OperationV128Ceil) error { 1255 var arr arm64.VectorArrangement 1256 switch o.Shape { 1257 case wazeroir.ShapeF32x4: 1258 arr = arm64.VectorArrangement4S 1259 case wazeroir.ShapeF64x2: 1260 arr = arm64.VectorArrangement2D 1261 } 1262 return c.compileV128UniOp(arm64.VFRINTP, arr) 1263 } 1264 1265 // compileV128Floor implements compiler.compileV128Floor for arm64. 1266 func (c *arm64Compiler) compileV128Floor(o *wazeroir.OperationV128Floor) error { 1267 var arr arm64.VectorArrangement 1268 switch o.Shape { 1269 case wazeroir.ShapeF32x4: 1270 arr = arm64.VectorArrangement4S 1271 case wazeroir.ShapeF64x2: 1272 arr = arm64.VectorArrangement2D 1273 } 1274 return c.compileV128UniOp(arm64.VFRINTM, arr) 1275 } 1276 1277 // compileV128Trunc implements compiler.compileV128Trunc for arm64. 1278 func (c *arm64Compiler) compileV128Trunc(o *wazeroir.OperationV128Trunc) error { 1279 var arr arm64.VectorArrangement 1280 switch o.Shape { 1281 case wazeroir.ShapeF32x4: 1282 arr = arm64.VectorArrangement4S 1283 case wazeroir.ShapeF64x2: 1284 arr = arm64.VectorArrangement2D 1285 } 1286 return c.compileV128UniOp(arm64.VFRINTZ, arr) 1287 } 1288 1289 // compileV128Nearest implements compiler.compileV128Nearest for arm64. 1290 func (c *arm64Compiler) compileV128Nearest(o *wazeroir.OperationV128Nearest) error { 1291 var arr arm64.VectorArrangement 1292 switch o.Shape { 1293 case wazeroir.ShapeF32x4: 1294 arr = arm64.VectorArrangement4S 1295 case wazeroir.ShapeF64x2: 1296 arr = arm64.VectorArrangement2D 1297 } 1298 return c.compileV128UniOp(arm64.VFRINTN, arr) 1299 } 1300 1301 // compileV128Extend implements compiler.compileV128Extend for arm64. 1302 func (c *arm64Compiler) compileV128Extend(o *wazeroir.OperationV128Extend) error { 1303 var inst asm.Instruction 1304 var arr arm64.VectorArrangement 1305 if o.UseLow { 1306 if o.Signed { 1307 inst = arm64.SSHLL 1308 } else { 1309 inst = arm64.USHLL 1310 } 1311 1312 switch o.OriginShape { 1313 case wazeroir.ShapeI8x16: 1314 arr = arm64.VectorArrangement8B 1315 case wazeroir.ShapeI16x8: 1316 arr = arm64.VectorArrangement4H 1317 case wazeroir.ShapeI32x4: 1318 arr = arm64.VectorArrangement2S 1319 } 1320 } else { 1321 if o.Signed { 1322 inst = arm64.SSHLL2 1323 } else { 1324 inst = arm64.USHLL2 1325 } 1326 arr = defaultArrangementForShape(o.OriginShape) 1327 } 1328 1329 return c.compileV128UniOp(inst, arr) 1330 } 1331 1332 // compileV128ExtMul implements compiler.compileV128ExtMul for arm64. 1333 func (c *arm64Compiler) compileV128ExtMul(o *wazeroir.OperationV128ExtMul) error { 1334 var inst asm.Instruction 1335 var arr arm64.VectorArrangement 1336 if o.UseLow { 1337 if o.Signed { 1338 inst = arm64.SMULL 1339 } else { 1340 inst = arm64.UMULL 1341 } 1342 1343 switch o.OriginShape { 1344 case wazeroir.ShapeI8x16: 1345 arr = arm64.VectorArrangement8B 1346 case wazeroir.ShapeI16x8: 1347 arr = arm64.VectorArrangement4H 1348 case wazeroir.ShapeI32x4: 1349 arr = arm64.VectorArrangement2S 1350 } 1351 } else { 1352 if o.Signed { 1353 inst = arm64.SMULL2 1354 } else { 1355 inst = arm64.UMULL2 1356 } 1357 arr = defaultArrangementForShape(o.OriginShape) 1358 } 1359 1360 return c.compileV128x2BinOp(inst, arr) 1361 } 1362 1363 // compileV128Q15mulrSatS implements compiler.compileV128Q15mulrSatS for arm64. 1364 func (c *arm64Compiler) compileV128Q15mulrSatS(*wazeroir.OperationV128Q15mulrSatS) error { 1365 return c.compileV128x2BinOp(arm64.SQRDMULH, arm64.VectorArrangement8H) 1366 } 1367 1368 // compileV128ExtAddPairwise implements compiler.compileV128ExtAddPairwise for arm64. 1369 func (c *arm64Compiler) compileV128ExtAddPairwise(o *wazeroir.OperationV128ExtAddPairwise) error { 1370 var inst asm.Instruction 1371 if o.Signed { 1372 inst = arm64.SADDLP 1373 } else { 1374 inst = arm64.UADDLP 1375 } 1376 return c.compileV128UniOp(inst, defaultArrangementForShape(o.OriginShape)) 1377 } 1378 1379 // compileV128FloatPromote implements compiler.compileV128FloatPromote for arm64. 1380 func (c *arm64Compiler) compileV128FloatPromote(*wazeroir.OperationV128FloatPromote) error { 1381 return c.compileV128UniOp(arm64.FCVTL, arm64.VectorArrangement2S) 1382 } 1383 1384 // compileV128FloatDemote implements compiler.compileV128FloatDemote for arm64. 1385 func (c *arm64Compiler) compileV128FloatDemote(*wazeroir.OperationV128FloatDemote) error { 1386 return c.compileV128UniOp(arm64.FCVTN, arm64.VectorArrangement2S) 1387 } 1388 1389 // compileV128FConvertFromI implements compiler.compileV128FConvertFromI for arm64. 1390 func (c *arm64Compiler) compileV128FConvertFromI(o *wazeroir.OperationV128FConvertFromI) (err error) { 1391 if o.DestinationShape == wazeroir.ShapeF32x4 { 1392 if o.Signed { 1393 err = c.compileV128UniOp(arm64.VSCVTF, defaultArrangementForShape(o.DestinationShape)) 1394 } else { 1395 err = c.compileV128UniOp(arm64.VUCVTF, defaultArrangementForShape(o.DestinationShape)) 1396 } 1397 return 1398 } else { // f64x2 1399 v := c.locationStack.popV128() 1400 if err = c.compileEnsureOnRegister(v); err != nil { 1401 return 1402 } 1403 vr := v.register 1404 1405 var expand, convert asm.Instruction 1406 if o.Signed { 1407 expand, convert = arm64.SSHLL, arm64.VSCVTF 1408 } else { 1409 expand, convert = arm64.USHLL, arm64.VUCVTF 1410 } 1411 1412 // Expand lower two 32-bit lanes as two 64-bit lanes. 1413 c.assembler.CompileVectorRegisterToVectorRegisterWithConst(expand, vr, vr, arm64.VectorArrangement2S, 0) 1414 // Convert these two 64-bit (integer) values on each lane as double precision values. 1415 c.assembler.CompileVectorRegisterToVectorRegister(convert, vr, vr, arm64.VectorArrangement2D, 1416 arm64.VectorIndexNone, arm64.VectorIndexNone) 1417 c.pushVectorRuntimeValueLocationOnRegister(vr) 1418 } 1419 return 1420 } 1421 1422 // compileV128Dot implements compiler.compileV128Dot for arm64. 1423 func (c *arm64Compiler) compileV128Dot(*wazeroir.OperationV128Dot) error { 1424 x2 := c.locationStack.popV128() 1425 if err := c.compileEnsureOnRegister(x2); err != nil { 1426 return err 1427 } 1428 1429 x1 := c.locationStack.popV128() 1430 if err := c.compileEnsureOnRegister(x1); err != nil { 1431 return err 1432 } 1433 1434 tmp, err := c.allocateRegister(registerTypeVector) 1435 if err != nil { 1436 return err 1437 } 1438 1439 x1r, x2r := x1.register, x2.register 1440 1441 // Multiply lower integers and get the 32-bit results into tmp. 1442 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.SMULL, x1r, x2r, tmp, arm64.VectorArrangement4H) 1443 // Multiply higher integers and get the 32-bit results into x1r. 1444 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.SMULL2, x1r, x2r, x1r, arm64.VectorArrangement8H) 1445 // Adds these two results into x1r. 1446 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.VADDP, x1r, tmp, x1r, arm64.VectorArrangement4S) 1447 1448 c.markRegisterUnused(x2r) 1449 c.pushVectorRuntimeValueLocationOnRegister(x1r) 1450 1451 return nil 1452 } 1453 1454 // compileV128Narrow implements compiler.compileV128Narrow for arm64. 1455 func (c *arm64Compiler) compileV128Narrow(o *wazeroir.OperationV128Narrow) error { 1456 x2 := c.locationStack.popV128() 1457 if err := c.compileEnsureOnRegister(x2); err != nil { 1458 return err 1459 } 1460 1461 x1 := c.locationStack.popV128() 1462 if err := c.compileEnsureOnRegister(x1); err != nil { 1463 return err 1464 } 1465 1466 x1r, x2r := x1.register, x2.register 1467 1468 var arr, arr2 arm64.VectorArrangement 1469 switch o.OriginShape { 1470 case wazeroir.ShapeI16x8: 1471 arr = arm64.VectorArrangement8B 1472 arr2 = arm64.VectorArrangement16B 1473 case wazeroir.ShapeI32x4: 1474 arr = arm64.VectorArrangement4H 1475 arr2 = arm64.VectorArrangement8H 1476 } 1477 1478 var lo, hi asm.Instruction 1479 if o.Signed { 1480 lo, hi = arm64.SQXTN, arm64.SQXTN2 1481 } else { 1482 lo, hi = arm64.SQXTUN, arm64.SQXTUN2 1483 } 1484 1485 // Narrow lanes on x1r and write them into lower-half of x1r. 1486 c.assembler.CompileVectorRegisterToVectorRegister(lo, x1r, x1r, arr, arm64.VectorIndexNone, arm64.VectorIndexNone) 1487 // Narrow lanes on x2r and write them into higher-half of x1r. 1488 c.assembler.CompileVectorRegisterToVectorRegister(hi, x2r, x1r, arr2, arm64.VectorIndexNone, arm64.VectorIndexNone) 1489 1490 c.markRegisterUnused(x2r) 1491 c.pushVectorRuntimeValueLocationOnRegister(x1r) 1492 return nil 1493 } 1494 1495 // compileV128ITruncSatFromF implements compiler.compileV128ITruncSatFromF for arm64. 1496 func (c *arm64Compiler) compileV128ITruncSatFromF(o *wazeroir.OperationV128ITruncSatFromF) (err error) { 1497 v := c.locationStack.popV128() 1498 if err = c.compileEnsureOnRegister(v); err != nil { 1499 return err 1500 } 1501 1502 var cvt asm.Instruction 1503 if o.Signed { 1504 cvt = arm64.VFCVTZS 1505 } else { 1506 cvt = arm64.VFCVTZU 1507 } 1508 1509 c.assembler.CompileVectorRegisterToVectorRegister(cvt, v.register, v.register, 1510 defaultArrangementForShape(o.OriginShape), arm64.VectorIndexNone, arm64.VectorIndexNone, 1511 ) 1512 1513 if o.OriginShape == wazeroir.ShapeF64x2 { 1514 var narrow asm.Instruction 1515 if o.Signed { 1516 narrow = arm64.SQXTN 1517 } else { 1518 narrow = arm64.UQXTN 1519 } 1520 c.assembler.CompileVectorRegisterToVectorRegister(narrow, v.register, v.register, 1521 arm64.VectorArrangement2S, arm64.VectorIndexNone, arm64.VectorIndexNone, 1522 ) 1523 } 1524 1525 c.pushVectorRuntimeValueLocationOnRegister(v.register) 1526 return 1527 }