github.com/bananabytelabs/wazero@v0.0.0-20240105073314-54b22a776da8/internal/engine/compiler/impl_vec_arm64.go (about) 1 package compiler 2 3 import ( 4 "github.com/bananabytelabs/wazero/internal/asm" 5 "github.com/bananabytelabs/wazero/internal/asm/arm64" 6 "github.com/bananabytelabs/wazero/internal/wazeroir" 7 ) 8 9 // compileV128Const implements compiler.compileV128Const for arm64. 10 func (c *arm64Compiler) compileV128Const(o *wazeroir.UnionOperation) error { 11 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 12 return err 13 } 14 15 lo, hi := o.U1, o.U2 16 17 result, err := c.allocateRegister(registerTypeVector) 18 if err != nil { 19 return err 20 } 21 22 // Moves the lower 64-bits as a scalar float. 23 intReg := arm64ReservedRegisterForTemporary 24 if lo == 0 { 25 intReg = arm64.RegRZR 26 } else { 27 c.assembler.CompileConstToRegister(arm64.MOVD, int64(lo), arm64ReservedRegisterForTemporary) 28 } 29 c.assembler.CompileRegisterToRegister(arm64.FMOVD, intReg, result) 30 31 // Then, insert the higher bits with INS(vector,general). 32 intReg = arm64ReservedRegisterForTemporary 33 if hi == 0 { 34 intReg = arm64.RegRZR 35 } else { 36 c.assembler.CompileConstToRegister(arm64.MOVD, int64(hi), arm64ReservedRegisterForTemporary) 37 } 38 // "ins Vn.D[1], intReg" 39 c.assembler.CompileRegisterToVectorRegister(arm64.INSGEN, intReg, result, arm64.VectorArrangementD, 1) 40 41 c.pushVectorRuntimeValueLocationOnRegister(result) 42 return nil 43 } 44 45 // compileV128Add implements compiler.compileV128Add for arm64. 46 func (c *arm64Compiler) compileV128Add(o *wazeroir.UnionOperation) error { 47 x2 := c.locationStack.popV128() 48 if err := c.compileEnsureOnRegister(x2); err != nil { 49 return err 50 } 51 52 x1 := c.locationStack.popV128() 53 if err := c.compileEnsureOnRegister(x1); err != nil { 54 return err 55 } 56 57 x1r, x2r := x1.register, x2.register 58 59 var arr arm64.VectorArrangement 60 var inst asm.Instruction 61 shape := o.B1 62 switch shape { 63 case wazeroir.ShapeI8x16: 64 inst = arm64.VADD 65 arr = arm64.VectorArrangement16B 66 case wazeroir.ShapeI16x8: 67 inst = arm64.VADD 68 arr = arm64.VectorArrangement8H 69 case wazeroir.ShapeI32x4: 70 inst = arm64.VADD 71 arr = arm64.VectorArrangement4S 72 case wazeroir.ShapeI64x2: 73 inst = arm64.VADD 74 arr = arm64.VectorArrangement2D 75 case wazeroir.ShapeF32x4: 76 inst = arm64.VFADDS 77 arr = arm64.VectorArrangement4S 78 case wazeroir.ShapeF64x2: 79 inst = arm64.VFADDD 80 arr = arm64.VectorArrangement2D 81 } 82 83 c.assembler.CompileVectorRegisterToVectorRegister(inst, x1r, x2r, arr, 84 arm64.VectorIndexNone, arm64.VectorIndexNone) 85 86 c.pushVectorRuntimeValueLocationOnRegister(x2r) 87 c.markRegisterUnused(x1r) 88 return nil 89 } 90 91 // compileV128Sub implements compiler.compileV128Sub for arm64. 92 func (c *arm64Compiler) compileV128Sub(o *wazeroir.UnionOperation) (err error) { 93 x2 := c.locationStack.popV128() 94 if err := c.compileEnsureOnRegister(x2); err != nil { 95 return err 96 } 97 98 x1 := c.locationStack.popV128() 99 if err := c.compileEnsureOnRegister(x1); err != nil { 100 return err 101 } 102 103 x1r, x2r := x1.register, x2.register 104 105 var arr arm64.VectorArrangement 106 var inst asm.Instruction 107 shape := o.B1 108 switch shape { 109 case wazeroir.ShapeI8x16: 110 inst = arm64.VSUB 111 arr = arm64.VectorArrangement16B 112 case wazeroir.ShapeI16x8: 113 inst = arm64.VSUB 114 arr = arm64.VectorArrangement8H 115 case wazeroir.ShapeI32x4: 116 inst = arm64.VSUB 117 arr = arm64.VectorArrangement4S 118 case wazeroir.ShapeI64x2: 119 inst = arm64.VSUB 120 arr = arm64.VectorArrangement2D 121 case wazeroir.ShapeF32x4: 122 inst = arm64.VFSUBS 123 arr = arm64.VectorArrangement4S 124 case wazeroir.ShapeF64x2: 125 inst = arm64.VFSUBD 126 arr = arm64.VectorArrangement2D 127 } 128 129 c.assembler.CompileVectorRegisterToVectorRegister(inst, x2r, x1r, arr, 130 arm64.VectorIndexNone, arm64.VectorIndexNone) 131 132 c.pushVectorRuntimeValueLocationOnRegister(x1r) 133 c.markRegisterUnused(x2r) 134 return 135 } 136 137 // compileV128Load implements compiler.compileV128Load for arm64. 138 func (c *arm64Compiler) compileV128Load(o *wazeroir.UnionOperation) (err error) { 139 if err := c.maybeCompileMoveTopConditionalToGeneralPurposeRegister(); err != nil { 140 return err 141 } 142 result, err := c.allocateRegister(registerTypeVector) 143 if err != nil { 144 return err 145 } 146 147 offset := uint32(o.U2) 148 loadType := wazeroir.V128LoadType(o.B1) 149 150 switch loadType { 151 case wazeroir.V128LoadType128: 152 offset, err := c.compileMemoryAccessOffsetSetup(offset, 16) 153 if err != nil { 154 return err 155 } 156 c.assembler.CompileMemoryWithRegisterOffsetToVectorRegister(arm64.VMOV, 157 arm64ReservedRegisterForMemory, offset, result, arm64.VectorArrangementQ, 158 ) 159 case wazeroir.V128LoadType8x8s: 160 offset, err := c.compileMemoryAccessOffsetSetup(offset, 8) 161 if err != nil { 162 return err 163 } 164 c.assembler.CompileMemoryWithRegisterOffsetToVectorRegister(arm64.VMOV, 165 arm64ReservedRegisterForMemory, offset, result, arm64.VectorArrangementD, 166 ) 167 c.assembler.CompileVectorRegisterToVectorRegister(arm64.SSHLL, result, result, 168 arm64.VectorArrangement8B, arm64.VectorIndexNone, arm64.VectorIndexNone) 169 case wazeroir.V128LoadType8x8u: 170 offset, err := c.compileMemoryAccessOffsetSetup(offset, 8) 171 if err != nil { 172 return err 173 } 174 c.assembler.CompileMemoryWithRegisterOffsetToVectorRegister(arm64.VMOV, 175 arm64ReservedRegisterForMemory, offset, result, arm64.VectorArrangementD, 176 ) 177 c.assembler.CompileVectorRegisterToVectorRegister(arm64.USHLL, result, result, 178 arm64.VectorArrangement8B, arm64.VectorIndexNone, arm64.VectorIndexNone) 179 case wazeroir.V128LoadType16x4s: 180 offset, err := c.compileMemoryAccessOffsetSetup(offset, 8) 181 if err != nil { 182 return err 183 } 184 c.assembler.CompileMemoryWithRegisterOffsetToVectorRegister(arm64.VMOV, 185 arm64ReservedRegisterForMemory, offset, result, arm64.VectorArrangementD, 186 ) 187 c.assembler.CompileVectorRegisterToVectorRegister(arm64.SSHLL, result, result, 188 arm64.VectorArrangement4H, arm64.VectorIndexNone, arm64.VectorIndexNone) 189 case wazeroir.V128LoadType16x4u: 190 offset, err := c.compileMemoryAccessOffsetSetup(offset, 8) 191 if err != nil { 192 return err 193 } 194 c.assembler.CompileMemoryWithRegisterOffsetToVectorRegister(arm64.VMOV, 195 arm64ReservedRegisterForMemory, offset, result, arm64.VectorArrangementD, 196 ) 197 c.assembler.CompileVectorRegisterToVectorRegister(arm64.USHLL, result, result, 198 arm64.VectorArrangement4H, arm64.VectorIndexNone, arm64.VectorIndexNone) 199 case wazeroir.V128LoadType32x2s: 200 offset, err := c.compileMemoryAccessOffsetSetup(offset, 8) 201 if err != nil { 202 return err 203 } 204 c.assembler.CompileMemoryWithRegisterOffsetToVectorRegister(arm64.VMOV, 205 arm64ReservedRegisterForMemory, offset, result, arm64.VectorArrangementD, 206 ) 207 c.assembler.CompileVectorRegisterToVectorRegister(arm64.SSHLL, result, result, 208 arm64.VectorArrangement2S, arm64.VectorIndexNone, arm64.VectorIndexNone) 209 case wazeroir.V128LoadType32x2u: 210 offset, err := c.compileMemoryAccessOffsetSetup(offset, 8) 211 if err != nil { 212 return err 213 } 214 c.assembler.CompileMemoryWithRegisterOffsetToVectorRegister(arm64.VMOV, 215 arm64ReservedRegisterForMemory, offset, result, arm64.VectorArrangementD, 216 ) 217 c.assembler.CompileVectorRegisterToVectorRegister(arm64.USHLL, result, result, 218 arm64.VectorArrangement2S, arm64.VectorIndexNone, arm64.VectorIndexNone) 219 case wazeroir.V128LoadType8Splat: 220 offset, err := c.compileMemoryAccessOffsetSetup(offset, 1) 221 if err != nil { 222 return err 223 } 224 c.assembler.CompileRegisterToRegister(arm64.ADD, arm64ReservedRegisterForMemory, offset) 225 c.assembler.CompileMemoryToVectorRegister(arm64.LD1R, offset, 0, result, arm64.VectorArrangement16B) 226 case wazeroir.V128LoadType16Splat: 227 offset, err := c.compileMemoryAccessOffsetSetup(offset, 2) 228 if err != nil { 229 return err 230 } 231 c.assembler.CompileRegisterToRegister(arm64.ADD, arm64ReservedRegisterForMemory, offset) 232 c.assembler.CompileMemoryToVectorRegister(arm64.LD1R, offset, 0, result, arm64.VectorArrangement8H) 233 case wazeroir.V128LoadType32Splat: 234 offset, err := c.compileMemoryAccessOffsetSetup(offset, 4) 235 if err != nil { 236 return err 237 } 238 c.assembler.CompileRegisterToRegister(arm64.ADD, arm64ReservedRegisterForMemory, offset) 239 c.assembler.CompileMemoryToVectorRegister(arm64.LD1R, offset, 0, result, arm64.VectorArrangement4S) 240 case wazeroir.V128LoadType64Splat: 241 offset, err := c.compileMemoryAccessOffsetSetup(offset, 8) 242 if err != nil { 243 return err 244 } 245 c.assembler.CompileRegisterToRegister(arm64.ADD, arm64ReservedRegisterForMemory, offset) 246 c.assembler.CompileMemoryToVectorRegister(arm64.LD1R, offset, 0, result, arm64.VectorArrangement2D) 247 case wazeroir.V128LoadType32zero: 248 offset, err := c.compileMemoryAccessOffsetSetup(offset, 4) 249 if err != nil { 250 return err 251 } 252 c.assembler.CompileMemoryWithRegisterOffsetToVectorRegister(arm64.VMOV, 253 arm64ReservedRegisterForMemory, offset, result, arm64.VectorArrangementS, 254 ) 255 case wazeroir.V128LoadType64zero: 256 offset, err := c.compileMemoryAccessOffsetSetup(offset, 8) 257 if err != nil { 258 return err 259 } 260 c.assembler.CompileMemoryWithRegisterOffsetToVectorRegister(arm64.VMOV, 261 arm64ReservedRegisterForMemory, offset, result, arm64.VectorArrangementD, 262 ) 263 } 264 265 c.pushVectorRuntimeValueLocationOnRegister(result) 266 return 267 } 268 269 // compileV128LoadLane implements compiler.compileV128LoadLane for arm64. 270 func (c *arm64Compiler) compileV128LoadLane(o *wazeroir.UnionOperation) (err error) { 271 targetVector := c.locationStack.popV128() 272 if err = c.compileEnsureOnRegister(targetVector); err != nil { 273 return 274 } 275 276 laneSize, laneIndex := o.B1, o.B2 277 offset := uint32(o.U2) 278 279 targetSizeInBytes := int64(laneSize / 8) 280 source, err := c.compileMemoryAccessOffsetSetup(offset, targetSizeInBytes) 281 if err != nil { 282 return err 283 } 284 285 var loadInst asm.Instruction 286 var arr arm64.VectorArrangement 287 switch laneSize { 288 case 8: 289 arr = arm64.VectorArrangementB 290 loadInst = arm64.LDRB 291 case 16: 292 arr = arm64.VectorArrangementH 293 loadInst = arm64.LDRH 294 case 32: 295 loadInst = arm64.LDRW 296 arr = arm64.VectorArrangementS 297 case 64: 298 loadInst = arm64.LDRD 299 arr = arm64.VectorArrangementD 300 } 301 302 c.assembler.CompileMemoryWithRegisterOffsetToRegister(loadInst, arm64ReservedRegisterForMemory, source, source) 303 c.assembler.CompileRegisterToVectorRegister(arm64.INSGEN, source, targetVector.register, arr, arm64.VectorIndex(laneIndex)) 304 305 c.pushVectorRuntimeValueLocationOnRegister(targetVector.register) 306 c.locationStack.markRegisterUnused(source) 307 return 308 } 309 310 // compileV128Store implements compiler.compileV128Store for arm64. 311 func (c *arm64Compiler) compileV128Store(o *wazeroir.UnionOperation) (err error) { 312 v := c.locationStack.popV128() 313 if err = c.compileEnsureOnRegister(v); err != nil { 314 return 315 } 316 317 const targetSizeInBytes = 16 318 offset := uint32(o.U2) 319 offsetReg, err := c.compileMemoryAccessOffsetSetup(offset, targetSizeInBytes) 320 if err != nil { 321 return err 322 } 323 324 c.assembler.CompileVectorRegisterToMemoryWithRegisterOffset(arm64.VMOV, 325 v.register, arm64ReservedRegisterForMemory, offsetReg, arm64.VectorArrangementQ) 326 327 c.markRegisterUnused(v.register) 328 return 329 } 330 331 // compileV128StoreLane implements compiler.compileV128StoreLane for arm64. 332 func (c *arm64Compiler) compileV128StoreLane(o *wazeroir.UnionOperation) (err error) { 333 var arr arm64.VectorArrangement 334 var storeInst asm.Instruction 335 laneSize := o.B1 336 laneIndex := o.B2 337 offset := uint32(o.U2) 338 switch laneSize { 339 case 8: 340 storeInst = arm64.STRB 341 arr = arm64.VectorArrangementB 342 case 16: 343 storeInst = arm64.STRH 344 arr = arm64.VectorArrangementH 345 case 32: 346 storeInst = arm64.STRW 347 arr = arm64.VectorArrangementS 348 case 64: 349 storeInst = arm64.STRD 350 arr = arm64.VectorArrangementD 351 } 352 353 v := c.locationStack.popV128() 354 if err = c.compileEnsureOnRegister(v); err != nil { 355 return 356 } 357 358 targetSizeInBytes := int64(laneSize / 8) 359 offsetReg, err := c.compileMemoryAccessOffsetSetup(offset, targetSizeInBytes) 360 if err != nil { 361 return err 362 } 363 364 c.assembler.CompileVectorRegisterToRegister(arm64.UMOV, v.register, arm64ReservedRegisterForTemporary, arr, 365 arm64.VectorIndex(laneIndex)) 366 367 c.assembler.CompileRegisterToMemoryWithRegisterOffset(storeInst, 368 arm64ReservedRegisterForTemporary, arm64ReservedRegisterForMemory, offsetReg) 369 370 c.locationStack.markRegisterUnused(v.register) 371 return 372 } 373 374 // compileV128ExtractLane implements compiler.compileV128ExtractLane for arm64. 375 func (c *arm64Compiler) compileV128ExtractLane(o *wazeroir.UnionOperation) (err error) { 376 v := c.locationStack.popV128() 377 if err = c.compileEnsureOnRegister(v); err != nil { 378 return 379 } 380 381 shape := o.B1 382 laneIndex := o.B2 383 signed := o.B3 384 switch shape { 385 case wazeroir.ShapeI8x16: 386 result, err := c.allocateRegister(registerTypeGeneralPurpose) 387 if err != nil { 388 return err 389 } 390 var inst asm.Instruction 391 if signed { 392 inst = arm64.SMOV32 393 } else { 394 inst = arm64.UMOV 395 } 396 c.assembler.CompileVectorRegisterToRegister(inst, v.register, result, 397 arm64.VectorArrangementB, arm64.VectorIndex(laneIndex)) 398 399 c.locationStack.markRegisterUnused(v.register) 400 c.pushRuntimeValueLocationOnRegister(result, runtimeValueTypeI32) 401 case wazeroir.ShapeI16x8: 402 result, err := c.allocateRegister(registerTypeGeneralPurpose) 403 if err != nil { 404 return err 405 } 406 var inst asm.Instruction 407 if signed { 408 inst = arm64.SMOV32 409 } else { 410 inst = arm64.UMOV 411 } 412 c.assembler.CompileVectorRegisterToRegister(inst, v.register, result, 413 arm64.VectorArrangementH, arm64.VectorIndex(laneIndex)) 414 415 c.locationStack.markRegisterUnused(v.register) 416 c.pushRuntimeValueLocationOnRegister(result, runtimeValueTypeI32) 417 case wazeroir.ShapeI32x4: 418 result, err := c.allocateRegister(registerTypeGeneralPurpose) 419 if err != nil { 420 return err 421 } 422 c.assembler.CompileVectorRegisterToRegister(arm64.UMOV, v.register, result, 423 arm64.VectorArrangementS, arm64.VectorIndex(laneIndex)) 424 425 c.locationStack.markRegisterUnused(v.register) 426 c.pushRuntimeValueLocationOnRegister(result, runtimeValueTypeI32) 427 case wazeroir.ShapeI64x2: 428 result, err := c.allocateRegister(registerTypeGeneralPurpose) 429 if err != nil { 430 return err 431 } 432 c.assembler.CompileVectorRegisterToRegister(arm64.UMOV, v.register, result, 433 arm64.VectorArrangementD, arm64.VectorIndex(laneIndex)) 434 435 c.locationStack.markRegisterUnused(v.register) 436 c.pushRuntimeValueLocationOnRegister(result, runtimeValueTypeI64) 437 case wazeroir.ShapeF32x4: 438 c.assembler.CompileVectorRegisterToVectorRegister(arm64.INSELEM, v.register, v.register, 439 arm64.VectorArrangementS, arm64.VectorIndex(laneIndex), 0) 440 c.pushRuntimeValueLocationOnRegister(v.register, runtimeValueTypeF32) 441 case wazeroir.ShapeF64x2: 442 c.assembler.CompileVectorRegisterToVectorRegister(arm64.INSELEM, v.register, v.register, 443 arm64.VectorArrangementD, arm64.VectorIndex(laneIndex), 0) 444 c.pushRuntimeValueLocationOnRegister(v.register, runtimeValueTypeF64) 445 } 446 return 447 } 448 449 // compileV128ReplaceLane implements compiler.compileV128ReplaceLane for arm64. 450 func (c *arm64Compiler) compileV128ReplaceLane(o *wazeroir.UnionOperation) (err error) { 451 origin := c.locationStack.pop() 452 if err = c.compileEnsureOnRegister(origin); err != nil { 453 return 454 } 455 456 vector := c.locationStack.popV128() 457 if err = c.compileEnsureOnRegister(vector); err != nil { 458 return 459 } 460 461 shape := o.B1 462 laneIndex := o.B2 463 switch shape { 464 case wazeroir.ShapeI8x16: 465 c.assembler.CompileRegisterToVectorRegister(arm64.INSGEN, origin.register, vector.register, 466 arm64.VectorArrangementB, arm64.VectorIndex(laneIndex)) 467 case wazeroir.ShapeI16x8: 468 c.assembler.CompileRegisterToVectorRegister(arm64.INSGEN, origin.register, vector.register, 469 arm64.VectorArrangementH, arm64.VectorIndex(laneIndex)) 470 case wazeroir.ShapeI32x4: 471 c.assembler.CompileRegisterToVectorRegister(arm64.INSGEN, origin.register, vector.register, 472 arm64.VectorArrangementS, arm64.VectorIndex(laneIndex)) 473 case wazeroir.ShapeI64x2: 474 c.assembler.CompileRegisterToVectorRegister(arm64.INSGEN, origin.register, vector.register, 475 arm64.VectorArrangementD, arm64.VectorIndex(laneIndex)) 476 case wazeroir.ShapeF32x4: 477 c.assembler.CompileVectorRegisterToVectorRegister(arm64.INSELEM, origin.register, vector.register, 478 arm64.VectorArrangementS, 0, arm64.VectorIndex(laneIndex)) 479 case wazeroir.ShapeF64x2: 480 c.assembler.CompileVectorRegisterToVectorRegister(arm64.INSELEM, origin.register, vector.register, 481 arm64.VectorArrangementD, 0, arm64.VectorIndex(laneIndex)) 482 } 483 484 c.locationStack.markRegisterUnused(origin.register) 485 c.pushVectorRuntimeValueLocationOnRegister(vector.register) 486 return 487 } 488 489 // compileV128Splat implements compiler.compileV128Splat for arm64. 490 func (c *arm64Compiler) compileV128Splat(o *wazeroir.UnionOperation) (err error) { 491 origin := c.locationStack.pop() 492 if err = c.compileEnsureOnRegister(origin); err != nil { 493 return 494 } 495 496 var result asm.Register 497 shape := o.B1 498 switch shape { 499 case wazeroir.ShapeI8x16: 500 result, err = c.allocateRegister(registerTypeVector) 501 if err != nil { 502 return 503 } 504 c.assembler.CompileRegisterToVectorRegister(arm64.DUPGEN, origin.register, result, 505 arm64.VectorArrangement16B, arm64.VectorIndexNone) 506 case wazeroir.ShapeI16x8: 507 result, err = c.allocateRegister(registerTypeVector) 508 if err != nil { 509 return 510 } 511 c.assembler.CompileRegisterToVectorRegister(arm64.DUPGEN, origin.register, result, 512 arm64.VectorArrangement8H, arm64.VectorIndexNone) 513 case wazeroir.ShapeI32x4: 514 result, err = c.allocateRegister(registerTypeVector) 515 if err != nil { 516 return 517 } 518 c.assembler.CompileRegisterToVectorRegister(arm64.DUPGEN, origin.register, result, 519 arm64.VectorArrangement4S, arm64.VectorIndexNone) 520 case wazeroir.ShapeI64x2: 521 result, err = c.allocateRegister(registerTypeVector) 522 if err != nil { 523 return 524 } 525 c.assembler.CompileRegisterToVectorRegister(arm64.DUPGEN, origin.register, result, 526 arm64.VectorArrangement2D, arm64.VectorIndexNone) 527 case wazeroir.ShapeF32x4: 528 result = origin.register 529 c.assembler.CompileVectorRegisterToVectorRegister(arm64.DUPELEM, origin.register, result, 530 arm64.VectorArrangementS, 0, arm64.VectorIndexNone) 531 case wazeroir.ShapeF64x2: 532 result = origin.register 533 c.assembler.CompileVectorRegisterToVectorRegister(arm64.DUPELEM, origin.register, result, 534 arm64.VectorArrangementD, 0, arm64.VectorIndexNone) 535 } 536 537 c.locationStack.markRegisterUnused(origin.register) 538 c.pushVectorRuntimeValueLocationOnRegister(result) 539 return 540 } 541 542 func (c *arm64Compiler) onValueReleaseRegisterToStack(reg asm.Register) { 543 for i := uint64(0); i < c.locationStack.sp; i++ { 544 prevValue := &c.locationStack.stack[i] 545 if prevValue.register == reg { 546 c.compileReleaseRegisterToStack(prevValue) 547 break 548 } 549 } 550 } 551 552 // compileV128Shuffle implements compiler.compileV128Shuffle for arm64. 553 func (c *arm64Compiler) compileV128Shuffle(o *wazeroir.UnionOperation) (err error) { 554 // Shuffle needs two operands (v, w) must be next to each other. 555 // For simplicity, we use V29 for v and V30 for w values respectively. 556 const vReg, wReg = arm64.RegV29, arm64.RegV30 557 558 // Ensures that w value is placed on wReg. 559 w := c.locationStack.popV128() 560 if w.register != wReg { 561 // If wReg is already in use, save the value onto the stack. 562 c.onValueReleaseRegisterToStack(wReg) 563 564 if w.onRegister() { 565 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.VORR, 566 w.register, w.register, wReg, arm64.VectorArrangement16B) 567 // We no longer use the old register. 568 c.markRegisterUnused(w.register) 569 } else { // on stack 570 w.setRegister(wReg) 571 c.compileLoadValueOnStackToRegister(w) 572 } 573 } 574 575 // Ensures that v value is placed on wReg. 576 v := c.locationStack.popV128() 577 if v.register != vReg { 578 // If vReg is already in use, save the value onto the stack. 579 c.onValueReleaseRegisterToStack(vReg) 580 581 if v.onRegister() { 582 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.VORR, 583 v.register, v.register, vReg, arm64.VectorArrangement16B) 584 // We no longer use the old register. 585 c.markRegisterUnused(v.register) 586 } else { // on stack 587 v.setRegister(vReg) 588 c.compileLoadValueOnStackToRegister(v) 589 } 590 } 591 592 c.locationStack.markRegisterUsed(vReg, wReg) 593 result, err := c.allocateRegister(registerTypeVector) 594 if err != nil { 595 return err 596 } 597 598 lanes := make([]byte, len(o.Us)) 599 for i, lane := range o.Us { 600 lanes[i] = byte(lane) 601 } 602 c.assembler.CompileStaticConstToVectorRegister(arm64.VMOV, asm.NewStaticConst(lanes), result, arm64.VectorArrangementQ) 603 c.assembler.CompileVectorRegisterToVectorRegister(arm64.TBL2, vReg, result, arm64.VectorArrangement16B, 604 arm64.VectorIndexNone, arm64.VectorIndexNone) 605 606 c.locationStack.markRegisterUnused(vReg, wReg) 607 c.pushVectorRuntimeValueLocationOnRegister(result) 608 return 609 } 610 611 // compileV128Swizzle implements compiler.compileV128Swizzle for arm64. 612 func (c *arm64Compiler) compileV128Swizzle(*wazeroir.UnionOperation) (err error) { 613 indexVec := c.locationStack.popV128() 614 if err = c.compileEnsureOnRegister(indexVec); err != nil { 615 return 616 } 617 baseVec := c.locationStack.popV128() 618 if err = c.compileEnsureOnRegister(baseVec); err != nil { 619 return 620 } 621 622 c.assembler.CompileVectorRegisterToVectorRegister(arm64.TBL1, baseVec.register, indexVec.register, 623 arm64.VectorArrangement16B, arm64.VectorIndexNone, arm64.VectorIndexNone) 624 625 c.markRegisterUnused(baseVec.register) 626 c.pushVectorRuntimeValueLocationOnRegister(indexVec.register) 627 return 628 } 629 630 // compileV128AnyTrue implements compiler.compileV128AnyTrue for arm64. 631 func (c *arm64Compiler) compileV128AnyTrue(*wazeroir.UnionOperation) (err error) { 632 vector := c.locationStack.popV128() 633 if err = c.compileEnsureOnRegister(vector); err != nil { 634 return 635 } 636 637 v := vector.register 638 c.assembler.CompileVectorRegisterToVectorRegister(arm64.UMAXP, v, v, 639 arm64.VectorArrangement16B, arm64.VectorIndexNone, arm64.VectorIndexNone) 640 c.assembler.CompileVectorRegisterToRegister(arm64.UMOV, v, arm64ReservedRegisterForTemporary, 641 arm64.VectorArrangementD, 0) 642 c.assembler.CompileTwoRegistersToNone(arm64.CMP, arm64.RegRZR, arm64ReservedRegisterForTemporary) 643 c.locationStack.pushRuntimeValueLocationOnConditionalRegister(arm64.CondNE) 644 645 c.locationStack.markRegisterUnused(v) 646 return 647 } 648 649 // compileV128AllTrue implements compiler.compileV128AllTrue for arm64. 650 func (c *arm64Compiler) compileV128AllTrue(o *wazeroir.UnionOperation) (err error) { 651 vector := c.locationStack.popV128() 652 if err = c.compileEnsureOnRegister(vector); err != nil { 653 return 654 } 655 656 v := vector.register 657 shape := o.B1 658 if shape == wazeroir.ShapeI64x2 { 659 c.assembler.CompileVectorRegisterToVectorRegister(arm64.CMEQZERO, arm64.RegRZR, v, 660 arm64.VectorArrangement2D, arm64.VectorIndexNone, arm64.VectorIndexNone) 661 c.assembler.CompileVectorRegisterToVectorRegister(arm64.ADDP, v, v, 662 arm64.VectorArrangement2D, arm64.VectorIndexNone, arm64.VectorIndexNone) 663 c.assembler.CompileTwoRegistersToNone(arm64.FCMPD, v, v) 664 c.locationStack.pushRuntimeValueLocationOnConditionalRegister(arm64.CondEQ) 665 } else { 666 var arr arm64.VectorArrangement 667 switch shape { 668 case wazeroir.ShapeI8x16: 669 arr = arm64.VectorArrangement16B 670 case wazeroir.ShapeI16x8: 671 arr = arm64.VectorArrangement8H 672 case wazeroir.ShapeI32x4: 673 arr = arm64.VectorArrangement4S 674 } 675 676 c.assembler.CompileVectorRegisterToVectorRegister(arm64.UMINV, v, v, 677 arr, arm64.VectorIndexNone, arm64.VectorIndexNone) 678 c.assembler.CompileVectorRegisterToRegister(arm64.UMOV, v, arm64ReservedRegisterForTemporary, 679 arm64.VectorArrangementD, 0) 680 c.assembler.CompileTwoRegistersToNone(arm64.CMP, arm64.RegRZR, arm64ReservedRegisterForTemporary) 681 c.locationStack.pushRuntimeValueLocationOnConditionalRegister(arm64.CondNE) 682 } 683 c.markRegisterUnused(v) 684 return 685 } 686 687 var ( 688 i8x16BitmaskConst = [16]byte{ 689 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 690 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 691 } 692 i16x8BitmaskConst = [16]byte{ 693 0x01, 0x00, 0x02, 0x00, 0x04, 0x00, 0x08, 0x00, 694 0x10, 0x00, 0x20, 0x00, 0x40, 0x00, 0x80, 0x00, 695 } 696 i32x4BitmaskConst = [16]byte{ 697 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 698 0x04, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 699 } 700 ) 701 702 // compileV128BitMask implements compiler.compileV128BitMask for arm64. 703 func (c *arm64Compiler) compileV128BitMask(o *wazeroir.UnionOperation) (err error) { 704 vector := c.locationStack.popV128() 705 if err = c.compileEnsureOnRegister(vector); err != nil { 706 return 707 } 708 709 v := vector.register 710 711 result, err := c.allocateRegister(registerTypeGeneralPurpose) 712 if err != nil { 713 return err 714 } 715 716 shape := o.B1 717 switch shape { 718 case wazeroir.ShapeI8x16: 719 vecTmp, err := c.allocateRegister(registerTypeVector) 720 if err != nil { 721 return err 722 } 723 // Right arithmetic shift on the original vector and store the result into vecTmp. So we have: 724 // v[i] = 0xff if vi<0, 0 otherwise. 725 c.assembler.CompileVectorRegisterToVectorRegisterWithConst(arm64.SSHR, v, v, arm64.VectorArrangement16B, 7) 726 727 // Load the bit mask into vecTmp. 728 c.assembler.CompileStaticConstToVectorRegister(arm64.VMOV, asm.NewStaticConst(i8x16BitmaskConst[:]), vecTmp, arm64.VectorArrangementQ) 729 730 // Lane-wise logical AND with i8x16BitmaskConst, meaning that we have 731 // v[i] = (1 << i) if vi<0, 0 otherwise. 732 // 733 // Below, we use the following notation: 734 // wi := (1 << i) if vi<0, 0 otherwise. 735 c.assembler.CompileVectorRegisterToVectorRegister(arm64.VAND, vecTmp, v, arm64.VectorArrangement16B, 736 arm64.VectorIndexNone, arm64.VectorIndexNone) 737 738 // Swap the lower and higher 8 byte elements, and write it into vecTmp, meaning that we have 739 // vecTmp[i] = w(i+8) if i < 8, w(i-8) otherwise. 740 // 741 c.assembler.CompileTwoVectorRegistersToVectorRegisterWithConst(arm64.EXT, v, v, vecTmp, arm64.VectorArrangement16B, 0x8) 742 743 // v = [w0, w8, ..., w7, w15] 744 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.ZIP1, vecTmp, v, v, arm64.VectorArrangement16B) 745 746 // v.h[0] = w0 + ... + w15 747 c.assembler.CompileVectorRegisterToVectorRegister(arm64.ADDV, v, v, 748 arm64.VectorArrangement8H, arm64.VectorIndexNone, arm64.VectorIndexNone) 749 750 // Extract the v.h[0] as the result. 751 c.assembler.CompileVectorRegisterToRegister(arm64.UMOV, v, result, arm64.VectorArrangementH, 0) 752 case wazeroir.ShapeI16x8: 753 vecTmp, err := c.allocateRegister(registerTypeVector) 754 if err != nil { 755 return err 756 } 757 // Right arithmetic shift on the original vector and store the result into vecTmp. So we have: 758 // v[i] = 0xffff if vi<0, 0 otherwise. 759 c.assembler.CompileVectorRegisterToVectorRegisterWithConst(arm64.SSHR, v, v, arm64.VectorArrangement8H, 15) 760 761 // Load the bit mask into vecTmp. 762 c.assembler.CompileStaticConstToVectorRegister(arm64.VMOV, asm.NewStaticConst(i16x8BitmaskConst[:]), vecTmp, arm64.VectorArrangementQ) 763 764 // Lane-wise logical AND with i16x8BitmaskConst, meaning that we have 765 // v[i] = (1 << i) if vi<0, 0 otherwise for i=0..3 766 // = (1 << (i+4)) if vi<0, 0 otherwise for i=3..7 767 c.assembler.CompileVectorRegisterToVectorRegister(arm64.VAND, vecTmp, v, arm64.VectorArrangement16B, 768 arm64.VectorIndexNone, arm64.VectorIndexNone) 769 770 c.assembler.CompileVectorRegisterToVectorRegister(arm64.ADDV, v, v, 771 arm64.VectorArrangement8H, arm64.VectorIndexNone, arm64.VectorIndexNone) 772 773 c.assembler.CompileVectorRegisterToRegister(arm64.UMOV, v, result, arm64.VectorArrangementH, 0) 774 case wazeroir.ShapeI32x4: 775 vecTmp, err := c.allocateRegister(registerTypeVector) 776 if err != nil { 777 return err 778 } 779 780 // Right arithmetic shift on the original vector and store the result into vecTmp. So we have: 781 // v[i] = 0xffffffff if vi<0, 0 otherwise. 782 c.assembler.CompileVectorRegisterToVectorRegisterWithConst(arm64.SSHR, v, v, arm64.VectorArrangement4S, 32) 783 784 // Load the bit mask into vecTmp. 785 c.assembler.CompileStaticConstToVectorRegister(arm64.VMOV, 786 asm.NewStaticConst(i32x4BitmaskConst[:]), vecTmp, arm64.VectorArrangementQ) 787 788 // Lane-wise logical AND with i16x8BitmaskConst, meaning that we have 789 // v[i] = (1 << i) if vi<0, 0 otherwise for i in [0, 1] 790 // = (1 << (i+4)) if vi<0, 0 otherwise for i in [2, 3] 791 c.assembler.CompileVectorRegisterToVectorRegister(arm64.VAND, vecTmp, v, arm64.VectorArrangement16B, 792 arm64.VectorIndexNone, arm64.VectorIndexNone) 793 794 c.assembler.CompileVectorRegisterToVectorRegister(arm64.ADDV, v, v, 795 arm64.VectorArrangement4S, arm64.VectorIndexNone, arm64.VectorIndexNone) 796 797 c.assembler.CompileVectorRegisterToRegister(arm64.UMOV, v, result, arm64.VectorArrangementS, 0) 798 case wazeroir.ShapeI64x2: 799 // Move the lower 64-bit int into result, 800 c.assembler.CompileVectorRegisterToRegister(arm64.UMOV, v, result, 801 arm64.VectorArrangementD, 0) 802 // Move the higher 64-bit int into arm64ReservedRegisterForTemporary. 803 c.assembler.CompileVectorRegisterToRegister(arm64.UMOV, v, arm64ReservedRegisterForTemporary, 804 arm64.VectorArrangementD, 1) 805 806 // Move the sign bit into the least significant bit. 807 c.assembler.CompileConstToRegister(arm64.LSR, 63, result) 808 c.assembler.CompileConstToRegister(arm64.LSR, 63, arm64ReservedRegisterForTemporary) 809 810 // result = (arm64ReservedRegisterForTemporary<<1) | result 811 c.assembler.CompileLeftShiftedRegisterToRegister(arm64.ADD, 812 arm64ReservedRegisterForTemporary, 1, result, result) 813 } 814 815 c.markRegisterUnused(v) 816 c.pushRuntimeValueLocationOnRegister(result, runtimeValueTypeI32) 817 return 818 } 819 820 // compileV128And implements compiler.compileV128And for arm64. 821 func (c *arm64Compiler) compileV128And(*wazeroir.UnionOperation) error { 822 return c.compileV128x2BinOp(arm64.VAND, arm64.VectorArrangement16B) 823 } 824 825 // compileV128Not implements compiler.compileV128Not for arm64. 826 func (c *arm64Compiler) compileV128Not(*wazeroir.UnionOperation) error { 827 return c.compileV128UniOp(arm64.NOT, arm64.VectorArrangement16B) 828 } 829 830 // compileV128Or implements compiler.compileV128Or for arm64. 831 func (c *arm64Compiler) compileV128Or(*wazeroir.UnionOperation) error { 832 return c.compileV128x2BinOp(arm64.VORR, arm64.VectorArrangement16B) 833 } 834 835 // compileV128Xor implements compiler.compileV128Xor for arm64. 836 func (c *arm64Compiler) compileV128Xor(*wazeroir.UnionOperation) error { 837 return c.compileV128x2BinOp(arm64.EOR, arm64.VectorArrangement16B) 838 } 839 840 // compileV128Bitselect implements compiler.compileV128Bitselect for arm64. 841 func (c *arm64Compiler) compileV128Bitselect(*wazeroir.UnionOperation) error { 842 selector := c.locationStack.popV128() 843 if err := c.compileEnsureOnRegister(selector); err != nil { 844 return err 845 } 846 847 x2 := c.locationStack.popV128() 848 if err := c.compileEnsureOnRegister(x2); err != nil { 849 return err 850 } 851 852 x1 := c.locationStack.popV128() 853 if err := c.compileEnsureOnRegister(x1); err != nil { 854 return err 855 } 856 857 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.BSL, 858 x2.register, x1.register, selector.register, arm64.VectorArrangement16B) 859 860 c.markRegisterUnused(x1.register, x2.register) 861 c.pushVectorRuntimeValueLocationOnRegister(selector.register) 862 return nil 863 } 864 865 // compileV128AndNot implements compiler.compileV128AndNot for arm64. 866 func (c *arm64Compiler) compileV128AndNot(*wazeroir.UnionOperation) error { 867 return c.compileV128x2BinOp(arm64.BIC, arm64.VectorArrangement16B) 868 } 869 870 func (c *arm64Compiler) compileV128UniOp(inst asm.Instruction, arr arm64.VectorArrangement) error { 871 v := c.locationStack.popV128() 872 if err := c.compileEnsureOnRegister(v); err != nil { 873 return err 874 } 875 876 c.assembler.CompileVectorRegisterToVectorRegister(inst, v.register, v.register, arr, arm64.VectorIndexNone, arm64.VectorIndexNone) 877 878 c.pushVectorRuntimeValueLocationOnRegister(v.register) 879 return nil 880 } 881 882 func (c *arm64Compiler) compileV128x2BinOp(inst asm.Instruction, arr arm64.VectorArrangement) error { 883 x2 := c.locationStack.popV128() 884 if err := c.compileEnsureOnRegister(x2); err != nil { 885 return err 886 } 887 888 x1 := c.locationStack.popV128() 889 if err := c.compileEnsureOnRegister(x1); err != nil { 890 return err 891 } 892 893 c.assembler.CompileVectorRegisterToVectorRegister(inst, x2.register, x1.register, arr, arm64.VectorIndexNone, arm64.VectorIndexNone) 894 895 c.markRegisterUnused(x2.register) 896 c.pushVectorRuntimeValueLocationOnRegister(x1.register) 897 return nil 898 } 899 900 // compileV128Shr implements compiler.compileV128Shr for arm64. 901 func (c *arm64Compiler) compileV128Shr(o *wazeroir.UnionOperation) error { 902 var inst asm.Instruction 903 shape := o.B1 904 signed := o.B3 905 if signed { 906 inst = arm64.SSHL 907 } else { 908 inst = arm64.USHL 909 } 910 return c.compileV128ShiftImpl(shape, inst, true) 911 } 912 913 // compileV128Shl implements compiler.compileV128Shl for arm64. 914 func (c *arm64Compiler) compileV128Shl(o *wazeroir.UnionOperation) error { 915 return c.compileV128ShiftImpl(o.B1 /*shape*/, arm64.SSHL, false) 916 } 917 918 func (c *arm64Compiler) compileV128ShiftImpl(shape wazeroir.Shape, ins asm.Instruction, rightShift bool) error { 919 s := c.locationStack.pop() 920 if s.register == arm64.RegRZR { 921 // If the shift amount is zero register, nothing to do here. 922 return nil 923 } 924 925 var modulo asm.ConstantValue 926 var arr arm64.VectorArrangement 927 switch shape { 928 case wazeroir.ShapeI8x16: 929 modulo = 0x7 // modulo 8. 930 arr = arm64.VectorArrangement16B 931 case wazeroir.ShapeI16x8: 932 modulo = 0xf // modulo 16. 933 arr = arm64.VectorArrangement8H 934 case wazeroir.ShapeI32x4: 935 modulo = 0x1f // modulo 32. 936 arr = arm64.VectorArrangement4S 937 case wazeroir.ShapeI64x2: 938 modulo = 0x3f // modulo 64. 939 arr = arm64.VectorArrangement2D 940 } 941 942 if err := c.compileEnsureOnRegister(s); err != nil { 943 return err 944 } 945 946 v := c.locationStack.popV128() 947 if err := c.compileEnsureOnRegister(v); err != nil { 948 return err 949 } 950 951 tmp, err := c.allocateRegister(registerTypeVector) 952 if err != nil { 953 return err 954 } 955 956 c.assembler.CompileConstToRegister(arm64.ANDIMM32, modulo, s.register) 957 958 if rightShift { 959 // Negate the amount to make this as right shift. 960 c.assembler.CompileRegisterToRegister(arm64.NEG, s.register, s.register) 961 } 962 963 // Copy the shift amount into a vector register as SSHL requires it to be there. 964 c.assembler.CompileRegisterToVectorRegister(arm64.DUPGEN, s.register, tmp, 965 arr, arm64.VectorIndexNone) 966 967 c.assembler.CompileVectorRegisterToVectorRegister(ins, tmp, v.register, arr, 968 arm64.VectorIndexNone, arm64.VectorIndexNone) 969 970 c.markRegisterUnused(s.register) 971 c.pushVectorRuntimeValueLocationOnRegister(v.register) 972 return nil 973 } 974 975 // compileV128Cmp implements compiler.compileV128Cmp for arm64. 976 func (c *arm64Compiler) compileV128Cmp(o *wazeroir.UnionOperation) error { 977 x2 := c.locationStack.popV128() 978 if err := c.compileEnsureOnRegister(x2); err != nil { 979 return err 980 } 981 982 x1 := c.locationStack.popV128() 983 if err := c.compileEnsureOnRegister(x1); err != nil { 984 return err 985 } 986 987 var arr arm64.VectorArrangement 988 v128CmpType := o.B1 989 if v128CmpType <= wazeroir.V128CmpTypeI8x16GeU { 990 arr = arm64.VectorArrangement16B 991 } else if v128CmpType <= wazeroir.V128CmpTypeI16x8GeU { 992 arr = arm64.VectorArrangement8H 993 } else if v128CmpType <= wazeroir.V128CmpTypeI32x4GeU { 994 arr = arm64.VectorArrangement4S 995 } else if v128CmpType <= wazeroir.V128CmpTypeI64x2GeS { 996 arr = arm64.VectorArrangement2D 997 } else if v128CmpType <= wazeroir.V128CmpTypeF32x4Ge { 998 arr = arm64.VectorArrangement4S 999 } else { // f64x2 1000 arr = arm64.VectorArrangement2D 1001 } 1002 1003 result := x1.register 1004 switch v128CmpType { 1005 case wazeroir.V128CmpTypeI8x16Eq, wazeroir.V128CmpTypeI16x8Eq, wazeroir.V128CmpTypeI32x4Eq, wazeroir.V128CmpTypeI64x2Eq: 1006 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.CMEQ, x1.register, x2.register, result, arr) 1007 case wazeroir.V128CmpTypeI8x16Ne, wazeroir.V128CmpTypeI16x8Ne, wazeroir.V128CmpTypeI32x4Ne, wazeroir.V128CmpTypeI64x2Ne: 1008 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.CMEQ, x1.register, x2.register, result, arr) 1009 // Reverse the condition by flipping all bits. 1010 c.assembler.CompileVectorRegisterToVectorRegister(arm64.NOT, result, result, 1011 arm64.VectorArrangement16B, arm64.VectorIndexNone, arm64.VectorIndexNone) 1012 case wazeroir.V128CmpTypeI8x16LtS, wazeroir.V128CmpTypeI16x8LtS, wazeroir.V128CmpTypeI32x4LtS, wazeroir.V128CmpTypeI64x2LtS: 1013 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.CMGT, x1.register, x2.register, result, arr) 1014 case wazeroir.V128CmpTypeI8x16LtU, wazeroir.V128CmpTypeI16x8LtU, wazeroir.V128CmpTypeI32x4LtU: 1015 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.CMHI, x1.register, x2.register, result, arr) 1016 case wazeroir.V128CmpTypeI8x16GtS, wazeroir.V128CmpTypeI16x8GtS, wazeroir.V128CmpTypeI32x4GtS, wazeroir.V128CmpTypeI64x2GtS: 1017 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.CMGT, x2.register, x1.register, result, arr) 1018 case wazeroir.V128CmpTypeI8x16GtU, wazeroir.V128CmpTypeI16x8GtU, wazeroir.V128CmpTypeI32x4GtU: 1019 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.CMHI, x2.register, x1.register, result, arr) 1020 case wazeroir.V128CmpTypeI8x16LeS, wazeroir.V128CmpTypeI16x8LeS, wazeroir.V128CmpTypeI32x4LeS, wazeroir.V128CmpTypeI64x2LeS: 1021 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.CMGE, x1.register, x2.register, result, arr) 1022 case wazeroir.V128CmpTypeI8x16LeU, wazeroir.V128CmpTypeI16x8LeU, wazeroir.V128CmpTypeI32x4LeU: 1023 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.CMHS, x1.register, x2.register, result, arr) 1024 case wazeroir.V128CmpTypeI8x16GeS, wazeroir.V128CmpTypeI16x8GeS, wazeroir.V128CmpTypeI32x4GeS, wazeroir.V128CmpTypeI64x2GeS: 1025 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.CMGE, x2.register, x1.register, result, arr) 1026 case wazeroir.V128CmpTypeI8x16GeU, wazeroir.V128CmpTypeI16x8GeU, wazeroir.V128CmpTypeI32x4GeU: 1027 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.CMHS, x2.register, x1.register, result, arr) 1028 case wazeroir.V128CmpTypeF32x4Eq, wazeroir.V128CmpTypeF64x2Eq: 1029 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.FCMEQ, x2.register, x1.register, result, arr) 1030 case wazeroir.V128CmpTypeF32x4Ne, wazeroir.V128CmpTypeF64x2Ne: 1031 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.FCMEQ, x2.register, x1.register, result, arr) 1032 // Reverse the condition by flipping all bits. 1033 c.assembler.CompileVectorRegisterToVectorRegister(arm64.NOT, result, result, 1034 arm64.VectorArrangement16B, arm64.VectorIndexNone, arm64.VectorIndexNone) 1035 case wazeroir.V128CmpTypeF32x4Lt, wazeroir.V128CmpTypeF64x2Lt: 1036 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.FCMGT, x1.register, x2.register, result, arr) 1037 case wazeroir.V128CmpTypeF32x4Le, wazeroir.V128CmpTypeF64x2Le: 1038 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.FCMGE, x1.register, x2.register, result, arr) 1039 case wazeroir.V128CmpTypeF32x4Gt, wazeroir.V128CmpTypeF64x2Gt: 1040 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.FCMGT, x2.register, x1.register, result, arr) 1041 case wazeroir.V128CmpTypeF32x4Ge, wazeroir.V128CmpTypeF64x2Ge: 1042 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.FCMGE, x2.register, x1.register, result, arr) 1043 } 1044 1045 c.markRegisterUnused(x2.register) 1046 c.pushVectorRuntimeValueLocationOnRegister(result) 1047 return nil 1048 } 1049 1050 // compileV128AddSat implements compiler.compileV128AddSat for arm64. 1051 func (c *arm64Compiler) compileV128AddSat(o *wazeroir.UnionOperation) error { 1052 var inst asm.Instruction 1053 shape := o.B1 1054 signed := o.B3 1055 if signed { 1056 inst = arm64.VSQADD 1057 } else { 1058 inst = arm64.VUQADD 1059 } 1060 return c.compileV128x2BinOp(inst, defaultArrangementForShape(shape)) 1061 } 1062 1063 // compileV128SubSat implements compiler.compileV128SubSat for arm64. 1064 func (c *arm64Compiler) compileV128SubSat(o *wazeroir.UnionOperation) error { 1065 var inst asm.Instruction 1066 shape := o.B1 1067 signed := o.B3 1068 if signed { 1069 inst = arm64.VSQSUB 1070 } else { 1071 inst = arm64.VUQSUB 1072 } 1073 return c.compileV128x2BinOp(inst, defaultArrangementForShape(shape)) 1074 } 1075 1076 // compileV128Mul implements compiler.compileV128Mul for arm64. 1077 func (c *arm64Compiler) compileV128Mul(o *wazeroir.UnionOperation) (err error) { 1078 shape := o.B1 1079 switch shape { 1080 case wazeroir.ShapeI8x16, wazeroir.ShapeI16x8, wazeroir.ShapeI32x4: 1081 err = c.compileV128x2BinOp(arm64.VMUL, defaultArrangementForShape(shape)) 1082 case wazeroir.ShapeF32x4, wazeroir.ShapeF64x2: 1083 err = c.compileV128x2BinOp(arm64.VFMUL, defaultArrangementForShape(shape)) 1084 case wazeroir.ShapeI64x2: 1085 x2 := c.locationStack.popV128() 1086 if err = c.compileEnsureOnRegister(x2); err != nil { 1087 return 1088 } 1089 1090 x1 := c.locationStack.popV128() 1091 if err = c.compileEnsureOnRegister(x1); err != nil { 1092 return 1093 } 1094 1095 src1, src2 := x1.register, x2.register 1096 1097 tmp1, err := c.allocateRegister(registerTypeVector) 1098 if err != nil { 1099 return err 1100 } 1101 c.markRegisterUsed(tmp1) 1102 1103 tmp2, err := c.allocateRegister(registerTypeVector) 1104 if err != nil { 1105 return err 1106 } 1107 1108 c.markRegisterUsed(tmp2) 1109 1110 tmp3, err := c.allocateRegister(registerTypeVector) 1111 if err != nil { 1112 return err 1113 } 1114 1115 // Following the algorithm in https://chromium-review.googlesource.com/c/v8/v8/+/1781696 1116 c.assembler.CompileVectorRegisterToVectorRegister(arm64.REV64, src2, tmp2, 1117 arm64.VectorArrangement4S, arm64.VectorIndexNone, arm64.VectorIndexNone) 1118 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.VMUL, src1, tmp2, tmp2, arm64.VectorArrangement4S) 1119 1120 c.assembler.CompileVectorRegisterToVectorRegister(arm64.XTN, src1, tmp1, 1121 arm64.VectorArrangement2D, arm64.VectorIndexNone, arm64.VectorIndexNone) 1122 1123 c.assembler.CompileVectorRegisterToVectorRegister(arm64.VADDP, tmp2, tmp2, arm64.VectorArrangement4S, 1124 arm64.VectorIndexNone, arm64.VectorIndexNone, 1125 ) 1126 1127 c.assembler.CompileVectorRegisterToVectorRegister(arm64.XTN, src2, tmp3, 1128 arm64.VectorArrangement2D, arm64.VectorIndexNone, arm64.VectorIndexNone) 1129 1130 c.assembler.CompileVectorRegisterToVectorRegister(arm64.SHLL, tmp2, src1, 1131 arm64.VectorArrangement2S, arm64.VectorIndexNone, arm64.VectorIndexNone) 1132 1133 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.VUMLAL, tmp3, tmp1, src1, arm64.VectorArrangement2S) 1134 1135 c.markRegisterUnused(src2, tmp1, tmp2) 1136 c.pushVectorRuntimeValueLocationOnRegister(src1) 1137 } 1138 return 1139 } 1140 1141 // compileV128Div implements compiler.compileV128Div for arm64. 1142 func (c *arm64Compiler) compileV128Div(o *wazeroir.UnionOperation) error { 1143 var arr arm64.VectorArrangement 1144 var inst asm.Instruction 1145 shape := o.B1 1146 switch shape { 1147 case wazeroir.ShapeF32x4: 1148 arr = arm64.VectorArrangement4S 1149 inst = arm64.VFDIV 1150 case wazeroir.ShapeF64x2: 1151 arr = arm64.VectorArrangement2D 1152 inst = arm64.VFDIV 1153 } 1154 return c.compileV128x2BinOp(inst, arr) 1155 } 1156 1157 // compileV128Neg implements compiler.compileV128Neg for arm64. 1158 func (c *arm64Compiler) compileV128Neg(o *wazeroir.UnionOperation) error { 1159 var inst asm.Instruction 1160 shape := o.B1 1161 if shape <= wazeroir.ShapeI64x2 { // Integer lanes 1162 inst = arm64.VNEG 1163 } else { // Floating point lanes 1164 inst = arm64.VFNEG 1165 } 1166 return c.compileV128UniOp(inst, defaultArrangementForShape(shape)) 1167 } 1168 1169 // compileV128Sqrt implements compiler.compileV128Sqrt for arm64. 1170 func (c *arm64Compiler) compileV128Sqrt(o *wazeroir.UnionOperation) error { 1171 var arr arm64.VectorArrangement 1172 shape := o.B1 1173 switch shape { 1174 case wazeroir.ShapeF32x4: 1175 arr = arm64.VectorArrangement4S 1176 case wazeroir.ShapeF64x2: 1177 arr = arm64.VectorArrangement2D 1178 } 1179 return c.compileV128UniOp(arm64.VFSQRT, arr) 1180 } 1181 1182 // compileV128Abs implements compiler.compileV128Abs for arm64. 1183 func (c *arm64Compiler) compileV128Abs(o *wazeroir.UnionOperation) error { 1184 var inst asm.Instruction 1185 shape := o.B1 1186 if shape <= wazeroir.ShapeI64x2 { // Integer lanes 1187 inst = arm64.VABS 1188 } else { // Floating point lanes 1189 inst = arm64.VFABS 1190 } 1191 return c.compileV128UniOp(inst, defaultArrangementForShape(shape)) 1192 } 1193 1194 // compileV128Popcnt implements compiler.compileV128Popcnt for arm64. 1195 func (c *arm64Compiler) compileV128Popcnt(o *wazeroir.UnionOperation) error { 1196 return c.compileV128UniOp(arm64.VCNT, defaultArrangementForShape(o.B1)) 1197 } 1198 1199 // compileV128Min implements compiler.compileV128Min for arm64. 1200 func (c *arm64Compiler) compileV128Min(o *wazeroir.UnionOperation) error { 1201 var inst asm.Instruction 1202 shape := o.B1 1203 signed := o.B3 1204 if shape <= wazeroir.ShapeI64x2 { // Integer lanes 1205 if signed { 1206 inst = arm64.SMIN 1207 } else { 1208 inst = arm64.UMIN 1209 } 1210 } else { // Floating point lanes 1211 inst = arm64.VFMIN 1212 } 1213 return c.compileV128x2BinOp(inst, defaultArrangementForShape(shape)) 1214 } 1215 1216 func defaultArrangementForShape(s wazeroir.Shape) (arr arm64.VectorArrangement) { 1217 switch s { 1218 case wazeroir.ShapeI8x16: 1219 arr = arm64.VectorArrangement16B 1220 case wazeroir.ShapeI16x8: 1221 arr = arm64.VectorArrangement8H 1222 case wazeroir.ShapeI32x4: 1223 arr = arm64.VectorArrangement4S 1224 case wazeroir.ShapeI64x2: 1225 arr = arm64.VectorArrangement2D 1226 case wazeroir.ShapeF32x4: 1227 arr = arm64.VectorArrangement4S 1228 case wazeroir.ShapeF64x2: 1229 arr = arm64.VectorArrangement2D 1230 } 1231 return 1232 } 1233 1234 // compileV128Max implements compiler.compileV128Max for arm64. 1235 func (c *arm64Compiler) compileV128Max(o *wazeroir.UnionOperation) error { 1236 var inst asm.Instruction 1237 shape := o.B1 1238 signed := o.B3 1239 if shape <= wazeroir.ShapeI64x2 { // Integer lanes 1240 if signed { 1241 inst = arm64.SMAX 1242 } else { 1243 inst = arm64.UMAX 1244 } 1245 } else { // Floating point lanes 1246 inst = arm64.VFMAX 1247 } 1248 return c.compileV128x2BinOp(inst, defaultArrangementForShape(shape)) 1249 } 1250 1251 // compileV128AvgrU implements compiler.compileV128AvgrU for arm64. 1252 func (c *arm64Compiler) compileV128AvgrU(o *wazeroir.UnionOperation) error { 1253 return c.compileV128x2BinOp(arm64.URHADD, defaultArrangementForShape(o.B1)) 1254 } 1255 1256 // compileV128Pmin implements compiler.compileV128Pmin for arm64. 1257 func (c *arm64Compiler) compileV128Pmin(o *wazeroir.UnionOperation) error { 1258 return c.compileV128PseudoMinOrMax(defaultArrangementForShape(o.B1), false) 1259 } 1260 1261 // compileV128Pmax implements compiler.compileV128Pmax for arm64. 1262 func (c *arm64Compiler) compileV128Pmax(o *wazeroir.UnionOperation) error { 1263 return c.compileV128PseudoMinOrMax(defaultArrangementForShape(o.B1), true) 1264 } 1265 1266 // compileV128PseudoMinOrMax implements compileV128Pmax and compileV128Pmin. 1267 func (c *arm64Compiler) compileV128PseudoMinOrMax(arr arm64.VectorArrangement, max bool) error { 1268 x2 := c.locationStack.popV128() 1269 if err := c.compileEnsureOnRegister(x2); err != nil { 1270 return err 1271 } 1272 1273 x1 := c.locationStack.popV128() 1274 if err := c.compileEnsureOnRegister(x1); err != nil { 1275 return err 1276 } 1277 1278 result, err := c.allocateRegister(registerTypeVector) 1279 if err != nil { 1280 return err 1281 } 1282 1283 x1r, x2r := x1.register, x2.register 1284 1285 // Sets all bits on each lane if x1r's lane satisfies the condition (min or max), zeros otherwise. 1286 if max { 1287 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.FCMGT, x1r, x2r, result, arr) 1288 } else { 1289 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.FCMGT, x2r, x1r, result, arr) 1290 } 1291 // Select each bit based on the result bits ^. 1292 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.BSL, x1r, x2r, result, arm64.VectorArrangement16B) 1293 1294 c.markRegisterUnused(x1r, x2r) 1295 c.pushVectorRuntimeValueLocationOnRegister(result) 1296 return nil 1297 } 1298 1299 // compileV128Ceil implements compiler.compileV128Ceil for arm64. 1300 func (c *arm64Compiler) compileV128Ceil(o *wazeroir.UnionOperation) error { 1301 var arr arm64.VectorArrangement 1302 shape := o.B1 1303 switch shape { 1304 case wazeroir.ShapeF32x4: 1305 arr = arm64.VectorArrangement4S 1306 case wazeroir.ShapeF64x2: 1307 arr = arm64.VectorArrangement2D 1308 } 1309 return c.compileV128UniOp(arm64.VFRINTP, arr) 1310 } 1311 1312 // compileV128Floor implements compiler.compileV128Floor for arm64. 1313 func (c *arm64Compiler) compileV128Floor(o *wazeroir.UnionOperation) error { 1314 var arr arm64.VectorArrangement 1315 shape := o.B1 1316 switch shape { 1317 case wazeroir.ShapeF32x4: 1318 arr = arm64.VectorArrangement4S 1319 case wazeroir.ShapeF64x2: 1320 arr = arm64.VectorArrangement2D 1321 } 1322 return c.compileV128UniOp(arm64.VFRINTM, arr) 1323 } 1324 1325 // compileV128Trunc implements compiler.compileV128Trunc for arm64. 1326 func (c *arm64Compiler) compileV128Trunc(o *wazeroir.UnionOperation) error { 1327 var arr arm64.VectorArrangement 1328 shape := o.B1 1329 switch shape { 1330 case wazeroir.ShapeF32x4: 1331 arr = arm64.VectorArrangement4S 1332 case wazeroir.ShapeF64x2: 1333 arr = arm64.VectorArrangement2D 1334 } 1335 return c.compileV128UniOp(arm64.VFRINTZ, arr) 1336 } 1337 1338 // compileV128Nearest implements compiler.compileV128Nearest for arm64. 1339 func (c *arm64Compiler) compileV128Nearest(o *wazeroir.UnionOperation) error { 1340 var arr arm64.VectorArrangement 1341 shape := o.B1 1342 switch shape { 1343 case wazeroir.ShapeF32x4: 1344 arr = arm64.VectorArrangement4S 1345 case wazeroir.ShapeF64x2: 1346 arr = arm64.VectorArrangement2D 1347 } 1348 return c.compileV128UniOp(arm64.VFRINTN, arr) 1349 } 1350 1351 // compileV128Extend implements compiler.compileV128Extend for arm64. 1352 func (c *arm64Compiler) compileV128Extend(o *wazeroir.UnionOperation) error { 1353 var inst asm.Instruction 1354 var arr arm64.VectorArrangement 1355 originShape := o.B1 1356 signed := o.B2 == 1 1357 useLow := o.B3 1358 if useLow { 1359 if signed { 1360 inst = arm64.SSHLL 1361 } else { 1362 inst = arm64.USHLL 1363 } 1364 1365 switch originShape { 1366 case wazeroir.ShapeI8x16: 1367 arr = arm64.VectorArrangement8B 1368 case wazeroir.ShapeI16x8: 1369 arr = arm64.VectorArrangement4H 1370 case wazeroir.ShapeI32x4: 1371 arr = arm64.VectorArrangement2S 1372 } 1373 } else { 1374 if signed { 1375 inst = arm64.SSHLL2 1376 } else { 1377 inst = arm64.USHLL2 1378 } 1379 arr = defaultArrangementForShape(originShape) 1380 } 1381 1382 return c.compileV128UniOp(inst, arr) 1383 } 1384 1385 // compileV128ExtMul implements compiler.compileV128ExtMul for arm64. 1386 func (c *arm64Compiler) compileV128ExtMul(o *wazeroir.UnionOperation) error { 1387 var inst asm.Instruction 1388 var arr arm64.VectorArrangement 1389 originShape := o.B1 1390 signed := o.B2 == 1 1391 useLow := o.B3 1392 if useLow { 1393 if signed { 1394 inst = arm64.SMULL 1395 } else { 1396 inst = arm64.UMULL 1397 } 1398 1399 switch originShape { 1400 case wazeroir.ShapeI8x16: 1401 arr = arm64.VectorArrangement8B 1402 case wazeroir.ShapeI16x8: 1403 arr = arm64.VectorArrangement4H 1404 case wazeroir.ShapeI32x4: 1405 arr = arm64.VectorArrangement2S 1406 } 1407 } else { 1408 if signed { 1409 inst = arm64.SMULL2 1410 } else { 1411 inst = arm64.UMULL2 1412 } 1413 arr = defaultArrangementForShape(originShape) 1414 } 1415 1416 return c.compileV128x2BinOp(inst, arr) 1417 } 1418 1419 // compileV128Q15mulrSatS implements compiler.compileV128Q15mulrSatS for arm64. 1420 func (c *arm64Compiler) compileV128Q15mulrSatS(*wazeroir.UnionOperation) error { 1421 return c.compileV128x2BinOp(arm64.SQRDMULH, arm64.VectorArrangement8H) 1422 } 1423 1424 // compileV128ExtAddPairwise implements compiler.compileV128ExtAddPairwise for arm64. 1425 func (c *arm64Compiler) compileV128ExtAddPairwise(o *wazeroir.UnionOperation) error { 1426 var inst asm.Instruction 1427 originShape := o.B1 1428 signed := o.B3 1429 if signed { 1430 inst = arm64.SADDLP 1431 } else { 1432 inst = arm64.UADDLP 1433 } 1434 return c.compileV128UniOp(inst, defaultArrangementForShape(originShape)) 1435 } 1436 1437 // compileV128FloatPromote implements compiler.compileV128FloatPromote for arm64. 1438 func (c *arm64Compiler) compileV128FloatPromote(*wazeroir.UnionOperation) error { 1439 return c.compileV128UniOp(arm64.FCVTL, arm64.VectorArrangement2S) 1440 } 1441 1442 // compileV128FloatDemote implements compiler.compileV128FloatDemote for arm64. 1443 func (c *arm64Compiler) compileV128FloatDemote(*wazeroir.UnionOperation) error { 1444 return c.compileV128UniOp(arm64.FCVTN, arm64.VectorArrangement2S) 1445 } 1446 1447 // compileV128FConvertFromI implements compiler.compileV128FConvertFromI for arm64. 1448 func (c *arm64Compiler) compileV128FConvertFromI(o *wazeroir.UnionOperation) (err error) { 1449 destinationShape := o.B1 1450 signed := o.B3 1451 1452 if destinationShape == wazeroir.ShapeF32x4 { 1453 if signed { 1454 err = c.compileV128UniOp(arm64.VSCVTF, defaultArrangementForShape(destinationShape)) 1455 } else { 1456 err = c.compileV128UniOp(arm64.VUCVTF, defaultArrangementForShape(destinationShape)) 1457 } 1458 return 1459 } else { // f64x2 1460 v := c.locationStack.popV128() 1461 if err = c.compileEnsureOnRegister(v); err != nil { 1462 return 1463 } 1464 vr := v.register 1465 1466 var expand, convert asm.Instruction 1467 if signed { 1468 expand, convert = arm64.SSHLL, arm64.VSCVTF 1469 } else { 1470 expand, convert = arm64.USHLL, arm64.VUCVTF 1471 } 1472 1473 // Expand lower two 32-bit lanes as two 64-bit lanes. 1474 c.assembler.CompileVectorRegisterToVectorRegisterWithConst(expand, vr, vr, arm64.VectorArrangement2S, 0) 1475 // Convert these two 64-bit (integer) values on each lane as double precision values. 1476 c.assembler.CompileVectorRegisterToVectorRegister(convert, vr, vr, arm64.VectorArrangement2D, 1477 arm64.VectorIndexNone, arm64.VectorIndexNone) 1478 c.pushVectorRuntimeValueLocationOnRegister(vr) 1479 } 1480 return 1481 } 1482 1483 // compileV128Dot implements compiler.compileV128Dot for arm64. 1484 func (c *arm64Compiler) compileV128Dot(*wazeroir.UnionOperation) error { 1485 x2 := c.locationStack.popV128() 1486 if err := c.compileEnsureOnRegister(x2); err != nil { 1487 return err 1488 } 1489 1490 x1 := c.locationStack.popV128() 1491 if err := c.compileEnsureOnRegister(x1); err != nil { 1492 return err 1493 } 1494 1495 tmp, err := c.allocateRegister(registerTypeVector) 1496 if err != nil { 1497 return err 1498 } 1499 1500 x1r, x2r := x1.register, x2.register 1501 1502 // Multiply lower integers and get the 32-bit results into tmp. 1503 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.SMULL, x1r, x2r, tmp, arm64.VectorArrangement4H) 1504 // Multiply higher integers and get the 32-bit results into x1r. 1505 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.SMULL2, x1r, x2r, x1r, arm64.VectorArrangement8H) 1506 // Adds these two results into x1r. 1507 c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.VADDP, x1r, tmp, x1r, arm64.VectorArrangement4S) 1508 1509 c.markRegisterUnused(x2r) 1510 c.pushVectorRuntimeValueLocationOnRegister(x1r) 1511 1512 return nil 1513 } 1514 1515 // compileV128Narrow implements compiler.compileV128Narrow for arm64. 1516 func (c *arm64Compiler) compileV128Narrow(o *wazeroir.UnionOperation) error { 1517 x2 := c.locationStack.popV128() 1518 if err := c.compileEnsureOnRegister(x2); err != nil { 1519 return err 1520 } 1521 1522 x1 := c.locationStack.popV128() 1523 if err := c.compileEnsureOnRegister(x1); err != nil { 1524 return err 1525 } 1526 1527 x1r, x2r := x1.register, x2.register 1528 1529 var arr, arr2 arm64.VectorArrangement 1530 originShape := o.B1 1531 signed := o.B3 1532 switch originShape { 1533 case wazeroir.ShapeI16x8: 1534 arr = arm64.VectorArrangement8B 1535 arr2 = arm64.VectorArrangement16B 1536 case wazeroir.ShapeI32x4: 1537 arr = arm64.VectorArrangement4H 1538 arr2 = arm64.VectorArrangement8H 1539 } 1540 1541 var lo, hi asm.Instruction 1542 if signed { 1543 lo, hi = arm64.SQXTN, arm64.SQXTN2 1544 } else { 1545 lo, hi = arm64.SQXTUN, arm64.SQXTUN2 1546 } 1547 1548 // Narrow lanes on x1r and write them into lower-half of x1r. 1549 c.assembler.CompileVectorRegisterToVectorRegister(lo, x1r, x1r, arr, arm64.VectorIndexNone, arm64.VectorIndexNone) 1550 // Narrow lanes on x2r and write them into higher-half of x1r. 1551 c.assembler.CompileVectorRegisterToVectorRegister(hi, x2r, x1r, arr2, arm64.VectorIndexNone, arm64.VectorIndexNone) 1552 1553 c.markRegisterUnused(x2r) 1554 c.pushVectorRuntimeValueLocationOnRegister(x1r) 1555 return nil 1556 } 1557 1558 // compileV128ITruncSatFromF implements compiler.compileV128ITruncSatFromF for arm64. 1559 func (c *arm64Compiler) compileV128ITruncSatFromF(o *wazeroir.UnionOperation) (err error) { 1560 v := c.locationStack.popV128() 1561 if err = c.compileEnsureOnRegister(v); err != nil { 1562 return err 1563 } 1564 1565 originShape := o.B1 1566 signed := o.B3 1567 var cvt asm.Instruction 1568 if signed { 1569 cvt = arm64.VFCVTZS 1570 } else { 1571 cvt = arm64.VFCVTZU 1572 } 1573 1574 c.assembler.CompileVectorRegisterToVectorRegister(cvt, v.register, v.register, 1575 defaultArrangementForShape(originShape), arm64.VectorIndexNone, arm64.VectorIndexNone, 1576 ) 1577 1578 if originShape == wazeroir.ShapeF64x2 { 1579 var narrow asm.Instruction 1580 if signed { 1581 narrow = arm64.SQXTN 1582 } else { 1583 narrow = arm64.UQXTN 1584 } 1585 c.assembler.CompileVectorRegisterToVectorRegister(narrow, v.register, v.register, 1586 arm64.VectorArrangement2S, arm64.VectorIndexNone, arm64.VectorIndexNone, 1587 ) 1588 } 1589 1590 c.pushVectorRuntimeValueLocationOnRegister(v.register) 1591 return 1592 }