github.com/wasilibs/wazerox@v0.0.0-20240124024944-4923be63ab5f/internal/engine/wazevo/backend/isa/arm64/instr.go (about) 1 package arm64 2 3 import ( 4 "fmt" 5 "math" 6 "strings" 7 8 "github.com/wasilibs/wazerox/internal/engine/wazevo/backend/regalloc" 9 "github.com/wasilibs/wazerox/internal/engine/wazevo/ssa" 10 ) 11 12 type ( 13 // instruction represents either a real instruction in arm64, or the meta instructions 14 // that are convenient for code generation. For example, inline constants are also treated 15 // as instructions. 16 // 17 // Basically, each instruction knows how to get encoded in binaries. Hence, the final output of compilation 18 // can be considered equivalent to the sequence of such instructions. 19 // 20 // Each field is interpreted depending on the kind. 21 // 22 // TODO: optimize the layout later once the impl settles. 23 instruction struct { 24 kind instructionKind 25 prev, next *instruction 26 u1, u2, u3 uint64 27 rd, rm, rn, ra operand 28 amode addressMode 29 abi *abiImpl 30 targets []uint32 31 addedBeforeRegAlloc bool 32 } 33 34 // instructionKind represents the kind of instruction. 35 // This controls how the instruction struct is interpreted. 36 instructionKind int 37 ) 38 39 // IsCall implements regalloc.Instr IsCall. 40 func (i *instruction) IsCall() bool { 41 return i.kind == call 42 } 43 44 // IsIndirectCall implements regalloc.Instr IsIndirectCall. 45 func (i *instruction) IsIndirectCall() bool { 46 return i.kind == callInd 47 } 48 49 // IsReturn implements regalloc.Instr IsReturn. 50 func (i *instruction) IsReturn() bool { 51 return i.kind == ret 52 } 53 54 type defKind byte 55 56 const ( 57 defKindNone defKind = iota + 1 58 defKindRD 59 defKindCall 60 ) 61 62 var defKinds = [numInstructionKinds]defKind{ 63 adr: defKindRD, 64 aluRRR: defKindRD, 65 aluRRRR: defKindRD, 66 aluRRImm12: defKindRD, 67 aluRRBitmaskImm: defKindRD, 68 aluRRRShift: defKindRD, 69 aluRRImmShift: defKindRD, 70 aluRRRExtend: defKindRD, 71 bitRR: defKindRD, 72 movZ: defKindRD, 73 movK: defKindRD, 74 movN: defKindRD, 75 mov32: defKindRD, 76 mov64: defKindRD, 77 fpuMov64: defKindRD, 78 fpuMov128: defKindRD, 79 fpuRR: defKindRD, 80 fpuRRR: defKindRD, 81 nop0: defKindNone, 82 call: defKindCall, 83 callInd: defKindCall, 84 ret: defKindNone, 85 store8: defKindNone, 86 store16: defKindNone, 87 store32: defKindNone, 88 store64: defKindNone, 89 exitSequence: defKindNone, 90 condBr: defKindNone, 91 br: defKindNone, 92 brTableSequence: defKindNone, 93 cSet: defKindRD, 94 extend: defKindRD, 95 fpuCmp: defKindNone, 96 uLoad8: defKindRD, 97 uLoad16: defKindRD, 98 uLoad32: defKindRD, 99 sLoad8: defKindRD, 100 sLoad16: defKindRD, 101 sLoad32: defKindRD, 102 uLoad64: defKindRD, 103 fpuLoad32: defKindRD, 104 fpuLoad64: defKindRD, 105 fpuLoad128: defKindRD, 106 vecLoad1R: defKindRD, 107 loadFpuConst32: defKindRD, 108 loadFpuConst64: defKindRD, 109 loadFpuConst128: defKindRD, 110 fpuStore32: defKindNone, 111 fpuStore64: defKindNone, 112 fpuStore128: defKindNone, 113 udf: defKindNone, 114 cSel: defKindRD, 115 fpuCSel: defKindRD, 116 movToVec: defKindRD, 117 movFromVec: defKindRD, 118 movFromVecSigned: defKindRD, 119 vecDup: defKindRD, 120 vecDupElement: defKindRD, 121 vecExtract: defKindRD, 122 vecMisc: defKindRD, 123 vecMovElement: defKindRD, 124 vecLanes: defKindRD, 125 vecShiftImm: defKindRD, 126 vecTbl: defKindRD, 127 vecTbl2: defKindRD, 128 vecPermute: defKindRD, 129 vecRRR: defKindRD, 130 vecRRRRewrite: defKindNone, 131 fpuToInt: defKindRD, 132 intToFpu: defKindRD, 133 cCmpImm: defKindNone, 134 movToFPSR: defKindNone, 135 movFromFPSR: defKindRD, 136 emitSourceOffsetInfo: defKindNone, 137 } 138 139 // Defs returns the list of regalloc.VReg that are defined by the instruction. 140 // In order to reduce the number of allocations, the caller can pass the slice to be used. 141 func (i *instruction) Defs(regs *[]regalloc.VReg) []regalloc.VReg { 142 *regs = (*regs)[:0] 143 switch defKinds[i.kind] { 144 case defKindNone: 145 case defKindRD: 146 *regs = append(*regs, i.rd.nr()) 147 case defKindCall: 148 *regs = append(*regs, i.abi.retRealRegs...) 149 default: 150 panic(fmt.Sprintf("defKind for %v not defined", i)) 151 } 152 return *regs 153 } 154 155 // AssignDef implements regalloc.Instr AssignDef. 156 func (i *instruction) AssignDef(reg regalloc.VReg) { 157 switch defKinds[i.kind] { 158 case defKindNone: 159 case defKindRD: 160 i.rd = i.rd.assignReg(reg) 161 case defKindCall: 162 panic("BUG: call instructions shouldn't be assigned") 163 default: 164 panic(fmt.Sprintf("defKind for %v not defined", i)) 165 } 166 } 167 168 type useKind byte 169 170 const ( 171 useKindNone useKind = iota + 1 172 useKindRN 173 useKindRNRM 174 useKindRNRMRA 175 useKindRNRN1RM 176 useKindRet 177 useKindCall 178 useKindCallInd 179 useKindAMode 180 useKindRNAMode 181 useKindCond 182 useKindVecRRRRewrite 183 ) 184 185 var useKinds = [numInstructionKinds]useKind{ 186 udf: useKindNone, 187 aluRRR: useKindRNRM, 188 aluRRRR: useKindRNRMRA, 189 aluRRImm12: useKindRN, 190 aluRRBitmaskImm: useKindRN, 191 aluRRRShift: useKindRNRM, 192 aluRRImmShift: useKindRN, 193 aluRRRExtend: useKindRNRM, 194 bitRR: useKindRN, 195 movZ: useKindNone, 196 movK: useKindNone, 197 movN: useKindNone, 198 mov32: useKindRN, 199 mov64: useKindRN, 200 fpuMov64: useKindRN, 201 fpuMov128: useKindRN, 202 fpuRR: useKindRN, 203 fpuRRR: useKindRNRM, 204 nop0: useKindNone, 205 call: useKindCall, 206 callInd: useKindCallInd, 207 ret: useKindRet, 208 store8: useKindRNAMode, 209 store16: useKindRNAMode, 210 store32: useKindRNAMode, 211 store64: useKindRNAMode, 212 exitSequence: useKindRN, 213 condBr: useKindCond, 214 br: useKindNone, 215 brTableSequence: useKindRN, 216 cSet: useKindNone, 217 extend: useKindRN, 218 fpuCmp: useKindRNRM, 219 uLoad8: useKindAMode, 220 uLoad16: useKindAMode, 221 uLoad32: useKindAMode, 222 sLoad8: useKindAMode, 223 sLoad16: useKindAMode, 224 sLoad32: useKindAMode, 225 uLoad64: useKindAMode, 226 fpuLoad32: useKindAMode, 227 fpuLoad64: useKindAMode, 228 fpuLoad128: useKindAMode, 229 fpuStore32: useKindRNAMode, 230 fpuStore64: useKindRNAMode, 231 fpuStore128: useKindRNAMode, 232 loadFpuConst32: useKindNone, 233 loadFpuConst64: useKindNone, 234 loadFpuConst128: useKindNone, 235 vecLoad1R: useKindRN, 236 cSel: useKindRNRM, 237 fpuCSel: useKindRNRM, 238 movToVec: useKindRN, 239 movFromVec: useKindRN, 240 movFromVecSigned: useKindRN, 241 vecDup: useKindRN, 242 vecDupElement: useKindRN, 243 vecExtract: useKindRNRM, 244 cCmpImm: useKindRN, 245 vecMisc: useKindRN, 246 vecMovElement: useKindRN, 247 vecLanes: useKindRN, 248 vecShiftImm: useKindRN, 249 vecTbl: useKindRNRM, 250 vecTbl2: useKindRNRN1RM, 251 vecRRR: useKindRNRM, 252 vecRRRRewrite: useKindVecRRRRewrite, 253 vecPermute: useKindRNRM, 254 fpuToInt: useKindRN, 255 intToFpu: useKindRN, 256 movToFPSR: useKindRN, 257 movFromFPSR: useKindNone, 258 adr: useKindNone, 259 emitSourceOffsetInfo: useKindNone, 260 } 261 262 // Uses returns the list of regalloc.VReg that are used by the instruction. 263 // In order to reduce the number of allocations, the caller can pass the slice to be used. 264 func (i *instruction) Uses(regs *[]regalloc.VReg) []regalloc.VReg { 265 *regs = (*regs)[:0] 266 switch useKinds[i.kind] { 267 case useKindNone: 268 case useKindRN: 269 if rn := i.rn.reg(); rn.Valid() { 270 *regs = append(*regs, rn) 271 } 272 case useKindRNRM: 273 if rn := i.rn.reg(); rn.Valid() { 274 *regs = append(*regs, rn) 275 } 276 if rm := i.rm.reg(); rm.Valid() { 277 *regs = append(*regs, rm) 278 } 279 case useKindRNRMRA: 280 if rn := i.rn.reg(); rn.Valid() { 281 *regs = append(*regs, rn) 282 } 283 if rm := i.rm.reg(); rm.Valid() { 284 *regs = append(*regs, rm) 285 } 286 if ra := i.ra.reg(); ra.Valid() { 287 *regs = append(*regs, ra) 288 } 289 case useKindRNRN1RM: 290 if rn := i.rn.reg(); rn.Valid() && rn.IsRealReg() { 291 rn1 := regalloc.FromRealReg(rn.RealReg()+1, rn.RegType()) 292 *regs = append(*regs, rn, rn1) 293 } 294 if rm := i.rm.reg(); rm.Valid() { 295 *regs = append(*regs, rm) 296 } 297 case useKindRet: 298 *regs = append(*regs, i.abi.retRealRegs...) 299 case useKindAMode: 300 if amodeRN := i.amode.rn; amodeRN.Valid() { 301 *regs = append(*regs, amodeRN) 302 } 303 if amodeRM := i.amode.rm; amodeRM.Valid() { 304 *regs = append(*regs, amodeRM) 305 } 306 case useKindRNAMode: 307 *regs = append(*regs, i.rn.reg()) 308 if amodeRN := i.amode.rn; amodeRN.Valid() { 309 *regs = append(*regs, amodeRN) 310 } 311 if amodeRM := i.amode.rm; amodeRM.Valid() { 312 *regs = append(*regs, amodeRM) 313 } 314 case useKindCond: 315 cnd := cond(i.u1) 316 if cnd.kind() != condKindCondFlagSet { 317 *regs = append(*regs, cnd.register()) 318 } 319 case useKindCall: 320 *regs = append(*regs, i.abi.argRealRegs...) 321 case useKindCallInd: 322 *regs = append(*regs, i.rn.nr()) 323 *regs = append(*regs, i.abi.argRealRegs...) 324 case useKindVecRRRRewrite: 325 *regs = append(*regs, i.rn.reg()) 326 *regs = append(*regs, i.rm.reg()) 327 *regs = append(*regs, i.rd.reg()) 328 default: 329 panic(fmt.Sprintf("useKind for %v not defined", i)) 330 } 331 return *regs 332 } 333 334 func (i *instruction) AssignUse(index int, reg regalloc.VReg) { 335 switch useKinds[i.kind] { 336 case useKindNone: 337 case useKindRN: 338 if rn := i.rn.reg(); rn.Valid() { 339 i.rn = i.rn.assignReg(reg) 340 } 341 case useKindRNRM: 342 if index == 0 { 343 if rn := i.rn.reg(); rn.Valid() { 344 i.rn = i.rn.assignReg(reg) 345 } 346 } else { 347 if rm := i.rm.reg(); rm.Valid() { 348 i.rm = i.rm.assignReg(reg) 349 } 350 } 351 case useKindVecRRRRewrite: 352 if index == 0 { 353 if rn := i.rn.reg(); rn.Valid() { 354 i.rn = i.rn.assignReg(reg) 355 } 356 } else if index == 1 { 357 if rm := i.rm.reg(); rm.Valid() { 358 i.rm = i.rm.assignReg(reg) 359 } 360 } else { 361 if rd := i.rd.reg(); rd.Valid() { 362 i.rd = i.rd.assignReg(reg) 363 } 364 } 365 case useKindRNRN1RM: 366 if index == 0 { 367 if rn := i.rn.reg(); rn.Valid() { 368 i.rn = i.rn.assignReg(reg) 369 } 370 if rn1 := i.rn.reg() + 1; rn1.Valid() { 371 i.rm = i.rm.assignReg(reg + 1) 372 } 373 } else { 374 if rm := i.rm.reg(); rm.Valid() { 375 i.rm = i.rm.assignReg(reg) 376 } 377 } 378 case useKindRNRMRA: 379 if index == 0 { 380 if rn := i.rn.reg(); rn.Valid() { 381 i.rn = i.rn.assignReg(reg) 382 } 383 } else if index == 1 { 384 if rm := i.rm.reg(); rm.Valid() { 385 i.rm = i.rm.assignReg(reg) 386 } 387 } else { 388 if ra := i.ra.reg(); ra.Valid() { 389 i.ra = i.ra.assignReg(reg) 390 } 391 } 392 case useKindRet: 393 panic("BUG: ret instructions shouldn't be assigned") 394 case useKindAMode: 395 if index == 0 { 396 if amodeRN := i.amode.rn; amodeRN.Valid() { 397 i.amode.rn = reg 398 } 399 } else { 400 if amodeRM := i.amode.rm; amodeRM.Valid() { 401 i.amode.rm = reg 402 } 403 } 404 case useKindRNAMode: 405 if index == 0 { 406 i.rn = i.rn.assignReg(reg) 407 } else if index == 1 { 408 if amodeRN := i.amode.rn; amodeRN.Valid() { 409 i.amode.rn = reg 410 } else { 411 panic("BUG") 412 } 413 } else { 414 if amodeRM := i.amode.rm; amodeRM.Valid() { 415 i.amode.rm = reg 416 } else { 417 panic("BUG") 418 } 419 } 420 case useKindCond: 421 c := cond(i.u1) 422 switch c.kind() { 423 case condKindRegisterZero: 424 i.u1 = uint64(registerAsRegZeroCond(reg)) 425 case condKindRegisterNotZero: 426 i.u1 = uint64(registerAsRegNotZeroCond(reg)) 427 } 428 case useKindCall: 429 panic("BUG: call instructions shouldn't be assigned") 430 case useKindCallInd: 431 i.rn = i.rn.assignReg(reg) 432 default: 433 panic(fmt.Sprintf("useKind for %v not defined", i)) 434 } 435 } 436 437 func (i *instruction) asCall(ref ssa.FuncRef, abi *abiImpl) { 438 i.kind = call 439 i.u1 = uint64(ref) 440 i.abi = abi 441 } 442 443 func (i *instruction) asCallIndirect(ptr regalloc.VReg, abi *abiImpl) { 444 i.kind = callInd 445 i.rn = operandNR(ptr) 446 i.abi = abi 447 } 448 449 func (i *instruction) callFuncRef() ssa.FuncRef { 450 return ssa.FuncRef(i.u1) 451 } 452 453 // shift must be divided by 16 and must be in range 0-3 (if dst64bit is true) or 0-1 (if dst64bit is false) 454 func (i *instruction) asMOVZ(dst regalloc.VReg, imm uint64, shift uint64, dst64bit bool) { 455 i.kind = movZ 456 i.rd = operandNR(dst) 457 i.u1 = imm 458 i.u2 = shift 459 if dst64bit { 460 i.u3 = 1 461 } 462 } 463 464 // shift must be divided by 16 and must be in range 0-3 (if dst64bit is true) or 0-1 (if dst64bit is false) 465 func (i *instruction) asMOVK(dst regalloc.VReg, imm uint64, shift uint64, dst64bit bool) { 466 i.kind = movK 467 i.rd = operandNR(dst) 468 i.u1 = imm 469 i.u2 = shift 470 if dst64bit { 471 i.u3 = 1 472 } 473 } 474 475 // shift must be divided by 16 and must be in range 0-3 (if dst64bit is true) or 0-1 (if dst64bit is false) 476 func (i *instruction) asMOVN(dst regalloc.VReg, imm uint64, shift uint64, dst64bit bool) { 477 i.kind = movN 478 i.rd = operandNR(dst) 479 i.u1 = imm 480 i.u2 = shift 481 if dst64bit { 482 i.u3 = 1 483 } 484 } 485 486 func (i *instruction) asNop0() *instruction { 487 i.kind = nop0 488 return i 489 } 490 491 func (i *instruction) asNop0WithLabel(l label) { 492 i.kind = nop0 493 i.u1 = uint64(l) 494 } 495 496 func (i *instruction) nop0Label() label { 497 return label(i.u1) 498 } 499 500 func (i *instruction) asRet(abi *abiImpl) { 501 i.kind = ret 502 i.abi = abi 503 } 504 505 func (i *instruction) asStorePair64(src1, src2 regalloc.VReg, amode addressMode) { 506 i.kind = storeP64 507 i.rn = operandNR(src1) 508 i.rm = operandNR(src2) 509 i.amode = amode 510 } 511 512 func (i *instruction) asLoadPair64(src1, src2 regalloc.VReg, amode addressMode) { 513 i.kind = loadP64 514 i.rn = operandNR(src1) 515 i.rm = operandNR(src2) 516 i.amode = amode 517 } 518 519 func (i *instruction) asStore(src operand, amode addressMode, sizeInBits byte) { 520 switch sizeInBits { 521 case 8: 522 i.kind = store8 523 case 16: 524 i.kind = store16 525 case 32: 526 if src.reg().RegType() == regalloc.RegTypeInt { 527 i.kind = store32 528 } else { 529 i.kind = fpuStore32 530 } 531 case 64: 532 if src.reg().RegType() == regalloc.RegTypeInt { 533 i.kind = store64 534 } else { 535 i.kind = fpuStore64 536 } 537 case 128: 538 i.kind = fpuStore128 539 } 540 i.rn = src 541 i.amode = amode 542 } 543 544 func (i *instruction) asSLoad(dst operand, amode addressMode, sizeInBits byte) { 545 switch sizeInBits { 546 case 8: 547 i.kind = sLoad8 548 case 16: 549 i.kind = sLoad16 550 case 32: 551 i.kind = sLoad32 552 default: 553 panic("BUG") 554 } 555 i.rd = dst 556 i.amode = amode 557 } 558 559 func (i *instruction) asULoad(dst operand, amode addressMode, sizeInBits byte) { 560 switch sizeInBits { 561 case 8: 562 i.kind = uLoad8 563 case 16: 564 i.kind = uLoad16 565 case 32: 566 i.kind = uLoad32 567 case 64: 568 i.kind = uLoad64 569 } 570 i.rd = dst 571 i.amode = amode 572 } 573 574 func (i *instruction) asFpuLoad(dst operand, amode addressMode, sizeInBits byte) { 575 switch sizeInBits { 576 case 32: 577 i.kind = fpuLoad32 578 case 64: 579 i.kind = fpuLoad64 580 case 128: 581 i.kind = fpuLoad128 582 } 583 i.rd = dst 584 i.amode = amode 585 } 586 587 func (i *instruction) asVecLoad1R(rd, rn operand, arr vecArrangement) { 588 // NOTE: currently only has support for no-offset loads, though it is suspicious that 589 // we would need to support offset load (that is only available for post-index). 590 i.kind = vecLoad1R 591 i.rd = rd 592 i.rn = rn 593 i.u1 = uint64(arr) 594 } 595 596 func (i *instruction) asCSet(rd regalloc.VReg, mask bool, c condFlag) { 597 i.kind = cSet 598 i.rd = operandNR(rd) 599 i.u1 = uint64(c) 600 if mask { 601 i.u2 = 1 602 } 603 } 604 605 func (i *instruction) asCSel(rd, rn, rm operand, c condFlag, _64bit bool) { 606 i.kind = cSel 607 i.rd = rd 608 i.rn = rn 609 i.rm = rm 610 i.u1 = uint64(c) 611 if _64bit { 612 i.u3 = 1 613 } 614 } 615 616 func (i *instruction) asFpuCSel(rd, rn, rm operand, c condFlag, _64bit bool) { 617 i.kind = fpuCSel 618 i.rd = rd 619 i.rn = rn 620 i.rm = rm 621 i.u1 = uint64(c) 622 if _64bit { 623 i.u3 = 1 624 } 625 } 626 627 func (i *instruction) asBr(target label) { 628 if target == returnLabel { 629 panic("BUG: call site should special case for returnLabel") 630 } 631 i.kind = br 632 i.u1 = uint64(target) 633 } 634 635 func (i *instruction) asBrTableSequence(indexReg regalloc.VReg, targets []uint32) { 636 i.kind = brTableSequence 637 i.rn = operandNR(indexReg) 638 i.targets = targets 639 } 640 641 func (i *instruction) brTableSequenceOffsetsResolved() { 642 i.u3 = 1 // indicate that the offsets are resolved, for debugging. 643 } 644 645 func (i *instruction) brLabel() label { 646 return label(i.u1) 647 } 648 649 // brOffsetResolved is called when the target label is resolved. 650 func (i *instruction) brOffsetResolve(offset int64) { 651 i.u2 = uint64(offset) 652 i.u3 = 1 // indicate that the offset is resolved, for debugging. 653 } 654 655 func (i *instruction) brOffset() int64 { 656 return int64(i.u2) 657 } 658 659 // asCondBr encodes a conditional branch instruction. is64bit is only needed when cond is not flag. 660 func (i *instruction) asCondBr(c cond, target label, is64bit bool) { 661 i.kind = condBr 662 i.u1 = c.asUint64() 663 i.u2 = uint64(target) 664 if is64bit { 665 i.u3 = 1 666 } 667 } 668 669 func (i *instruction) setCondBrTargets(target label) { 670 i.u2 = uint64(target) 671 } 672 673 func (i *instruction) condBrLabel() label { 674 return label(i.u2) 675 } 676 677 // condBrOffsetResolve is called when the target label is resolved. 678 func (i *instruction) condBrOffsetResolve(offset int64) { 679 i.rd.data = uint64(offset) 680 i.rd.data2 = 1 // indicate that the offset is resolved, for debugging. 681 } 682 683 // condBrOffsetResolved returns true if condBrOffsetResolve is already called. 684 func (i *instruction) condBrOffsetResolved() bool { 685 return i.rd.data2 == 1 686 } 687 688 func (i *instruction) condBrOffset() int64 { 689 return int64(i.rd.data) 690 } 691 692 func (i *instruction) condBrCond() cond { 693 return cond(i.u1) 694 } 695 696 func (i *instruction) condBr64bit() bool { 697 return i.u3 == 1 698 } 699 700 func (i *instruction) asLoadFpuConst32(rd regalloc.VReg, raw uint64) { 701 i.kind = loadFpuConst32 702 i.u1 = raw 703 i.rd = operandNR(rd) 704 } 705 706 func (i *instruction) asLoadFpuConst64(rd regalloc.VReg, raw uint64) { 707 i.kind = loadFpuConst64 708 i.u1 = raw 709 i.rd = operandNR(rd) 710 } 711 712 func (i *instruction) asLoadFpuConst128(rd regalloc.VReg, lo, hi uint64) { 713 i.kind = loadFpuConst128 714 i.u1 = lo 715 i.u2 = hi 716 i.rd = operandNR(rd) 717 } 718 719 func (i *instruction) asFpuCmp(rn, rm operand, is64bit bool) { 720 i.kind = fpuCmp 721 i.rn, i.rm = rn, rm 722 if is64bit { 723 i.u3 = 1 724 } 725 } 726 727 func (i *instruction) asCCmpImm(rn operand, imm uint64, c condFlag, flag byte, is64bit bool) { 728 i.kind = cCmpImm 729 i.rn = rn 730 i.rm.data = imm 731 i.u1 = uint64(c) 732 i.u2 = uint64(flag) 733 if is64bit { 734 i.u3 = 1 735 } 736 } 737 738 // asALU setups a basic ALU instruction. 739 func (i *instruction) asALU(aluOp aluOp, rd, rn, rm operand, dst64bit bool) { 740 switch rm.kind { 741 case operandKindNR: 742 i.kind = aluRRR 743 case operandKindSR: 744 i.kind = aluRRRShift 745 case operandKindER: 746 i.kind = aluRRRExtend 747 case operandKindImm12: 748 i.kind = aluRRImm12 749 default: 750 panic("BUG") 751 } 752 i.u1 = uint64(aluOp) 753 i.rd, i.rn, i.rm = rd, rn, rm 754 if dst64bit { 755 i.u3 = 1 756 } 757 } 758 759 // asALU setups a basic ALU instruction. 760 func (i *instruction) asALURRRR(aluOp aluOp, rd, rn, rm, ra operand, dst64bit bool) { 761 i.kind = aluRRRR 762 i.u1 = uint64(aluOp) 763 i.rd, i.rn, i.rm, i.ra = rd, rn, rm, ra 764 if dst64bit { 765 i.u3 = 1 766 } 767 } 768 769 // asALUShift setups a shift based ALU instruction. 770 func (i *instruction) asALUShift(aluOp aluOp, rd, rn, rm operand, dst64bit bool) { 771 switch rm.kind { 772 case operandKindNR: 773 i.kind = aluRRR // If the shift amount op is a register, then the instruction is encoded as a normal ALU instruction with two register operands. 774 case operandKindShiftImm: 775 i.kind = aluRRImmShift 776 default: 777 panic("BUG") 778 } 779 i.u1 = uint64(aluOp) 780 i.rd, i.rn, i.rm = rd, rn, rm 781 if dst64bit { 782 i.u3 = 1 783 } 784 } 785 786 func (i *instruction) asALUBitmaskImm(aluOp aluOp, rd, rn regalloc.VReg, imm uint64, dst64bit bool) { 787 i.kind = aluRRBitmaskImm 788 i.u1 = uint64(aluOp) 789 i.rn, i.rd = operandNR(rn), operandNR(rd) 790 i.u2 = imm 791 if dst64bit { 792 i.u3 = 1 793 } 794 } 795 796 func (i *instruction) asMovToFPSR(rn regalloc.VReg) { 797 i.kind = movToFPSR 798 i.rn = operandNR(rn) 799 } 800 801 func (i *instruction) asMovFromFPSR(rd regalloc.VReg) { 802 i.kind = movFromFPSR 803 i.rd = operandNR(rd) 804 } 805 806 func (i *instruction) asBitRR(bitOp bitOp, rd, rn regalloc.VReg, is64bit bool) { 807 i.kind = bitRR 808 i.rn, i.rd = operandNR(rn), operandNR(rd) 809 i.u1 = uint64(bitOp) 810 if is64bit { 811 i.u2 = 1 812 } 813 } 814 815 func (i *instruction) asFpuRRR(op fpuBinOp, rd, rn, rm operand, dst64bit bool) { 816 i.kind = fpuRRR 817 i.u1 = uint64(op) 818 i.rd, i.rn, i.rm = rd, rn, rm 819 if dst64bit { 820 i.u3 = 1 821 } 822 } 823 824 func (i *instruction) asFpuRR(op fpuUniOp, rd, rn operand, dst64bit bool) { 825 i.kind = fpuRR 826 i.u1 = uint64(op) 827 i.rd, i.rn = rd, rn 828 if dst64bit { 829 i.u3 = 1 830 } 831 } 832 833 func (i *instruction) asExtend(rd, rn regalloc.VReg, fromBits, toBits byte, signed bool) { 834 i.kind = extend 835 i.rn, i.rd = operandNR(rn), operandNR(rd) 836 i.u1 = uint64(fromBits) 837 i.u2 = uint64(toBits) 838 if signed { 839 i.u3 = 1 840 } 841 } 842 843 func (i *instruction) asMove32(rd, rn regalloc.VReg) { 844 i.kind = mov32 845 i.rn, i.rd = operandNR(rn), operandNR(rd) 846 } 847 848 func (i *instruction) asMove64(rd, rn regalloc.VReg) *instruction { 849 i.kind = mov64 850 i.rn, i.rd = operandNR(rn), operandNR(rd) 851 return i 852 } 853 854 func (i *instruction) asFpuMov64(rd, rn regalloc.VReg) { 855 i.kind = fpuMov64 856 i.rn, i.rd = operandNR(rn), operandNR(rd) 857 } 858 859 func (i *instruction) asFpuMov128(rd, rn regalloc.VReg) *instruction { 860 i.kind = fpuMov128 861 i.rn, i.rd = operandNR(rn), operandNR(rd) 862 return i 863 } 864 865 func (i *instruction) asMovToVec(rd, rn operand, arr vecArrangement, index vecIndex) { 866 i.kind = movToVec 867 i.rd = rd 868 i.rn = rn 869 i.u1, i.u2 = uint64(arr), uint64(index) 870 } 871 872 func (i *instruction) asMovFromVec(rd, rn operand, arr vecArrangement, index vecIndex, signed bool) { 873 if signed { 874 i.kind = movFromVecSigned 875 } else { 876 i.kind = movFromVec 877 } 878 i.rd = rd 879 i.rn = rn 880 i.u1, i.u2 = uint64(arr), uint64(index) 881 } 882 883 func (i *instruction) asVecDup(rd, rn operand, arr vecArrangement) { 884 i.kind = vecDup 885 i.u1 = uint64(arr) 886 i.rn, i.rd = rn, rd 887 } 888 889 func (i *instruction) asVecDupElement(rd, rn operand, arr vecArrangement, index vecIndex) { 890 i.kind = vecDupElement 891 i.u1 = uint64(arr) 892 i.rn, i.rd = rn, rd 893 i.u2 = uint64(index) 894 } 895 896 func (i *instruction) asVecExtract(rd, rn, rm operand, arr vecArrangement, index uint32) { 897 i.kind = vecExtract 898 i.u1 = uint64(arr) 899 i.rn, i.rm, i.rd = rn, rm, rd 900 i.u2 = uint64(index) 901 } 902 903 func (i *instruction) asVecMovElement(rd, rn operand, arr vecArrangement, rdIndex, rnIndex vecIndex) { 904 i.kind = vecMovElement 905 i.u1 = uint64(arr) 906 i.u2, i.u3 = uint64(rdIndex), uint64(rnIndex) 907 i.rn, i.rd = rn, rd 908 } 909 910 func (i *instruction) asVecMisc(op vecOp, rd, rn operand, arr vecArrangement) { 911 i.kind = vecMisc 912 i.u1 = uint64(op) 913 i.rn, i.rd = rn, rd 914 i.u2 = uint64(arr) 915 } 916 917 func (i *instruction) asVecLanes(op vecOp, rd, rn operand, arr vecArrangement) { 918 i.kind = vecLanes 919 i.u1 = uint64(op) 920 i.rn, i.rd = rn, rd 921 i.u2 = uint64(arr) 922 } 923 924 func (i *instruction) asVecShiftImm(op vecOp, rd, rn, rm operand, arr vecArrangement) { 925 i.kind = vecShiftImm 926 i.u1 = uint64(op) 927 i.rn, i.rm, i.rd = rn, rm, rd 928 i.u2 = uint64(arr) 929 } 930 931 func (i *instruction) asVecTbl(nregs byte, rd, rn, rm operand, arr vecArrangement) { 932 switch nregs { 933 case 0, 1: 934 i.kind = vecTbl 935 case 2: 936 i.kind = vecTbl2 937 if !rn.reg().IsRealReg() { 938 panic("rn is not a RealReg") 939 } 940 if rn.realReg() == v31 { 941 panic("rn cannot be v31") 942 } 943 default: 944 panic(fmt.Sprintf("unsupported number of registers %d", nregs)) 945 } 946 i.rn, i.rm, i.rd = rn, rm, rd 947 i.u2 = uint64(arr) 948 } 949 950 func (i *instruction) asVecPermute(op vecOp, rd, rn, rm operand, arr vecArrangement) { 951 i.kind = vecPermute 952 i.u1 = uint64(op) 953 i.rn, i.rm, i.rd = rn, rm, rd 954 i.u2 = uint64(arr) 955 } 956 957 func (i *instruction) asVecRRR(op vecOp, rd, rn, rm operand, arr vecArrangement) { 958 i.kind = vecRRR 959 i.u1 = uint64(op) 960 i.rn, i.rd, i.rm = rn, rd, rm 961 i.u2 = uint64(arr) 962 } 963 964 // asVecRRRRewrite encodes a vector instruction that rewrites the destination register. 965 // IMPORTANT: the destination register must be already defined before this instruction. 966 func (i *instruction) asVecRRRRewrite(op vecOp, rd, rn, rm operand, arr vecArrangement) { 967 i.kind = vecRRRRewrite 968 i.u1 = uint64(op) 969 i.rn, i.rd, i.rm = rn, rd, rm 970 i.u2 = uint64(arr) 971 } 972 973 func (i *instruction) IsCopy() bool { 974 op := i.kind 975 // We do not include mov32 as it is not a copy instruction in the sense that it does not preserve the upper 32 bits, 976 // and it is only used in the translation of IReduce, not the actual copy indeed. 977 return op == mov64 || op == fpuMov64 || op == fpuMov128 978 } 979 980 // String implements fmt.Stringer. 981 func (i *instruction) String() (str string) { 982 is64SizeBitToSize := func(u3 uint64) byte { 983 if u3 == 0 { 984 return 32 985 } 986 return 64 987 } 988 989 switch i.kind { 990 case nop0: 991 if i.u1 != 0 { 992 l := label(i.u1) 993 str = fmt.Sprintf("%s:", l) 994 } else { 995 str = "nop0" 996 } 997 case aluRRR: 998 size := is64SizeBitToSize(i.u3) 999 str = fmt.Sprintf("%s %s, %s, %s", aluOp(i.u1).String(), 1000 formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), size), 1001 i.rm.format(size)) 1002 case aluRRRR: 1003 size := is64SizeBitToSize(i.u3) 1004 str = fmt.Sprintf("%s %s, %s, %s, %s", aluOp(i.u1).String(), 1005 formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), size), formatVRegSized(i.rm.nr(), size), formatVRegSized(i.ra.nr(), size)) 1006 case aluRRImm12: 1007 size := is64SizeBitToSize(i.u3) 1008 str = fmt.Sprintf("%s %s, %s, %s", aluOp(i.u1).String(), 1009 formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), size), i.rm.format(size)) 1010 case aluRRBitmaskImm: 1011 size := is64SizeBitToSize(i.u3) 1012 rd, rn := formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), size) 1013 if size == 32 { 1014 str = fmt.Sprintf("%s %s, %s, #%#x", aluOp(i.u1).String(), rd, rn, uint32(i.u2)) 1015 } else { 1016 str = fmt.Sprintf("%s %s, %s, #%#x", aluOp(i.u1).String(), rd, rn, i.u2) 1017 } 1018 case aluRRImmShift: 1019 size := is64SizeBitToSize(i.u3) 1020 str = fmt.Sprintf("%s %s, %s, %#x", 1021 aluOp(i.u1).String(), 1022 formatVRegSized(i.rd.nr(), size), 1023 formatVRegSized(i.rn.nr(), size), 1024 i.rm.shiftImm(), 1025 ) 1026 case aluRRRShift: 1027 size := is64SizeBitToSize(i.u3) 1028 str = fmt.Sprintf("%s %s, %s, %s", 1029 aluOp(i.u1).String(), 1030 formatVRegSized(i.rd.nr(), size), 1031 formatVRegSized(i.rn.nr(), size), 1032 i.rm.format(size), 1033 ) 1034 case aluRRRExtend: 1035 size := is64SizeBitToSize(i.u3) 1036 str = fmt.Sprintf("%s %s, %s, %s", aluOp(i.u1).String(), 1037 formatVRegSized(i.rd.nr(), size), 1038 formatVRegSized(i.rn.nr(), size), 1039 // Regardless of the source size, the register is formatted in 32-bit. 1040 i.rm.format(32), 1041 ) 1042 case bitRR: 1043 size := is64SizeBitToSize(i.u2) 1044 str = fmt.Sprintf("%s %s, %s", 1045 bitOp(i.u1), 1046 formatVRegSized(i.rd.nr(), size), 1047 formatVRegSized(i.rn.nr(), size), 1048 ) 1049 case uLoad8: 1050 str = fmt.Sprintf("ldrb %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32)) 1051 case sLoad8: 1052 str = fmt.Sprintf("ldrsb %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32)) 1053 case uLoad16: 1054 str = fmt.Sprintf("ldrh %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32)) 1055 case sLoad16: 1056 str = fmt.Sprintf("ldrsh %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32)) 1057 case uLoad32: 1058 str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32)) 1059 case sLoad32: 1060 str = fmt.Sprintf("ldrs %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32)) 1061 case uLoad64: 1062 str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd.nr(), 64), i.amode.format(64)) 1063 case store8: 1064 str = fmt.Sprintf("strb %s, %s", formatVRegSized(i.rn.nr(), 32), i.amode.format(8)) 1065 case store16: 1066 str = fmt.Sprintf("strh %s, %s", formatVRegSized(i.rn.nr(), 32), i.amode.format(16)) 1067 case store32: 1068 str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 32), i.amode.format(32)) 1069 case store64: 1070 str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 64), i.amode.format(64)) 1071 case storeP64: 1072 str = fmt.Sprintf("stp %s, %s, %s", 1073 formatVRegSized(i.rn.nr(), 64), formatVRegSized(i.rm.nr(), 64), i.amode.format(64)) 1074 case loadP64: 1075 str = fmt.Sprintf("ldp %s, %s, %s", 1076 formatVRegSized(i.rn.nr(), 64), formatVRegSized(i.rm.nr(), 64), i.amode.format(64)) 1077 case mov64: 1078 str = fmt.Sprintf("mov %s, %s", 1079 formatVRegSized(i.rd.nr(), 64), 1080 formatVRegSized(i.rn.nr(), 64)) 1081 case mov32: 1082 str = fmt.Sprintf("mov %s, %s", formatVRegSized(i.rd.nr(), 32), formatVRegSized(i.rn.nr(), 32)) 1083 case movZ: 1084 size := is64SizeBitToSize(i.u3) 1085 str = fmt.Sprintf("movz %s, #%#x, lsl %d", formatVRegSized(i.rd.nr(), size), uint16(i.u1), i.u2*16) 1086 case movN: 1087 size := is64SizeBitToSize(i.u3) 1088 str = fmt.Sprintf("movn %s, #%#x, lsl %d", formatVRegSized(i.rd.nr(), size), uint16(i.u1), i.u2*16) 1089 case movK: 1090 size := is64SizeBitToSize(i.u3) 1091 str = fmt.Sprintf("movk %s, #%#x, lsl %d", formatVRegSized(i.rd.nr(), size), uint16(i.u1), i.u2*16) 1092 case extend: 1093 fromBits, toBits := byte(i.u1), byte(i.u2) 1094 1095 var signedStr string 1096 if i.u3 == 1 { 1097 signedStr = "s" 1098 } else { 1099 signedStr = "u" 1100 } 1101 var fromStr string 1102 switch fromBits { 1103 case 8: 1104 fromStr = "b" 1105 case 16: 1106 fromStr = "h" 1107 case 32: 1108 fromStr = "w" 1109 } 1110 str = fmt.Sprintf("%sxt%s %s, %s", signedStr, fromStr, formatVRegSized(i.rd.nr(), toBits), formatVRegSized(i.rn.nr(), 32)) 1111 case cSel: 1112 size := is64SizeBitToSize(i.u3) 1113 str = fmt.Sprintf("csel %s, %s, %s, %s", 1114 formatVRegSized(i.rd.nr(), size), 1115 formatVRegSized(i.rn.nr(), size), 1116 formatVRegSized(i.rm.nr(), size), 1117 condFlag(i.u1), 1118 ) 1119 case cSet: 1120 if i.u2 != 0 { 1121 str = fmt.Sprintf("csetm %s, %s", formatVRegSized(i.rd.nr(), 64), condFlag(i.u1)) 1122 } else { 1123 str = fmt.Sprintf("cset %s, %s", formatVRegSized(i.rd.nr(), 64), condFlag(i.u1)) 1124 } 1125 case cCmpImm: 1126 size := is64SizeBitToSize(i.u3) 1127 str = fmt.Sprintf("ccmp %s, #%#x, #%#x, %s", 1128 formatVRegSized(i.rn.nr(), size), i.rm.data, 1129 i.u2&0b1111, 1130 condFlag(i.u1)) 1131 case fpuMov64: 1132 str = fmt.Sprintf("mov %s, %s", 1133 formatVRegVec(i.rd.nr(), vecArrangement8B, vecIndexNone), 1134 formatVRegVec(i.rn.nr(), vecArrangement8B, vecIndexNone)) 1135 case fpuMov128: 1136 str = fmt.Sprintf("mov %s, %s", 1137 formatVRegVec(i.rd.nr(), vecArrangement16B, vecIndexNone), 1138 formatVRegVec(i.rn.nr(), vecArrangement16B, vecIndexNone)) 1139 case fpuMovFromVec: 1140 panic("TODO") 1141 case fpuRR: 1142 dstSz := is64SizeBitToSize(i.u3) 1143 srcSz := dstSz 1144 op := fpuUniOp(i.u1) 1145 switch op { 1146 case fpuUniOpCvt32To64: 1147 srcSz = 32 1148 case fpuUniOpCvt64To32: 1149 srcSz = 64 1150 } 1151 str = fmt.Sprintf("%s %s, %s", op.String(), 1152 formatVRegSized(i.rd.nr(), dstSz), formatVRegSized(i.rn.nr(), srcSz)) 1153 case fpuRRR: 1154 size := is64SizeBitToSize(i.u3) 1155 str = fmt.Sprintf("%s %s, %s, %s", fpuBinOp(i.u1).String(), 1156 formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), size), formatVRegSized(i.rm.nr(), size)) 1157 case fpuRRI: 1158 panic("TODO") 1159 case fpuRRRR: 1160 panic("TODO") 1161 case fpuCmp: 1162 size := is64SizeBitToSize(i.u3) 1163 str = fmt.Sprintf("fcmp %s, %s", 1164 formatVRegSized(i.rn.nr(), size), formatVRegSized(i.rm.nr(), size)) 1165 case fpuLoad32: 1166 str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32)) 1167 case fpuStore32: 1168 str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 32), i.amode.format(64)) 1169 case fpuLoad64: 1170 str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd.nr(), 64), i.amode.format(64)) 1171 case fpuStore64: 1172 str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 64), i.amode.format(64)) 1173 case fpuLoad128: 1174 str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd.nr(), 128), i.amode.format(64)) 1175 case fpuStore128: 1176 str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 128), i.amode.format(64)) 1177 case loadFpuConst32: 1178 str = fmt.Sprintf("ldr %s, #8; b 8; data.f32 %f", formatVRegSized(i.rd.nr(), 32), math.Float32frombits(uint32(i.u1))) 1179 case loadFpuConst64: 1180 str = fmt.Sprintf("ldr %s, #8; b 16; data.f64 %f", formatVRegSized(i.rd.nr(), 64), math.Float64frombits(i.u1)) 1181 case loadFpuConst128: 1182 str = fmt.Sprintf("ldr %s, #8; b 32; data.v128 %016x %016x", 1183 formatVRegSized(i.rd.nr(), 128), i.u1, i.u2) 1184 case fpuToInt: 1185 var op, src, dst string 1186 if signed := i.u1 == 1; signed { 1187 op = "fcvtzs" 1188 } else { 1189 op = "fcvtzu" 1190 } 1191 if src64 := i.u2 == 1; src64 { 1192 src = formatVRegWidthVec(i.rn.nr(), vecArrangementD) 1193 } else { 1194 src = formatVRegWidthVec(i.rn.nr(), vecArrangementS) 1195 } 1196 if dst64 := i.u3 == 1; dst64 { 1197 dst = formatVRegSized(i.rd.nr(), 64) 1198 } else { 1199 dst = formatVRegSized(i.rd.nr(), 32) 1200 } 1201 str = fmt.Sprintf("%s %s, %s", op, dst, src) 1202 1203 case intToFpu: 1204 var op, src, dst string 1205 if signed := i.u1 == 1; signed { 1206 op = "scvtf" 1207 } else { 1208 op = "ucvtf" 1209 } 1210 if src64 := i.u2 == 1; src64 { 1211 src = formatVRegSized(i.rn.nr(), 64) 1212 } else { 1213 src = formatVRegSized(i.rn.nr(), 32) 1214 } 1215 if dst64 := i.u3 == 1; dst64 { 1216 dst = formatVRegWidthVec(i.rd.nr(), vecArrangementD) 1217 } else { 1218 dst = formatVRegWidthVec(i.rd.nr(), vecArrangementS) 1219 } 1220 str = fmt.Sprintf("%s %s, %s", op, dst, src) 1221 case fpuCSel: 1222 size := is64SizeBitToSize(i.u3) 1223 str = fmt.Sprintf("fcsel %s, %s, %s, %s", 1224 formatVRegSized(i.rd.nr(), size), 1225 formatVRegSized(i.rn.nr(), size), 1226 formatVRegSized(i.rm.nr(), size), 1227 condFlag(i.u1), 1228 ) 1229 case movToVec: 1230 var size byte 1231 arr := vecArrangement(i.u1) 1232 switch arr { 1233 case vecArrangementB, vecArrangementH, vecArrangementS: 1234 size = 32 1235 case vecArrangementD: 1236 size = 64 1237 default: 1238 panic("unsupported arrangement " + arr.String()) 1239 } 1240 str = fmt.Sprintf("ins %s, %s", formatVRegVec(i.rd.nr(), arr, vecIndex(i.u2)), formatVRegSized(i.rn.nr(), size)) 1241 case movFromVec, movFromVecSigned: 1242 var size byte 1243 var opcode string 1244 arr := vecArrangement(i.u1) 1245 signed := i.kind == movFromVecSigned 1246 switch arr { 1247 case vecArrangementB, vecArrangementH, vecArrangementS: 1248 size = 32 1249 if signed { 1250 opcode = "smov" 1251 } else { 1252 opcode = "umov" 1253 } 1254 case vecArrangementD: 1255 size = 64 1256 if signed { 1257 opcode = "smov" 1258 } else { 1259 opcode = "mov" 1260 } 1261 default: 1262 panic("unsupported arrangement " + arr.String()) 1263 } 1264 str = fmt.Sprintf("%s %s, %s", opcode, formatVRegSized(i.rd.nr(), size), formatVRegVec(i.rn.nr(), arr, vecIndex(i.u2))) 1265 case vecDup: 1266 str = fmt.Sprintf("dup %s, %s", 1267 formatVRegVec(i.rd.nr(), vecArrangement(i.u1), vecIndexNone), 1268 formatVRegSized(i.rn.nr(), 64), 1269 ) 1270 case vecDupElement: 1271 arr := vecArrangement(i.u1) 1272 str = fmt.Sprintf("dup %s, %s", 1273 formatVRegVec(i.rd.nr(), arr, vecIndexNone), 1274 formatVRegVec(i.rn.nr(), arr, vecIndex(i.u2)), 1275 ) 1276 case vecDupFromFpu: 1277 panic("TODO") 1278 case vecExtract: 1279 str = fmt.Sprintf("ext %s, %s, %s, #%d", 1280 formatVRegVec(i.rd.nr(), vecArrangement(i.u1), vecIndexNone), 1281 formatVRegVec(i.rn.nr(), vecArrangement(i.u1), vecIndexNone), 1282 formatVRegVec(i.rm.nr(), vecArrangement(i.u1), vecIndexNone), 1283 uint32(i.u2), 1284 ) 1285 case vecExtend: 1286 panic("TODO") 1287 case vecMovElement: 1288 str = fmt.Sprintf("mov %s, %s", 1289 formatVRegVec(i.rd.nr(), vecArrangement(i.u1), vecIndex(i.u2)), 1290 formatVRegVec(i.rn.nr(), vecArrangement(i.u1), vecIndex(i.u3)), 1291 ) 1292 case vecMiscNarrow: 1293 panic("TODO") 1294 case vecRRR, vecRRRRewrite: 1295 str = fmt.Sprintf("%s %s, %s, %s", 1296 vecOp(i.u1), 1297 formatVRegVec(i.rd.nr(), vecArrangement(i.u2), vecIndexNone), 1298 formatVRegVec(i.rn.nr(), vecArrangement(i.u2), vecIndexNone), 1299 formatVRegVec(i.rm.nr(), vecArrangement(i.u2), vecIndexNone), 1300 ) 1301 case vecMisc: 1302 vop := vecOp(i.u1) 1303 if vop == vecOpCmeq0 { 1304 str = fmt.Sprintf("cmeq %s, %s, #0", 1305 formatVRegVec(i.rd.nr(), vecArrangement(i.u2), vecIndexNone), 1306 formatVRegVec(i.rn.nr(), vecArrangement(i.u2), vecIndexNone)) 1307 } else { 1308 str = fmt.Sprintf("%s %s, %s", 1309 vop, 1310 formatVRegVec(i.rd.nr(), vecArrangement(i.u2), vecIndexNone), 1311 formatVRegVec(i.rn.nr(), vecArrangement(i.u2), vecIndexNone)) 1312 } 1313 case vecLanes: 1314 arr := vecArrangement(i.u2) 1315 var destArr vecArrangement 1316 switch arr { 1317 case vecArrangement8B, vecArrangement16B: 1318 destArr = vecArrangementH 1319 case vecArrangement4H, vecArrangement8H: 1320 destArr = vecArrangementS 1321 case vecArrangement4S: 1322 destArr = vecArrangementD 1323 default: 1324 panic("invalid arrangement " + arr.String()) 1325 } 1326 str = fmt.Sprintf("%s %s, %s", 1327 vecOp(i.u1), 1328 formatVRegWidthVec(i.rd.nr(), destArr), 1329 formatVRegVec(i.rn.nr(), arr, vecIndexNone)) 1330 case vecShiftImm: 1331 arr := vecArrangement(i.u2) 1332 str = fmt.Sprintf("%s %s, %s, #%d", 1333 vecOp(i.u1), 1334 formatVRegVec(i.rd.nr(), arr, vecIndexNone), 1335 formatVRegVec(i.rn.nr(), arr, vecIndexNone), 1336 i.rm.shiftImm()) 1337 case vecTbl: 1338 arr := vecArrangement(i.u2) 1339 str = fmt.Sprintf("tbl %s, { %s }, %s", 1340 formatVRegVec(i.rd.nr(), arr, vecIndexNone), 1341 formatVRegVec(i.rn.nr(), vecArrangement16B, vecIndexNone), 1342 formatVRegVec(i.rm.nr(), arr, vecIndexNone)) 1343 case vecTbl2: 1344 arr := vecArrangement(i.u2) 1345 rd, rn, rm := i.rd.nr(), i.rn.nr(), i.rm.nr() 1346 rn1 := regalloc.FromRealReg(rn.RealReg()+1, rn.RegType()) 1347 str = fmt.Sprintf("tbl %s, { %s, %s }, %s", 1348 formatVRegVec(rd, arr, vecIndexNone), 1349 formatVRegVec(rn, vecArrangement16B, vecIndexNone), 1350 formatVRegVec(rn1, vecArrangement16B, vecIndexNone), 1351 formatVRegVec(rm, arr, vecIndexNone)) 1352 case vecPermute: 1353 arr := vecArrangement(i.u2) 1354 str = fmt.Sprintf("%s %s, %s, %s", 1355 vecOp(i.u1), 1356 formatVRegVec(i.rd.nr(), arr, vecIndexNone), 1357 formatVRegVec(i.rn.nr(), arr, vecIndexNone), 1358 formatVRegVec(i.rm.nr(), arr, vecIndexNone)) 1359 case movToFPSR: 1360 str = fmt.Sprintf("msr fpsr, %s", formatVRegSized(i.rn.nr(), 64)) 1361 case movFromFPSR: 1362 str = fmt.Sprintf("mrs %s fpsr", formatVRegSized(i.rd.nr(), 64)) 1363 case call: 1364 if i.u2 > 0 { 1365 str = fmt.Sprintf("bl #%#x", i.u2) 1366 } else { 1367 str = fmt.Sprintf("bl %s", ssa.FuncRef(i.u1)) 1368 } 1369 case callInd: 1370 str = fmt.Sprintf("bl %s", formatVRegSized(i.rn.nr(), 64)) 1371 case ret: 1372 str = "ret" 1373 case br: 1374 target := label(i.u1) 1375 if i.u3 != 0 { 1376 str = fmt.Sprintf("b #%#x (%s)", i.brOffset(), target.String()) 1377 } else { 1378 str = fmt.Sprintf("b %s", target.String()) 1379 } 1380 case condBr: 1381 size := is64SizeBitToSize(i.u3) 1382 c := cond(i.u1) 1383 target := label(i.u2) 1384 switch c.kind() { 1385 case condKindRegisterZero: 1386 if !i.condBrOffsetResolved() { 1387 str = fmt.Sprintf("cbz %s, (%s)", formatVRegSized(c.register(), size), target.String()) 1388 } else { 1389 str = fmt.Sprintf("cbz %s, #%#x %s", formatVRegSized(c.register(), size), i.condBrOffset(), target.String()) 1390 } 1391 case condKindRegisterNotZero: 1392 if offset := i.condBrOffset(); offset != 0 { 1393 str = fmt.Sprintf("cbnz %s, #%#x (%s)", formatVRegSized(c.register(), size), offset, target.String()) 1394 } else { 1395 str = fmt.Sprintf("cbnz %s, %s", formatVRegSized(c.register(), size), target.String()) 1396 } 1397 case condKindCondFlagSet: 1398 if offset := i.condBrOffset(); offset != 0 { 1399 if target == invalidLabel { 1400 str = fmt.Sprintf("b.%s #%#x", c.flag(), offset) 1401 } else { 1402 str = fmt.Sprintf("b.%s #%#x, (%s)", c.flag(), offset, target.String()) 1403 } 1404 } else { 1405 str = fmt.Sprintf("b.%s %s", c.flag(), target.String()) 1406 } 1407 } 1408 case adr: 1409 str = fmt.Sprintf("adr %s, #%#x", formatVRegSized(i.rd.nr(), 64), int64(i.u1)) 1410 case brTableSequence: 1411 if i.u3 == 0 { // The offsets haven't been resolved yet. 1412 labels := make([]string, len(i.targets)) 1413 for index, l := range i.targets { 1414 labels[index] = label(l).String() 1415 } 1416 str = fmt.Sprintf("br_table_sequence %s, [%s]", 1417 formatVRegSized(i.rn.nr(), 64), 1418 strings.Join(labels, ", "), 1419 ) 1420 } else { 1421 // See encodeBrTableSequence for the encoding. 1422 offsets := make([]string, len(i.targets)) 1423 for index, offset := range i.targets { 1424 offsets[index] = fmt.Sprintf("%#x", int32(offset)) 1425 } 1426 str = fmt.Sprintf( 1427 `adr %[2]s, #16; ldrsw %[1]s, [%[2]s, %[1]s, UXTW 2]; add %[2]s, %[2]s, %[1]s; br %[2]s; %s`, 1428 formatVRegSized(i.rn.nr(), 64), 1429 formatVRegSized(tmpRegVReg, 64), 1430 offsets, 1431 ) 1432 } 1433 case exitSequence: 1434 str = fmt.Sprintf("exit_sequence %s", formatVRegSized(i.rn.nr(), 64)) 1435 case udf: 1436 str = "udf" 1437 case emitSourceOffsetInfo: 1438 str = fmt.Sprintf("source_offset_info %d", ssa.SourceOffset(i.u1)) 1439 case vecLoad1R: 1440 str = fmt.Sprintf("ld1r {%s}, [%s]", formatVRegVec(i.rd.nr(), vecArrangement(i.u1), vecIndexNone), formatVRegSized(i.rn.nr(), 64)) 1441 default: 1442 panic(i.kind) 1443 } 1444 return 1445 } 1446 1447 func (i *instruction) asAdr(rd regalloc.VReg, offset int64) { 1448 i.kind = adr 1449 i.rd = operandNR(rd) 1450 i.u1 = uint64(offset) 1451 } 1452 1453 // TODO: delete unnecessary things. 1454 const ( 1455 // nop0 represents a no-op of zero size. 1456 nop0 instructionKind = iota + 1 1457 // aluRRR represents an ALU operation with two register sources and a register destination. 1458 aluRRR 1459 // aluRRRR represents an ALU operation with three register sources and a register destination. 1460 aluRRRR 1461 // aluRRImm12 represents an ALU operation with a register source and an immediate-12 source, with a register destination. 1462 aluRRImm12 1463 // aluRRBitmaskImm represents an ALU operation with a register source and a bitmask immediate, with a register destination. 1464 aluRRBitmaskImm 1465 // aluRRImmShift represents an ALU operation with a register source and an immediate-shifted source, with a register destination. 1466 aluRRImmShift 1467 // aluRRRShift represents an ALU operation with two register sources, one of which can be shifted, with a register destination. 1468 aluRRRShift 1469 // aluRRRExtend represents an ALU operation with two register sources, one of which can be extended, with a register destination. 1470 aluRRRExtend 1471 // bitRR represents a bit op instruction with a single register source. 1472 bitRR 1473 // uLoad8 represents an unsigned 8-bit load. 1474 uLoad8 1475 // sLoad8 represents a signed 8-bit load into 64-bit register. 1476 sLoad8 1477 // uLoad16 represents an unsigned 16-bit load into 64-bit register. 1478 uLoad16 1479 // sLoad16 represents a signed 16-bit load into 64-bit register. 1480 sLoad16 1481 // uLoad32 represents an unsigned 32-bit load into 64-bit register. 1482 uLoad32 1483 // sLoad32 represents a signed 32-bit load into 64-bit register. 1484 sLoad32 1485 // uLoad64 represents a 64-bit load. 1486 uLoad64 1487 // store8 represents an 8-bit store. 1488 store8 1489 // store16 represents a 16-bit store. 1490 store16 1491 // store32 represents a 32-bit store. 1492 store32 1493 // store64 represents a 64-bit store. 1494 store64 1495 // storeP64 represents a store of a pair of registers. 1496 storeP64 1497 // loadP64 represents a load of a pair of registers. 1498 loadP64 1499 // mov64 represents a MOV instruction. These are encoded as ORR's but we keep them separate for better handling. 1500 mov64 1501 // mov32 represents a 32-bit MOV. This zeroes the top 32 bits of the destination. 1502 mov32 1503 // movZ represents a MOVZ with a 16-bit immediate. 1504 movZ 1505 // movN represents a MOVN with a 16-bit immediate. 1506 movN 1507 // movK represents a MOVK with a 16-bit immediate. 1508 movK 1509 // extend represents a sign- or zero-extend operation. 1510 extend 1511 // cSel represents a conditional-select operation. 1512 cSel 1513 // cSet represents a conditional-set operation. 1514 cSet 1515 // cCmpImm represents a conditional comparison with an immediate. 1516 cCmpImm 1517 // fpuMov64 represents a FPU move. Distinct from a vector-register move; moving just 64 bits appears to be significantly faster. 1518 fpuMov64 1519 // fpuMov128 represents a vector register move. 1520 fpuMov128 1521 // fpuMovFromVec represents a move to scalar from a vector element. 1522 fpuMovFromVec 1523 // fpuRR represents a 1-op FPU instruction. 1524 fpuRR 1525 // fpuRRR represents a 2-op FPU instruction. 1526 fpuRRR 1527 // fpuRRI represents a 2-op FPU instruction with immediate value. 1528 fpuRRI 1529 // fpuRRRR represents a 3-op FPU instruction. 1530 fpuRRRR 1531 // fpuCmp represents a FPU comparison, either 32 or 64 bit. 1532 fpuCmp 1533 // fpuLoad32 represents a floating-point load, single-precision (32 bit). 1534 fpuLoad32 1535 // fpuStore32 represents a floating-point store, single-precision (32 bit). 1536 fpuStore32 1537 // fpuLoad64 represents a floating-point load, double-precision (64 bit). 1538 fpuLoad64 1539 // fpuStore64 represents a floating-point store, double-precision (64 bit). 1540 fpuStore64 1541 // fpuLoad128 represents a floating-point/vector load, 128 bit. 1542 fpuLoad128 1543 // fpuStore128 represents a floating-point/vector store, 128 bit. 1544 fpuStore128 1545 // loadFpuConst32 represents a load of a 32-bit floating-point constant. 1546 loadFpuConst32 1547 // loadFpuConst64 represents a load of a 64-bit floating-point constant. 1548 loadFpuConst64 1549 // loadFpuConst128 represents a load of a 128-bit floating-point constant. 1550 loadFpuConst128 1551 // vecLoad1R represents a load of a one single-element structure that replicates to all lanes of a vector. 1552 vecLoad1R 1553 // fpuToInt represents a conversion from FP to integer. 1554 fpuToInt 1555 // intToFpu represents a conversion from integer to FP. 1556 intToFpu 1557 // fpuCSel represents a 32/64-bit FP conditional select. 1558 fpuCSel 1559 // movToVec represents a move to a vector element from a GPR. 1560 movToVec 1561 // movFromVec represents an unsigned move from a vector element to a GPR. 1562 movFromVec 1563 // movFromVecSigned represents a signed move from a vector element to a GPR. 1564 movFromVecSigned 1565 // vecDup represents a duplication of general-purpose register to vector. 1566 vecDup 1567 // vecDupElement represents a duplication of a vector element to vector or scalar. 1568 vecDupElement 1569 // vecDupFromFpu represents a duplication of scalar to vector. 1570 vecDupFromFpu 1571 // vecExtract represents a vector extraction operation. 1572 vecExtract 1573 // vecExtend represents a vector extension operation. 1574 vecExtend 1575 // vecMovElement represents a move vector element to another vector element operation. 1576 vecMovElement 1577 // vecMiscNarrow represents a vector narrowing operation. 1578 vecMiscNarrow 1579 // vecRRR represents a vector ALU operation. 1580 vecRRR 1581 // vecRRRRewrite is exactly the same as vecRRR except that this rewrites the destination register. 1582 // For example, BSL instruction rewrites the destination register, and the existing value influences the result. 1583 // Therefore, the "destination" register in vecRRRRewrite will be treated as "use" which makes the register outlive 1584 // the instruction while this instruction doesn't have "def" in the context of register allocation. 1585 vecRRRRewrite 1586 // vecMisc represents a vector two register miscellaneous instruction. 1587 vecMisc 1588 // vecLanes represents a vector instruction across lanes. 1589 vecLanes 1590 // vecShiftImm represents a SIMD scalar shift by immediate instruction. 1591 vecShiftImm 1592 // vecTbl represents a table vector lookup - single register table. 1593 vecTbl 1594 // vecTbl2 represents a table vector lookup - two register table. 1595 vecTbl2 1596 // vecPermute represents a vector permute instruction. 1597 vecPermute 1598 // movToNZCV represents a move to the FPSR. 1599 movToFPSR 1600 // movFromNZCV represents a move from the FPSR. 1601 movFromFPSR 1602 // call represents a machine call instruction. 1603 call 1604 // callInd represents a machine indirect-call instruction. 1605 callInd 1606 // ret represents a machine return instruction. 1607 ret 1608 // br represents an unconditional branch. 1609 br 1610 // condBr represents a conditional branch. 1611 condBr 1612 // adr represents a compute the address (using a PC-relative offset) of a memory location. 1613 adr 1614 // brTableSequence represents a jump-table sequence. 1615 brTableSequence 1616 // exitSequence consists of multiple instructions, and exits the execution immediately. 1617 // See encodeExitSequence. 1618 exitSequence 1619 // UDF is the undefined instruction. For debugging only. 1620 udf 1621 1622 // emitSourceOffsetInfo is a dummy instruction to emit source offset info. 1623 // The existence of this instruction does not affect the execution. 1624 emitSourceOffsetInfo 1625 1626 // ------------------- do not define below this line ------------------- 1627 numInstructionKinds 1628 ) 1629 1630 func (i *instruction) asEmitSourceOffsetInfo(l ssa.SourceOffset) *instruction { 1631 i.kind = emitSourceOffsetInfo 1632 i.u1 = uint64(l) 1633 return i 1634 } 1635 1636 func (i *instruction) sourceOffsetInfo() ssa.SourceOffset { 1637 return ssa.SourceOffset(i.u1) 1638 } 1639 1640 func (i *instruction) asUDF() *instruction { 1641 i.kind = udf 1642 return i 1643 } 1644 1645 func (i *instruction) asFpuToInt(rd, rn operand, rdSigned, src64bit, dst64bit bool) { 1646 i.kind = fpuToInt 1647 i.rn = rn 1648 i.rd = rd 1649 if rdSigned { 1650 i.u1 = 1 1651 } 1652 if src64bit { 1653 i.u2 = 1 1654 } 1655 if dst64bit { 1656 i.u3 = 1 1657 } 1658 } 1659 1660 func (i *instruction) asIntToFpu(rd, rn operand, rnSigned, src64bit, dst64bit bool) { 1661 i.kind = intToFpu 1662 i.rn = rn 1663 i.rd = rd 1664 if rnSigned { 1665 i.u1 = 1 1666 } 1667 if src64bit { 1668 i.u2 = 1 1669 } 1670 if dst64bit { 1671 i.u3 = 1 1672 } 1673 } 1674 1675 func (i *instruction) asExitSequence(ctx regalloc.VReg) *instruction { 1676 i.kind = exitSequence 1677 i.rn = operandNR(ctx) 1678 return i 1679 } 1680 1681 // aluOp determines the type of ALU operation. Instructions whose kind is one of 1682 // aluRRR, aluRRRR, aluRRImm12, aluRRBitmaskImm, aluRRImmShift, aluRRRShift and aluRRRExtend 1683 // would use this type. 1684 type aluOp int 1685 1686 func (a aluOp) String() string { 1687 switch a { 1688 case aluOpAdd: 1689 return "add" 1690 case aluOpSub: 1691 return "sub" 1692 case aluOpOrr: 1693 return "orr" 1694 case aluOpAnd: 1695 return "and" 1696 case aluOpBic: 1697 return "bic" 1698 case aluOpEor: 1699 return "eor" 1700 case aluOpAddS: 1701 return "adds" 1702 case aluOpSubS: 1703 return "subs" 1704 case aluOpSMulH: 1705 return "sMulH" 1706 case aluOpUMulH: 1707 return "uMulH" 1708 case aluOpSDiv: 1709 return "sdiv" 1710 case aluOpUDiv: 1711 return "udiv" 1712 case aluOpRotR: 1713 return "ror" 1714 case aluOpLsr: 1715 return "lsr" 1716 case aluOpAsr: 1717 return "asr" 1718 case aluOpLsl: 1719 return "lsl" 1720 case aluOpMAdd: 1721 return "madd" 1722 case aluOpMSub: 1723 return "msub" 1724 } 1725 panic(int(a)) 1726 } 1727 1728 const ( 1729 // 32/64-bit Add. 1730 aluOpAdd aluOp = iota 1731 // 32/64-bit Subtract. 1732 aluOpSub 1733 // 32/64-bit Bitwise OR. 1734 aluOpOrr 1735 // 32/64-bit Bitwise AND. 1736 aluOpAnd 1737 // 32/64-bit Bitwise AND NOT. 1738 aluOpBic 1739 // 32/64-bit Bitwise XOR (Exclusive OR). 1740 aluOpEor 1741 // 32/64-bit Add setting flags. 1742 aluOpAddS 1743 // 32/64-bit Subtract setting flags. 1744 aluOpSubS 1745 // Signed multiply, high-word result. 1746 aluOpSMulH 1747 // Unsigned multiply, high-word result. 1748 aluOpUMulH 1749 // 64-bit Signed divide. 1750 aluOpSDiv 1751 // 64-bit Unsigned divide. 1752 aluOpUDiv 1753 // 32/64-bit Rotate right. 1754 aluOpRotR 1755 // 32/64-bit Logical shift right. 1756 aluOpLsr 1757 // 32/64-bit Arithmetic shift right. 1758 aluOpAsr 1759 // 32/64-bit Logical shift left. 1760 aluOpLsl /// Multiply-add 1761 1762 // MAdd and MSub are only applicable for aluRRRR. 1763 aluOpMAdd 1764 aluOpMSub 1765 ) 1766 1767 // vecOp determines the type of vector operation. Instructions whose kind is one of 1768 // vecOpCnt would use this type. 1769 type vecOp int 1770 1771 // String implements fmt.Stringer. 1772 func (b vecOp) String() string { 1773 switch b { 1774 case vecOpCnt: 1775 return "cnt" 1776 case vecOpCmeq: 1777 return "cmeq" 1778 case vecOpCmgt: 1779 return "cmgt" 1780 case vecOpCmhi: 1781 return "cmhi" 1782 case vecOpCmge: 1783 return "cmge" 1784 case vecOpCmhs: 1785 return "cmhs" 1786 case vecOpFcmeq: 1787 return "fcmeq" 1788 case vecOpFcmgt: 1789 return "fcmgt" 1790 case vecOpFcmge: 1791 return "fcmge" 1792 case vecOpCmeq0: 1793 return "cmeq0" 1794 case vecOpUaddlv: 1795 return "uaddlv" 1796 case vecOpBit: 1797 return "bit" 1798 case vecOpBic: 1799 return "bic" 1800 case vecOpBsl: 1801 return "bsl" 1802 case vecOpNot: 1803 return "not" 1804 case vecOpAnd: 1805 return "and" 1806 case vecOpOrr: 1807 return "orr" 1808 case vecOpEOR: 1809 return "eor" 1810 case vecOpFadd: 1811 return "fadd" 1812 case vecOpAdd: 1813 return "add" 1814 case vecOpAddp: 1815 return "addp" 1816 case vecOpAddv: 1817 return "addv" 1818 case vecOpSub: 1819 return "sub" 1820 case vecOpFsub: 1821 return "fsub" 1822 case vecOpSmin: 1823 return "smin" 1824 case vecOpUmin: 1825 return "umin" 1826 case vecOpUminv: 1827 return "uminv" 1828 case vecOpSmax: 1829 return "smax" 1830 case vecOpUmax: 1831 return "umax" 1832 case vecOpUmaxp: 1833 return "umaxp" 1834 case vecOpUrhadd: 1835 return "urhadd" 1836 case vecOpFmul: 1837 return "fmul" 1838 case vecOpSqrdmulh: 1839 return "sqrdmulh" 1840 case vecOpMul: 1841 return "mul" 1842 case vecOpUmlal: 1843 return "umlal" 1844 case vecOpFdiv: 1845 return "fdiv" 1846 case vecOpFsqrt: 1847 return "fsqrt" 1848 case vecOpAbs: 1849 return "abs" 1850 case vecOpFabs: 1851 return "fabs" 1852 case vecOpNeg: 1853 return "neg" 1854 case vecOpFneg: 1855 return "fneg" 1856 case vecOpFrintp: 1857 return "frintp" 1858 case vecOpFrintm: 1859 return "frintm" 1860 case vecOpFrintn: 1861 return "frintn" 1862 case vecOpFrintz: 1863 return "frintz" 1864 case vecOpFcvtl: 1865 return "fcvtl" 1866 case vecOpFcvtn: 1867 return "fcvtn" 1868 case vecOpFcvtzu: 1869 return "fcvtzu" 1870 case vecOpFcvtzs: 1871 return "fcvtzs" 1872 case vecOpScvtf: 1873 return "scvtf" 1874 case vecOpUcvtf: 1875 return "ucvtf" 1876 case vecOpSqxtn: 1877 return "sqxtn" 1878 case vecOpUqxtn: 1879 return "uqxtn" 1880 case vecOpSqxtun: 1881 return "sqxtun" 1882 case vecOpRev64: 1883 return "rev64" 1884 case vecOpXtn: 1885 return "xtn" 1886 case vecOpShll: 1887 return "shll" 1888 case vecOpSshl: 1889 return "sshl" 1890 case vecOpSshll: 1891 return "sshll" 1892 case vecOpUshl: 1893 return "ushl" 1894 case vecOpUshll: 1895 return "ushll" 1896 case vecOpSshr: 1897 return "sshr" 1898 case vecOpZip1: 1899 return "zip1" 1900 case vecOpFmin: 1901 return "fmin" 1902 case vecOpFmax: 1903 return "fmax" 1904 } 1905 panic(int(b)) 1906 } 1907 1908 const ( 1909 vecOpCnt vecOp = iota 1910 vecOpCmeq0 1911 vecOpCmeq 1912 vecOpCmgt 1913 vecOpCmhi 1914 vecOpCmge 1915 vecOpCmhs 1916 vecOpFcmeq 1917 vecOpFcmgt 1918 vecOpFcmge 1919 vecOpUaddlv 1920 vecOpBit 1921 vecOpBic 1922 vecOpBsl 1923 vecOpNot 1924 vecOpAnd 1925 vecOpOrr 1926 vecOpEOR 1927 vecOpAdd 1928 vecOpFadd 1929 vecOpAddv 1930 vecOpSqadd 1931 vecOpUqadd 1932 vecOpAddp 1933 vecOpSub 1934 vecOpFsub 1935 vecOpSqsub 1936 vecOpUqsub 1937 vecOpSmin 1938 vecOpUmin 1939 vecOpUminv 1940 vecOpFmin 1941 vecOpSmax 1942 vecOpUmax 1943 vecOpUmaxp 1944 vecOpFmax 1945 vecOpUrhadd 1946 vecOpMul 1947 vecOpFmul 1948 vecOpSqrdmulh 1949 vecOpUmlal 1950 vecOpFdiv 1951 vecOpFsqrt 1952 vecOpAbs 1953 vecOpFabs 1954 vecOpNeg 1955 vecOpFneg 1956 vecOpFrintm 1957 vecOpFrintn 1958 vecOpFrintp 1959 vecOpFrintz 1960 vecOpFcvtl 1961 vecOpFcvtn 1962 vecOpFcvtzs 1963 vecOpFcvtzu 1964 vecOpScvtf 1965 vecOpUcvtf 1966 vecOpSqxtn 1967 vecOpSqxtun 1968 vecOpUqxtn 1969 vecOpRev64 1970 vecOpXtn 1971 vecOpShll 1972 vecOpSshl 1973 vecOpSshll 1974 vecOpUshl 1975 vecOpUshll 1976 vecOpSshr 1977 vecOpZip1 1978 ) 1979 1980 // bitOp determines the type of bitwise operation. Instructions whose kind is one of 1981 // bitOpRbit and bitOpClz would use this type. 1982 type bitOp int 1983 1984 // String implements fmt.Stringer. 1985 func (b bitOp) String() string { 1986 switch b { 1987 case bitOpRbit: 1988 return "rbit" 1989 case bitOpClz: 1990 return "clz" 1991 } 1992 panic(int(b)) 1993 } 1994 1995 const ( 1996 // 32/64-bit Rbit. 1997 bitOpRbit bitOp = iota 1998 // 32/64-bit Clz. 1999 bitOpClz 2000 ) 2001 2002 // fpuUniOp represents a unary floating-point unit (FPU) operation. 2003 type fpuUniOp byte 2004 2005 const ( 2006 fpuUniOpNeg fpuUniOp = iota 2007 fpuUniOpCvt32To64 2008 fpuUniOpCvt64To32 2009 fpuUniOpSqrt 2010 fpuUniOpRoundPlus 2011 fpuUniOpRoundMinus 2012 fpuUniOpRoundZero 2013 fpuUniOpRoundNearest 2014 fpuUniOpAbs 2015 ) 2016 2017 // String implements the fmt.Stringer. 2018 func (f fpuUniOp) String() string { 2019 switch f { 2020 case fpuUniOpNeg: 2021 return "fneg" 2022 case fpuUniOpCvt32To64: 2023 return "fcvt" 2024 case fpuUniOpCvt64To32: 2025 return "fcvt" 2026 case fpuUniOpSqrt: 2027 return "fsqrt" 2028 case fpuUniOpRoundPlus: 2029 return "frintp" 2030 case fpuUniOpRoundMinus: 2031 return "frintm" 2032 case fpuUniOpRoundZero: 2033 return "frintz" 2034 case fpuUniOpRoundNearest: 2035 return "frintn" 2036 case fpuUniOpAbs: 2037 return "fabs" 2038 } 2039 panic(int(f)) 2040 } 2041 2042 // fpuBinOp represents a binary floating-point unit (FPU) operation. 2043 type fpuBinOp byte 2044 2045 const ( 2046 fpuBinOpAdd = iota 2047 fpuBinOpSub 2048 fpuBinOpMul 2049 fpuBinOpDiv 2050 fpuBinOpMax 2051 fpuBinOpMin 2052 ) 2053 2054 // String implements the fmt.Stringer. 2055 func (f fpuBinOp) String() string { 2056 switch f { 2057 case fpuBinOpAdd: 2058 return "fadd" 2059 case fpuBinOpSub: 2060 return "fsub" 2061 case fpuBinOpMul: 2062 return "fmul" 2063 case fpuBinOpDiv: 2064 return "fdiv" 2065 case fpuBinOpMax: 2066 return "fmax" 2067 case fpuBinOpMin: 2068 return "fmin" 2069 } 2070 panic(int(f)) 2071 } 2072 2073 // extMode represents the mode of a register operand extension. 2074 // For example, aluRRRExtend instructions need this info to determine the extensions. 2075 type extMode byte 2076 2077 const ( 2078 extModeNone extMode = iota 2079 // extModeZeroExtend64 suggests a zero-extension to 32 bits if the original bit size is less than 32. 2080 extModeZeroExtend32 2081 // extModeSignExtend64 stands for a sign-extension to 32 bits if the original bit size is less than 32. 2082 extModeSignExtend32 2083 // extModeZeroExtend64 suggests a zero-extension to 64 bits if the original bit size is less than 64. 2084 extModeZeroExtend64 2085 // extModeSignExtend64 stands for a sign-extension to 64 bits if the original bit size is less than 64. 2086 extModeSignExtend64 2087 ) 2088 2089 func (e extMode) bits() byte { 2090 switch e { 2091 case extModeZeroExtend32, extModeSignExtend32: 2092 return 32 2093 case extModeZeroExtend64, extModeSignExtend64: 2094 return 64 2095 default: 2096 return 0 2097 } 2098 } 2099 2100 func (e extMode) signed() bool { 2101 switch e { 2102 case extModeSignExtend32, extModeSignExtend64: 2103 return true 2104 default: 2105 return false 2106 } 2107 } 2108 2109 func extModeOf(t ssa.Type, signed bool) extMode { 2110 switch t.Bits() { 2111 case 32: 2112 if signed { 2113 return extModeSignExtend32 2114 } 2115 return extModeZeroExtend32 2116 case 64: 2117 if signed { 2118 return extModeSignExtend64 2119 } 2120 return extModeZeroExtend64 2121 default: 2122 panic("TODO? do we need narrower than 32 bits?") 2123 } 2124 } 2125 2126 type extendOp byte 2127 2128 const ( 2129 extendOpUXTB extendOp = 0b000 2130 extendOpUXTH extendOp = 0b001 2131 extendOpUXTW extendOp = 0b010 2132 // extendOpUXTX does nothing, but convenient symbol that officially exists. See: 2133 // https://stackoverflow.com/questions/72041372/what-do-the-uxtx-and-sxtx-extensions-mean-for-32-bit-aarch64-adds-instruct 2134 extendOpUXTX extendOp = 0b011 2135 extendOpSXTB extendOp = 0b100 2136 extendOpSXTH extendOp = 0b101 2137 extendOpSXTW extendOp = 0b110 2138 // extendOpSXTX does nothing, but convenient symbol that officially exists. See: 2139 // https://stackoverflow.com/questions/72041372/what-do-the-uxtx-and-sxtx-extensions-mean-for-32-bit-aarch64-adds-instruct 2140 extendOpSXTX extendOp = 0b111 2141 extendOpNone extendOp = 0xff 2142 ) 2143 2144 func (e extendOp) srcBits() byte { 2145 switch e { 2146 case extendOpUXTB, extendOpSXTB: 2147 return 8 2148 case extendOpUXTH, extendOpSXTH: 2149 return 16 2150 case extendOpUXTW, extendOpSXTW: 2151 return 32 2152 case extendOpUXTX, extendOpSXTX: 2153 return 64 2154 } 2155 panic(int(e)) 2156 } 2157 2158 func (e extendOp) String() string { 2159 switch e { 2160 case extendOpUXTB: 2161 return "UXTB" 2162 case extendOpUXTH: 2163 return "UXTH" 2164 case extendOpUXTW: 2165 return "UXTW" 2166 case extendOpUXTX: 2167 return "UXTX" 2168 case extendOpSXTB: 2169 return "SXTB" 2170 case extendOpSXTH: 2171 return "SXTH" 2172 case extendOpSXTW: 2173 return "SXTW" 2174 case extendOpSXTX: 2175 return "SXTX" 2176 } 2177 panic(int(e)) 2178 } 2179 2180 func extendOpFrom(signed bool, from byte) extendOp { 2181 switch from { 2182 case 8: 2183 if signed { 2184 return extendOpSXTB 2185 } 2186 return extendOpUXTB 2187 case 16: 2188 if signed { 2189 return extendOpSXTH 2190 } 2191 return extendOpUXTH 2192 case 32: 2193 if signed { 2194 return extendOpSXTW 2195 } 2196 return extendOpUXTW 2197 case 64: 2198 if signed { 2199 return extendOpSXTX 2200 } 2201 return extendOpUXTX 2202 } 2203 panic("invalid extendOpFrom") 2204 } 2205 2206 type shiftOp byte 2207 2208 const ( 2209 shiftOpLSL shiftOp = 0b00 2210 shiftOpLSR shiftOp = 0b01 2211 shiftOpASR shiftOp = 0b10 2212 shiftOpROR shiftOp = 0b11 2213 ) 2214 2215 func (s shiftOp) String() string { 2216 switch s { 2217 case shiftOpLSL: 2218 return "lsl" 2219 case shiftOpLSR: 2220 return "lsr" 2221 case shiftOpASR: 2222 return "asr" 2223 case shiftOpROR: 2224 return "ror" 2225 } 2226 panic(int(s)) 2227 } 2228 2229 const exitSequenceSize = 6 * 4 // 6 instructions as in encodeExitSequence. 2230 2231 // size returns the size of the instruction in encoded bytes. 2232 func (i *instruction) size() int64 { 2233 switch i.kind { 2234 case exitSequence: 2235 return exitSequenceSize // 5 instructions as in encodeExitSequence. 2236 case nop0: 2237 return 0 2238 case emitSourceOffsetInfo: 2239 return 0 2240 case loadFpuConst32: 2241 if i.u1 == 0 { 2242 return 4 // zero loading can be encoded as a single instruction. 2243 } 2244 return 4 + 4 + 4 2245 case loadFpuConst64: 2246 if i.u1 == 0 { 2247 return 4 // zero loading can be encoded as a single instruction. 2248 } 2249 return 4 + 4 + 8 2250 case loadFpuConst128: 2251 if i.u1 == 0 && i.u2 == 0 { 2252 return 4 // zero loading can be encoded as a single instruction. 2253 } 2254 return 4 + 4 + 16 2255 case brTableSequence: 2256 return 4*4 + int64(len(i.targets))*4 2257 default: 2258 return 4 2259 } 2260 } 2261 2262 // vecArrangement is the arrangement of data within a vector register. 2263 type vecArrangement byte 2264 2265 const ( 2266 // vecArrangementNone is an arrangement indicating no data is stored. 2267 vecArrangementNone vecArrangement = iota 2268 // vecArrangement8B is an arrangement of 8 bytes (64-bit vector) 2269 vecArrangement8B 2270 // vecArrangement16B is an arrangement of 16 bytes (128-bit vector) 2271 vecArrangement16B 2272 // vecArrangement4H is an arrangement of 4 half precisions (64-bit vector) 2273 vecArrangement4H 2274 // vecArrangement8H is an arrangement of 8 half precisions (128-bit vector) 2275 vecArrangement8H 2276 // vecArrangement2S is an arrangement of 2 single precisions (64-bit vector) 2277 vecArrangement2S 2278 // vecArrangement4S is an arrangement of 4 single precisions (128-bit vector) 2279 vecArrangement4S 2280 // vecArrangement1D is an arrangement of 1 double precision (64-bit vector) 2281 vecArrangement1D 2282 // vecArrangement2D is an arrangement of 2 double precisions (128-bit vector) 2283 vecArrangement2D 2284 2285 // Assign each vector size specifier to a vector arrangement ID. 2286 // Instructions can only have an arrangement or a size specifier, but not both, so it 2287 // simplifies the internal representation of vector instructions by being able to 2288 // store either into the same field. 2289 2290 // vecArrangementB is a size specifier of byte 2291 vecArrangementB 2292 // vecArrangementH is a size specifier of word (16-bit) 2293 vecArrangementH 2294 // vecArrangementS is a size specifier of double word (32-bit) 2295 vecArrangementS 2296 // vecArrangementD is a size specifier of quad word (64-bit) 2297 vecArrangementD 2298 // vecArrangementQ is a size specifier of the entire vector (128-bit) 2299 vecArrangementQ 2300 ) 2301 2302 // String implements fmt.Stringer 2303 func (v vecArrangement) String() (ret string) { 2304 switch v { 2305 case vecArrangement8B: 2306 ret = "8B" 2307 case vecArrangement16B: 2308 ret = "16B" 2309 case vecArrangement4H: 2310 ret = "4H" 2311 case vecArrangement8H: 2312 ret = "8H" 2313 case vecArrangement2S: 2314 ret = "2S" 2315 case vecArrangement4S: 2316 ret = "4S" 2317 case vecArrangement1D: 2318 ret = "1D" 2319 case vecArrangement2D: 2320 ret = "2D" 2321 case vecArrangementB: 2322 ret = "B" 2323 case vecArrangementH: 2324 ret = "H" 2325 case vecArrangementS: 2326 ret = "S" 2327 case vecArrangementD: 2328 ret = "D" 2329 case vecArrangementQ: 2330 ret = "Q" 2331 case vecArrangementNone: 2332 ret = "none" 2333 default: 2334 panic(v) 2335 } 2336 return 2337 } 2338 2339 // vecIndex is the index of an element of a vector register 2340 type vecIndex byte 2341 2342 // vecIndexNone indicates no vector index specified. 2343 const vecIndexNone = ^vecIndex(0) 2344 2345 func ssaLaneToArrangement(lane ssa.VecLane) vecArrangement { 2346 switch lane { 2347 case ssa.VecLaneI8x16: 2348 return vecArrangement16B 2349 case ssa.VecLaneI16x8: 2350 return vecArrangement8H 2351 case ssa.VecLaneI32x4: 2352 return vecArrangement4S 2353 case ssa.VecLaneI64x2: 2354 return vecArrangement2D 2355 case ssa.VecLaneF32x4: 2356 return vecArrangement4S 2357 case ssa.VecLaneF64x2: 2358 return vecArrangement2D 2359 default: 2360 panic(lane) 2361 } 2362 }