github.com/bananabytelabs/wazero@v0.0.0-20240105073314-54b22a776da8/internal/engine/wazevo/backend/isa/arm64/instr.go (about) 1 package arm64 2 3 import ( 4 "fmt" 5 "math" 6 "strings" 7 8 "github.com/bananabytelabs/wazero/internal/engine/wazevo/backend/regalloc" 9 "github.com/bananabytelabs/wazero/internal/engine/wazevo/ssa" 10 ) 11 12 type ( 13 // instruction represents either a real instruction in arm64, or the meta instructions 14 // that are convenient for code generation. For example, inline constants are also treated 15 // as instructions. 16 // 17 // Basically, each instruction knows how to get encoded in binaries. Hence, the final output of compilation 18 // can be considered equivalent to the sequence of such instructions. 19 // 20 // Each field is interpreted depending on the kind. 21 // 22 // TODO: optimize the layout later once the impl settles. 23 instruction struct { 24 kind instructionKind 25 prev, next *instruction 26 u1, u2, u3 uint64 27 rd, rm, rn, ra operand 28 amode addressMode 29 abi *abiImpl 30 targets []uint32 31 addedBeforeRegAlloc bool 32 } 33 34 // instructionKind represents the kind of instruction. 35 // This controls how the instruction struct is interpreted. 36 instructionKind int 37 ) 38 39 func asNop0(i *instruction) { 40 i.kind = nop0 41 } 42 43 func setNext(i, next *instruction) { 44 i.next = next 45 } 46 47 func setPrev(i, prev *instruction) { 48 i.prev = prev 49 } 50 51 // IsCall implements regalloc.Instr IsCall. 52 func (i *instruction) IsCall() bool { 53 return i.kind == call 54 } 55 56 // IsIndirectCall implements regalloc.Instr IsIndirectCall. 57 func (i *instruction) IsIndirectCall() bool { 58 return i.kind == callInd 59 } 60 61 // IsReturn implements regalloc.Instr IsReturn. 62 func (i *instruction) IsReturn() bool { 63 return i.kind == ret 64 } 65 66 type defKind byte 67 68 const ( 69 defKindNone defKind = iota + 1 70 defKindRD 71 defKindCall 72 ) 73 74 var defKinds = [numInstructionKinds]defKind{ 75 adr: defKindRD, 76 aluRRR: defKindRD, 77 aluRRRR: defKindRD, 78 aluRRImm12: defKindRD, 79 aluRRBitmaskImm: defKindRD, 80 aluRRRShift: defKindRD, 81 aluRRImmShift: defKindRD, 82 aluRRRExtend: defKindRD, 83 bitRR: defKindRD, 84 movZ: defKindRD, 85 movK: defKindRD, 86 movN: defKindRD, 87 mov32: defKindRD, 88 mov64: defKindRD, 89 fpuMov64: defKindRD, 90 fpuMov128: defKindRD, 91 fpuRR: defKindRD, 92 fpuRRR: defKindRD, 93 nop0: defKindNone, 94 call: defKindCall, 95 callInd: defKindCall, 96 ret: defKindNone, 97 store8: defKindNone, 98 store16: defKindNone, 99 store32: defKindNone, 100 store64: defKindNone, 101 exitSequence: defKindNone, 102 condBr: defKindNone, 103 br: defKindNone, 104 brTableSequence: defKindNone, 105 cSet: defKindRD, 106 extend: defKindRD, 107 fpuCmp: defKindNone, 108 uLoad8: defKindRD, 109 uLoad16: defKindRD, 110 uLoad32: defKindRD, 111 sLoad8: defKindRD, 112 sLoad16: defKindRD, 113 sLoad32: defKindRD, 114 uLoad64: defKindRD, 115 fpuLoad32: defKindRD, 116 fpuLoad64: defKindRD, 117 fpuLoad128: defKindRD, 118 vecLoad1R: defKindRD, 119 loadFpuConst32: defKindRD, 120 loadFpuConst64: defKindRD, 121 loadFpuConst128: defKindRD, 122 fpuStore32: defKindNone, 123 fpuStore64: defKindNone, 124 fpuStore128: defKindNone, 125 udf: defKindNone, 126 cSel: defKindRD, 127 fpuCSel: defKindRD, 128 movToVec: defKindRD, 129 movFromVec: defKindRD, 130 movFromVecSigned: defKindRD, 131 vecDup: defKindRD, 132 vecDupElement: defKindRD, 133 vecExtract: defKindRD, 134 vecMisc: defKindRD, 135 vecMovElement: defKindRD, 136 vecLanes: defKindRD, 137 vecShiftImm: defKindRD, 138 vecTbl: defKindRD, 139 vecTbl2: defKindRD, 140 vecPermute: defKindRD, 141 vecRRR: defKindRD, 142 vecRRRRewrite: defKindNone, 143 fpuToInt: defKindRD, 144 intToFpu: defKindRD, 145 cCmpImm: defKindNone, 146 movToFPSR: defKindNone, 147 movFromFPSR: defKindRD, 148 emitSourceOffsetInfo: defKindNone, 149 } 150 151 // Defs returns the list of regalloc.VReg that are defined by the instruction. 152 // In order to reduce the number of allocations, the caller can pass the slice to be used. 153 func (i *instruction) Defs(regs *[]regalloc.VReg) []regalloc.VReg { 154 *regs = (*regs)[:0] 155 switch defKinds[i.kind] { 156 case defKindNone: 157 case defKindRD: 158 *regs = append(*regs, i.rd.nr()) 159 case defKindCall: 160 *regs = append(*regs, i.abi.retRealRegs...) 161 default: 162 panic(fmt.Sprintf("defKind for %v not defined", i)) 163 } 164 return *regs 165 } 166 167 // AssignDef implements regalloc.Instr AssignDef. 168 func (i *instruction) AssignDef(reg regalloc.VReg) { 169 switch defKinds[i.kind] { 170 case defKindNone: 171 case defKindRD: 172 i.rd = i.rd.assignReg(reg) 173 case defKindCall: 174 panic("BUG: call instructions shouldn't be assigned") 175 default: 176 panic(fmt.Sprintf("defKind for %v not defined", i)) 177 } 178 } 179 180 type useKind byte 181 182 const ( 183 useKindNone useKind = iota + 1 184 useKindRN 185 useKindRNRM 186 useKindRNRMRA 187 useKindRNRN1RM 188 useKindRet 189 useKindCall 190 useKindCallInd 191 useKindAMode 192 useKindRNAMode 193 useKindCond 194 useKindVecRRRRewrite 195 ) 196 197 var useKinds = [numInstructionKinds]useKind{ 198 udf: useKindNone, 199 aluRRR: useKindRNRM, 200 aluRRRR: useKindRNRMRA, 201 aluRRImm12: useKindRN, 202 aluRRBitmaskImm: useKindRN, 203 aluRRRShift: useKindRNRM, 204 aluRRImmShift: useKindRN, 205 aluRRRExtend: useKindRNRM, 206 bitRR: useKindRN, 207 movZ: useKindNone, 208 movK: useKindNone, 209 movN: useKindNone, 210 mov32: useKindRN, 211 mov64: useKindRN, 212 fpuMov64: useKindRN, 213 fpuMov128: useKindRN, 214 fpuRR: useKindRN, 215 fpuRRR: useKindRNRM, 216 nop0: useKindNone, 217 call: useKindCall, 218 callInd: useKindCallInd, 219 ret: useKindRet, 220 store8: useKindRNAMode, 221 store16: useKindRNAMode, 222 store32: useKindRNAMode, 223 store64: useKindRNAMode, 224 exitSequence: useKindRN, 225 condBr: useKindCond, 226 br: useKindNone, 227 brTableSequence: useKindRN, 228 cSet: useKindNone, 229 extend: useKindRN, 230 fpuCmp: useKindRNRM, 231 uLoad8: useKindAMode, 232 uLoad16: useKindAMode, 233 uLoad32: useKindAMode, 234 sLoad8: useKindAMode, 235 sLoad16: useKindAMode, 236 sLoad32: useKindAMode, 237 uLoad64: useKindAMode, 238 fpuLoad32: useKindAMode, 239 fpuLoad64: useKindAMode, 240 fpuLoad128: useKindAMode, 241 fpuStore32: useKindRNAMode, 242 fpuStore64: useKindRNAMode, 243 fpuStore128: useKindRNAMode, 244 loadFpuConst32: useKindNone, 245 loadFpuConst64: useKindNone, 246 loadFpuConst128: useKindNone, 247 vecLoad1R: useKindRN, 248 cSel: useKindRNRM, 249 fpuCSel: useKindRNRM, 250 movToVec: useKindRN, 251 movFromVec: useKindRN, 252 movFromVecSigned: useKindRN, 253 vecDup: useKindRN, 254 vecDupElement: useKindRN, 255 vecExtract: useKindRNRM, 256 cCmpImm: useKindRN, 257 vecMisc: useKindRN, 258 vecMovElement: useKindRN, 259 vecLanes: useKindRN, 260 vecShiftImm: useKindRN, 261 vecTbl: useKindRNRM, 262 vecTbl2: useKindRNRN1RM, 263 vecRRR: useKindRNRM, 264 vecRRRRewrite: useKindVecRRRRewrite, 265 vecPermute: useKindRNRM, 266 fpuToInt: useKindRN, 267 intToFpu: useKindRN, 268 movToFPSR: useKindRN, 269 movFromFPSR: useKindNone, 270 adr: useKindNone, 271 emitSourceOffsetInfo: useKindNone, 272 } 273 274 // Uses returns the list of regalloc.VReg that are used by the instruction. 275 // In order to reduce the number of allocations, the caller can pass the slice to be used. 276 func (i *instruction) Uses(regs *[]regalloc.VReg) []regalloc.VReg { 277 *regs = (*regs)[:0] 278 switch useKinds[i.kind] { 279 case useKindNone: 280 case useKindRN: 281 if rn := i.rn.reg(); rn.Valid() { 282 *regs = append(*regs, rn) 283 } 284 case useKindRNRM: 285 if rn := i.rn.reg(); rn.Valid() { 286 *regs = append(*regs, rn) 287 } 288 if rm := i.rm.reg(); rm.Valid() { 289 *regs = append(*regs, rm) 290 } 291 case useKindRNRMRA: 292 if rn := i.rn.reg(); rn.Valid() { 293 *regs = append(*regs, rn) 294 } 295 if rm := i.rm.reg(); rm.Valid() { 296 *regs = append(*regs, rm) 297 } 298 if ra := i.ra.reg(); ra.Valid() { 299 *regs = append(*regs, ra) 300 } 301 case useKindRNRN1RM: 302 if rn := i.rn.reg(); rn.Valid() && rn.IsRealReg() { 303 rn1 := regalloc.FromRealReg(rn.RealReg()+1, rn.RegType()) 304 *regs = append(*regs, rn, rn1) 305 } 306 if rm := i.rm.reg(); rm.Valid() { 307 *regs = append(*regs, rm) 308 } 309 case useKindRet: 310 *regs = append(*regs, i.abi.retRealRegs...) 311 case useKindAMode: 312 if amodeRN := i.amode.rn; amodeRN.Valid() { 313 *regs = append(*regs, amodeRN) 314 } 315 if amodeRM := i.amode.rm; amodeRM.Valid() { 316 *regs = append(*regs, amodeRM) 317 } 318 case useKindRNAMode: 319 *regs = append(*regs, i.rn.reg()) 320 if amodeRN := i.amode.rn; amodeRN.Valid() { 321 *regs = append(*regs, amodeRN) 322 } 323 if amodeRM := i.amode.rm; amodeRM.Valid() { 324 *regs = append(*regs, amodeRM) 325 } 326 case useKindCond: 327 cnd := cond(i.u1) 328 if cnd.kind() != condKindCondFlagSet { 329 *regs = append(*regs, cnd.register()) 330 } 331 case useKindCall: 332 *regs = append(*regs, i.abi.argRealRegs...) 333 case useKindCallInd: 334 *regs = append(*regs, i.rn.nr()) 335 *regs = append(*regs, i.abi.argRealRegs...) 336 case useKindVecRRRRewrite: 337 *regs = append(*regs, i.rn.reg()) 338 *regs = append(*regs, i.rm.reg()) 339 *regs = append(*regs, i.rd.reg()) 340 default: 341 panic(fmt.Sprintf("useKind for %v not defined", i)) 342 } 343 return *regs 344 } 345 346 func (i *instruction) AssignUse(index int, reg regalloc.VReg) { 347 switch useKinds[i.kind] { 348 case useKindNone: 349 case useKindRN: 350 if rn := i.rn.reg(); rn.Valid() { 351 i.rn = i.rn.assignReg(reg) 352 } 353 case useKindRNRM: 354 if index == 0 { 355 if rn := i.rn.reg(); rn.Valid() { 356 i.rn = i.rn.assignReg(reg) 357 } 358 } else { 359 if rm := i.rm.reg(); rm.Valid() { 360 i.rm = i.rm.assignReg(reg) 361 } 362 } 363 case useKindVecRRRRewrite: 364 if index == 0 { 365 if rn := i.rn.reg(); rn.Valid() { 366 i.rn = i.rn.assignReg(reg) 367 } 368 } else if index == 1 { 369 if rm := i.rm.reg(); rm.Valid() { 370 i.rm = i.rm.assignReg(reg) 371 } 372 } else { 373 if rd := i.rd.reg(); rd.Valid() { 374 i.rd = i.rd.assignReg(reg) 375 } 376 } 377 case useKindRNRN1RM: 378 if index == 0 { 379 if rn := i.rn.reg(); rn.Valid() { 380 i.rn = i.rn.assignReg(reg) 381 } 382 if rn1 := i.rn.reg() + 1; rn1.Valid() { 383 i.rm = i.rm.assignReg(reg + 1) 384 } 385 } else { 386 if rm := i.rm.reg(); rm.Valid() { 387 i.rm = i.rm.assignReg(reg) 388 } 389 } 390 case useKindRNRMRA: 391 if index == 0 { 392 if rn := i.rn.reg(); rn.Valid() { 393 i.rn = i.rn.assignReg(reg) 394 } 395 } else if index == 1 { 396 if rm := i.rm.reg(); rm.Valid() { 397 i.rm = i.rm.assignReg(reg) 398 } 399 } else { 400 if ra := i.ra.reg(); ra.Valid() { 401 i.ra = i.ra.assignReg(reg) 402 } 403 } 404 case useKindRet: 405 panic("BUG: ret instructions shouldn't be assigned") 406 case useKindAMode: 407 if index == 0 { 408 if amodeRN := i.amode.rn; amodeRN.Valid() { 409 i.amode.rn = reg 410 } 411 } else { 412 if amodeRM := i.amode.rm; amodeRM.Valid() { 413 i.amode.rm = reg 414 } 415 } 416 case useKindRNAMode: 417 if index == 0 { 418 i.rn = i.rn.assignReg(reg) 419 } else if index == 1 { 420 if amodeRN := i.amode.rn; amodeRN.Valid() { 421 i.amode.rn = reg 422 } else { 423 panic("BUG") 424 } 425 } else { 426 if amodeRM := i.amode.rm; amodeRM.Valid() { 427 i.amode.rm = reg 428 } else { 429 panic("BUG") 430 } 431 } 432 case useKindCond: 433 c := cond(i.u1) 434 switch c.kind() { 435 case condKindRegisterZero: 436 i.u1 = uint64(registerAsRegZeroCond(reg)) 437 case condKindRegisterNotZero: 438 i.u1 = uint64(registerAsRegNotZeroCond(reg)) 439 } 440 case useKindCall: 441 panic("BUG: call instructions shouldn't be assigned") 442 case useKindCallInd: 443 i.rn = i.rn.assignReg(reg) 444 default: 445 panic(fmt.Sprintf("useKind for %v not defined", i)) 446 } 447 } 448 449 func (i *instruction) asCall(ref ssa.FuncRef, abi *abiImpl) { 450 i.kind = call 451 i.u1 = uint64(ref) 452 i.abi = abi 453 } 454 455 func (i *instruction) asCallIndirect(ptr regalloc.VReg, abi *abiImpl) { 456 i.kind = callInd 457 i.rn = operandNR(ptr) 458 i.abi = abi 459 } 460 461 func (i *instruction) callFuncRef() ssa.FuncRef { 462 return ssa.FuncRef(i.u1) 463 } 464 465 // shift must be divided by 16 and must be in range 0-3 (if dst64bit is true) or 0-1 (if dst64bit is false) 466 func (i *instruction) asMOVZ(dst regalloc.VReg, imm uint64, shift uint64, dst64bit bool) { 467 i.kind = movZ 468 i.rd = operandNR(dst) 469 i.u1 = imm 470 i.u2 = shift 471 if dst64bit { 472 i.u3 = 1 473 } 474 } 475 476 // shift must be divided by 16 and must be in range 0-3 (if dst64bit is true) or 0-1 (if dst64bit is false) 477 func (i *instruction) asMOVK(dst regalloc.VReg, imm uint64, shift uint64, dst64bit bool) { 478 i.kind = movK 479 i.rd = operandNR(dst) 480 i.u1 = imm 481 i.u2 = shift 482 if dst64bit { 483 i.u3 = 1 484 } 485 } 486 487 // shift must be divided by 16 and must be in range 0-3 (if dst64bit is true) or 0-1 (if dst64bit is false) 488 func (i *instruction) asMOVN(dst regalloc.VReg, imm uint64, shift uint64, dst64bit bool) { 489 i.kind = movN 490 i.rd = operandNR(dst) 491 i.u1 = imm 492 i.u2 = shift 493 if dst64bit { 494 i.u3 = 1 495 } 496 } 497 498 func (i *instruction) asNop0() *instruction { 499 i.kind = nop0 500 return i 501 } 502 503 func (i *instruction) asNop0WithLabel(l label) { 504 i.kind = nop0 505 i.u1 = uint64(l) 506 } 507 508 func (i *instruction) nop0Label() label { 509 return label(i.u1) 510 } 511 512 func (i *instruction) asRet(abi *abiImpl) { 513 i.kind = ret 514 i.abi = abi 515 } 516 517 func (i *instruction) asStorePair64(src1, src2 regalloc.VReg, amode addressMode) { 518 i.kind = storeP64 519 i.rn = operandNR(src1) 520 i.rm = operandNR(src2) 521 i.amode = amode 522 } 523 524 func (i *instruction) asLoadPair64(src1, src2 regalloc.VReg, amode addressMode) { 525 i.kind = loadP64 526 i.rn = operandNR(src1) 527 i.rm = operandNR(src2) 528 i.amode = amode 529 } 530 531 func (i *instruction) asStore(src operand, amode addressMode, sizeInBits byte) { 532 switch sizeInBits { 533 case 8: 534 i.kind = store8 535 case 16: 536 i.kind = store16 537 case 32: 538 if src.reg().RegType() == regalloc.RegTypeInt { 539 i.kind = store32 540 } else { 541 i.kind = fpuStore32 542 } 543 case 64: 544 if src.reg().RegType() == regalloc.RegTypeInt { 545 i.kind = store64 546 } else { 547 i.kind = fpuStore64 548 } 549 case 128: 550 i.kind = fpuStore128 551 } 552 i.rn = src 553 i.amode = amode 554 } 555 556 func (i *instruction) asSLoad(dst operand, amode addressMode, sizeInBits byte) { 557 switch sizeInBits { 558 case 8: 559 i.kind = sLoad8 560 case 16: 561 i.kind = sLoad16 562 case 32: 563 i.kind = sLoad32 564 default: 565 panic("BUG") 566 } 567 i.rd = dst 568 i.amode = amode 569 } 570 571 func (i *instruction) asULoad(dst operand, amode addressMode, sizeInBits byte) { 572 switch sizeInBits { 573 case 8: 574 i.kind = uLoad8 575 case 16: 576 i.kind = uLoad16 577 case 32: 578 i.kind = uLoad32 579 case 64: 580 i.kind = uLoad64 581 } 582 i.rd = dst 583 i.amode = amode 584 } 585 586 func (i *instruction) asFpuLoad(dst operand, amode addressMode, sizeInBits byte) { 587 switch sizeInBits { 588 case 32: 589 i.kind = fpuLoad32 590 case 64: 591 i.kind = fpuLoad64 592 case 128: 593 i.kind = fpuLoad128 594 } 595 i.rd = dst 596 i.amode = amode 597 } 598 599 func (i *instruction) asVecLoad1R(rd, rn operand, arr vecArrangement) { 600 // NOTE: currently only has support for no-offset loads, though it is suspicious that 601 // we would need to support offset load (that is only available for post-index). 602 i.kind = vecLoad1R 603 i.rd = rd 604 i.rn = rn 605 i.u1 = uint64(arr) 606 } 607 608 func (i *instruction) asCSet(rd regalloc.VReg, mask bool, c condFlag) { 609 i.kind = cSet 610 i.rd = operandNR(rd) 611 i.u1 = uint64(c) 612 if mask { 613 i.u2 = 1 614 } 615 } 616 617 func (i *instruction) asCSel(rd, rn, rm operand, c condFlag, _64bit bool) { 618 i.kind = cSel 619 i.rd = rd 620 i.rn = rn 621 i.rm = rm 622 i.u1 = uint64(c) 623 if _64bit { 624 i.u3 = 1 625 } 626 } 627 628 func (i *instruction) asFpuCSel(rd, rn, rm operand, c condFlag, _64bit bool) { 629 i.kind = fpuCSel 630 i.rd = rd 631 i.rn = rn 632 i.rm = rm 633 i.u1 = uint64(c) 634 if _64bit { 635 i.u3 = 1 636 } 637 } 638 639 func (i *instruction) asBr(target label) { 640 if target == labelReturn { 641 panic("BUG: call site should special case for returnLabel") 642 } 643 i.kind = br 644 i.u1 = uint64(target) 645 } 646 647 func (i *instruction) asBrTableSequence(indexReg regalloc.VReg, targets []uint32) { 648 i.kind = brTableSequence 649 i.rn = operandNR(indexReg) 650 i.targets = targets 651 } 652 653 func (i *instruction) brTableSequenceOffsetsResolved() { 654 i.u3 = 1 // indicate that the offsets are resolved, for debugging. 655 } 656 657 func (i *instruction) brLabel() label { 658 return label(i.u1) 659 } 660 661 // brOffsetResolved is called when the target label is resolved. 662 func (i *instruction) brOffsetResolve(offset int64) { 663 i.u2 = uint64(offset) 664 i.u3 = 1 // indicate that the offset is resolved, for debugging. 665 } 666 667 func (i *instruction) brOffset() int64 { 668 return int64(i.u2) 669 } 670 671 // asCondBr encodes a conditional branch instruction. is64bit is only needed when cond is not flag. 672 func (i *instruction) asCondBr(c cond, target label, is64bit bool) { 673 i.kind = condBr 674 i.u1 = c.asUint64() 675 i.u2 = uint64(target) 676 if is64bit { 677 i.u3 = 1 678 } 679 } 680 681 func (i *instruction) setCondBrTargets(target label) { 682 i.u2 = uint64(target) 683 } 684 685 func (i *instruction) condBrLabel() label { 686 return label(i.u2) 687 } 688 689 // condBrOffsetResolve is called when the target label is resolved. 690 func (i *instruction) condBrOffsetResolve(offset int64) { 691 i.rd.data = uint64(offset) 692 i.rd.data2 = 1 // indicate that the offset is resolved, for debugging. 693 } 694 695 // condBrOffsetResolved returns true if condBrOffsetResolve is already called. 696 func (i *instruction) condBrOffsetResolved() bool { 697 return i.rd.data2 == 1 698 } 699 700 func (i *instruction) condBrOffset() int64 { 701 return int64(i.rd.data) 702 } 703 704 func (i *instruction) condBrCond() cond { 705 return cond(i.u1) 706 } 707 708 func (i *instruction) condBr64bit() bool { 709 return i.u3 == 1 710 } 711 712 func (i *instruction) asLoadFpuConst32(rd regalloc.VReg, raw uint64) { 713 i.kind = loadFpuConst32 714 i.u1 = raw 715 i.rd = operandNR(rd) 716 } 717 718 func (i *instruction) asLoadFpuConst64(rd regalloc.VReg, raw uint64) { 719 i.kind = loadFpuConst64 720 i.u1 = raw 721 i.rd = operandNR(rd) 722 } 723 724 func (i *instruction) asLoadFpuConst128(rd regalloc.VReg, lo, hi uint64) { 725 i.kind = loadFpuConst128 726 i.u1 = lo 727 i.u2 = hi 728 i.rd = operandNR(rd) 729 } 730 731 func (i *instruction) asFpuCmp(rn, rm operand, is64bit bool) { 732 i.kind = fpuCmp 733 i.rn, i.rm = rn, rm 734 if is64bit { 735 i.u3 = 1 736 } 737 } 738 739 func (i *instruction) asCCmpImm(rn operand, imm uint64, c condFlag, flag byte, is64bit bool) { 740 i.kind = cCmpImm 741 i.rn = rn 742 i.rm.data = imm 743 i.u1 = uint64(c) 744 i.u2 = uint64(flag) 745 if is64bit { 746 i.u3 = 1 747 } 748 } 749 750 // asALU setups a basic ALU instruction. 751 func (i *instruction) asALU(aluOp aluOp, rd, rn, rm operand, dst64bit bool) { 752 switch rm.kind { 753 case operandKindNR: 754 i.kind = aluRRR 755 case operandKindSR: 756 i.kind = aluRRRShift 757 case operandKindER: 758 i.kind = aluRRRExtend 759 case operandKindImm12: 760 i.kind = aluRRImm12 761 default: 762 panic("BUG") 763 } 764 i.u1 = uint64(aluOp) 765 i.rd, i.rn, i.rm = rd, rn, rm 766 if dst64bit { 767 i.u3 = 1 768 } 769 } 770 771 // asALU setups a basic ALU instruction. 772 func (i *instruction) asALURRRR(aluOp aluOp, rd, rn, rm, ra operand, dst64bit bool) { 773 i.kind = aluRRRR 774 i.u1 = uint64(aluOp) 775 i.rd, i.rn, i.rm, i.ra = rd, rn, rm, ra 776 if dst64bit { 777 i.u3 = 1 778 } 779 } 780 781 // asALUShift setups a shift based ALU instruction. 782 func (i *instruction) asALUShift(aluOp aluOp, rd, rn, rm operand, dst64bit bool) { 783 switch rm.kind { 784 case operandKindNR: 785 i.kind = aluRRR // If the shift amount op is a register, then the instruction is encoded as a normal ALU instruction with two register operands. 786 case operandKindShiftImm: 787 i.kind = aluRRImmShift 788 default: 789 panic("BUG") 790 } 791 i.u1 = uint64(aluOp) 792 i.rd, i.rn, i.rm = rd, rn, rm 793 if dst64bit { 794 i.u3 = 1 795 } 796 } 797 798 func (i *instruction) asALUBitmaskImm(aluOp aluOp, rd, rn regalloc.VReg, imm uint64, dst64bit bool) { 799 i.kind = aluRRBitmaskImm 800 i.u1 = uint64(aluOp) 801 i.rn, i.rd = operandNR(rn), operandNR(rd) 802 i.u2 = imm 803 if dst64bit { 804 i.u3 = 1 805 } 806 } 807 808 func (i *instruction) asMovToFPSR(rn regalloc.VReg) { 809 i.kind = movToFPSR 810 i.rn = operandNR(rn) 811 } 812 813 func (i *instruction) asMovFromFPSR(rd regalloc.VReg) { 814 i.kind = movFromFPSR 815 i.rd = operandNR(rd) 816 } 817 818 func (i *instruction) asBitRR(bitOp bitOp, rd, rn regalloc.VReg, is64bit bool) { 819 i.kind = bitRR 820 i.rn, i.rd = operandNR(rn), operandNR(rd) 821 i.u1 = uint64(bitOp) 822 if is64bit { 823 i.u2 = 1 824 } 825 } 826 827 func (i *instruction) asFpuRRR(op fpuBinOp, rd, rn, rm operand, dst64bit bool) { 828 i.kind = fpuRRR 829 i.u1 = uint64(op) 830 i.rd, i.rn, i.rm = rd, rn, rm 831 if dst64bit { 832 i.u3 = 1 833 } 834 } 835 836 func (i *instruction) asFpuRR(op fpuUniOp, rd, rn operand, dst64bit bool) { 837 i.kind = fpuRR 838 i.u1 = uint64(op) 839 i.rd, i.rn = rd, rn 840 if dst64bit { 841 i.u3 = 1 842 } 843 } 844 845 func (i *instruction) asExtend(rd, rn regalloc.VReg, fromBits, toBits byte, signed bool) { 846 i.kind = extend 847 i.rn, i.rd = operandNR(rn), operandNR(rd) 848 i.u1 = uint64(fromBits) 849 i.u2 = uint64(toBits) 850 if signed { 851 i.u3 = 1 852 } 853 } 854 855 func (i *instruction) asMove32(rd, rn regalloc.VReg) { 856 i.kind = mov32 857 i.rn, i.rd = operandNR(rn), operandNR(rd) 858 } 859 860 func (i *instruction) asMove64(rd, rn regalloc.VReg) *instruction { 861 i.kind = mov64 862 i.rn, i.rd = operandNR(rn), operandNR(rd) 863 return i 864 } 865 866 func (i *instruction) asFpuMov64(rd, rn regalloc.VReg) { 867 i.kind = fpuMov64 868 i.rn, i.rd = operandNR(rn), operandNR(rd) 869 } 870 871 func (i *instruction) asFpuMov128(rd, rn regalloc.VReg) *instruction { 872 i.kind = fpuMov128 873 i.rn, i.rd = operandNR(rn), operandNR(rd) 874 return i 875 } 876 877 func (i *instruction) asMovToVec(rd, rn operand, arr vecArrangement, index vecIndex) { 878 i.kind = movToVec 879 i.rd = rd 880 i.rn = rn 881 i.u1, i.u2 = uint64(arr), uint64(index) 882 } 883 884 func (i *instruction) asMovFromVec(rd, rn operand, arr vecArrangement, index vecIndex, signed bool) { 885 if signed { 886 i.kind = movFromVecSigned 887 } else { 888 i.kind = movFromVec 889 } 890 i.rd = rd 891 i.rn = rn 892 i.u1, i.u2 = uint64(arr), uint64(index) 893 } 894 895 func (i *instruction) asVecDup(rd, rn operand, arr vecArrangement) { 896 i.kind = vecDup 897 i.u1 = uint64(arr) 898 i.rn, i.rd = rn, rd 899 } 900 901 func (i *instruction) asVecDupElement(rd, rn operand, arr vecArrangement, index vecIndex) { 902 i.kind = vecDupElement 903 i.u1 = uint64(arr) 904 i.rn, i.rd = rn, rd 905 i.u2 = uint64(index) 906 } 907 908 func (i *instruction) asVecExtract(rd, rn, rm operand, arr vecArrangement, index uint32) { 909 i.kind = vecExtract 910 i.u1 = uint64(arr) 911 i.rn, i.rm, i.rd = rn, rm, rd 912 i.u2 = uint64(index) 913 } 914 915 func (i *instruction) asVecMovElement(rd, rn operand, arr vecArrangement, rdIndex, rnIndex vecIndex) { 916 i.kind = vecMovElement 917 i.u1 = uint64(arr) 918 i.u2, i.u3 = uint64(rdIndex), uint64(rnIndex) 919 i.rn, i.rd = rn, rd 920 } 921 922 func (i *instruction) asVecMisc(op vecOp, rd, rn operand, arr vecArrangement) { 923 i.kind = vecMisc 924 i.u1 = uint64(op) 925 i.rn, i.rd = rn, rd 926 i.u2 = uint64(arr) 927 } 928 929 func (i *instruction) asVecLanes(op vecOp, rd, rn operand, arr vecArrangement) { 930 i.kind = vecLanes 931 i.u1 = uint64(op) 932 i.rn, i.rd = rn, rd 933 i.u2 = uint64(arr) 934 } 935 936 func (i *instruction) asVecShiftImm(op vecOp, rd, rn, rm operand, arr vecArrangement) { 937 i.kind = vecShiftImm 938 i.u1 = uint64(op) 939 i.rn, i.rm, i.rd = rn, rm, rd 940 i.u2 = uint64(arr) 941 } 942 943 func (i *instruction) asVecTbl(nregs byte, rd, rn, rm operand, arr vecArrangement) { 944 switch nregs { 945 case 0, 1: 946 i.kind = vecTbl 947 case 2: 948 i.kind = vecTbl2 949 if !rn.reg().IsRealReg() { 950 panic("rn is not a RealReg") 951 } 952 if rn.realReg() == v31 { 953 panic("rn cannot be v31") 954 } 955 default: 956 panic(fmt.Sprintf("unsupported number of registers %d", nregs)) 957 } 958 i.rn, i.rm, i.rd = rn, rm, rd 959 i.u2 = uint64(arr) 960 } 961 962 func (i *instruction) asVecPermute(op vecOp, rd, rn, rm operand, arr vecArrangement) { 963 i.kind = vecPermute 964 i.u1 = uint64(op) 965 i.rn, i.rm, i.rd = rn, rm, rd 966 i.u2 = uint64(arr) 967 } 968 969 func (i *instruction) asVecRRR(op vecOp, rd, rn, rm operand, arr vecArrangement) { 970 i.kind = vecRRR 971 i.u1 = uint64(op) 972 i.rn, i.rd, i.rm = rn, rd, rm 973 i.u2 = uint64(arr) 974 } 975 976 // asVecRRRRewrite encodes a vector instruction that rewrites the destination register. 977 // IMPORTANT: the destination register must be already defined before this instruction. 978 func (i *instruction) asVecRRRRewrite(op vecOp, rd, rn, rm operand, arr vecArrangement) { 979 i.kind = vecRRRRewrite 980 i.u1 = uint64(op) 981 i.rn, i.rd, i.rm = rn, rd, rm 982 i.u2 = uint64(arr) 983 } 984 985 func (i *instruction) IsCopy() bool { 986 op := i.kind 987 // We do not include mov32 as it is not a copy instruction in the sense that it does not preserve the upper 32 bits, 988 // and it is only used in the translation of IReduce, not the actual copy indeed. 989 return op == mov64 || op == fpuMov64 || op == fpuMov128 990 } 991 992 // String implements fmt.Stringer. 993 func (i *instruction) String() (str string) { 994 is64SizeBitToSize := func(u3 uint64) byte { 995 if u3 == 0 { 996 return 32 997 } 998 return 64 999 } 1000 1001 switch i.kind { 1002 case nop0: 1003 if i.u1 != 0 { 1004 l := label(i.u1) 1005 str = fmt.Sprintf("%s:", l) 1006 } else { 1007 str = "nop0" 1008 } 1009 case aluRRR: 1010 size := is64SizeBitToSize(i.u3) 1011 str = fmt.Sprintf("%s %s, %s, %s", aluOp(i.u1).String(), 1012 formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), size), 1013 i.rm.format(size)) 1014 case aluRRRR: 1015 size := is64SizeBitToSize(i.u3) 1016 str = fmt.Sprintf("%s %s, %s, %s, %s", aluOp(i.u1).String(), 1017 formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), size), formatVRegSized(i.rm.nr(), size), formatVRegSized(i.ra.nr(), size)) 1018 case aluRRImm12: 1019 size := is64SizeBitToSize(i.u3) 1020 str = fmt.Sprintf("%s %s, %s, %s", aluOp(i.u1).String(), 1021 formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), size), i.rm.format(size)) 1022 case aluRRBitmaskImm: 1023 size := is64SizeBitToSize(i.u3) 1024 rd, rn := formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), size) 1025 if size == 32 { 1026 str = fmt.Sprintf("%s %s, %s, #%#x", aluOp(i.u1).String(), rd, rn, uint32(i.u2)) 1027 } else { 1028 str = fmt.Sprintf("%s %s, %s, #%#x", aluOp(i.u1).String(), rd, rn, i.u2) 1029 } 1030 case aluRRImmShift: 1031 size := is64SizeBitToSize(i.u3) 1032 str = fmt.Sprintf("%s %s, %s, %#x", 1033 aluOp(i.u1).String(), 1034 formatVRegSized(i.rd.nr(), size), 1035 formatVRegSized(i.rn.nr(), size), 1036 i.rm.shiftImm(), 1037 ) 1038 case aluRRRShift: 1039 size := is64SizeBitToSize(i.u3) 1040 str = fmt.Sprintf("%s %s, %s, %s", 1041 aluOp(i.u1).String(), 1042 formatVRegSized(i.rd.nr(), size), 1043 formatVRegSized(i.rn.nr(), size), 1044 i.rm.format(size), 1045 ) 1046 case aluRRRExtend: 1047 size := is64SizeBitToSize(i.u3) 1048 str = fmt.Sprintf("%s %s, %s, %s", aluOp(i.u1).String(), 1049 formatVRegSized(i.rd.nr(), size), 1050 formatVRegSized(i.rn.nr(), size), 1051 // Regardless of the source size, the register is formatted in 32-bit. 1052 i.rm.format(32), 1053 ) 1054 case bitRR: 1055 size := is64SizeBitToSize(i.u2) 1056 str = fmt.Sprintf("%s %s, %s", 1057 bitOp(i.u1), 1058 formatVRegSized(i.rd.nr(), size), 1059 formatVRegSized(i.rn.nr(), size), 1060 ) 1061 case uLoad8: 1062 str = fmt.Sprintf("ldrb %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32)) 1063 case sLoad8: 1064 str = fmt.Sprintf("ldrsb %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32)) 1065 case uLoad16: 1066 str = fmt.Sprintf("ldrh %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32)) 1067 case sLoad16: 1068 str = fmt.Sprintf("ldrsh %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32)) 1069 case uLoad32: 1070 str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32)) 1071 case sLoad32: 1072 str = fmt.Sprintf("ldrs %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32)) 1073 case uLoad64: 1074 str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd.nr(), 64), i.amode.format(64)) 1075 case store8: 1076 str = fmt.Sprintf("strb %s, %s", formatVRegSized(i.rn.nr(), 32), i.amode.format(8)) 1077 case store16: 1078 str = fmt.Sprintf("strh %s, %s", formatVRegSized(i.rn.nr(), 32), i.amode.format(16)) 1079 case store32: 1080 str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 32), i.amode.format(32)) 1081 case store64: 1082 str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 64), i.amode.format(64)) 1083 case storeP64: 1084 str = fmt.Sprintf("stp %s, %s, %s", 1085 formatVRegSized(i.rn.nr(), 64), formatVRegSized(i.rm.nr(), 64), i.amode.format(64)) 1086 case loadP64: 1087 str = fmt.Sprintf("ldp %s, %s, %s", 1088 formatVRegSized(i.rn.nr(), 64), formatVRegSized(i.rm.nr(), 64), i.amode.format(64)) 1089 case mov64: 1090 str = fmt.Sprintf("mov %s, %s", 1091 formatVRegSized(i.rd.nr(), 64), 1092 formatVRegSized(i.rn.nr(), 64)) 1093 case mov32: 1094 str = fmt.Sprintf("mov %s, %s", formatVRegSized(i.rd.nr(), 32), formatVRegSized(i.rn.nr(), 32)) 1095 case movZ: 1096 size := is64SizeBitToSize(i.u3) 1097 str = fmt.Sprintf("movz %s, #%#x, lsl %d", formatVRegSized(i.rd.nr(), size), uint16(i.u1), i.u2*16) 1098 case movN: 1099 size := is64SizeBitToSize(i.u3) 1100 str = fmt.Sprintf("movn %s, #%#x, lsl %d", formatVRegSized(i.rd.nr(), size), uint16(i.u1), i.u2*16) 1101 case movK: 1102 size := is64SizeBitToSize(i.u3) 1103 str = fmt.Sprintf("movk %s, #%#x, lsl %d", formatVRegSized(i.rd.nr(), size), uint16(i.u1), i.u2*16) 1104 case extend: 1105 fromBits, toBits := byte(i.u1), byte(i.u2) 1106 1107 var signedStr string 1108 if i.u3 == 1 { 1109 signedStr = "s" 1110 } else { 1111 signedStr = "u" 1112 } 1113 var fromStr string 1114 switch fromBits { 1115 case 8: 1116 fromStr = "b" 1117 case 16: 1118 fromStr = "h" 1119 case 32: 1120 fromStr = "w" 1121 } 1122 str = fmt.Sprintf("%sxt%s %s, %s", signedStr, fromStr, formatVRegSized(i.rd.nr(), toBits), formatVRegSized(i.rn.nr(), 32)) 1123 case cSel: 1124 size := is64SizeBitToSize(i.u3) 1125 str = fmt.Sprintf("csel %s, %s, %s, %s", 1126 formatVRegSized(i.rd.nr(), size), 1127 formatVRegSized(i.rn.nr(), size), 1128 formatVRegSized(i.rm.nr(), size), 1129 condFlag(i.u1), 1130 ) 1131 case cSet: 1132 if i.u2 != 0 { 1133 str = fmt.Sprintf("csetm %s, %s", formatVRegSized(i.rd.nr(), 64), condFlag(i.u1)) 1134 } else { 1135 str = fmt.Sprintf("cset %s, %s", formatVRegSized(i.rd.nr(), 64), condFlag(i.u1)) 1136 } 1137 case cCmpImm: 1138 size := is64SizeBitToSize(i.u3) 1139 str = fmt.Sprintf("ccmp %s, #%#x, #%#x, %s", 1140 formatVRegSized(i.rn.nr(), size), i.rm.data, 1141 i.u2&0b1111, 1142 condFlag(i.u1)) 1143 case fpuMov64: 1144 str = fmt.Sprintf("mov %s, %s", 1145 formatVRegVec(i.rd.nr(), vecArrangement8B, vecIndexNone), 1146 formatVRegVec(i.rn.nr(), vecArrangement8B, vecIndexNone)) 1147 case fpuMov128: 1148 str = fmt.Sprintf("mov %s, %s", 1149 formatVRegVec(i.rd.nr(), vecArrangement16B, vecIndexNone), 1150 formatVRegVec(i.rn.nr(), vecArrangement16B, vecIndexNone)) 1151 case fpuMovFromVec: 1152 panic("TODO") 1153 case fpuRR: 1154 dstSz := is64SizeBitToSize(i.u3) 1155 srcSz := dstSz 1156 op := fpuUniOp(i.u1) 1157 switch op { 1158 case fpuUniOpCvt32To64: 1159 srcSz = 32 1160 case fpuUniOpCvt64To32: 1161 srcSz = 64 1162 } 1163 str = fmt.Sprintf("%s %s, %s", op.String(), 1164 formatVRegSized(i.rd.nr(), dstSz), formatVRegSized(i.rn.nr(), srcSz)) 1165 case fpuRRR: 1166 size := is64SizeBitToSize(i.u3) 1167 str = fmt.Sprintf("%s %s, %s, %s", fpuBinOp(i.u1).String(), 1168 formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), size), formatVRegSized(i.rm.nr(), size)) 1169 case fpuRRI: 1170 panic("TODO") 1171 case fpuRRRR: 1172 panic("TODO") 1173 case fpuCmp: 1174 size := is64SizeBitToSize(i.u3) 1175 str = fmt.Sprintf("fcmp %s, %s", 1176 formatVRegSized(i.rn.nr(), size), formatVRegSized(i.rm.nr(), size)) 1177 case fpuLoad32: 1178 str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32)) 1179 case fpuStore32: 1180 str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 32), i.amode.format(64)) 1181 case fpuLoad64: 1182 str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd.nr(), 64), i.amode.format(64)) 1183 case fpuStore64: 1184 str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 64), i.amode.format(64)) 1185 case fpuLoad128: 1186 str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd.nr(), 128), i.amode.format(64)) 1187 case fpuStore128: 1188 str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 128), i.amode.format(64)) 1189 case loadFpuConst32: 1190 str = fmt.Sprintf("ldr %s, #8; b 8; data.f32 %f", formatVRegSized(i.rd.nr(), 32), math.Float32frombits(uint32(i.u1))) 1191 case loadFpuConst64: 1192 str = fmt.Sprintf("ldr %s, #8; b 16; data.f64 %f", formatVRegSized(i.rd.nr(), 64), math.Float64frombits(i.u1)) 1193 case loadFpuConst128: 1194 str = fmt.Sprintf("ldr %s, #8; b 32; data.v128 %016x %016x", 1195 formatVRegSized(i.rd.nr(), 128), i.u1, i.u2) 1196 case fpuToInt: 1197 var op, src, dst string 1198 if signed := i.u1 == 1; signed { 1199 op = "fcvtzs" 1200 } else { 1201 op = "fcvtzu" 1202 } 1203 if src64 := i.u2 == 1; src64 { 1204 src = formatVRegWidthVec(i.rn.nr(), vecArrangementD) 1205 } else { 1206 src = formatVRegWidthVec(i.rn.nr(), vecArrangementS) 1207 } 1208 if dst64 := i.u3 == 1; dst64 { 1209 dst = formatVRegSized(i.rd.nr(), 64) 1210 } else { 1211 dst = formatVRegSized(i.rd.nr(), 32) 1212 } 1213 str = fmt.Sprintf("%s %s, %s", op, dst, src) 1214 1215 case intToFpu: 1216 var op, src, dst string 1217 if signed := i.u1 == 1; signed { 1218 op = "scvtf" 1219 } else { 1220 op = "ucvtf" 1221 } 1222 if src64 := i.u2 == 1; src64 { 1223 src = formatVRegSized(i.rn.nr(), 64) 1224 } else { 1225 src = formatVRegSized(i.rn.nr(), 32) 1226 } 1227 if dst64 := i.u3 == 1; dst64 { 1228 dst = formatVRegWidthVec(i.rd.nr(), vecArrangementD) 1229 } else { 1230 dst = formatVRegWidthVec(i.rd.nr(), vecArrangementS) 1231 } 1232 str = fmt.Sprintf("%s %s, %s", op, dst, src) 1233 case fpuCSel: 1234 size := is64SizeBitToSize(i.u3) 1235 str = fmt.Sprintf("fcsel %s, %s, %s, %s", 1236 formatVRegSized(i.rd.nr(), size), 1237 formatVRegSized(i.rn.nr(), size), 1238 formatVRegSized(i.rm.nr(), size), 1239 condFlag(i.u1), 1240 ) 1241 case movToVec: 1242 var size byte 1243 arr := vecArrangement(i.u1) 1244 switch arr { 1245 case vecArrangementB, vecArrangementH, vecArrangementS: 1246 size = 32 1247 case vecArrangementD: 1248 size = 64 1249 default: 1250 panic("unsupported arrangement " + arr.String()) 1251 } 1252 str = fmt.Sprintf("ins %s, %s", formatVRegVec(i.rd.nr(), arr, vecIndex(i.u2)), formatVRegSized(i.rn.nr(), size)) 1253 case movFromVec, movFromVecSigned: 1254 var size byte 1255 var opcode string 1256 arr := vecArrangement(i.u1) 1257 signed := i.kind == movFromVecSigned 1258 switch arr { 1259 case vecArrangementB, vecArrangementH, vecArrangementS: 1260 size = 32 1261 if signed { 1262 opcode = "smov" 1263 } else { 1264 opcode = "umov" 1265 } 1266 case vecArrangementD: 1267 size = 64 1268 if signed { 1269 opcode = "smov" 1270 } else { 1271 opcode = "mov" 1272 } 1273 default: 1274 panic("unsupported arrangement " + arr.String()) 1275 } 1276 str = fmt.Sprintf("%s %s, %s", opcode, formatVRegSized(i.rd.nr(), size), formatVRegVec(i.rn.nr(), arr, vecIndex(i.u2))) 1277 case vecDup: 1278 str = fmt.Sprintf("dup %s, %s", 1279 formatVRegVec(i.rd.nr(), vecArrangement(i.u1), vecIndexNone), 1280 formatVRegSized(i.rn.nr(), 64), 1281 ) 1282 case vecDupElement: 1283 arr := vecArrangement(i.u1) 1284 str = fmt.Sprintf("dup %s, %s", 1285 formatVRegVec(i.rd.nr(), arr, vecIndexNone), 1286 formatVRegVec(i.rn.nr(), arr, vecIndex(i.u2)), 1287 ) 1288 case vecDupFromFpu: 1289 panic("TODO") 1290 case vecExtract: 1291 str = fmt.Sprintf("ext %s, %s, %s, #%d", 1292 formatVRegVec(i.rd.nr(), vecArrangement(i.u1), vecIndexNone), 1293 formatVRegVec(i.rn.nr(), vecArrangement(i.u1), vecIndexNone), 1294 formatVRegVec(i.rm.nr(), vecArrangement(i.u1), vecIndexNone), 1295 uint32(i.u2), 1296 ) 1297 case vecExtend: 1298 panic("TODO") 1299 case vecMovElement: 1300 str = fmt.Sprintf("mov %s, %s", 1301 formatVRegVec(i.rd.nr(), vecArrangement(i.u1), vecIndex(i.u2)), 1302 formatVRegVec(i.rn.nr(), vecArrangement(i.u1), vecIndex(i.u3)), 1303 ) 1304 case vecMiscNarrow: 1305 panic("TODO") 1306 case vecRRR, vecRRRRewrite: 1307 str = fmt.Sprintf("%s %s, %s, %s", 1308 vecOp(i.u1), 1309 formatVRegVec(i.rd.nr(), vecArrangement(i.u2), vecIndexNone), 1310 formatVRegVec(i.rn.nr(), vecArrangement(i.u2), vecIndexNone), 1311 formatVRegVec(i.rm.nr(), vecArrangement(i.u2), vecIndexNone), 1312 ) 1313 case vecMisc: 1314 vop := vecOp(i.u1) 1315 if vop == vecOpCmeq0 { 1316 str = fmt.Sprintf("cmeq %s, %s, #0", 1317 formatVRegVec(i.rd.nr(), vecArrangement(i.u2), vecIndexNone), 1318 formatVRegVec(i.rn.nr(), vecArrangement(i.u2), vecIndexNone)) 1319 } else { 1320 str = fmt.Sprintf("%s %s, %s", 1321 vop, 1322 formatVRegVec(i.rd.nr(), vecArrangement(i.u2), vecIndexNone), 1323 formatVRegVec(i.rn.nr(), vecArrangement(i.u2), vecIndexNone)) 1324 } 1325 case vecLanes: 1326 arr := vecArrangement(i.u2) 1327 var destArr vecArrangement 1328 switch arr { 1329 case vecArrangement8B, vecArrangement16B: 1330 destArr = vecArrangementH 1331 case vecArrangement4H, vecArrangement8H: 1332 destArr = vecArrangementS 1333 case vecArrangement4S: 1334 destArr = vecArrangementD 1335 default: 1336 panic("invalid arrangement " + arr.String()) 1337 } 1338 str = fmt.Sprintf("%s %s, %s", 1339 vecOp(i.u1), 1340 formatVRegWidthVec(i.rd.nr(), destArr), 1341 formatVRegVec(i.rn.nr(), arr, vecIndexNone)) 1342 case vecShiftImm: 1343 arr := vecArrangement(i.u2) 1344 str = fmt.Sprintf("%s %s, %s, #%d", 1345 vecOp(i.u1), 1346 formatVRegVec(i.rd.nr(), arr, vecIndexNone), 1347 formatVRegVec(i.rn.nr(), arr, vecIndexNone), 1348 i.rm.shiftImm()) 1349 case vecTbl: 1350 arr := vecArrangement(i.u2) 1351 str = fmt.Sprintf("tbl %s, { %s }, %s", 1352 formatVRegVec(i.rd.nr(), arr, vecIndexNone), 1353 formatVRegVec(i.rn.nr(), vecArrangement16B, vecIndexNone), 1354 formatVRegVec(i.rm.nr(), arr, vecIndexNone)) 1355 case vecTbl2: 1356 arr := vecArrangement(i.u2) 1357 rd, rn, rm := i.rd.nr(), i.rn.nr(), i.rm.nr() 1358 rn1 := regalloc.FromRealReg(rn.RealReg()+1, rn.RegType()) 1359 str = fmt.Sprintf("tbl %s, { %s, %s }, %s", 1360 formatVRegVec(rd, arr, vecIndexNone), 1361 formatVRegVec(rn, vecArrangement16B, vecIndexNone), 1362 formatVRegVec(rn1, vecArrangement16B, vecIndexNone), 1363 formatVRegVec(rm, arr, vecIndexNone)) 1364 case vecPermute: 1365 arr := vecArrangement(i.u2) 1366 str = fmt.Sprintf("%s %s, %s, %s", 1367 vecOp(i.u1), 1368 formatVRegVec(i.rd.nr(), arr, vecIndexNone), 1369 formatVRegVec(i.rn.nr(), arr, vecIndexNone), 1370 formatVRegVec(i.rm.nr(), arr, vecIndexNone)) 1371 case movToFPSR: 1372 str = fmt.Sprintf("msr fpsr, %s", formatVRegSized(i.rn.nr(), 64)) 1373 case movFromFPSR: 1374 str = fmt.Sprintf("mrs %s fpsr", formatVRegSized(i.rd.nr(), 64)) 1375 case call: 1376 if i.u2 > 0 { 1377 str = fmt.Sprintf("bl #%#x", i.u2) 1378 } else { 1379 str = fmt.Sprintf("bl %s", ssa.FuncRef(i.u1)) 1380 } 1381 case callInd: 1382 str = fmt.Sprintf("bl %s", formatVRegSized(i.rn.nr(), 64)) 1383 case ret: 1384 str = "ret" 1385 case br: 1386 target := label(i.u1) 1387 if i.u3 != 0 { 1388 str = fmt.Sprintf("b #%#x (%s)", i.brOffset(), target.String()) 1389 } else { 1390 str = fmt.Sprintf("b %s", target.String()) 1391 } 1392 case condBr: 1393 size := is64SizeBitToSize(i.u3) 1394 c := cond(i.u1) 1395 target := label(i.u2) 1396 switch c.kind() { 1397 case condKindRegisterZero: 1398 if !i.condBrOffsetResolved() { 1399 str = fmt.Sprintf("cbz %s, (%s)", formatVRegSized(c.register(), size), target.String()) 1400 } else { 1401 str = fmt.Sprintf("cbz %s, #%#x %s", formatVRegSized(c.register(), size), i.condBrOffset(), target.String()) 1402 } 1403 case condKindRegisterNotZero: 1404 if offset := i.condBrOffset(); offset != 0 { 1405 str = fmt.Sprintf("cbnz %s, #%#x (%s)", formatVRegSized(c.register(), size), offset, target.String()) 1406 } else { 1407 str = fmt.Sprintf("cbnz %s, %s", formatVRegSized(c.register(), size), target.String()) 1408 } 1409 case condKindCondFlagSet: 1410 if offset := i.condBrOffset(); offset != 0 { 1411 if target == labelInvalid { 1412 str = fmt.Sprintf("b.%s #%#x", c.flag(), offset) 1413 } else { 1414 str = fmt.Sprintf("b.%s #%#x, (%s)", c.flag(), offset, target.String()) 1415 } 1416 } else { 1417 str = fmt.Sprintf("b.%s %s", c.flag(), target.String()) 1418 } 1419 } 1420 case adr: 1421 str = fmt.Sprintf("adr %s, #%#x", formatVRegSized(i.rd.nr(), 64), int64(i.u1)) 1422 case brTableSequence: 1423 if i.u3 == 0 { // The offsets haven't been resolved yet. 1424 labels := make([]string, len(i.targets)) 1425 for index, l := range i.targets { 1426 labels[index] = label(l).String() 1427 } 1428 str = fmt.Sprintf("br_table_sequence %s, [%s]", 1429 formatVRegSized(i.rn.nr(), 64), 1430 strings.Join(labels, ", "), 1431 ) 1432 } else { 1433 // See encodeBrTableSequence for the encoding. 1434 offsets := make([]string, len(i.targets)) 1435 for index, offset := range i.targets { 1436 offsets[index] = fmt.Sprintf("%#x", int32(offset)) 1437 } 1438 str = fmt.Sprintf( 1439 `adr %[2]s, #16; ldrsw %[1]s, [%[2]s, %[1]s, UXTW 2]; add %[2]s, %[2]s, %[1]s; br %[2]s; %s`, 1440 formatVRegSized(i.rn.nr(), 64), 1441 formatVRegSized(tmpRegVReg, 64), 1442 offsets, 1443 ) 1444 } 1445 case exitSequence: 1446 str = fmt.Sprintf("exit_sequence %s", formatVRegSized(i.rn.nr(), 64)) 1447 case udf: 1448 str = "udf" 1449 case emitSourceOffsetInfo: 1450 str = fmt.Sprintf("source_offset_info %d", ssa.SourceOffset(i.u1)) 1451 case vecLoad1R: 1452 str = fmt.Sprintf("ld1r {%s}, [%s]", formatVRegVec(i.rd.nr(), vecArrangement(i.u1), vecIndexNone), formatVRegSized(i.rn.nr(), 64)) 1453 default: 1454 panic(i.kind) 1455 } 1456 return 1457 } 1458 1459 func (i *instruction) asAdr(rd regalloc.VReg, offset int64) { 1460 i.kind = adr 1461 i.rd = operandNR(rd) 1462 i.u1 = uint64(offset) 1463 } 1464 1465 // TODO: delete unnecessary things. 1466 const ( 1467 // nop0 represents a no-op of zero size. 1468 nop0 instructionKind = iota + 1 1469 // aluRRR represents an ALU operation with two register sources and a register destination. 1470 aluRRR 1471 // aluRRRR represents an ALU operation with three register sources and a register destination. 1472 aluRRRR 1473 // aluRRImm12 represents an ALU operation with a register source and an immediate-12 source, with a register destination. 1474 aluRRImm12 1475 // aluRRBitmaskImm represents an ALU operation with a register source and a bitmask immediate, with a register destination. 1476 aluRRBitmaskImm 1477 // aluRRImmShift represents an ALU operation with a register source and an immediate-shifted source, with a register destination. 1478 aluRRImmShift 1479 // aluRRRShift represents an ALU operation with two register sources, one of which can be shifted, with a register destination. 1480 aluRRRShift 1481 // aluRRRExtend represents an ALU operation with two register sources, one of which can be extended, with a register destination. 1482 aluRRRExtend 1483 // bitRR represents a bit op instruction with a single register source. 1484 bitRR 1485 // uLoad8 represents an unsigned 8-bit load. 1486 uLoad8 1487 // sLoad8 represents a signed 8-bit load into 64-bit register. 1488 sLoad8 1489 // uLoad16 represents an unsigned 16-bit load into 64-bit register. 1490 uLoad16 1491 // sLoad16 represents a signed 16-bit load into 64-bit register. 1492 sLoad16 1493 // uLoad32 represents an unsigned 32-bit load into 64-bit register. 1494 uLoad32 1495 // sLoad32 represents a signed 32-bit load into 64-bit register. 1496 sLoad32 1497 // uLoad64 represents a 64-bit load. 1498 uLoad64 1499 // store8 represents an 8-bit store. 1500 store8 1501 // store16 represents a 16-bit store. 1502 store16 1503 // store32 represents a 32-bit store. 1504 store32 1505 // store64 represents a 64-bit store. 1506 store64 1507 // storeP64 represents a store of a pair of registers. 1508 storeP64 1509 // loadP64 represents a load of a pair of registers. 1510 loadP64 1511 // mov64 represents a MOV instruction. These are encoded as ORR's but we keep them separate for better handling. 1512 mov64 1513 // mov32 represents a 32-bit MOV. This zeroes the top 32 bits of the destination. 1514 mov32 1515 // movZ represents a MOVZ with a 16-bit immediate. 1516 movZ 1517 // movN represents a MOVN with a 16-bit immediate. 1518 movN 1519 // movK represents a MOVK with a 16-bit immediate. 1520 movK 1521 // extend represents a sign- or zero-extend operation. 1522 extend 1523 // cSel represents a conditional-select operation. 1524 cSel 1525 // cSet represents a conditional-set operation. 1526 cSet 1527 // cCmpImm represents a conditional comparison with an immediate. 1528 cCmpImm 1529 // fpuMov64 represents a FPU move. Distinct from a vector-register move; moving just 64 bits appears to be significantly faster. 1530 fpuMov64 1531 // fpuMov128 represents a vector register move. 1532 fpuMov128 1533 // fpuMovFromVec represents a move to scalar from a vector element. 1534 fpuMovFromVec 1535 // fpuRR represents a 1-op FPU instruction. 1536 fpuRR 1537 // fpuRRR represents a 2-op FPU instruction. 1538 fpuRRR 1539 // fpuRRI represents a 2-op FPU instruction with immediate value. 1540 fpuRRI 1541 // fpuRRRR represents a 3-op FPU instruction. 1542 fpuRRRR 1543 // fpuCmp represents a FPU comparison, either 32 or 64 bit. 1544 fpuCmp 1545 // fpuLoad32 represents a floating-point load, single-precision (32 bit). 1546 fpuLoad32 1547 // fpuStore32 represents a floating-point store, single-precision (32 bit). 1548 fpuStore32 1549 // fpuLoad64 represents a floating-point load, double-precision (64 bit). 1550 fpuLoad64 1551 // fpuStore64 represents a floating-point store, double-precision (64 bit). 1552 fpuStore64 1553 // fpuLoad128 represents a floating-point/vector load, 128 bit. 1554 fpuLoad128 1555 // fpuStore128 represents a floating-point/vector store, 128 bit. 1556 fpuStore128 1557 // loadFpuConst32 represents a load of a 32-bit floating-point constant. 1558 loadFpuConst32 1559 // loadFpuConst64 represents a load of a 64-bit floating-point constant. 1560 loadFpuConst64 1561 // loadFpuConst128 represents a load of a 128-bit floating-point constant. 1562 loadFpuConst128 1563 // vecLoad1R represents a load of a one single-element structure that replicates to all lanes of a vector. 1564 vecLoad1R 1565 // fpuToInt represents a conversion from FP to integer. 1566 fpuToInt 1567 // intToFpu represents a conversion from integer to FP. 1568 intToFpu 1569 // fpuCSel represents a 32/64-bit FP conditional select. 1570 fpuCSel 1571 // movToVec represents a move to a vector element from a GPR. 1572 movToVec 1573 // movFromVec represents an unsigned move from a vector element to a GPR. 1574 movFromVec 1575 // movFromVecSigned represents a signed move from a vector element to a GPR. 1576 movFromVecSigned 1577 // vecDup represents a duplication of general-purpose register to vector. 1578 vecDup 1579 // vecDupElement represents a duplication of a vector element to vector or scalar. 1580 vecDupElement 1581 // vecDupFromFpu represents a duplication of scalar to vector. 1582 vecDupFromFpu 1583 // vecExtract represents a vector extraction operation. 1584 vecExtract 1585 // vecExtend represents a vector extension operation. 1586 vecExtend 1587 // vecMovElement represents a move vector element to another vector element operation. 1588 vecMovElement 1589 // vecMiscNarrow represents a vector narrowing operation. 1590 vecMiscNarrow 1591 // vecRRR represents a vector ALU operation. 1592 vecRRR 1593 // vecRRRRewrite is exactly the same as vecRRR except that this rewrites the destination register. 1594 // For example, BSL instruction rewrites the destination register, and the existing value influences the result. 1595 // Therefore, the "destination" register in vecRRRRewrite will be treated as "use" which makes the register outlive 1596 // the instruction while this instruction doesn't have "def" in the context of register allocation. 1597 vecRRRRewrite 1598 // vecMisc represents a vector two register miscellaneous instruction. 1599 vecMisc 1600 // vecLanes represents a vector instruction across lanes. 1601 vecLanes 1602 // vecShiftImm represents a SIMD scalar shift by immediate instruction. 1603 vecShiftImm 1604 // vecTbl represents a table vector lookup - single register table. 1605 vecTbl 1606 // vecTbl2 represents a table vector lookup - two register table. 1607 vecTbl2 1608 // vecPermute represents a vector permute instruction. 1609 vecPermute 1610 // movToNZCV represents a move to the FPSR. 1611 movToFPSR 1612 // movFromNZCV represents a move from the FPSR. 1613 movFromFPSR 1614 // call represents a machine call instruction. 1615 call 1616 // callInd represents a machine indirect-call instruction. 1617 callInd 1618 // ret represents a machine return instruction. 1619 ret 1620 // br represents an unconditional branch. 1621 br 1622 // condBr represents a conditional branch. 1623 condBr 1624 // adr represents a compute the address (using a PC-relative offset) of a memory location. 1625 adr 1626 // brTableSequence represents a jump-table sequence. 1627 brTableSequence 1628 // exitSequence consists of multiple instructions, and exits the execution immediately. 1629 // See encodeExitSequence. 1630 exitSequence 1631 // UDF is the undefined instruction. For debugging only. 1632 udf 1633 1634 // emitSourceOffsetInfo is a dummy instruction to emit source offset info. 1635 // The existence of this instruction does not affect the execution. 1636 emitSourceOffsetInfo 1637 1638 // ------------------- do not define below this line ------------------- 1639 numInstructionKinds 1640 ) 1641 1642 func (i *instruction) asEmitSourceOffsetInfo(l ssa.SourceOffset) *instruction { 1643 i.kind = emitSourceOffsetInfo 1644 i.u1 = uint64(l) 1645 return i 1646 } 1647 1648 func (i *instruction) sourceOffsetInfo() ssa.SourceOffset { 1649 return ssa.SourceOffset(i.u1) 1650 } 1651 1652 func (i *instruction) asUDF() *instruction { 1653 i.kind = udf 1654 return i 1655 } 1656 1657 func (i *instruction) asFpuToInt(rd, rn operand, rdSigned, src64bit, dst64bit bool) { 1658 i.kind = fpuToInt 1659 i.rn = rn 1660 i.rd = rd 1661 if rdSigned { 1662 i.u1 = 1 1663 } 1664 if src64bit { 1665 i.u2 = 1 1666 } 1667 if dst64bit { 1668 i.u3 = 1 1669 } 1670 } 1671 1672 func (i *instruction) asIntToFpu(rd, rn operand, rnSigned, src64bit, dst64bit bool) { 1673 i.kind = intToFpu 1674 i.rn = rn 1675 i.rd = rd 1676 if rnSigned { 1677 i.u1 = 1 1678 } 1679 if src64bit { 1680 i.u2 = 1 1681 } 1682 if dst64bit { 1683 i.u3 = 1 1684 } 1685 } 1686 1687 func (i *instruction) asExitSequence(ctx regalloc.VReg) *instruction { 1688 i.kind = exitSequence 1689 i.rn = operandNR(ctx) 1690 return i 1691 } 1692 1693 // aluOp determines the type of ALU operation. Instructions whose kind is one of 1694 // aluRRR, aluRRRR, aluRRImm12, aluRRBitmaskImm, aluRRImmShift, aluRRRShift and aluRRRExtend 1695 // would use this type. 1696 type aluOp int 1697 1698 func (a aluOp) String() string { 1699 switch a { 1700 case aluOpAdd: 1701 return "add" 1702 case aluOpSub: 1703 return "sub" 1704 case aluOpOrr: 1705 return "orr" 1706 case aluOpAnd: 1707 return "and" 1708 case aluOpBic: 1709 return "bic" 1710 case aluOpEor: 1711 return "eor" 1712 case aluOpAddS: 1713 return "adds" 1714 case aluOpSubS: 1715 return "subs" 1716 case aluOpSMulH: 1717 return "sMulH" 1718 case aluOpUMulH: 1719 return "uMulH" 1720 case aluOpSDiv: 1721 return "sdiv" 1722 case aluOpUDiv: 1723 return "udiv" 1724 case aluOpRotR: 1725 return "ror" 1726 case aluOpLsr: 1727 return "lsr" 1728 case aluOpAsr: 1729 return "asr" 1730 case aluOpLsl: 1731 return "lsl" 1732 case aluOpMAdd: 1733 return "madd" 1734 case aluOpMSub: 1735 return "msub" 1736 } 1737 panic(int(a)) 1738 } 1739 1740 const ( 1741 // 32/64-bit Add. 1742 aluOpAdd aluOp = iota 1743 // 32/64-bit Subtract. 1744 aluOpSub 1745 // 32/64-bit Bitwise OR. 1746 aluOpOrr 1747 // 32/64-bit Bitwise AND. 1748 aluOpAnd 1749 // 32/64-bit Bitwise AND NOT. 1750 aluOpBic 1751 // 32/64-bit Bitwise XOR (Exclusive OR). 1752 aluOpEor 1753 // 32/64-bit Add setting flags. 1754 aluOpAddS 1755 // 32/64-bit Subtract setting flags. 1756 aluOpSubS 1757 // Signed multiply, high-word result. 1758 aluOpSMulH 1759 // Unsigned multiply, high-word result. 1760 aluOpUMulH 1761 // 64-bit Signed divide. 1762 aluOpSDiv 1763 // 64-bit Unsigned divide. 1764 aluOpUDiv 1765 // 32/64-bit Rotate right. 1766 aluOpRotR 1767 // 32/64-bit Logical shift right. 1768 aluOpLsr 1769 // 32/64-bit Arithmetic shift right. 1770 aluOpAsr 1771 // 32/64-bit Logical shift left. 1772 aluOpLsl /// Multiply-add 1773 1774 // MAdd and MSub are only applicable for aluRRRR. 1775 aluOpMAdd 1776 aluOpMSub 1777 ) 1778 1779 // vecOp determines the type of vector operation. Instructions whose kind is one of 1780 // vecOpCnt would use this type. 1781 type vecOp int 1782 1783 // String implements fmt.Stringer. 1784 func (b vecOp) String() string { 1785 switch b { 1786 case vecOpCnt: 1787 return "cnt" 1788 case vecOpCmeq: 1789 return "cmeq" 1790 case vecOpCmgt: 1791 return "cmgt" 1792 case vecOpCmhi: 1793 return "cmhi" 1794 case vecOpCmge: 1795 return "cmge" 1796 case vecOpCmhs: 1797 return "cmhs" 1798 case vecOpFcmeq: 1799 return "fcmeq" 1800 case vecOpFcmgt: 1801 return "fcmgt" 1802 case vecOpFcmge: 1803 return "fcmge" 1804 case vecOpCmeq0: 1805 return "cmeq0" 1806 case vecOpUaddlv: 1807 return "uaddlv" 1808 case vecOpBit: 1809 return "bit" 1810 case vecOpBic: 1811 return "bic" 1812 case vecOpBsl: 1813 return "bsl" 1814 case vecOpNot: 1815 return "not" 1816 case vecOpAnd: 1817 return "and" 1818 case vecOpOrr: 1819 return "orr" 1820 case vecOpEOR: 1821 return "eor" 1822 case vecOpFadd: 1823 return "fadd" 1824 case vecOpAdd: 1825 return "add" 1826 case vecOpAddp: 1827 return "addp" 1828 case vecOpAddv: 1829 return "addv" 1830 case vecOpSub: 1831 return "sub" 1832 case vecOpFsub: 1833 return "fsub" 1834 case vecOpSmin: 1835 return "smin" 1836 case vecOpUmin: 1837 return "umin" 1838 case vecOpUminv: 1839 return "uminv" 1840 case vecOpSmax: 1841 return "smax" 1842 case vecOpUmax: 1843 return "umax" 1844 case vecOpUmaxp: 1845 return "umaxp" 1846 case vecOpUrhadd: 1847 return "urhadd" 1848 case vecOpFmul: 1849 return "fmul" 1850 case vecOpSqrdmulh: 1851 return "sqrdmulh" 1852 case vecOpMul: 1853 return "mul" 1854 case vecOpUmlal: 1855 return "umlal" 1856 case vecOpFdiv: 1857 return "fdiv" 1858 case vecOpFsqrt: 1859 return "fsqrt" 1860 case vecOpAbs: 1861 return "abs" 1862 case vecOpFabs: 1863 return "fabs" 1864 case vecOpNeg: 1865 return "neg" 1866 case vecOpFneg: 1867 return "fneg" 1868 case vecOpFrintp: 1869 return "frintp" 1870 case vecOpFrintm: 1871 return "frintm" 1872 case vecOpFrintn: 1873 return "frintn" 1874 case vecOpFrintz: 1875 return "frintz" 1876 case vecOpFcvtl: 1877 return "fcvtl" 1878 case vecOpFcvtn: 1879 return "fcvtn" 1880 case vecOpFcvtzu: 1881 return "fcvtzu" 1882 case vecOpFcvtzs: 1883 return "fcvtzs" 1884 case vecOpScvtf: 1885 return "scvtf" 1886 case vecOpUcvtf: 1887 return "ucvtf" 1888 case vecOpSqxtn: 1889 return "sqxtn" 1890 case vecOpUqxtn: 1891 return "uqxtn" 1892 case vecOpSqxtun: 1893 return "sqxtun" 1894 case vecOpRev64: 1895 return "rev64" 1896 case vecOpXtn: 1897 return "xtn" 1898 case vecOpShll: 1899 return "shll" 1900 case vecOpSshl: 1901 return "sshl" 1902 case vecOpSshll: 1903 return "sshll" 1904 case vecOpUshl: 1905 return "ushl" 1906 case vecOpUshll: 1907 return "ushll" 1908 case vecOpSshr: 1909 return "sshr" 1910 case vecOpZip1: 1911 return "zip1" 1912 case vecOpFmin: 1913 return "fmin" 1914 case vecOpFmax: 1915 return "fmax" 1916 } 1917 panic(int(b)) 1918 } 1919 1920 const ( 1921 vecOpCnt vecOp = iota 1922 vecOpCmeq0 1923 vecOpCmeq 1924 vecOpCmgt 1925 vecOpCmhi 1926 vecOpCmge 1927 vecOpCmhs 1928 vecOpFcmeq 1929 vecOpFcmgt 1930 vecOpFcmge 1931 vecOpUaddlv 1932 vecOpBit 1933 vecOpBic 1934 vecOpBsl 1935 vecOpNot 1936 vecOpAnd 1937 vecOpOrr 1938 vecOpEOR 1939 vecOpAdd 1940 vecOpFadd 1941 vecOpAddv 1942 vecOpSqadd 1943 vecOpUqadd 1944 vecOpAddp 1945 vecOpSub 1946 vecOpFsub 1947 vecOpSqsub 1948 vecOpUqsub 1949 vecOpSmin 1950 vecOpUmin 1951 vecOpUminv 1952 vecOpFmin 1953 vecOpSmax 1954 vecOpUmax 1955 vecOpUmaxp 1956 vecOpFmax 1957 vecOpUrhadd 1958 vecOpMul 1959 vecOpFmul 1960 vecOpSqrdmulh 1961 vecOpUmlal 1962 vecOpFdiv 1963 vecOpFsqrt 1964 vecOpAbs 1965 vecOpFabs 1966 vecOpNeg 1967 vecOpFneg 1968 vecOpFrintm 1969 vecOpFrintn 1970 vecOpFrintp 1971 vecOpFrintz 1972 vecOpFcvtl 1973 vecOpFcvtn 1974 vecOpFcvtzs 1975 vecOpFcvtzu 1976 vecOpScvtf 1977 vecOpUcvtf 1978 vecOpSqxtn 1979 vecOpSqxtun 1980 vecOpUqxtn 1981 vecOpRev64 1982 vecOpXtn 1983 vecOpShll 1984 vecOpSshl 1985 vecOpSshll 1986 vecOpUshl 1987 vecOpUshll 1988 vecOpSshr 1989 vecOpZip1 1990 ) 1991 1992 // bitOp determines the type of bitwise operation. Instructions whose kind is one of 1993 // bitOpRbit and bitOpClz would use this type. 1994 type bitOp int 1995 1996 // String implements fmt.Stringer. 1997 func (b bitOp) String() string { 1998 switch b { 1999 case bitOpRbit: 2000 return "rbit" 2001 case bitOpClz: 2002 return "clz" 2003 } 2004 panic(int(b)) 2005 } 2006 2007 const ( 2008 // 32/64-bit Rbit. 2009 bitOpRbit bitOp = iota 2010 // 32/64-bit Clz. 2011 bitOpClz 2012 ) 2013 2014 // fpuUniOp represents a unary floating-point unit (FPU) operation. 2015 type fpuUniOp byte 2016 2017 const ( 2018 fpuUniOpNeg fpuUniOp = iota 2019 fpuUniOpCvt32To64 2020 fpuUniOpCvt64To32 2021 fpuUniOpSqrt 2022 fpuUniOpRoundPlus 2023 fpuUniOpRoundMinus 2024 fpuUniOpRoundZero 2025 fpuUniOpRoundNearest 2026 fpuUniOpAbs 2027 ) 2028 2029 // String implements the fmt.Stringer. 2030 func (f fpuUniOp) String() string { 2031 switch f { 2032 case fpuUniOpNeg: 2033 return "fneg" 2034 case fpuUniOpCvt32To64: 2035 return "fcvt" 2036 case fpuUniOpCvt64To32: 2037 return "fcvt" 2038 case fpuUniOpSqrt: 2039 return "fsqrt" 2040 case fpuUniOpRoundPlus: 2041 return "frintp" 2042 case fpuUniOpRoundMinus: 2043 return "frintm" 2044 case fpuUniOpRoundZero: 2045 return "frintz" 2046 case fpuUniOpRoundNearest: 2047 return "frintn" 2048 case fpuUniOpAbs: 2049 return "fabs" 2050 } 2051 panic(int(f)) 2052 } 2053 2054 // fpuBinOp represents a binary floating-point unit (FPU) operation. 2055 type fpuBinOp byte 2056 2057 const ( 2058 fpuBinOpAdd = iota 2059 fpuBinOpSub 2060 fpuBinOpMul 2061 fpuBinOpDiv 2062 fpuBinOpMax 2063 fpuBinOpMin 2064 ) 2065 2066 // String implements the fmt.Stringer. 2067 func (f fpuBinOp) String() string { 2068 switch f { 2069 case fpuBinOpAdd: 2070 return "fadd" 2071 case fpuBinOpSub: 2072 return "fsub" 2073 case fpuBinOpMul: 2074 return "fmul" 2075 case fpuBinOpDiv: 2076 return "fdiv" 2077 case fpuBinOpMax: 2078 return "fmax" 2079 case fpuBinOpMin: 2080 return "fmin" 2081 } 2082 panic(int(f)) 2083 } 2084 2085 // extMode represents the mode of a register operand extension. 2086 // For example, aluRRRExtend instructions need this info to determine the extensions. 2087 type extMode byte 2088 2089 const ( 2090 extModeNone extMode = iota 2091 // extModeZeroExtend64 suggests a zero-extension to 32 bits if the original bit size is less than 32. 2092 extModeZeroExtend32 2093 // extModeSignExtend64 stands for a sign-extension to 32 bits if the original bit size is less than 32. 2094 extModeSignExtend32 2095 // extModeZeroExtend64 suggests a zero-extension to 64 bits if the original bit size is less than 64. 2096 extModeZeroExtend64 2097 // extModeSignExtend64 stands for a sign-extension to 64 bits if the original bit size is less than 64. 2098 extModeSignExtend64 2099 ) 2100 2101 func (e extMode) bits() byte { 2102 switch e { 2103 case extModeZeroExtend32, extModeSignExtend32: 2104 return 32 2105 case extModeZeroExtend64, extModeSignExtend64: 2106 return 64 2107 default: 2108 return 0 2109 } 2110 } 2111 2112 func (e extMode) signed() bool { 2113 switch e { 2114 case extModeSignExtend32, extModeSignExtend64: 2115 return true 2116 default: 2117 return false 2118 } 2119 } 2120 2121 func extModeOf(t ssa.Type, signed bool) extMode { 2122 switch t.Bits() { 2123 case 32: 2124 if signed { 2125 return extModeSignExtend32 2126 } 2127 return extModeZeroExtend32 2128 case 64: 2129 if signed { 2130 return extModeSignExtend64 2131 } 2132 return extModeZeroExtend64 2133 default: 2134 panic("TODO? do we need narrower than 32 bits?") 2135 } 2136 } 2137 2138 type extendOp byte 2139 2140 const ( 2141 extendOpUXTB extendOp = 0b000 2142 extendOpUXTH extendOp = 0b001 2143 extendOpUXTW extendOp = 0b010 2144 // extendOpUXTX does nothing, but convenient symbol that officially exists. See: 2145 // https://stackoverflow.com/questions/72041372/what-do-the-uxtx-and-sxtx-extensions-mean-for-32-bit-aarch64-adds-instruct 2146 extendOpUXTX extendOp = 0b011 2147 extendOpSXTB extendOp = 0b100 2148 extendOpSXTH extendOp = 0b101 2149 extendOpSXTW extendOp = 0b110 2150 // extendOpSXTX does nothing, but convenient symbol that officially exists. See: 2151 // https://stackoverflow.com/questions/72041372/what-do-the-uxtx-and-sxtx-extensions-mean-for-32-bit-aarch64-adds-instruct 2152 extendOpSXTX extendOp = 0b111 2153 extendOpNone extendOp = 0xff 2154 ) 2155 2156 func (e extendOp) srcBits() byte { 2157 switch e { 2158 case extendOpUXTB, extendOpSXTB: 2159 return 8 2160 case extendOpUXTH, extendOpSXTH: 2161 return 16 2162 case extendOpUXTW, extendOpSXTW: 2163 return 32 2164 case extendOpUXTX, extendOpSXTX: 2165 return 64 2166 } 2167 panic(int(e)) 2168 } 2169 2170 func (e extendOp) String() string { 2171 switch e { 2172 case extendOpUXTB: 2173 return "UXTB" 2174 case extendOpUXTH: 2175 return "UXTH" 2176 case extendOpUXTW: 2177 return "UXTW" 2178 case extendOpUXTX: 2179 return "UXTX" 2180 case extendOpSXTB: 2181 return "SXTB" 2182 case extendOpSXTH: 2183 return "SXTH" 2184 case extendOpSXTW: 2185 return "SXTW" 2186 case extendOpSXTX: 2187 return "SXTX" 2188 } 2189 panic(int(e)) 2190 } 2191 2192 func extendOpFrom(signed bool, from byte) extendOp { 2193 switch from { 2194 case 8: 2195 if signed { 2196 return extendOpSXTB 2197 } 2198 return extendOpUXTB 2199 case 16: 2200 if signed { 2201 return extendOpSXTH 2202 } 2203 return extendOpUXTH 2204 case 32: 2205 if signed { 2206 return extendOpSXTW 2207 } 2208 return extendOpUXTW 2209 case 64: 2210 if signed { 2211 return extendOpSXTX 2212 } 2213 return extendOpUXTX 2214 } 2215 panic("invalid extendOpFrom") 2216 } 2217 2218 type shiftOp byte 2219 2220 const ( 2221 shiftOpLSL shiftOp = 0b00 2222 shiftOpLSR shiftOp = 0b01 2223 shiftOpASR shiftOp = 0b10 2224 shiftOpROR shiftOp = 0b11 2225 ) 2226 2227 func (s shiftOp) String() string { 2228 switch s { 2229 case shiftOpLSL: 2230 return "lsl" 2231 case shiftOpLSR: 2232 return "lsr" 2233 case shiftOpASR: 2234 return "asr" 2235 case shiftOpROR: 2236 return "ror" 2237 } 2238 panic(int(s)) 2239 } 2240 2241 const exitSequenceSize = 6 * 4 // 6 instructions as in encodeExitSequence. 2242 2243 // size returns the size of the instruction in encoded bytes. 2244 func (i *instruction) size() int64 { 2245 switch i.kind { 2246 case exitSequence: 2247 return exitSequenceSize // 5 instructions as in encodeExitSequence. 2248 case nop0: 2249 return 0 2250 case emitSourceOffsetInfo: 2251 return 0 2252 case loadFpuConst32: 2253 if i.u1 == 0 { 2254 return 4 // zero loading can be encoded as a single instruction. 2255 } 2256 return 4 + 4 + 4 2257 case loadFpuConst64: 2258 if i.u1 == 0 { 2259 return 4 // zero loading can be encoded as a single instruction. 2260 } 2261 return 4 + 4 + 8 2262 case loadFpuConst128: 2263 if i.u1 == 0 && i.u2 == 0 { 2264 return 4 // zero loading can be encoded as a single instruction. 2265 } 2266 return 4 + 4 + 16 2267 case brTableSequence: 2268 return 4*4 + int64(len(i.targets))*4 2269 default: 2270 return 4 2271 } 2272 } 2273 2274 // vecArrangement is the arrangement of data within a vector register. 2275 type vecArrangement byte 2276 2277 const ( 2278 // vecArrangementNone is an arrangement indicating no data is stored. 2279 vecArrangementNone vecArrangement = iota 2280 // vecArrangement8B is an arrangement of 8 bytes (64-bit vector) 2281 vecArrangement8B 2282 // vecArrangement16B is an arrangement of 16 bytes (128-bit vector) 2283 vecArrangement16B 2284 // vecArrangement4H is an arrangement of 4 half precisions (64-bit vector) 2285 vecArrangement4H 2286 // vecArrangement8H is an arrangement of 8 half precisions (128-bit vector) 2287 vecArrangement8H 2288 // vecArrangement2S is an arrangement of 2 single precisions (64-bit vector) 2289 vecArrangement2S 2290 // vecArrangement4S is an arrangement of 4 single precisions (128-bit vector) 2291 vecArrangement4S 2292 // vecArrangement1D is an arrangement of 1 double precision (64-bit vector) 2293 vecArrangement1D 2294 // vecArrangement2D is an arrangement of 2 double precisions (128-bit vector) 2295 vecArrangement2D 2296 2297 // Assign each vector size specifier to a vector arrangement ID. 2298 // Instructions can only have an arrangement or a size specifier, but not both, so it 2299 // simplifies the internal representation of vector instructions by being able to 2300 // store either into the same field. 2301 2302 // vecArrangementB is a size specifier of byte 2303 vecArrangementB 2304 // vecArrangementH is a size specifier of word (16-bit) 2305 vecArrangementH 2306 // vecArrangementS is a size specifier of double word (32-bit) 2307 vecArrangementS 2308 // vecArrangementD is a size specifier of quad word (64-bit) 2309 vecArrangementD 2310 // vecArrangementQ is a size specifier of the entire vector (128-bit) 2311 vecArrangementQ 2312 ) 2313 2314 // String implements fmt.Stringer 2315 func (v vecArrangement) String() (ret string) { 2316 switch v { 2317 case vecArrangement8B: 2318 ret = "8B" 2319 case vecArrangement16B: 2320 ret = "16B" 2321 case vecArrangement4H: 2322 ret = "4H" 2323 case vecArrangement8H: 2324 ret = "8H" 2325 case vecArrangement2S: 2326 ret = "2S" 2327 case vecArrangement4S: 2328 ret = "4S" 2329 case vecArrangement1D: 2330 ret = "1D" 2331 case vecArrangement2D: 2332 ret = "2D" 2333 case vecArrangementB: 2334 ret = "B" 2335 case vecArrangementH: 2336 ret = "H" 2337 case vecArrangementS: 2338 ret = "S" 2339 case vecArrangementD: 2340 ret = "D" 2341 case vecArrangementQ: 2342 ret = "Q" 2343 case vecArrangementNone: 2344 ret = "none" 2345 default: 2346 panic(v) 2347 } 2348 return 2349 } 2350 2351 // vecIndex is the index of an element of a vector register 2352 type vecIndex byte 2353 2354 // vecIndexNone indicates no vector index specified. 2355 const vecIndexNone = ^vecIndex(0) 2356 2357 func ssaLaneToArrangement(lane ssa.VecLane) vecArrangement { 2358 switch lane { 2359 case ssa.VecLaneI8x16: 2360 return vecArrangement16B 2361 case ssa.VecLaneI16x8: 2362 return vecArrangement8H 2363 case ssa.VecLaneI32x4: 2364 return vecArrangement4S 2365 case ssa.VecLaneI64x2: 2366 return vecArrangement2D 2367 case ssa.VecLaneF32x4: 2368 return vecArrangement4S 2369 case ssa.VecLaneF64x2: 2370 return vecArrangement2D 2371 default: 2372 panic(lane) 2373 } 2374 }