github.com/tetratelabs/wazero@v1.7.1/internal/engine/wazevo/backend/isa/arm64/instr.go (about) 1 package arm64 2 3 import ( 4 "fmt" 5 "math" 6 7 "github.com/tetratelabs/wazero/internal/engine/wazevo/backend" 8 "github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc" 9 "github.com/tetratelabs/wazero/internal/engine/wazevo/ssa" 10 ) 11 12 type ( 13 // instruction represents either a real instruction in arm64, or the meta instructions 14 // that are convenient for code generation. For example, inline constants are also treated 15 // as instructions. 16 // 17 // Basically, each instruction knows how to get encoded in binaries. Hence, the final output of compilation 18 // can be considered equivalent to the sequence of such instructions. 19 // 20 // Each field is interpreted depending on the kind. 21 // 22 // TODO: optimize the layout later once the impl settles. 23 instruction struct { 24 prev, next *instruction 25 u1, u2, u3 uint64 26 rd, rm, rn, ra operand 27 amode addressMode 28 kind instructionKind 29 addedBeforeRegAlloc bool 30 } 31 32 // instructionKind represents the kind of instruction. 33 // This controls how the instruction struct is interpreted. 34 instructionKind byte 35 ) 36 37 func asNop0(i *instruction) { 38 i.kind = nop0 39 } 40 41 func setNext(i, next *instruction) { 42 i.next = next 43 } 44 45 func setPrev(i, prev *instruction) { 46 i.prev = prev 47 } 48 49 // IsCall implements regalloc.Instr IsCall. 50 func (i *instruction) IsCall() bool { 51 return i.kind == call 52 } 53 54 // IsIndirectCall implements regalloc.Instr IsIndirectCall. 55 func (i *instruction) IsIndirectCall() bool { 56 return i.kind == callInd 57 } 58 59 // IsReturn implements regalloc.Instr IsReturn. 60 func (i *instruction) IsReturn() bool { 61 return i.kind == ret 62 } 63 64 // Next implements regalloc.Instr Next. 65 func (i *instruction) Next() regalloc.Instr { 66 return i.next 67 } 68 69 // Prev implements regalloc.Instr Prev. 70 func (i *instruction) Prev() regalloc.Instr { 71 return i.prev 72 } 73 74 // AddedBeforeRegAlloc implements regalloc.Instr AddedBeforeRegAlloc. 75 func (i *instruction) AddedBeforeRegAlloc() bool { 76 return i.addedBeforeRegAlloc 77 } 78 79 type defKind byte 80 81 const ( 82 defKindNone defKind = iota + 1 83 defKindRD 84 defKindCall 85 ) 86 87 var defKinds = [numInstructionKinds]defKind{ 88 adr: defKindRD, 89 aluRRR: defKindRD, 90 aluRRRR: defKindRD, 91 aluRRImm12: defKindRD, 92 aluRRBitmaskImm: defKindRD, 93 aluRRRShift: defKindRD, 94 aluRRImmShift: defKindRD, 95 aluRRRExtend: defKindRD, 96 bitRR: defKindRD, 97 movZ: defKindRD, 98 movK: defKindRD, 99 movN: defKindRD, 100 mov32: defKindRD, 101 mov64: defKindRD, 102 fpuMov64: defKindRD, 103 fpuMov128: defKindRD, 104 fpuRR: defKindRD, 105 fpuRRR: defKindRD, 106 nop0: defKindNone, 107 call: defKindCall, 108 callInd: defKindCall, 109 ret: defKindNone, 110 store8: defKindNone, 111 store16: defKindNone, 112 store32: defKindNone, 113 store64: defKindNone, 114 exitSequence: defKindNone, 115 condBr: defKindNone, 116 br: defKindNone, 117 brTableSequence: defKindNone, 118 cSet: defKindRD, 119 extend: defKindRD, 120 fpuCmp: defKindNone, 121 uLoad8: defKindRD, 122 uLoad16: defKindRD, 123 uLoad32: defKindRD, 124 sLoad8: defKindRD, 125 sLoad16: defKindRD, 126 sLoad32: defKindRD, 127 uLoad64: defKindRD, 128 fpuLoad32: defKindRD, 129 fpuLoad64: defKindRD, 130 fpuLoad128: defKindRD, 131 vecLoad1R: defKindRD, 132 loadFpuConst32: defKindRD, 133 loadFpuConst64: defKindRD, 134 loadFpuConst128: defKindRD, 135 fpuStore32: defKindNone, 136 fpuStore64: defKindNone, 137 fpuStore128: defKindNone, 138 udf: defKindNone, 139 cSel: defKindRD, 140 fpuCSel: defKindRD, 141 movToVec: defKindRD, 142 movFromVec: defKindRD, 143 movFromVecSigned: defKindRD, 144 vecDup: defKindRD, 145 vecDupElement: defKindRD, 146 vecExtract: defKindRD, 147 vecMisc: defKindRD, 148 vecMovElement: defKindRD, 149 vecLanes: defKindRD, 150 vecShiftImm: defKindRD, 151 vecTbl: defKindRD, 152 vecTbl2: defKindRD, 153 vecPermute: defKindRD, 154 vecRRR: defKindRD, 155 vecRRRRewrite: defKindNone, 156 fpuToInt: defKindRD, 157 intToFpu: defKindRD, 158 cCmpImm: defKindNone, 159 movToFPSR: defKindNone, 160 movFromFPSR: defKindRD, 161 emitSourceOffsetInfo: defKindNone, 162 atomicRmw: defKindRD, 163 atomicCas: defKindNone, 164 atomicLoad: defKindRD, 165 atomicStore: defKindNone, 166 dmb: defKindNone, 167 loadConstBlockArg: defKindRD, 168 } 169 170 // Defs returns the list of regalloc.VReg that are defined by the instruction. 171 // In order to reduce the number of allocations, the caller can pass the slice to be used. 172 func (i *instruction) Defs(regs *[]regalloc.VReg) []regalloc.VReg { 173 *regs = (*regs)[:0] 174 switch defKinds[i.kind] { 175 case defKindNone: 176 case defKindRD: 177 *regs = append(*regs, i.rd.nr()) 178 case defKindCall: 179 _, _, retIntRealRegs, retFloatRealRegs, _ := backend.ABIInfoFromUint64(i.u2) 180 for i := byte(0); i < retIntRealRegs; i++ { 181 *regs = append(*regs, regInfo.RealRegToVReg[intParamResultRegs[i]]) 182 } 183 for i := byte(0); i < retFloatRealRegs; i++ { 184 *regs = append(*regs, regInfo.RealRegToVReg[floatParamResultRegs[i]]) 185 } 186 default: 187 panic(fmt.Sprintf("defKind for %v not defined", i)) 188 } 189 return *regs 190 } 191 192 // AssignDef implements regalloc.Instr AssignDef. 193 func (i *instruction) AssignDef(reg regalloc.VReg) { 194 switch defKinds[i.kind] { 195 case defKindNone: 196 case defKindRD: 197 i.rd = i.rd.assignReg(reg) 198 case defKindCall: 199 panic("BUG: call instructions shouldn't be assigned") 200 default: 201 panic(fmt.Sprintf("defKind for %v not defined", i)) 202 } 203 } 204 205 type useKind byte 206 207 const ( 208 useKindNone useKind = iota + 1 209 useKindRN 210 useKindRNRM 211 useKindRNRMRA 212 useKindRNRN1RM 213 useKindCall 214 useKindCallInd 215 useKindAMode 216 useKindRNAMode 217 useKindCond 218 // useKindRDRewrite indicates an instruction where RD is used both as a source and destination. 219 // A temporary register for RD must be allocated explicitly with the source copied to this 220 // register before the instruction and the value copied from this register to the instruction 221 // return register. 222 useKindRDRewrite 223 ) 224 225 var useKinds = [numInstructionKinds]useKind{ 226 udf: useKindNone, 227 aluRRR: useKindRNRM, 228 aluRRRR: useKindRNRMRA, 229 aluRRImm12: useKindRN, 230 aluRRBitmaskImm: useKindRN, 231 aluRRRShift: useKindRNRM, 232 aluRRImmShift: useKindRN, 233 aluRRRExtend: useKindRNRM, 234 bitRR: useKindRN, 235 movZ: useKindNone, 236 movK: useKindNone, 237 movN: useKindNone, 238 mov32: useKindRN, 239 mov64: useKindRN, 240 fpuMov64: useKindRN, 241 fpuMov128: useKindRN, 242 fpuRR: useKindRN, 243 fpuRRR: useKindRNRM, 244 nop0: useKindNone, 245 call: useKindCall, 246 callInd: useKindCallInd, 247 ret: useKindNone, 248 store8: useKindRNAMode, 249 store16: useKindRNAMode, 250 store32: useKindRNAMode, 251 store64: useKindRNAMode, 252 exitSequence: useKindRN, 253 condBr: useKindCond, 254 br: useKindNone, 255 brTableSequence: useKindRN, 256 cSet: useKindNone, 257 extend: useKindRN, 258 fpuCmp: useKindRNRM, 259 uLoad8: useKindAMode, 260 uLoad16: useKindAMode, 261 uLoad32: useKindAMode, 262 sLoad8: useKindAMode, 263 sLoad16: useKindAMode, 264 sLoad32: useKindAMode, 265 uLoad64: useKindAMode, 266 fpuLoad32: useKindAMode, 267 fpuLoad64: useKindAMode, 268 fpuLoad128: useKindAMode, 269 fpuStore32: useKindRNAMode, 270 fpuStore64: useKindRNAMode, 271 fpuStore128: useKindRNAMode, 272 loadFpuConst32: useKindNone, 273 loadFpuConst64: useKindNone, 274 loadFpuConst128: useKindNone, 275 vecLoad1R: useKindRN, 276 cSel: useKindRNRM, 277 fpuCSel: useKindRNRM, 278 movToVec: useKindRN, 279 movFromVec: useKindRN, 280 movFromVecSigned: useKindRN, 281 vecDup: useKindRN, 282 vecDupElement: useKindRN, 283 vecExtract: useKindRNRM, 284 cCmpImm: useKindRN, 285 vecMisc: useKindRN, 286 vecMovElement: useKindRN, 287 vecLanes: useKindRN, 288 vecShiftImm: useKindRN, 289 vecTbl: useKindRNRM, 290 vecTbl2: useKindRNRN1RM, 291 vecRRR: useKindRNRM, 292 vecRRRRewrite: useKindRDRewrite, 293 vecPermute: useKindRNRM, 294 fpuToInt: useKindRN, 295 intToFpu: useKindRN, 296 movToFPSR: useKindRN, 297 movFromFPSR: useKindNone, 298 adr: useKindNone, 299 emitSourceOffsetInfo: useKindNone, 300 atomicRmw: useKindRNRM, 301 atomicCas: useKindRDRewrite, 302 atomicLoad: useKindRN, 303 atomicStore: useKindRNRM, 304 loadConstBlockArg: useKindNone, 305 dmb: useKindNone, 306 } 307 308 // Uses returns the list of regalloc.VReg that are used by the instruction. 309 // In order to reduce the number of allocations, the caller can pass the slice to be used. 310 func (i *instruction) Uses(regs *[]regalloc.VReg) []regalloc.VReg { 311 *regs = (*regs)[:0] 312 switch useKinds[i.kind] { 313 case useKindNone: 314 case useKindRN: 315 if rn := i.rn.reg(); rn.Valid() { 316 *regs = append(*regs, rn) 317 } 318 case useKindRNRM: 319 if rn := i.rn.reg(); rn.Valid() { 320 *regs = append(*regs, rn) 321 } 322 if rm := i.rm.reg(); rm.Valid() { 323 *regs = append(*regs, rm) 324 } 325 case useKindRNRMRA: 326 if rn := i.rn.reg(); rn.Valid() { 327 *regs = append(*regs, rn) 328 } 329 if rm := i.rm.reg(); rm.Valid() { 330 *regs = append(*regs, rm) 331 } 332 if ra := i.ra.reg(); ra.Valid() { 333 *regs = append(*regs, ra) 334 } 335 case useKindRNRN1RM: 336 if rn := i.rn.reg(); rn.Valid() && rn.IsRealReg() { 337 rn1 := regalloc.FromRealReg(rn.RealReg()+1, rn.RegType()) 338 *regs = append(*regs, rn, rn1) 339 } 340 if rm := i.rm.reg(); rm.Valid() { 341 *regs = append(*regs, rm) 342 } 343 case useKindAMode: 344 if amodeRN := i.amode.rn; amodeRN.Valid() { 345 *regs = append(*regs, amodeRN) 346 } 347 if amodeRM := i.amode.rm; amodeRM.Valid() { 348 *regs = append(*regs, amodeRM) 349 } 350 case useKindRNAMode: 351 *regs = append(*regs, i.rn.reg()) 352 if amodeRN := i.amode.rn; amodeRN.Valid() { 353 *regs = append(*regs, amodeRN) 354 } 355 if amodeRM := i.amode.rm; amodeRM.Valid() { 356 *regs = append(*regs, amodeRM) 357 } 358 case useKindCond: 359 cnd := cond(i.u1) 360 if cnd.kind() != condKindCondFlagSet { 361 *regs = append(*regs, cnd.register()) 362 } 363 case useKindCallInd: 364 *regs = append(*regs, i.rn.nr()) 365 fallthrough 366 case useKindCall: 367 argIntRealRegs, argFloatRealRegs, _, _, _ := backend.ABIInfoFromUint64(i.u2) 368 for i := byte(0); i < argIntRealRegs; i++ { 369 *regs = append(*regs, regInfo.RealRegToVReg[intParamResultRegs[i]]) 370 } 371 for i := byte(0); i < argFloatRealRegs; i++ { 372 *regs = append(*regs, regInfo.RealRegToVReg[floatParamResultRegs[i]]) 373 } 374 case useKindRDRewrite: 375 *regs = append(*regs, i.rn.reg()) 376 *regs = append(*regs, i.rm.reg()) 377 *regs = append(*regs, i.rd.reg()) 378 default: 379 panic(fmt.Sprintf("useKind for %v not defined", i)) 380 } 381 return *regs 382 } 383 384 func (i *instruction) AssignUse(index int, reg regalloc.VReg) { 385 switch useKinds[i.kind] { 386 case useKindNone: 387 case useKindRN: 388 if rn := i.rn.reg(); rn.Valid() { 389 i.rn = i.rn.assignReg(reg) 390 } 391 case useKindRNRM: 392 if index == 0 { 393 if rn := i.rn.reg(); rn.Valid() { 394 i.rn = i.rn.assignReg(reg) 395 } 396 } else { 397 if rm := i.rm.reg(); rm.Valid() { 398 i.rm = i.rm.assignReg(reg) 399 } 400 } 401 case useKindRDRewrite: 402 if index == 0 { 403 if rn := i.rn.reg(); rn.Valid() { 404 i.rn = i.rn.assignReg(reg) 405 } 406 } else if index == 1 { 407 if rm := i.rm.reg(); rm.Valid() { 408 i.rm = i.rm.assignReg(reg) 409 } 410 } else { 411 if rd := i.rd.reg(); rd.Valid() { 412 i.rd = i.rd.assignReg(reg) 413 } 414 } 415 case useKindRNRN1RM: 416 if index == 0 { 417 if rn := i.rn.reg(); rn.Valid() { 418 i.rn = i.rn.assignReg(reg) 419 } 420 if rn1 := i.rn.reg() + 1; rn1.Valid() { 421 i.rm = i.rm.assignReg(reg + 1) 422 } 423 } else { 424 if rm := i.rm.reg(); rm.Valid() { 425 i.rm = i.rm.assignReg(reg) 426 } 427 } 428 case useKindRNRMRA: 429 if index == 0 { 430 if rn := i.rn.reg(); rn.Valid() { 431 i.rn = i.rn.assignReg(reg) 432 } 433 } else if index == 1 { 434 if rm := i.rm.reg(); rm.Valid() { 435 i.rm = i.rm.assignReg(reg) 436 } 437 } else { 438 if ra := i.ra.reg(); ra.Valid() { 439 i.ra = i.ra.assignReg(reg) 440 } 441 } 442 case useKindAMode: 443 if index == 0 { 444 if amodeRN := i.amode.rn; amodeRN.Valid() { 445 i.amode.rn = reg 446 } 447 } else { 448 if amodeRM := i.amode.rm; amodeRM.Valid() { 449 i.amode.rm = reg 450 } 451 } 452 case useKindRNAMode: 453 if index == 0 { 454 i.rn = i.rn.assignReg(reg) 455 } else if index == 1 { 456 if amodeRN := i.amode.rn; amodeRN.Valid() { 457 i.amode.rn = reg 458 } else { 459 panic("BUG") 460 } 461 } else { 462 if amodeRM := i.amode.rm; amodeRM.Valid() { 463 i.amode.rm = reg 464 } else { 465 panic("BUG") 466 } 467 } 468 case useKindCond: 469 c := cond(i.u1) 470 switch c.kind() { 471 case condKindRegisterZero: 472 i.u1 = uint64(registerAsRegZeroCond(reg)) 473 case condKindRegisterNotZero: 474 i.u1 = uint64(registerAsRegNotZeroCond(reg)) 475 } 476 case useKindCall: 477 panic("BUG: call instructions shouldn't be assigned") 478 case useKindCallInd: 479 i.rn = i.rn.assignReg(reg) 480 default: 481 panic(fmt.Sprintf("useKind for %v not defined", i)) 482 } 483 } 484 485 func (i *instruction) asCall(ref ssa.FuncRef, abi *backend.FunctionABI) { 486 i.kind = call 487 i.u1 = uint64(ref) 488 if abi != nil { 489 i.u2 = abi.ABIInfoAsUint64() 490 } 491 } 492 493 func (i *instruction) asCallIndirect(ptr regalloc.VReg, abi *backend.FunctionABI) { 494 i.kind = callInd 495 i.rn = operandNR(ptr) 496 if abi != nil { 497 i.u2 = abi.ABIInfoAsUint64() 498 } 499 } 500 501 func (i *instruction) callFuncRef() ssa.FuncRef { 502 return ssa.FuncRef(i.u1) 503 } 504 505 // shift must be divided by 16 and must be in range 0-3 (if dst64bit is true) or 0-1 (if dst64bit is false) 506 func (i *instruction) asMOVZ(dst regalloc.VReg, imm uint64, shift uint64, dst64bit bool) { 507 i.kind = movZ 508 i.rd = operandNR(dst) 509 i.u1 = imm 510 i.u2 = shift 511 if dst64bit { 512 i.u3 = 1 513 } 514 } 515 516 // shift must be divided by 16 and must be in range 0-3 (if dst64bit is true) or 0-1 (if dst64bit is false) 517 func (i *instruction) asMOVK(dst regalloc.VReg, imm uint64, shift uint64, dst64bit bool) { 518 i.kind = movK 519 i.rd = operandNR(dst) 520 i.u1 = imm 521 i.u2 = shift 522 if dst64bit { 523 i.u3 = 1 524 } 525 } 526 527 // shift must be divided by 16 and must be in range 0-3 (if dst64bit is true) or 0-1 (if dst64bit is false) 528 func (i *instruction) asMOVN(dst regalloc.VReg, imm uint64, shift uint64, dst64bit bool) { 529 i.kind = movN 530 i.rd = operandNR(dst) 531 i.u1 = imm 532 i.u2 = shift 533 if dst64bit { 534 i.u3 = 1 535 } 536 } 537 538 func (i *instruction) asNop0() *instruction { 539 i.kind = nop0 540 return i 541 } 542 543 func (i *instruction) asNop0WithLabel(l label) { 544 i.kind = nop0 545 i.u1 = uint64(l) 546 } 547 548 func (i *instruction) nop0Label() label { 549 return label(i.u1) 550 } 551 552 func (i *instruction) asRet() { 553 i.kind = ret 554 } 555 556 func (i *instruction) asStorePair64(src1, src2 regalloc.VReg, amode addressMode) { 557 i.kind = storeP64 558 i.rn = operandNR(src1) 559 i.rm = operandNR(src2) 560 i.amode = amode 561 } 562 563 func (i *instruction) asLoadPair64(src1, src2 regalloc.VReg, amode addressMode) { 564 i.kind = loadP64 565 i.rn = operandNR(src1) 566 i.rm = operandNR(src2) 567 i.amode = amode 568 } 569 570 func (i *instruction) asStore(src operand, amode addressMode, sizeInBits byte) { 571 switch sizeInBits { 572 case 8: 573 i.kind = store8 574 case 16: 575 i.kind = store16 576 case 32: 577 if src.reg().RegType() == regalloc.RegTypeInt { 578 i.kind = store32 579 } else { 580 i.kind = fpuStore32 581 } 582 case 64: 583 if src.reg().RegType() == regalloc.RegTypeInt { 584 i.kind = store64 585 } else { 586 i.kind = fpuStore64 587 } 588 case 128: 589 i.kind = fpuStore128 590 } 591 i.rn = src 592 i.amode = amode 593 } 594 595 func (i *instruction) asSLoad(dst operand, amode addressMode, sizeInBits byte) { 596 switch sizeInBits { 597 case 8: 598 i.kind = sLoad8 599 case 16: 600 i.kind = sLoad16 601 case 32: 602 i.kind = sLoad32 603 default: 604 panic("BUG") 605 } 606 i.rd = dst 607 i.amode = amode 608 } 609 610 func (i *instruction) asULoad(dst operand, amode addressMode, sizeInBits byte) { 611 switch sizeInBits { 612 case 8: 613 i.kind = uLoad8 614 case 16: 615 i.kind = uLoad16 616 case 32: 617 i.kind = uLoad32 618 case 64: 619 i.kind = uLoad64 620 } 621 i.rd = dst 622 i.amode = amode 623 } 624 625 func (i *instruction) asFpuLoad(dst operand, amode addressMode, sizeInBits byte) { 626 switch sizeInBits { 627 case 32: 628 i.kind = fpuLoad32 629 case 64: 630 i.kind = fpuLoad64 631 case 128: 632 i.kind = fpuLoad128 633 } 634 i.rd = dst 635 i.amode = amode 636 } 637 638 func (i *instruction) asVecLoad1R(rd, rn operand, arr vecArrangement) { 639 // NOTE: currently only has support for no-offset loads, though it is suspicious that 640 // we would need to support offset load (that is only available for post-index). 641 i.kind = vecLoad1R 642 i.rd = rd 643 i.rn = rn 644 i.u1 = uint64(arr) 645 } 646 647 func (i *instruction) asCSet(rd regalloc.VReg, mask bool, c condFlag) { 648 i.kind = cSet 649 i.rd = operandNR(rd) 650 i.u1 = uint64(c) 651 if mask { 652 i.u2 = 1 653 } 654 } 655 656 func (i *instruction) asCSel(rd, rn, rm operand, c condFlag, _64bit bool) { 657 i.kind = cSel 658 i.rd = rd 659 i.rn = rn 660 i.rm = rm 661 i.u1 = uint64(c) 662 if _64bit { 663 i.u3 = 1 664 } 665 } 666 667 func (i *instruction) asFpuCSel(rd, rn, rm operand, c condFlag, _64bit bool) { 668 i.kind = fpuCSel 669 i.rd = rd 670 i.rn = rn 671 i.rm = rm 672 i.u1 = uint64(c) 673 if _64bit { 674 i.u3 = 1 675 } 676 } 677 678 func (i *instruction) asBr(target label) { 679 if target == labelReturn { 680 panic("BUG: call site should special case for returnLabel") 681 } 682 i.kind = br 683 i.u1 = uint64(target) 684 } 685 686 func (i *instruction) asBrTableSequence(indexReg regalloc.VReg, targetIndex, targetCounts int) { 687 i.kind = brTableSequence 688 i.rn = operandNR(indexReg) 689 i.u1 = uint64(targetIndex) 690 i.u2 = uint64(targetCounts) 691 } 692 693 func (i *instruction) brTableSequenceOffsetsResolved() { 694 i.u3 = 1 // indicate that the offsets are resolved, for debugging. 695 } 696 697 func (i *instruction) brLabel() label { 698 return label(i.u1) 699 } 700 701 // brOffsetResolved is called when the target label is resolved. 702 func (i *instruction) brOffsetResolve(offset int64) { 703 i.u2 = uint64(offset) 704 i.u3 = 1 // indicate that the offset is resolved, for debugging. 705 } 706 707 func (i *instruction) brOffset() int64 { 708 return int64(i.u2) 709 } 710 711 // asCondBr encodes a conditional branch instruction. is64bit is only needed when cond is not flag. 712 func (i *instruction) asCondBr(c cond, target label, is64bit bool) { 713 i.kind = condBr 714 i.u1 = c.asUint64() 715 i.u2 = uint64(target) 716 if is64bit { 717 i.u3 = 1 718 } 719 } 720 721 func (i *instruction) setCondBrTargets(target label) { 722 i.u2 = uint64(target) 723 } 724 725 func (i *instruction) condBrLabel() label { 726 return label(i.u2) 727 } 728 729 // condBrOffsetResolve is called when the target label is resolved. 730 func (i *instruction) condBrOffsetResolve(offset int64) { 731 i.rd.data = uint64(offset) 732 i.rd.data2 = 1 // indicate that the offset is resolved, for debugging. 733 } 734 735 // condBrOffsetResolved returns true if condBrOffsetResolve is already called. 736 func (i *instruction) condBrOffsetResolved() bool { 737 return i.rd.data2 == 1 738 } 739 740 func (i *instruction) condBrOffset() int64 { 741 return int64(i.rd.data) 742 } 743 744 func (i *instruction) condBrCond() cond { 745 return cond(i.u1) 746 } 747 748 func (i *instruction) condBr64bit() bool { 749 return i.u3 == 1 750 } 751 752 func (i *instruction) asLoadFpuConst32(rd regalloc.VReg, raw uint64) { 753 i.kind = loadFpuConst32 754 i.u1 = raw 755 i.rd = operandNR(rd) 756 } 757 758 func (i *instruction) asLoadFpuConst64(rd regalloc.VReg, raw uint64) { 759 i.kind = loadFpuConst64 760 i.u1 = raw 761 i.rd = operandNR(rd) 762 } 763 764 func (i *instruction) asLoadFpuConst128(rd regalloc.VReg, lo, hi uint64) { 765 i.kind = loadFpuConst128 766 i.u1 = lo 767 i.u2 = hi 768 i.rd = operandNR(rd) 769 } 770 771 func (i *instruction) asFpuCmp(rn, rm operand, is64bit bool) { 772 i.kind = fpuCmp 773 i.rn, i.rm = rn, rm 774 if is64bit { 775 i.u3 = 1 776 } 777 } 778 779 func (i *instruction) asCCmpImm(rn operand, imm uint64, c condFlag, flag byte, is64bit bool) { 780 i.kind = cCmpImm 781 i.rn = rn 782 i.rm.data = imm 783 i.u1 = uint64(c) 784 i.u2 = uint64(flag) 785 if is64bit { 786 i.u3 = 1 787 } 788 } 789 790 // asALU setups a basic ALU instruction. 791 func (i *instruction) asALU(aluOp aluOp, rd, rn, rm operand, dst64bit bool) { 792 switch rm.kind { 793 case operandKindNR: 794 i.kind = aluRRR 795 case operandKindSR: 796 i.kind = aluRRRShift 797 case operandKindER: 798 i.kind = aluRRRExtend 799 case operandKindImm12: 800 i.kind = aluRRImm12 801 default: 802 panic("BUG") 803 } 804 i.u1 = uint64(aluOp) 805 i.rd, i.rn, i.rm = rd, rn, rm 806 if dst64bit { 807 i.u3 = 1 808 } 809 } 810 811 // asALU setups a basic ALU instruction. 812 func (i *instruction) asALURRRR(aluOp aluOp, rd, rn, rm, ra operand, dst64bit bool) { 813 i.kind = aluRRRR 814 i.u1 = uint64(aluOp) 815 i.rd, i.rn, i.rm, i.ra = rd, rn, rm, ra 816 if dst64bit { 817 i.u3 = 1 818 } 819 } 820 821 // asALUShift setups a shift based ALU instruction. 822 func (i *instruction) asALUShift(aluOp aluOp, rd, rn, rm operand, dst64bit bool) { 823 switch rm.kind { 824 case operandKindNR: 825 i.kind = aluRRR // If the shift amount op is a register, then the instruction is encoded as a normal ALU instruction with two register operands. 826 case operandKindShiftImm: 827 i.kind = aluRRImmShift 828 default: 829 panic("BUG") 830 } 831 i.u1 = uint64(aluOp) 832 i.rd, i.rn, i.rm = rd, rn, rm 833 if dst64bit { 834 i.u3 = 1 835 } 836 } 837 838 func (i *instruction) asALUBitmaskImm(aluOp aluOp, rd, rn regalloc.VReg, imm uint64, dst64bit bool) { 839 i.kind = aluRRBitmaskImm 840 i.u1 = uint64(aluOp) 841 i.rn, i.rd = operandNR(rn), operandNR(rd) 842 i.u2 = imm 843 if dst64bit { 844 i.u3 = 1 845 } 846 } 847 848 func (i *instruction) asMovToFPSR(rn regalloc.VReg) { 849 i.kind = movToFPSR 850 i.rn = operandNR(rn) 851 } 852 853 func (i *instruction) asMovFromFPSR(rd regalloc.VReg) { 854 i.kind = movFromFPSR 855 i.rd = operandNR(rd) 856 } 857 858 func (i *instruction) asBitRR(bitOp bitOp, rd, rn regalloc.VReg, is64bit bool) { 859 i.kind = bitRR 860 i.rn, i.rd = operandNR(rn), operandNR(rd) 861 i.u1 = uint64(bitOp) 862 if is64bit { 863 i.u2 = 1 864 } 865 } 866 867 func (i *instruction) asFpuRRR(op fpuBinOp, rd, rn, rm operand, dst64bit bool) { 868 i.kind = fpuRRR 869 i.u1 = uint64(op) 870 i.rd, i.rn, i.rm = rd, rn, rm 871 if dst64bit { 872 i.u3 = 1 873 } 874 } 875 876 func (i *instruction) asFpuRR(op fpuUniOp, rd, rn operand, dst64bit bool) { 877 i.kind = fpuRR 878 i.u1 = uint64(op) 879 i.rd, i.rn = rd, rn 880 if dst64bit { 881 i.u3 = 1 882 } 883 } 884 885 func (i *instruction) asExtend(rd, rn regalloc.VReg, fromBits, toBits byte, signed bool) { 886 i.kind = extend 887 i.rn, i.rd = operandNR(rn), operandNR(rd) 888 i.u1 = uint64(fromBits) 889 i.u2 = uint64(toBits) 890 if signed { 891 i.u3 = 1 892 } 893 } 894 895 func (i *instruction) asMove32(rd, rn regalloc.VReg) { 896 i.kind = mov32 897 i.rn, i.rd = operandNR(rn), operandNR(rd) 898 } 899 900 func (i *instruction) asMove64(rd, rn regalloc.VReg) *instruction { 901 i.kind = mov64 902 i.rn, i.rd = operandNR(rn), operandNR(rd) 903 return i 904 } 905 906 func (i *instruction) asFpuMov64(rd, rn regalloc.VReg) { 907 i.kind = fpuMov64 908 i.rn, i.rd = operandNR(rn), operandNR(rd) 909 } 910 911 func (i *instruction) asFpuMov128(rd, rn regalloc.VReg) *instruction { 912 i.kind = fpuMov128 913 i.rn, i.rd = operandNR(rn), operandNR(rd) 914 return i 915 } 916 917 func (i *instruction) asMovToVec(rd, rn operand, arr vecArrangement, index vecIndex) { 918 i.kind = movToVec 919 i.rd = rd 920 i.rn = rn 921 i.u1, i.u2 = uint64(arr), uint64(index) 922 } 923 924 func (i *instruction) asMovFromVec(rd, rn operand, arr vecArrangement, index vecIndex, signed bool) { 925 if signed { 926 i.kind = movFromVecSigned 927 } else { 928 i.kind = movFromVec 929 } 930 i.rd = rd 931 i.rn = rn 932 i.u1, i.u2 = uint64(arr), uint64(index) 933 } 934 935 func (i *instruction) asVecDup(rd, rn operand, arr vecArrangement) { 936 i.kind = vecDup 937 i.u1 = uint64(arr) 938 i.rn, i.rd = rn, rd 939 } 940 941 func (i *instruction) asVecDupElement(rd, rn operand, arr vecArrangement, index vecIndex) { 942 i.kind = vecDupElement 943 i.u1 = uint64(arr) 944 i.rn, i.rd = rn, rd 945 i.u2 = uint64(index) 946 } 947 948 func (i *instruction) asVecExtract(rd, rn, rm operand, arr vecArrangement, index uint32) { 949 i.kind = vecExtract 950 i.u1 = uint64(arr) 951 i.rn, i.rm, i.rd = rn, rm, rd 952 i.u2 = uint64(index) 953 } 954 955 func (i *instruction) asVecMovElement(rd, rn operand, arr vecArrangement, rdIndex, rnIndex vecIndex) { 956 i.kind = vecMovElement 957 i.u1 = uint64(arr) 958 i.u2, i.u3 = uint64(rdIndex), uint64(rnIndex) 959 i.rn, i.rd = rn, rd 960 } 961 962 func (i *instruction) asVecMisc(op vecOp, rd, rn operand, arr vecArrangement) { 963 i.kind = vecMisc 964 i.u1 = uint64(op) 965 i.rn, i.rd = rn, rd 966 i.u2 = uint64(arr) 967 } 968 969 func (i *instruction) asVecLanes(op vecOp, rd, rn operand, arr vecArrangement) { 970 i.kind = vecLanes 971 i.u1 = uint64(op) 972 i.rn, i.rd = rn, rd 973 i.u2 = uint64(arr) 974 } 975 976 func (i *instruction) asVecShiftImm(op vecOp, rd, rn, rm operand, arr vecArrangement) *instruction { 977 i.kind = vecShiftImm 978 i.u1 = uint64(op) 979 i.rn, i.rm, i.rd = rn, rm, rd 980 i.u2 = uint64(arr) 981 return i 982 } 983 984 func (i *instruction) asVecTbl(nregs byte, rd, rn, rm operand, arr vecArrangement) { 985 switch nregs { 986 case 0, 1: 987 i.kind = vecTbl 988 case 2: 989 i.kind = vecTbl2 990 if !rn.reg().IsRealReg() { 991 panic("rn is not a RealReg") 992 } 993 if rn.realReg() == v31 { 994 panic("rn cannot be v31") 995 } 996 default: 997 panic(fmt.Sprintf("unsupported number of registers %d", nregs)) 998 } 999 i.rn, i.rm, i.rd = rn, rm, rd 1000 i.u2 = uint64(arr) 1001 } 1002 1003 func (i *instruction) asVecPermute(op vecOp, rd, rn, rm operand, arr vecArrangement) { 1004 i.kind = vecPermute 1005 i.u1 = uint64(op) 1006 i.rn, i.rm, i.rd = rn, rm, rd 1007 i.u2 = uint64(arr) 1008 } 1009 1010 func (i *instruction) asVecRRR(op vecOp, rd, rn, rm operand, arr vecArrangement) *instruction { 1011 i.kind = vecRRR 1012 i.u1 = uint64(op) 1013 i.rn, i.rd, i.rm = rn, rd, rm 1014 i.u2 = uint64(arr) 1015 return i 1016 } 1017 1018 // asVecRRRRewrite encodes a vector instruction that rewrites the destination register. 1019 // IMPORTANT: the destination register must be already defined before this instruction. 1020 func (i *instruction) asVecRRRRewrite(op vecOp, rd, rn, rm operand, arr vecArrangement) { 1021 i.kind = vecRRRRewrite 1022 i.u1 = uint64(op) 1023 i.rn, i.rd, i.rm = rn, rd, rm 1024 i.u2 = uint64(arr) 1025 } 1026 1027 func (i *instruction) IsCopy() bool { 1028 op := i.kind 1029 // We do not include mov32 as it is not a copy instruction in the sense that it does not preserve the upper 32 bits, 1030 // and it is only used in the translation of IReduce, not the actual copy indeed. 1031 return op == mov64 || op == fpuMov64 || op == fpuMov128 1032 } 1033 1034 // String implements fmt.Stringer. 1035 func (i *instruction) String() (str string) { 1036 is64SizeBitToSize := func(u3 uint64) byte { 1037 if u3 == 0 { 1038 return 32 1039 } 1040 return 64 1041 } 1042 1043 switch i.kind { 1044 case nop0: 1045 if i.u1 != 0 { 1046 l := label(i.u1) 1047 str = fmt.Sprintf("%s:", l) 1048 } else { 1049 str = "nop0" 1050 } 1051 case aluRRR: 1052 size := is64SizeBitToSize(i.u3) 1053 str = fmt.Sprintf("%s %s, %s, %s", aluOp(i.u1).String(), 1054 formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), size), 1055 i.rm.format(size)) 1056 case aluRRRR: 1057 size := is64SizeBitToSize(i.u3) 1058 str = fmt.Sprintf("%s %s, %s, %s, %s", aluOp(i.u1).String(), 1059 formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), size), formatVRegSized(i.rm.nr(), size), formatVRegSized(i.ra.nr(), size)) 1060 case aluRRImm12: 1061 size := is64SizeBitToSize(i.u3) 1062 str = fmt.Sprintf("%s %s, %s, %s", aluOp(i.u1).String(), 1063 formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), size), i.rm.format(size)) 1064 case aluRRBitmaskImm: 1065 size := is64SizeBitToSize(i.u3) 1066 rd, rn := formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), size) 1067 if size == 32 { 1068 str = fmt.Sprintf("%s %s, %s, #%#x", aluOp(i.u1).String(), rd, rn, uint32(i.u2)) 1069 } else { 1070 str = fmt.Sprintf("%s %s, %s, #%#x", aluOp(i.u1).String(), rd, rn, i.u2) 1071 } 1072 case aluRRImmShift: 1073 size := is64SizeBitToSize(i.u3) 1074 str = fmt.Sprintf("%s %s, %s, %#x", 1075 aluOp(i.u1).String(), 1076 formatVRegSized(i.rd.nr(), size), 1077 formatVRegSized(i.rn.nr(), size), 1078 i.rm.shiftImm(), 1079 ) 1080 case aluRRRShift: 1081 size := is64SizeBitToSize(i.u3) 1082 str = fmt.Sprintf("%s %s, %s, %s", 1083 aluOp(i.u1).String(), 1084 formatVRegSized(i.rd.nr(), size), 1085 formatVRegSized(i.rn.nr(), size), 1086 i.rm.format(size), 1087 ) 1088 case aluRRRExtend: 1089 size := is64SizeBitToSize(i.u3) 1090 str = fmt.Sprintf("%s %s, %s, %s", aluOp(i.u1).String(), 1091 formatVRegSized(i.rd.nr(), size), 1092 formatVRegSized(i.rn.nr(), size), 1093 // Regardless of the source size, the register is formatted in 32-bit. 1094 i.rm.format(32), 1095 ) 1096 case bitRR: 1097 size := is64SizeBitToSize(i.u2) 1098 str = fmt.Sprintf("%s %s, %s", 1099 bitOp(i.u1), 1100 formatVRegSized(i.rd.nr(), size), 1101 formatVRegSized(i.rn.nr(), size), 1102 ) 1103 case uLoad8: 1104 str = fmt.Sprintf("ldrb %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32)) 1105 case sLoad8: 1106 str = fmt.Sprintf("ldrsb %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32)) 1107 case uLoad16: 1108 str = fmt.Sprintf("ldrh %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32)) 1109 case sLoad16: 1110 str = fmt.Sprintf("ldrsh %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32)) 1111 case uLoad32: 1112 str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32)) 1113 case sLoad32: 1114 str = fmt.Sprintf("ldrs %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32)) 1115 case uLoad64: 1116 str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd.nr(), 64), i.amode.format(64)) 1117 case store8: 1118 str = fmt.Sprintf("strb %s, %s", formatVRegSized(i.rn.nr(), 32), i.amode.format(8)) 1119 case store16: 1120 str = fmt.Sprintf("strh %s, %s", formatVRegSized(i.rn.nr(), 32), i.amode.format(16)) 1121 case store32: 1122 str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 32), i.amode.format(32)) 1123 case store64: 1124 str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 64), i.amode.format(64)) 1125 case storeP64: 1126 str = fmt.Sprintf("stp %s, %s, %s", 1127 formatVRegSized(i.rn.nr(), 64), formatVRegSized(i.rm.nr(), 64), i.amode.format(64)) 1128 case loadP64: 1129 str = fmt.Sprintf("ldp %s, %s, %s", 1130 formatVRegSized(i.rn.nr(), 64), formatVRegSized(i.rm.nr(), 64), i.amode.format(64)) 1131 case mov64: 1132 str = fmt.Sprintf("mov %s, %s", 1133 formatVRegSized(i.rd.nr(), 64), 1134 formatVRegSized(i.rn.nr(), 64)) 1135 case mov32: 1136 str = fmt.Sprintf("mov %s, %s", formatVRegSized(i.rd.nr(), 32), formatVRegSized(i.rn.nr(), 32)) 1137 case movZ: 1138 size := is64SizeBitToSize(i.u3) 1139 str = fmt.Sprintf("movz %s, #%#x, lsl %d", formatVRegSized(i.rd.nr(), size), uint16(i.u1), i.u2*16) 1140 case movN: 1141 size := is64SizeBitToSize(i.u3) 1142 str = fmt.Sprintf("movn %s, #%#x, lsl %d", formatVRegSized(i.rd.nr(), size), uint16(i.u1), i.u2*16) 1143 case movK: 1144 size := is64SizeBitToSize(i.u3) 1145 str = fmt.Sprintf("movk %s, #%#x, lsl %d", formatVRegSized(i.rd.nr(), size), uint16(i.u1), i.u2*16) 1146 case extend: 1147 fromBits, toBits := byte(i.u1), byte(i.u2) 1148 1149 var signedStr string 1150 if i.u3 == 1 { 1151 signedStr = "s" 1152 } else { 1153 signedStr = "u" 1154 } 1155 var fromStr string 1156 switch fromBits { 1157 case 8: 1158 fromStr = "b" 1159 case 16: 1160 fromStr = "h" 1161 case 32: 1162 fromStr = "w" 1163 } 1164 str = fmt.Sprintf("%sxt%s %s, %s", signedStr, fromStr, formatVRegSized(i.rd.nr(), toBits), formatVRegSized(i.rn.nr(), 32)) 1165 case cSel: 1166 size := is64SizeBitToSize(i.u3) 1167 str = fmt.Sprintf("csel %s, %s, %s, %s", 1168 formatVRegSized(i.rd.nr(), size), 1169 formatVRegSized(i.rn.nr(), size), 1170 formatVRegSized(i.rm.nr(), size), 1171 condFlag(i.u1), 1172 ) 1173 case cSet: 1174 if i.u2 != 0 { 1175 str = fmt.Sprintf("csetm %s, %s", formatVRegSized(i.rd.nr(), 64), condFlag(i.u1)) 1176 } else { 1177 str = fmt.Sprintf("cset %s, %s", formatVRegSized(i.rd.nr(), 64), condFlag(i.u1)) 1178 } 1179 case cCmpImm: 1180 size := is64SizeBitToSize(i.u3) 1181 str = fmt.Sprintf("ccmp %s, #%#x, #%#x, %s", 1182 formatVRegSized(i.rn.nr(), size), i.rm.data, 1183 i.u2&0b1111, 1184 condFlag(i.u1)) 1185 case fpuMov64: 1186 str = fmt.Sprintf("mov %s, %s", 1187 formatVRegVec(i.rd.nr(), vecArrangement8B, vecIndexNone), 1188 formatVRegVec(i.rn.nr(), vecArrangement8B, vecIndexNone)) 1189 case fpuMov128: 1190 str = fmt.Sprintf("mov %s, %s", 1191 formatVRegVec(i.rd.nr(), vecArrangement16B, vecIndexNone), 1192 formatVRegVec(i.rn.nr(), vecArrangement16B, vecIndexNone)) 1193 case fpuMovFromVec: 1194 panic("TODO") 1195 case fpuRR: 1196 dstSz := is64SizeBitToSize(i.u3) 1197 srcSz := dstSz 1198 op := fpuUniOp(i.u1) 1199 switch op { 1200 case fpuUniOpCvt32To64: 1201 srcSz = 32 1202 case fpuUniOpCvt64To32: 1203 srcSz = 64 1204 } 1205 str = fmt.Sprintf("%s %s, %s", op.String(), 1206 formatVRegSized(i.rd.nr(), dstSz), formatVRegSized(i.rn.nr(), srcSz)) 1207 case fpuRRR: 1208 size := is64SizeBitToSize(i.u3) 1209 str = fmt.Sprintf("%s %s, %s, %s", fpuBinOp(i.u1).String(), 1210 formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), size), formatVRegSized(i.rm.nr(), size)) 1211 case fpuRRI: 1212 panic("TODO") 1213 case fpuRRRR: 1214 panic("TODO") 1215 case fpuCmp: 1216 size := is64SizeBitToSize(i.u3) 1217 str = fmt.Sprintf("fcmp %s, %s", 1218 formatVRegSized(i.rn.nr(), size), formatVRegSized(i.rm.nr(), size)) 1219 case fpuLoad32: 1220 str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32)) 1221 case fpuStore32: 1222 str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 32), i.amode.format(64)) 1223 case fpuLoad64: 1224 str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd.nr(), 64), i.amode.format(64)) 1225 case fpuStore64: 1226 str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 64), i.amode.format(64)) 1227 case fpuLoad128: 1228 str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd.nr(), 128), i.amode.format(64)) 1229 case fpuStore128: 1230 str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 128), i.amode.format(64)) 1231 case loadFpuConst32: 1232 str = fmt.Sprintf("ldr %s, #8; b 8; data.f32 %f", formatVRegSized(i.rd.nr(), 32), math.Float32frombits(uint32(i.u1))) 1233 case loadFpuConst64: 1234 str = fmt.Sprintf("ldr %s, #8; b 16; data.f64 %f", formatVRegSized(i.rd.nr(), 64), math.Float64frombits(i.u1)) 1235 case loadFpuConst128: 1236 str = fmt.Sprintf("ldr %s, #8; b 32; data.v128 %016x %016x", 1237 formatVRegSized(i.rd.nr(), 128), i.u1, i.u2) 1238 case fpuToInt: 1239 var op, src, dst string 1240 if signed := i.u1 == 1; signed { 1241 op = "fcvtzs" 1242 } else { 1243 op = "fcvtzu" 1244 } 1245 if src64 := i.u2 == 1; src64 { 1246 src = formatVRegWidthVec(i.rn.nr(), vecArrangementD) 1247 } else { 1248 src = formatVRegWidthVec(i.rn.nr(), vecArrangementS) 1249 } 1250 if dst64 := i.u3 == 1; dst64 { 1251 dst = formatVRegSized(i.rd.nr(), 64) 1252 } else { 1253 dst = formatVRegSized(i.rd.nr(), 32) 1254 } 1255 str = fmt.Sprintf("%s %s, %s", op, dst, src) 1256 1257 case intToFpu: 1258 var op, src, dst string 1259 if signed := i.u1 == 1; signed { 1260 op = "scvtf" 1261 } else { 1262 op = "ucvtf" 1263 } 1264 if src64 := i.u2 == 1; src64 { 1265 src = formatVRegSized(i.rn.nr(), 64) 1266 } else { 1267 src = formatVRegSized(i.rn.nr(), 32) 1268 } 1269 if dst64 := i.u3 == 1; dst64 { 1270 dst = formatVRegWidthVec(i.rd.nr(), vecArrangementD) 1271 } else { 1272 dst = formatVRegWidthVec(i.rd.nr(), vecArrangementS) 1273 } 1274 str = fmt.Sprintf("%s %s, %s", op, dst, src) 1275 case fpuCSel: 1276 size := is64SizeBitToSize(i.u3) 1277 str = fmt.Sprintf("fcsel %s, %s, %s, %s", 1278 formatVRegSized(i.rd.nr(), size), 1279 formatVRegSized(i.rn.nr(), size), 1280 formatVRegSized(i.rm.nr(), size), 1281 condFlag(i.u1), 1282 ) 1283 case movToVec: 1284 var size byte 1285 arr := vecArrangement(i.u1) 1286 switch arr { 1287 case vecArrangementB, vecArrangementH, vecArrangementS: 1288 size = 32 1289 case vecArrangementD: 1290 size = 64 1291 default: 1292 panic("unsupported arrangement " + arr.String()) 1293 } 1294 str = fmt.Sprintf("ins %s, %s", formatVRegVec(i.rd.nr(), arr, vecIndex(i.u2)), formatVRegSized(i.rn.nr(), size)) 1295 case movFromVec, movFromVecSigned: 1296 var size byte 1297 var opcode string 1298 arr := vecArrangement(i.u1) 1299 signed := i.kind == movFromVecSigned 1300 switch arr { 1301 case vecArrangementB, vecArrangementH, vecArrangementS: 1302 size = 32 1303 if signed { 1304 opcode = "smov" 1305 } else { 1306 opcode = "umov" 1307 } 1308 case vecArrangementD: 1309 size = 64 1310 if signed { 1311 opcode = "smov" 1312 } else { 1313 opcode = "mov" 1314 } 1315 default: 1316 panic("unsupported arrangement " + arr.String()) 1317 } 1318 str = fmt.Sprintf("%s %s, %s", opcode, formatVRegSized(i.rd.nr(), size), formatVRegVec(i.rn.nr(), arr, vecIndex(i.u2))) 1319 case vecDup: 1320 str = fmt.Sprintf("dup %s, %s", 1321 formatVRegVec(i.rd.nr(), vecArrangement(i.u1), vecIndexNone), 1322 formatVRegSized(i.rn.nr(), 64), 1323 ) 1324 case vecDupElement: 1325 arr := vecArrangement(i.u1) 1326 str = fmt.Sprintf("dup %s, %s", 1327 formatVRegVec(i.rd.nr(), arr, vecIndexNone), 1328 formatVRegVec(i.rn.nr(), arr, vecIndex(i.u2)), 1329 ) 1330 case vecDupFromFpu: 1331 panic("TODO") 1332 case vecExtract: 1333 str = fmt.Sprintf("ext %s, %s, %s, #%d", 1334 formatVRegVec(i.rd.nr(), vecArrangement(i.u1), vecIndexNone), 1335 formatVRegVec(i.rn.nr(), vecArrangement(i.u1), vecIndexNone), 1336 formatVRegVec(i.rm.nr(), vecArrangement(i.u1), vecIndexNone), 1337 uint32(i.u2), 1338 ) 1339 case vecExtend: 1340 panic("TODO") 1341 case vecMovElement: 1342 str = fmt.Sprintf("mov %s, %s", 1343 formatVRegVec(i.rd.nr(), vecArrangement(i.u1), vecIndex(i.u2)), 1344 formatVRegVec(i.rn.nr(), vecArrangement(i.u1), vecIndex(i.u3)), 1345 ) 1346 case vecMiscNarrow: 1347 panic("TODO") 1348 case vecRRR, vecRRRRewrite: 1349 str = fmt.Sprintf("%s %s, %s, %s", 1350 vecOp(i.u1), 1351 formatVRegVec(i.rd.nr(), vecArrangement(i.u2), vecIndexNone), 1352 formatVRegVec(i.rn.nr(), vecArrangement(i.u2), vecIndexNone), 1353 formatVRegVec(i.rm.nr(), vecArrangement(i.u2), vecIndexNone), 1354 ) 1355 case vecMisc: 1356 vop := vecOp(i.u1) 1357 if vop == vecOpCmeq0 { 1358 str = fmt.Sprintf("cmeq %s, %s, #0", 1359 formatVRegVec(i.rd.nr(), vecArrangement(i.u2), vecIndexNone), 1360 formatVRegVec(i.rn.nr(), vecArrangement(i.u2), vecIndexNone)) 1361 } else { 1362 str = fmt.Sprintf("%s %s, %s", 1363 vop, 1364 formatVRegVec(i.rd.nr(), vecArrangement(i.u2), vecIndexNone), 1365 formatVRegVec(i.rn.nr(), vecArrangement(i.u2), vecIndexNone)) 1366 } 1367 case vecLanes: 1368 arr := vecArrangement(i.u2) 1369 var destArr vecArrangement 1370 switch arr { 1371 case vecArrangement8B, vecArrangement16B: 1372 destArr = vecArrangementH 1373 case vecArrangement4H, vecArrangement8H: 1374 destArr = vecArrangementS 1375 case vecArrangement4S: 1376 destArr = vecArrangementD 1377 default: 1378 panic("invalid arrangement " + arr.String()) 1379 } 1380 str = fmt.Sprintf("%s %s, %s", 1381 vecOp(i.u1), 1382 formatVRegWidthVec(i.rd.nr(), destArr), 1383 formatVRegVec(i.rn.nr(), arr, vecIndexNone)) 1384 case vecShiftImm: 1385 arr := vecArrangement(i.u2) 1386 str = fmt.Sprintf("%s %s, %s, #%d", 1387 vecOp(i.u1), 1388 formatVRegVec(i.rd.nr(), arr, vecIndexNone), 1389 formatVRegVec(i.rn.nr(), arr, vecIndexNone), 1390 i.rm.shiftImm()) 1391 case vecTbl: 1392 arr := vecArrangement(i.u2) 1393 str = fmt.Sprintf("tbl %s, { %s }, %s", 1394 formatVRegVec(i.rd.nr(), arr, vecIndexNone), 1395 formatVRegVec(i.rn.nr(), vecArrangement16B, vecIndexNone), 1396 formatVRegVec(i.rm.nr(), arr, vecIndexNone)) 1397 case vecTbl2: 1398 arr := vecArrangement(i.u2) 1399 rd, rn, rm := i.rd.nr(), i.rn.nr(), i.rm.nr() 1400 rn1 := regalloc.FromRealReg(rn.RealReg()+1, rn.RegType()) 1401 str = fmt.Sprintf("tbl %s, { %s, %s }, %s", 1402 formatVRegVec(rd, arr, vecIndexNone), 1403 formatVRegVec(rn, vecArrangement16B, vecIndexNone), 1404 formatVRegVec(rn1, vecArrangement16B, vecIndexNone), 1405 formatVRegVec(rm, arr, vecIndexNone)) 1406 case vecPermute: 1407 arr := vecArrangement(i.u2) 1408 str = fmt.Sprintf("%s %s, %s, %s", 1409 vecOp(i.u1), 1410 formatVRegVec(i.rd.nr(), arr, vecIndexNone), 1411 formatVRegVec(i.rn.nr(), arr, vecIndexNone), 1412 formatVRegVec(i.rm.nr(), arr, vecIndexNone)) 1413 case movToFPSR: 1414 str = fmt.Sprintf("msr fpsr, %s", formatVRegSized(i.rn.nr(), 64)) 1415 case movFromFPSR: 1416 str = fmt.Sprintf("mrs %s fpsr", formatVRegSized(i.rd.nr(), 64)) 1417 case call: 1418 str = fmt.Sprintf("bl %s", ssa.FuncRef(i.u1)) 1419 case callInd: 1420 str = fmt.Sprintf("bl %s", formatVRegSized(i.rn.nr(), 64)) 1421 case ret: 1422 str = "ret" 1423 case br: 1424 target := label(i.u1) 1425 if i.u3 != 0 { 1426 str = fmt.Sprintf("b #%#x (%s)", i.brOffset(), target.String()) 1427 } else { 1428 str = fmt.Sprintf("b %s", target.String()) 1429 } 1430 case condBr: 1431 size := is64SizeBitToSize(i.u3) 1432 c := cond(i.u1) 1433 target := label(i.u2) 1434 switch c.kind() { 1435 case condKindRegisterZero: 1436 if !i.condBrOffsetResolved() { 1437 str = fmt.Sprintf("cbz %s, (%s)", formatVRegSized(c.register(), size), target.String()) 1438 } else { 1439 str = fmt.Sprintf("cbz %s, #%#x %s", formatVRegSized(c.register(), size), i.condBrOffset(), target.String()) 1440 } 1441 case condKindRegisterNotZero: 1442 if offset := i.condBrOffset(); offset != 0 { 1443 str = fmt.Sprintf("cbnz %s, #%#x (%s)", formatVRegSized(c.register(), size), offset, target.String()) 1444 } else { 1445 str = fmt.Sprintf("cbnz %s, %s", formatVRegSized(c.register(), size), target.String()) 1446 } 1447 case condKindCondFlagSet: 1448 if offset := i.condBrOffset(); offset != 0 { 1449 if target == labelInvalid { 1450 str = fmt.Sprintf("b.%s #%#x", c.flag(), offset) 1451 } else { 1452 str = fmt.Sprintf("b.%s #%#x, (%s)", c.flag(), offset, target.String()) 1453 } 1454 } else { 1455 str = fmt.Sprintf("b.%s %s", c.flag(), target.String()) 1456 } 1457 } 1458 case adr: 1459 str = fmt.Sprintf("adr %s, #%#x", formatVRegSized(i.rd.nr(), 64), int64(i.u1)) 1460 case brTableSequence: 1461 targetIndex := i.u1 1462 str = fmt.Sprintf("br_table_sequence %s, table_index=%d", formatVRegSized(i.rn.nr(), 64), targetIndex) 1463 case exitSequence: 1464 str = fmt.Sprintf("exit_sequence %s", formatVRegSized(i.rn.nr(), 64)) 1465 case atomicRmw: 1466 m := atomicRmwOp(i.u1).String() 1467 size := byte(32) 1468 switch i.u2 { 1469 case 8: 1470 size = 64 1471 case 2: 1472 m = m + "h" 1473 case 1: 1474 m = m + "b" 1475 } 1476 str = fmt.Sprintf("%s %s, %s, %s", m, formatVRegSized(i.rm.nr(), size), formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), 64)) 1477 case atomicCas: 1478 m := "casal" 1479 size := byte(32) 1480 switch i.u2 { 1481 case 8: 1482 size = 64 1483 case 2: 1484 m = m + "h" 1485 case 1: 1486 m = m + "b" 1487 } 1488 str = fmt.Sprintf("%s %s, %s, %s", m, formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rm.nr(), size), formatVRegSized(i.rn.nr(), 64)) 1489 case atomicLoad: 1490 m := "ldar" 1491 size := byte(32) 1492 switch i.u2 { 1493 case 8: 1494 size = 64 1495 case 2: 1496 m = m + "h" 1497 case 1: 1498 m = m + "b" 1499 } 1500 str = fmt.Sprintf("%s %s, %s", m, formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), 64)) 1501 case atomicStore: 1502 m := "stlr" 1503 size := byte(32) 1504 switch i.u2 { 1505 case 8: 1506 size = 64 1507 case 2: 1508 m = m + "h" 1509 case 1: 1510 m = m + "b" 1511 } 1512 str = fmt.Sprintf("%s %s, %s", m, formatVRegSized(i.rm.nr(), size), formatVRegSized(i.rn.nr(), 64)) 1513 case dmb: 1514 str = "dmb" 1515 case udf: 1516 str = "udf" 1517 case emitSourceOffsetInfo: 1518 str = fmt.Sprintf("source_offset_info %d", ssa.SourceOffset(i.u1)) 1519 case vecLoad1R: 1520 str = fmt.Sprintf("ld1r {%s}, [%s]", formatVRegVec(i.rd.nr(), vecArrangement(i.u1), vecIndexNone), formatVRegSized(i.rn.nr(), 64)) 1521 case loadConstBlockArg: 1522 str = fmt.Sprintf("load_const_block_arg %s, %#x", formatVRegSized(i.rd.nr(), 64), i.u1) 1523 default: 1524 panic(i.kind) 1525 } 1526 return 1527 } 1528 1529 func (i *instruction) asAdr(rd regalloc.VReg, offset int64) { 1530 i.kind = adr 1531 i.rd = operandNR(rd) 1532 i.u1 = uint64(offset) 1533 } 1534 1535 func (i *instruction) asAtomicRmw(op atomicRmwOp, rn, rs, rt operand, size uint64) { 1536 i.kind = atomicRmw 1537 i.rd, i.rn, i.rm = rt, rn, rs 1538 i.u1 = uint64(op) 1539 i.u2 = size 1540 } 1541 1542 func (i *instruction) asAtomicCas(rn, rs, rt operand, size uint64) { 1543 i.kind = atomicCas 1544 i.rm, i.rn, i.rd = rt, rn, rs 1545 i.u2 = size 1546 } 1547 1548 func (i *instruction) asAtomicLoad(rn, rt operand, size uint64) { 1549 i.kind = atomicLoad 1550 i.rn, i.rd = rn, rt 1551 i.u2 = size 1552 } 1553 1554 func (i *instruction) asAtomicStore(rn, rt operand, size uint64) { 1555 i.kind = atomicStore 1556 i.rn, i.rm = rn, rt 1557 i.u2 = size 1558 } 1559 1560 func (i *instruction) asDMB() { 1561 i.kind = dmb 1562 } 1563 1564 // TODO: delete unnecessary things. 1565 const ( 1566 // nop0 represents a no-op of zero size. 1567 nop0 instructionKind = iota + 1 1568 // aluRRR represents an ALU operation with two register sources and a register destination. 1569 aluRRR 1570 // aluRRRR represents an ALU operation with three register sources and a register destination. 1571 aluRRRR 1572 // aluRRImm12 represents an ALU operation with a register source and an immediate-12 source, with a register destination. 1573 aluRRImm12 1574 // aluRRBitmaskImm represents an ALU operation with a register source and a bitmask immediate, with a register destination. 1575 aluRRBitmaskImm 1576 // aluRRImmShift represents an ALU operation with a register source and an immediate-shifted source, with a register destination. 1577 aluRRImmShift 1578 // aluRRRShift represents an ALU operation with two register sources, one of which can be shifted, with a register destination. 1579 aluRRRShift 1580 // aluRRRExtend represents an ALU operation with two register sources, one of which can be extended, with a register destination. 1581 aluRRRExtend 1582 // bitRR represents a bit op instruction with a single register source. 1583 bitRR 1584 // uLoad8 represents an unsigned 8-bit load. 1585 uLoad8 1586 // sLoad8 represents a signed 8-bit load into 64-bit register. 1587 sLoad8 1588 // uLoad16 represents an unsigned 16-bit load into 64-bit register. 1589 uLoad16 1590 // sLoad16 represents a signed 16-bit load into 64-bit register. 1591 sLoad16 1592 // uLoad32 represents an unsigned 32-bit load into 64-bit register. 1593 uLoad32 1594 // sLoad32 represents a signed 32-bit load into 64-bit register. 1595 sLoad32 1596 // uLoad64 represents a 64-bit load. 1597 uLoad64 1598 // store8 represents an 8-bit store. 1599 store8 1600 // store16 represents a 16-bit store. 1601 store16 1602 // store32 represents a 32-bit store. 1603 store32 1604 // store64 represents a 64-bit store. 1605 store64 1606 // storeP64 represents a store of a pair of registers. 1607 storeP64 1608 // loadP64 represents a load of a pair of registers. 1609 loadP64 1610 // mov64 represents a MOV instruction. These are encoded as ORR's but we keep them separate for better handling. 1611 mov64 1612 // mov32 represents a 32-bit MOV. This zeroes the top 32 bits of the destination. 1613 mov32 1614 // movZ represents a MOVZ with a 16-bit immediate. 1615 movZ 1616 // movN represents a MOVN with a 16-bit immediate. 1617 movN 1618 // movK represents a MOVK with a 16-bit immediate. 1619 movK 1620 // extend represents a sign- or zero-extend operation. 1621 extend 1622 // cSel represents a conditional-select operation. 1623 cSel 1624 // cSet represents a conditional-set operation. 1625 cSet 1626 // cCmpImm represents a conditional comparison with an immediate. 1627 cCmpImm 1628 // fpuMov64 represents a FPU move. Distinct from a vector-register move; moving just 64 bits appears to be significantly faster. 1629 fpuMov64 1630 // fpuMov128 represents a vector register move. 1631 fpuMov128 1632 // fpuMovFromVec represents a move to scalar from a vector element. 1633 fpuMovFromVec 1634 // fpuRR represents a 1-op FPU instruction. 1635 fpuRR 1636 // fpuRRR represents a 2-op FPU instruction. 1637 fpuRRR 1638 // fpuRRI represents a 2-op FPU instruction with immediate value. 1639 fpuRRI 1640 // fpuRRRR represents a 3-op FPU instruction. 1641 fpuRRRR 1642 // fpuCmp represents a FPU comparison, either 32 or 64 bit. 1643 fpuCmp 1644 // fpuLoad32 represents a floating-point load, single-precision (32 bit). 1645 fpuLoad32 1646 // fpuStore32 represents a floating-point store, single-precision (32 bit). 1647 fpuStore32 1648 // fpuLoad64 represents a floating-point load, double-precision (64 bit). 1649 fpuLoad64 1650 // fpuStore64 represents a floating-point store, double-precision (64 bit). 1651 fpuStore64 1652 // fpuLoad128 represents a floating-point/vector load, 128 bit. 1653 fpuLoad128 1654 // fpuStore128 represents a floating-point/vector store, 128 bit. 1655 fpuStore128 1656 // loadFpuConst32 represents a load of a 32-bit floating-point constant. 1657 loadFpuConst32 1658 // loadFpuConst64 represents a load of a 64-bit floating-point constant. 1659 loadFpuConst64 1660 // loadFpuConst128 represents a load of a 128-bit floating-point constant. 1661 loadFpuConst128 1662 // vecLoad1R represents a load of a one single-element structure that replicates to all lanes of a vector. 1663 vecLoad1R 1664 // fpuToInt represents a conversion from FP to integer. 1665 fpuToInt 1666 // intToFpu represents a conversion from integer to FP. 1667 intToFpu 1668 // fpuCSel represents a 32/64-bit FP conditional select. 1669 fpuCSel 1670 // movToVec represents a move to a vector element from a GPR. 1671 movToVec 1672 // movFromVec represents an unsigned move from a vector element to a GPR. 1673 movFromVec 1674 // movFromVecSigned represents a signed move from a vector element to a GPR. 1675 movFromVecSigned 1676 // vecDup represents a duplication of general-purpose register to vector. 1677 vecDup 1678 // vecDupElement represents a duplication of a vector element to vector or scalar. 1679 vecDupElement 1680 // vecDupFromFpu represents a duplication of scalar to vector. 1681 vecDupFromFpu 1682 // vecExtract represents a vector extraction operation. 1683 vecExtract 1684 // vecExtend represents a vector extension operation. 1685 vecExtend 1686 // vecMovElement represents a move vector element to another vector element operation. 1687 vecMovElement 1688 // vecMiscNarrow represents a vector narrowing operation. 1689 vecMiscNarrow 1690 // vecRRR represents a vector ALU operation. 1691 vecRRR 1692 // vecRRRRewrite is exactly the same as vecRRR except that this rewrites the destination register. 1693 // For example, BSL instruction rewrites the destination register, and the existing value influences the result. 1694 // Therefore, the "destination" register in vecRRRRewrite will be treated as "use" which makes the register outlive 1695 // the instruction while this instruction doesn't have "def" in the context of register allocation. 1696 vecRRRRewrite 1697 // vecMisc represents a vector two register miscellaneous instruction. 1698 vecMisc 1699 // vecLanes represents a vector instruction across lanes. 1700 vecLanes 1701 // vecShiftImm represents a SIMD scalar shift by immediate instruction. 1702 vecShiftImm 1703 // vecTbl represents a table vector lookup - single register table. 1704 vecTbl 1705 // vecTbl2 represents a table vector lookup - two register table. 1706 vecTbl2 1707 // vecPermute represents a vector permute instruction. 1708 vecPermute 1709 // movToNZCV represents a move to the FPSR. 1710 movToFPSR 1711 // movFromNZCV represents a move from the FPSR. 1712 movFromFPSR 1713 // call represents a machine call instruction. 1714 call 1715 // callInd represents a machine indirect-call instruction. 1716 callInd 1717 // ret represents a machine return instruction. 1718 ret 1719 // br represents an unconditional branch. 1720 br 1721 // condBr represents a conditional branch. 1722 condBr 1723 // adr represents a compute the address (using a PC-relative offset) of a memory location. 1724 adr 1725 // brTableSequence represents a jump-table sequence. 1726 brTableSequence 1727 // exitSequence consists of multiple instructions, and exits the execution immediately. 1728 // See encodeExitSequence. 1729 exitSequence 1730 // atomicRmw represents an atomic read-modify-write operation with two register sources and a register destination. 1731 atomicRmw 1732 // atomicCas represents an atomic compare-and-swap operation with three register sources. The value is loaded to 1733 // the source register containing the comparison value. 1734 atomicCas 1735 // atomicLoad represents an atomic load with one source register and a register destination. 1736 atomicLoad 1737 // atomicStore represents an atomic store with two source registers and no destination. 1738 atomicStore 1739 // dmb represents the data memory barrier instruction in inner-shareable (ish) mode. 1740 dmb 1741 // UDF is the undefined instruction. For debugging only. 1742 udf 1743 // loadConstBlockArg represents a load of a constant block argument. 1744 loadConstBlockArg 1745 1746 // emitSourceOffsetInfo is a dummy instruction to emit source offset info. 1747 // The existence of this instruction does not affect the execution. 1748 emitSourceOffsetInfo 1749 1750 // ------------------- do not define below this line ------------------- 1751 numInstructionKinds 1752 ) 1753 1754 func (i *instruction) asLoadConstBlockArg(v uint64, typ ssa.Type, dst regalloc.VReg) *instruction { 1755 i.kind = loadConstBlockArg 1756 i.u1 = v 1757 i.u2 = uint64(typ) 1758 i.rd = operandNR(dst) 1759 return i 1760 } 1761 1762 func (i *instruction) loadConstBlockArgData() (v uint64, typ ssa.Type, dst regalloc.VReg) { 1763 return i.u1, ssa.Type(i.u2), i.rd.nr() 1764 } 1765 1766 func (i *instruction) asEmitSourceOffsetInfo(l ssa.SourceOffset) *instruction { 1767 i.kind = emitSourceOffsetInfo 1768 i.u1 = uint64(l) 1769 return i 1770 } 1771 1772 func (i *instruction) sourceOffsetInfo() ssa.SourceOffset { 1773 return ssa.SourceOffset(i.u1) 1774 } 1775 1776 func (i *instruction) asUDF() *instruction { 1777 i.kind = udf 1778 return i 1779 } 1780 1781 func (i *instruction) asFpuToInt(rd, rn operand, rdSigned, src64bit, dst64bit bool) { 1782 i.kind = fpuToInt 1783 i.rn = rn 1784 i.rd = rd 1785 if rdSigned { 1786 i.u1 = 1 1787 } 1788 if src64bit { 1789 i.u2 = 1 1790 } 1791 if dst64bit { 1792 i.u3 = 1 1793 } 1794 } 1795 1796 func (i *instruction) asIntToFpu(rd, rn operand, rnSigned, src64bit, dst64bit bool) { 1797 i.kind = intToFpu 1798 i.rn = rn 1799 i.rd = rd 1800 if rnSigned { 1801 i.u1 = 1 1802 } 1803 if src64bit { 1804 i.u2 = 1 1805 } 1806 if dst64bit { 1807 i.u3 = 1 1808 } 1809 } 1810 1811 func (i *instruction) asExitSequence(ctx regalloc.VReg) *instruction { 1812 i.kind = exitSequence 1813 i.rn = operandNR(ctx) 1814 return i 1815 } 1816 1817 // aluOp determines the type of ALU operation. Instructions whose kind is one of 1818 // aluRRR, aluRRRR, aluRRImm12, aluRRBitmaskImm, aluRRImmShift, aluRRRShift and aluRRRExtend 1819 // would use this type. 1820 type aluOp int 1821 1822 func (a aluOp) String() string { 1823 switch a { 1824 case aluOpAdd: 1825 return "add" 1826 case aluOpSub: 1827 return "sub" 1828 case aluOpOrr: 1829 return "orr" 1830 case aluOpOrn: 1831 return "orn" 1832 case aluOpAnd: 1833 return "and" 1834 case aluOpAnds: 1835 return "ands" 1836 case aluOpBic: 1837 return "bic" 1838 case aluOpEor: 1839 return "eor" 1840 case aluOpAddS: 1841 return "adds" 1842 case aluOpSubS: 1843 return "subs" 1844 case aluOpSMulH: 1845 return "sMulH" 1846 case aluOpUMulH: 1847 return "uMulH" 1848 case aluOpSDiv: 1849 return "sdiv" 1850 case aluOpUDiv: 1851 return "udiv" 1852 case aluOpRotR: 1853 return "ror" 1854 case aluOpLsr: 1855 return "lsr" 1856 case aluOpAsr: 1857 return "asr" 1858 case aluOpLsl: 1859 return "lsl" 1860 case aluOpMAdd: 1861 return "madd" 1862 case aluOpMSub: 1863 return "msub" 1864 } 1865 panic(int(a)) 1866 } 1867 1868 const ( 1869 // 32/64-bit Add. 1870 aluOpAdd aluOp = iota 1871 // 32/64-bit Subtract. 1872 aluOpSub 1873 // 32/64-bit Bitwise OR. 1874 aluOpOrr 1875 // 32/64-bit Bitwise OR NOT. 1876 aluOpOrn 1877 // 32/64-bit Bitwise AND. 1878 aluOpAnd 1879 // 32/64-bit Bitwise ANDS. 1880 aluOpAnds 1881 // 32/64-bit Bitwise AND NOT. 1882 aluOpBic 1883 // 32/64-bit Bitwise XOR (Exclusive OR). 1884 aluOpEor 1885 // 32/64-bit Add setting flags. 1886 aluOpAddS 1887 // 32/64-bit Subtract setting flags. 1888 aluOpSubS 1889 // Signed multiply, high-word result. 1890 aluOpSMulH 1891 // Unsigned multiply, high-word result. 1892 aluOpUMulH 1893 // 64-bit Signed divide. 1894 aluOpSDiv 1895 // 64-bit Unsigned divide. 1896 aluOpUDiv 1897 // 32/64-bit Rotate right. 1898 aluOpRotR 1899 // 32/64-bit Logical shift right. 1900 aluOpLsr 1901 // 32/64-bit Arithmetic shift right. 1902 aluOpAsr 1903 // 32/64-bit Logical shift left. 1904 aluOpLsl /// Multiply-add 1905 1906 // MAdd and MSub are only applicable for aluRRRR. 1907 aluOpMAdd 1908 aluOpMSub 1909 ) 1910 1911 // vecOp determines the type of vector operation. Instructions whose kind is one of 1912 // vecOpCnt would use this type. 1913 type vecOp int 1914 1915 // String implements fmt.Stringer. 1916 func (b vecOp) String() string { 1917 switch b { 1918 case vecOpCnt: 1919 return "cnt" 1920 case vecOpCmeq: 1921 return "cmeq" 1922 case vecOpCmgt: 1923 return "cmgt" 1924 case vecOpCmhi: 1925 return "cmhi" 1926 case vecOpCmge: 1927 return "cmge" 1928 case vecOpCmhs: 1929 return "cmhs" 1930 case vecOpFcmeq: 1931 return "fcmeq" 1932 case vecOpFcmgt: 1933 return "fcmgt" 1934 case vecOpFcmge: 1935 return "fcmge" 1936 case vecOpCmeq0: 1937 return "cmeq0" 1938 case vecOpUaddlv: 1939 return "uaddlv" 1940 case vecOpBit: 1941 return "bit" 1942 case vecOpBic: 1943 return "bic" 1944 case vecOpBsl: 1945 return "bsl" 1946 case vecOpNot: 1947 return "not" 1948 case vecOpAnd: 1949 return "and" 1950 case vecOpOrr: 1951 return "orr" 1952 case vecOpEOR: 1953 return "eor" 1954 case vecOpFadd: 1955 return "fadd" 1956 case vecOpAdd: 1957 return "add" 1958 case vecOpAddp: 1959 return "addp" 1960 case vecOpAddv: 1961 return "addv" 1962 case vecOpSub: 1963 return "sub" 1964 case vecOpFsub: 1965 return "fsub" 1966 case vecOpSmin: 1967 return "smin" 1968 case vecOpUmin: 1969 return "umin" 1970 case vecOpUminv: 1971 return "uminv" 1972 case vecOpSmax: 1973 return "smax" 1974 case vecOpUmax: 1975 return "umax" 1976 case vecOpUmaxp: 1977 return "umaxp" 1978 case vecOpUrhadd: 1979 return "urhadd" 1980 case vecOpFmul: 1981 return "fmul" 1982 case vecOpSqrdmulh: 1983 return "sqrdmulh" 1984 case vecOpMul: 1985 return "mul" 1986 case vecOpUmlal: 1987 return "umlal" 1988 case vecOpFdiv: 1989 return "fdiv" 1990 case vecOpFsqrt: 1991 return "fsqrt" 1992 case vecOpAbs: 1993 return "abs" 1994 case vecOpFabs: 1995 return "fabs" 1996 case vecOpNeg: 1997 return "neg" 1998 case vecOpFneg: 1999 return "fneg" 2000 case vecOpFrintp: 2001 return "frintp" 2002 case vecOpFrintm: 2003 return "frintm" 2004 case vecOpFrintn: 2005 return "frintn" 2006 case vecOpFrintz: 2007 return "frintz" 2008 case vecOpFcvtl: 2009 return "fcvtl" 2010 case vecOpFcvtn: 2011 return "fcvtn" 2012 case vecOpFcvtzu: 2013 return "fcvtzu" 2014 case vecOpFcvtzs: 2015 return "fcvtzs" 2016 case vecOpScvtf: 2017 return "scvtf" 2018 case vecOpUcvtf: 2019 return "ucvtf" 2020 case vecOpSqxtn: 2021 return "sqxtn" 2022 case vecOpUqxtn: 2023 return "uqxtn" 2024 case vecOpSqxtun: 2025 return "sqxtun" 2026 case vecOpRev64: 2027 return "rev64" 2028 case vecOpXtn: 2029 return "xtn" 2030 case vecOpShll: 2031 return "shll" 2032 case vecOpSshl: 2033 return "sshl" 2034 case vecOpSshll: 2035 return "sshll" 2036 case vecOpUshl: 2037 return "ushl" 2038 case vecOpUshll: 2039 return "ushll" 2040 case vecOpSshr: 2041 return "sshr" 2042 case vecOpZip1: 2043 return "zip1" 2044 case vecOpFmin: 2045 return "fmin" 2046 case vecOpFmax: 2047 return "fmax" 2048 case vecOpSmull: 2049 return "smull" 2050 case vecOpSmull2: 2051 return "smull2" 2052 } 2053 panic(int(b)) 2054 } 2055 2056 const ( 2057 vecOpCnt vecOp = iota 2058 vecOpCmeq0 2059 vecOpCmeq 2060 vecOpCmgt 2061 vecOpCmhi 2062 vecOpCmge 2063 vecOpCmhs 2064 vecOpFcmeq 2065 vecOpFcmgt 2066 vecOpFcmge 2067 vecOpUaddlv 2068 vecOpBit 2069 vecOpBic 2070 vecOpBsl 2071 vecOpNot 2072 vecOpAnd 2073 vecOpOrr 2074 vecOpEOR 2075 vecOpAdd 2076 vecOpFadd 2077 vecOpAddv 2078 vecOpSqadd 2079 vecOpUqadd 2080 vecOpAddp 2081 vecOpSub 2082 vecOpFsub 2083 vecOpSqsub 2084 vecOpUqsub 2085 vecOpSmin 2086 vecOpUmin 2087 vecOpUminv 2088 vecOpFmin 2089 vecOpSmax 2090 vecOpUmax 2091 vecOpUmaxp 2092 vecOpFmax 2093 vecOpUrhadd 2094 vecOpMul 2095 vecOpFmul 2096 vecOpSqrdmulh 2097 vecOpUmlal 2098 vecOpFdiv 2099 vecOpFsqrt 2100 vecOpAbs 2101 vecOpFabs 2102 vecOpNeg 2103 vecOpFneg 2104 vecOpFrintm 2105 vecOpFrintn 2106 vecOpFrintp 2107 vecOpFrintz 2108 vecOpFcvtl 2109 vecOpFcvtn 2110 vecOpFcvtzs 2111 vecOpFcvtzu 2112 vecOpScvtf 2113 vecOpUcvtf 2114 vecOpSqxtn 2115 vecOpSqxtun 2116 vecOpUqxtn 2117 vecOpRev64 2118 vecOpXtn 2119 vecOpShll 2120 vecOpSshl 2121 vecOpSshll 2122 vecOpUshl 2123 vecOpUshll 2124 vecOpSshr 2125 vecOpZip1 2126 vecOpSmull 2127 vecOpSmull2 2128 ) 2129 2130 // bitOp determines the type of bitwise operation. Instructions whose kind is one of 2131 // bitOpRbit and bitOpClz would use this type. 2132 type bitOp int 2133 2134 // String implements fmt.Stringer. 2135 func (b bitOp) String() string { 2136 switch b { 2137 case bitOpRbit: 2138 return "rbit" 2139 case bitOpClz: 2140 return "clz" 2141 } 2142 panic(int(b)) 2143 } 2144 2145 const ( 2146 // 32/64-bit Rbit. 2147 bitOpRbit bitOp = iota 2148 // 32/64-bit Clz. 2149 bitOpClz 2150 ) 2151 2152 // fpuUniOp represents a unary floating-point unit (FPU) operation. 2153 type fpuUniOp byte 2154 2155 const ( 2156 fpuUniOpNeg fpuUniOp = iota 2157 fpuUniOpCvt32To64 2158 fpuUniOpCvt64To32 2159 fpuUniOpSqrt 2160 fpuUniOpRoundPlus 2161 fpuUniOpRoundMinus 2162 fpuUniOpRoundZero 2163 fpuUniOpRoundNearest 2164 fpuUniOpAbs 2165 ) 2166 2167 // String implements the fmt.Stringer. 2168 func (f fpuUniOp) String() string { 2169 switch f { 2170 case fpuUniOpNeg: 2171 return "fneg" 2172 case fpuUniOpCvt32To64: 2173 return "fcvt" 2174 case fpuUniOpCvt64To32: 2175 return "fcvt" 2176 case fpuUniOpSqrt: 2177 return "fsqrt" 2178 case fpuUniOpRoundPlus: 2179 return "frintp" 2180 case fpuUniOpRoundMinus: 2181 return "frintm" 2182 case fpuUniOpRoundZero: 2183 return "frintz" 2184 case fpuUniOpRoundNearest: 2185 return "frintn" 2186 case fpuUniOpAbs: 2187 return "fabs" 2188 } 2189 panic(int(f)) 2190 } 2191 2192 // fpuBinOp represents a binary floating-point unit (FPU) operation. 2193 type fpuBinOp byte 2194 2195 const ( 2196 fpuBinOpAdd = iota 2197 fpuBinOpSub 2198 fpuBinOpMul 2199 fpuBinOpDiv 2200 fpuBinOpMax 2201 fpuBinOpMin 2202 ) 2203 2204 // String implements the fmt.Stringer. 2205 func (f fpuBinOp) String() string { 2206 switch f { 2207 case fpuBinOpAdd: 2208 return "fadd" 2209 case fpuBinOpSub: 2210 return "fsub" 2211 case fpuBinOpMul: 2212 return "fmul" 2213 case fpuBinOpDiv: 2214 return "fdiv" 2215 case fpuBinOpMax: 2216 return "fmax" 2217 case fpuBinOpMin: 2218 return "fmin" 2219 } 2220 panic(int(f)) 2221 } 2222 2223 // extMode represents the mode of a register operand extension. 2224 // For example, aluRRRExtend instructions need this info to determine the extensions. 2225 type extMode byte 2226 2227 const ( 2228 extModeNone extMode = iota 2229 // extModeZeroExtend64 suggests a zero-extension to 32 bits if the original bit size is less than 32. 2230 extModeZeroExtend32 2231 // extModeSignExtend64 stands for a sign-extension to 32 bits if the original bit size is less than 32. 2232 extModeSignExtend32 2233 // extModeZeroExtend64 suggests a zero-extension to 64 bits if the original bit size is less than 64. 2234 extModeZeroExtend64 2235 // extModeSignExtend64 stands for a sign-extension to 64 bits if the original bit size is less than 64. 2236 extModeSignExtend64 2237 ) 2238 2239 func (e extMode) bits() byte { 2240 switch e { 2241 case extModeZeroExtend32, extModeSignExtend32: 2242 return 32 2243 case extModeZeroExtend64, extModeSignExtend64: 2244 return 64 2245 default: 2246 return 0 2247 } 2248 } 2249 2250 func (e extMode) signed() bool { 2251 switch e { 2252 case extModeSignExtend32, extModeSignExtend64: 2253 return true 2254 default: 2255 return false 2256 } 2257 } 2258 2259 func extModeOf(t ssa.Type, signed bool) extMode { 2260 switch t.Bits() { 2261 case 32: 2262 if signed { 2263 return extModeSignExtend32 2264 } 2265 return extModeZeroExtend32 2266 case 64: 2267 if signed { 2268 return extModeSignExtend64 2269 } 2270 return extModeZeroExtend64 2271 default: 2272 panic("TODO? do we need narrower than 32 bits?") 2273 } 2274 } 2275 2276 type extendOp byte 2277 2278 const ( 2279 extendOpUXTB extendOp = 0b000 2280 extendOpUXTH extendOp = 0b001 2281 extendOpUXTW extendOp = 0b010 2282 // extendOpUXTX does nothing, but convenient symbol that officially exists. See: 2283 // https://stackoverflow.com/questions/72041372/what-do-the-uxtx-and-sxtx-extensions-mean-for-32-bit-aarch64-adds-instruct 2284 extendOpUXTX extendOp = 0b011 2285 extendOpSXTB extendOp = 0b100 2286 extendOpSXTH extendOp = 0b101 2287 extendOpSXTW extendOp = 0b110 2288 // extendOpSXTX does nothing, but convenient symbol that officially exists. See: 2289 // https://stackoverflow.com/questions/72041372/what-do-the-uxtx-and-sxtx-extensions-mean-for-32-bit-aarch64-adds-instruct 2290 extendOpSXTX extendOp = 0b111 2291 extendOpNone extendOp = 0xff 2292 ) 2293 2294 func (e extendOp) srcBits() byte { 2295 switch e { 2296 case extendOpUXTB, extendOpSXTB: 2297 return 8 2298 case extendOpUXTH, extendOpSXTH: 2299 return 16 2300 case extendOpUXTW, extendOpSXTW: 2301 return 32 2302 case extendOpUXTX, extendOpSXTX: 2303 return 64 2304 } 2305 panic(int(e)) 2306 } 2307 2308 func (e extendOp) String() string { 2309 switch e { 2310 case extendOpUXTB: 2311 return "UXTB" 2312 case extendOpUXTH: 2313 return "UXTH" 2314 case extendOpUXTW: 2315 return "UXTW" 2316 case extendOpUXTX: 2317 return "UXTX" 2318 case extendOpSXTB: 2319 return "SXTB" 2320 case extendOpSXTH: 2321 return "SXTH" 2322 case extendOpSXTW: 2323 return "SXTW" 2324 case extendOpSXTX: 2325 return "SXTX" 2326 } 2327 panic(int(e)) 2328 } 2329 2330 func extendOpFrom(signed bool, from byte) extendOp { 2331 switch from { 2332 case 8: 2333 if signed { 2334 return extendOpSXTB 2335 } 2336 return extendOpUXTB 2337 case 16: 2338 if signed { 2339 return extendOpSXTH 2340 } 2341 return extendOpUXTH 2342 case 32: 2343 if signed { 2344 return extendOpSXTW 2345 } 2346 return extendOpUXTW 2347 case 64: 2348 if signed { 2349 return extendOpSXTX 2350 } 2351 return extendOpUXTX 2352 } 2353 panic("invalid extendOpFrom") 2354 } 2355 2356 type shiftOp byte 2357 2358 const ( 2359 shiftOpLSL shiftOp = 0b00 2360 shiftOpLSR shiftOp = 0b01 2361 shiftOpASR shiftOp = 0b10 2362 shiftOpROR shiftOp = 0b11 2363 ) 2364 2365 func (s shiftOp) String() string { 2366 switch s { 2367 case shiftOpLSL: 2368 return "lsl" 2369 case shiftOpLSR: 2370 return "lsr" 2371 case shiftOpASR: 2372 return "asr" 2373 case shiftOpROR: 2374 return "ror" 2375 } 2376 panic(int(s)) 2377 } 2378 2379 const exitSequenceSize = 6 * 4 // 6 instructions as in encodeExitSequence. 2380 2381 // size returns the size of the instruction in encoded bytes. 2382 func (i *instruction) size() int64 { 2383 switch i.kind { 2384 case exitSequence: 2385 return exitSequenceSize // 5 instructions as in encodeExitSequence. 2386 case nop0, loadConstBlockArg: 2387 return 0 2388 case emitSourceOffsetInfo: 2389 return 0 2390 case loadFpuConst32: 2391 if i.u1 == 0 { 2392 return 4 // zero loading can be encoded as a single instruction. 2393 } 2394 return 4 + 4 + 4 2395 case loadFpuConst64: 2396 if i.u1 == 0 { 2397 return 4 // zero loading can be encoded as a single instruction. 2398 } 2399 return 4 + 4 + 8 2400 case loadFpuConst128: 2401 if i.u1 == 0 && i.u2 == 0 { 2402 return 4 // zero loading can be encoded as a single instruction. 2403 } 2404 return 4 + 4 + 16 2405 case brTableSequence: 2406 return 4*4 + int64(i.u2)*4 2407 default: 2408 return 4 2409 } 2410 } 2411 2412 // vecArrangement is the arrangement of data within a vector register. 2413 type vecArrangement byte 2414 2415 const ( 2416 // vecArrangementNone is an arrangement indicating no data is stored. 2417 vecArrangementNone vecArrangement = iota 2418 // vecArrangement8B is an arrangement of 8 bytes (64-bit vector) 2419 vecArrangement8B 2420 // vecArrangement16B is an arrangement of 16 bytes (128-bit vector) 2421 vecArrangement16B 2422 // vecArrangement4H is an arrangement of 4 half precisions (64-bit vector) 2423 vecArrangement4H 2424 // vecArrangement8H is an arrangement of 8 half precisions (128-bit vector) 2425 vecArrangement8H 2426 // vecArrangement2S is an arrangement of 2 single precisions (64-bit vector) 2427 vecArrangement2S 2428 // vecArrangement4S is an arrangement of 4 single precisions (128-bit vector) 2429 vecArrangement4S 2430 // vecArrangement1D is an arrangement of 1 double precision (64-bit vector) 2431 vecArrangement1D 2432 // vecArrangement2D is an arrangement of 2 double precisions (128-bit vector) 2433 vecArrangement2D 2434 2435 // Assign each vector size specifier to a vector arrangement ID. 2436 // Instructions can only have an arrangement or a size specifier, but not both, so it 2437 // simplifies the internal representation of vector instructions by being able to 2438 // store either into the same field. 2439 2440 // vecArrangementB is a size specifier of byte 2441 vecArrangementB 2442 // vecArrangementH is a size specifier of word (16-bit) 2443 vecArrangementH 2444 // vecArrangementS is a size specifier of double word (32-bit) 2445 vecArrangementS 2446 // vecArrangementD is a size specifier of quad word (64-bit) 2447 vecArrangementD 2448 // vecArrangementQ is a size specifier of the entire vector (128-bit) 2449 vecArrangementQ 2450 ) 2451 2452 // String implements fmt.Stringer 2453 func (v vecArrangement) String() (ret string) { 2454 switch v { 2455 case vecArrangement8B: 2456 ret = "8B" 2457 case vecArrangement16B: 2458 ret = "16B" 2459 case vecArrangement4H: 2460 ret = "4H" 2461 case vecArrangement8H: 2462 ret = "8H" 2463 case vecArrangement2S: 2464 ret = "2S" 2465 case vecArrangement4S: 2466 ret = "4S" 2467 case vecArrangement1D: 2468 ret = "1D" 2469 case vecArrangement2D: 2470 ret = "2D" 2471 case vecArrangementB: 2472 ret = "B" 2473 case vecArrangementH: 2474 ret = "H" 2475 case vecArrangementS: 2476 ret = "S" 2477 case vecArrangementD: 2478 ret = "D" 2479 case vecArrangementQ: 2480 ret = "Q" 2481 case vecArrangementNone: 2482 ret = "none" 2483 default: 2484 panic(v) 2485 } 2486 return 2487 } 2488 2489 // vecIndex is the index of an element of a vector register 2490 type vecIndex byte 2491 2492 // vecIndexNone indicates no vector index specified. 2493 const vecIndexNone = ^vecIndex(0) 2494 2495 func ssaLaneToArrangement(lane ssa.VecLane) vecArrangement { 2496 switch lane { 2497 case ssa.VecLaneI8x16: 2498 return vecArrangement16B 2499 case ssa.VecLaneI16x8: 2500 return vecArrangement8H 2501 case ssa.VecLaneI32x4: 2502 return vecArrangement4S 2503 case ssa.VecLaneI64x2: 2504 return vecArrangement2D 2505 case ssa.VecLaneF32x4: 2506 return vecArrangement4S 2507 case ssa.VecLaneF64x2: 2508 return vecArrangement2D 2509 default: 2510 panic(lane) 2511 } 2512 } 2513 2514 // atomicRmwOp is the type of atomic read-modify-write operation. 2515 type atomicRmwOp byte 2516 2517 const ( 2518 // atomicRmwOpAdd is an atomic add operation. 2519 atomicRmwOpAdd atomicRmwOp = iota 2520 // atomicRmwOpClr is an atomic clear operation, i.e. AND NOT. 2521 atomicRmwOpClr 2522 // atomicRmwOpSet is an atomic set operation, i.e. OR. 2523 atomicRmwOpSet 2524 // atomicRmwOpEor is an atomic exclusive OR operation. 2525 atomicRmwOpEor 2526 // atomicRmwOpSwp is an atomic swap operation. 2527 atomicRmwOpSwp 2528 ) 2529 2530 // String implements fmt.Stringer 2531 func (a atomicRmwOp) String() string { 2532 switch a { 2533 case atomicRmwOpAdd: 2534 return "ldaddal" 2535 case atomicRmwOpClr: 2536 return "ldclral" 2537 case atomicRmwOpSet: 2538 return "ldsetal" 2539 case atomicRmwOpEor: 2540 return "ldeoral" 2541 case atomicRmwOpSwp: 2542 return "swpal" 2543 } 2544 panic(fmt.Sprintf("unknown atomicRmwOp: %d", a)) 2545 }