github.com/wasilibs/wazerox@v0.0.0-20240124024944-4923be63ab5f/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go (about) 1 package arm64 2 3 import ( 4 "fmt" 5 6 "github.com/wasilibs/wazerox/internal/engine/wazevo/backend" 7 "github.com/wasilibs/wazerox/internal/engine/wazevo/backend/regalloc" 8 "github.com/wasilibs/wazerox/internal/engine/wazevo/wazevoapi" 9 ) 10 11 // Encode implements backend.Machine Encode. 12 func (m *machine) Encode() { 13 m.encode(m.rootInstr) 14 } 15 16 func (m *machine) encode(root *instruction) { 17 for cur := root; cur != nil; cur = cur.next { 18 cur.encode(m.compiler) 19 } 20 } 21 22 func (i *instruction) encode(c backend.Compiler) { 23 switch kind := i.kind; kind { 24 case nop0, emitSourceOffsetInfo: 25 case exitSequence: 26 encodeExitSequence(c, i.rn.reg()) 27 case ret: 28 // https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/RET--Return-from-subroutine-?lang=en 29 c.Emit4Bytes(encodeRet()) 30 case br: 31 imm := i.brOffset() 32 c.Emit4Bytes(encodeUnconditionalBranch(false, imm)) 33 case call: 34 if i.u2 > 0 { 35 // This is a special case for EmitGoEntryPreamble which doesn't need reloc info, 36 // but instead the imm is already resolved. 37 c.Emit4Bytes(encodeUnconditionalBranch(true, int64(i.u2))) 38 } else { 39 // We still don't know the exact address of the function to call, so we emit a placeholder. 40 c.AddRelocationInfo(i.callFuncRef()) 41 c.Emit4Bytes(encodeUnconditionalBranch(true, 0)) // 0 = placeholder 42 } 43 case callInd: 44 c.Emit4Bytes(encodeUnconditionalBranchReg(regNumberInEncoding[i.rn.realReg()], true)) 45 case store8, store16, store32, store64, fpuStore32, fpuStore64, fpuStore128: 46 c.Emit4Bytes(encodeLoadOrStore(i.kind, regNumberInEncoding[i.rn.realReg()], i.amode)) 47 case uLoad8, uLoad16, uLoad32, uLoad64, sLoad8, sLoad16, sLoad32, fpuLoad32, fpuLoad64, fpuLoad128: 48 c.Emit4Bytes(encodeLoadOrStore(i.kind, regNumberInEncoding[i.rd.realReg()], i.amode)) 49 case vecLoad1R: 50 c.Emit4Bytes(encodeVecLoad1R( 51 regNumberInEncoding[i.rd.realReg()], 52 regNumberInEncoding[i.rn.realReg()], 53 vecArrangement(i.u1))) 54 case condBr: 55 imm19 := i.condBrOffset() 56 if imm19%4 != 0 { 57 panic("imm26 for branch must be a multiple of 4") 58 } 59 60 imm19U32 := uint32(imm19/4) & 0b111_11111111_11111111 61 brCond := i.condBrCond() 62 switch brCond.kind() { 63 case condKindRegisterZero: 64 rt := regNumberInEncoding[brCond.register().RealReg()] 65 c.Emit4Bytes(encodeCBZCBNZ(rt, false, imm19U32, i.condBr64bit())) 66 case condKindRegisterNotZero: 67 rt := regNumberInEncoding[brCond.register().RealReg()] 68 c.Emit4Bytes(encodeCBZCBNZ(rt, true, imm19U32, i.condBr64bit())) 69 case condKindCondFlagSet: 70 // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/B-cond--Branch-conditionally- 71 fl := brCond.flag() 72 c.Emit4Bytes(0b01010100<<24 | (imm19U32 << 5) | uint32(fl)) 73 default: 74 panic("BUG") 75 } 76 case movN: 77 c.Emit4Bytes(encodeMoveWideImmediate(0b00, regNumberInEncoding[i.rd.realReg()], i.u1, i.u2, i.u3)) 78 case movZ: 79 c.Emit4Bytes(encodeMoveWideImmediate(0b10, regNumberInEncoding[i.rd.realReg()], i.u1, i.u2, i.u3)) 80 case movK: 81 c.Emit4Bytes(encodeMoveWideImmediate(0b11, regNumberInEncoding[i.rd.realReg()], i.u1, i.u2, i.u3)) 82 case mov32: 83 to, from := i.rd.realReg(), i.rn.realReg() 84 c.Emit4Bytes(encodeAsMov32(regNumberInEncoding[from], regNumberInEncoding[to])) 85 case mov64: 86 to, from := i.rd.realReg(), i.rn.realReg() 87 toIsSp := to == sp 88 fromIsSp := from == sp 89 c.Emit4Bytes(encodeMov64(regNumberInEncoding[to], regNumberInEncoding[from], toIsSp, fromIsSp)) 90 case loadP64, storeP64: 91 rt, rt2 := regNumberInEncoding[i.rn.realReg()], regNumberInEncoding[i.rm.realReg()] 92 amode := i.amode 93 rn := regNumberInEncoding[amode.rn.RealReg()] 94 var pre bool 95 switch amode.kind { 96 case addressModeKindPostIndex: 97 case addressModeKindPreIndex: 98 pre = true 99 default: 100 panic("BUG") 101 } 102 c.Emit4Bytes(encodePreOrPostIndexLoadStorePair64(pre, kind == loadP64, rn, rt, rt2, amode.imm)) 103 case loadFpuConst32: 104 rd := regNumberInEncoding[i.rd.realReg()] 105 if i.u1 == 0 { 106 c.Emit4Bytes(encodeVecRRR(vecOpEOR, rd, rd, rd, vecArrangement8B)) 107 } else { 108 encodeLoadFpuConst32(c, rd, i.u1) 109 } 110 case loadFpuConst64: 111 rd := regNumberInEncoding[i.rd.realReg()] 112 if i.u1 == 0 { 113 c.Emit4Bytes(encodeVecRRR(vecOpEOR, rd, rd, rd, vecArrangement8B)) 114 } else { 115 encodeLoadFpuConst64(c, regNumberInEncoding[i.rd.realReg()], i.u1) 116 } 117 case loadFpuConst128: 118 rd := regNumberInEncoding[i.rd.realReg()] 119 lo, hi := i.u1, i.u2 120 if lo == 0 && hi == 0 { 121 c.Emit4Bytes(encodeVecRRR(vecOpEOR, rd, rd, rd, vecArrangement16B)) 122 } else { 123 encodeLoadFpuConst128(c, rd, lo, hi) 124 } 125 case aluRRRR: 126 c.Emit4Bytes(encodeAluRRRR( 127 aluOp(i.u1), 128 regNumberInEncoding[i.rd.realReg()], 129 regNumberInEncoding[i.rn.realReg()], 130 regNumberInEncoding[i.rm.realReg()], 131 regNumberInEncoding[i.ra.realReg()], 132 uint32(i.u3), 133 )) 134 case aluRRImmShift: 135 c.Emit4Bytes(encodeAluRRImm( 136 aluOp(i.u1), 137 regNumberInEncoding[i.rd.realReg()], 138 regNumberInEncoding[i.rn.realReg()], 139 uint32(i.rm.shiftImm()), 140 uint32(i.u3), 141 )) 142 case aluRRR: 143 rn := i.rn.realReg() 144 c.Emit4Bytes(encodeAluRRR( 145 aluOp(i.u1), 146 regNumberInEncoding[i.rd.realReg()], 147 regNumberInEncoding[rn], 148 regNumberInEncoding[i.rm.realReg()], 149 i.u3 == 1, 150 rn == sp, 151 )) 152 case aluRRRExtend: 153 rm, exo, to := i.rm.er() 154 c.Emit4Bytes(encodeAluRRRExtend( 155 aluOp(i.u1), 156 regNumberInEncoding[i.rd.realReg()], 157 regNumberInEncoding[i.rn.realReg()], 158 regNumberInEncoding[rm.RealReg()], 159 exo, 160 to, 161 )) 162 case aluRRRShift: 163 r, amt, sop := i.rm.sr() 164 c.Emit4Bytes(encodeAluRRRShift( 165 aluOp(i.u1), 166 regNumberInEncoding[i.rd.realReg()], 167 regNumberInEncoding[i.rn.realReg()], 168 regNumberInEncoding[r.RealReg()], 169 uint32(amt), 170 sop, 171 i.u3 == 1, 172 )) 173 case aluRRBitmaskImm: 174 c.Emit4Bytes(encodeAluBitmaskImmediate( 175 aluOp(i.u1), 176 regNumberInEncoding[i.rd.realReg()], 177 regNumberInEncoding[i.rn.realReg()], 178 i.u2, 179 i.u3 == 1, 180 )) 181 case bitRR: 182 c.Emit4Bytes(encodeBitRR( 183 bitOp(i.u1), 184 regNumberInEncoding[i.rd.realReg()], 185 regNumberInEncoding[i.rn.realReg()], 186 uint32(i.u2)), 187 ) 188 case aluRRImm12: 189 imm12, shift := i.rm.imm12() 190 c.Emit4Bytes(encodeAluRRImm12( 191 aluOp(i.u1), 192 regNumberInEncoding[i.rd.realReg()], 193 regNumberInEncoding[i.rn.realReg()], 194 imm12, shift, 195 i.u3 == 1, 196 )) 197 case fpuRRR: 198 c.Emit4Bytes(encodeFpuRRR( 199 fpuBinOp(i.u1), 200 regNumberInEncoding[i.rd.realReg()], 201 regNumberInEncoding[i.rn.realReg()], 202 regNumberInEncoding[i.rm.realReg()], 203 i.u3 == 1, 204 )) 205 case fpuMov64, fpuMov128: 206 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/MOV--vector---Move-vector--an-alias-of-ORR--vector--register-- 207 rd := regNumberInEncoding[i.rd.realReg()] 208 rn := regNumberInEncoding[i.rn.realReg()] 209 var q uint32 210 if kind == fpuMov128 { 211 q = 0b1 212 } 213 c.Emit4Bytes(q<<30 | 0b1110101<<21 | rn<<16 | 0b000111<<10 | rn<<5 | rd) 214 case cSet: 215 rd := regNumberInEncoding[i.rd.realReg()] 216 cf := condFlag(i.u1) 217 if i.u2 == 1 { 218 // https://developer.arm.com/documentation/ddi0602/2022-03/Base-Instructions/CSETM--Conditional-Set-Mask--an-alias-of-CSINV- 219 // Note that we set 64bit version here. 220 c.Emit4Bytes(0b1101101010011111<<16 | uint32(cf.invert())<<12 | 0b011111<<5 | rd) 221 } else { 222 // https://developer.arm.com/documentation/ddi0602/2022-06/Base-Instructions/CSET--Conditional-Set--an-alias-of-CSINC- 223 // Note that we set 64bit version here. 224 c.Emit4Bytes(0b1001101010011111<<16 | uint32(cf.invert())<<12 | 0b111111<<5 | rd) 225 } 226 case extend: 227 c.Emit4Bytes(encodeExtend(i.u3 == 1, byte(i.u1), byte(i.u2), regNumberInEncoding[i.rd.realReg()], regNumberInEncoding[i.rn.realReg()])) 228 case fpuCmp: 229 // https://developer.arm.com/documentation/ddi0596/2020-12/SIMD-FP-Instructions/FCMP--Floating-point-quiet-Compare--scalar--?lang=en 230 rn, rm := regNumberInEncoding[i.rn.realReg()], regNumberInEncoding[i.rm.realReg()] 231 var ftype uint32 232 if i.u3 == 1 { 233 ftype = 0b01 // double precision. 234 } 235 c.Emit4Bytes(0b1111<<25 | ftype<<22 | 1<<21 | rm<<16 | 0b1<<13 | rn<<5) 236 case udf: 237 // https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/UDF--Permanently-Undefined-?lang=en 238 if wazevoapi.PrintMachineCodeHexPerFunctionDisassemblable { 239 c.Emit4Bytes(dummyInstruction) 240 } else { 241 c.Emit4Bytes(0) 242 } 243 case adr: 244 c.Emit4Bytes(encodeAdr(regNumberInEncoding[i.rd.realReg()], uint32(i.u1))) 245 case cSel: 246 c.Emit4Bytes(encodeConditionalSelect( 247 kind, 248 regNumberInEncoding[i.rd.realReg()], 249 regNumberInEncoding[i.rn.realReg()], 250 regNumberInEncoding[i.rm.realReg()], 251 condFlag(i.u1), 252 i.u3 == 1, 253 )) 254 case fpuCSel: 255 c.Emit4Bytes(encodeFpuCSel( 256 regNumberInEncoding[i.rd.realReg()], 257 regNumberInEncoding[i.rn.realReg()], 258 regNumberInEncoding[i.rm.realReg()], 259 condFlag(i.u1), 260 i.u3 == 1, 261 )) 262 case movToVec: 263 c.Emit4Bytes(encodeMoveToVec( 264 regNumberInEncoding[i.rd.realReg()], 265 regNumberInEncoding[i.rn.realReg()], 266 vecArrangement(byte(i.u1)), 267 vecIndex(i.u2), 268 )) 269 case movFromVec, movFromVecSigned: 270 c.Emit4Bytes(encodeMoveFromVec( 271 regNumberInEncoding[i.rd.realReg()], 272 regNumberInEncoding[i.rn.realReg()], 273 vecArrangement(byte(i.u1)), 274 vecIndex(i.u2), 275 i.kind == movFromVecSigned, 276 )) 277 case vecDup: 278 c.Emit4Bytes(encodeVecDup( 279 regNumberInEncoding[i.rd.realReg()], 280 regNumberInEncoding[i.rn.realReg()], 281 vecArrangement(byte(i.u1)))) 282 case vecDupElement: 283 c.Emit4Bytes(encodeVecDupElement( 284 regNumberInEncoding[i.rd.realReg()], 285 regNumberInEncoding[i.rn.realReg()], 286 vecArrangement(byte(i.u1)), 287 vecIndex(i.u2))) 288 case vecExtract: 289 c.Emit4Bytes(encodeVecExtract( 290 regNumberInEncoding[i.rd.realReg()], 291 regNumberInEncoding[i.rn.realReg()], 292 regNumberInEncoding[i.rm.realReg()], 293 vecArrangement(byte(i.u1)), 294 uint32(i.u2))) 295 case vecPermute: 296 c.Emit4Bytes(encodeVecPermute( 297 vecOp(i.u1), 298 regNumberInEncoding[i.rd.realReg()], 299 regNumberInEncoding[i.rn.realReg()], 300 regNumberInEncoding[i.rm.realReg()], 301 vecArrangement(byte(i.u2)))) 302 case vecMovElement: 303 c.Emit4Bytes(encodeVecMovElement( 304 regNumberInEncoding[i.rd.realReg()], 305 regNumberInEncoding[i.rn.realReg()], 306 vecArrangement(i.u1), 307 uint32(i.u2), uint32(i.u3), 308 )) 309 case vecMisc: 310 c.Emit4Bytes(encodeAdvancedSIMDTwoMisc( 311 vecOp(i.u1), 312 regNumberInEncoding[i.rd.realReg()], 313 regNumberInEncoding[i.rn.realReg()], 314 vecArrangement(i.u2), 315 )) 316 case vecLanes: 317 c.Emit4Bytes(encodeVecLanes( 318 vecOp(i.u1), 319 regNumberInEncoding[i.rd.realReg()], 320 regNumberInEncoding[i.rn.realReg()], 321 vecArrangement(i.u2), 322 )) 323 case vecShiftImm: 324 c.Emit4Bytes(encodeVecShiftImm( 325 vecOp(i.u1), 326 regNumberInEncoding[i.rd.realReg()], 327 regNumberInEncoding[i.rn.realReg()], 328 uint32(i.rm.shiftImm()), 329 vecArrangement(i.u2), 330 )) 331 case vecTbl: 332 c.Emit4Bytes(encodeVecTbl( 333 1, 334 regNumberInEncoding[i.rd.realReg()], 335 regNumberInEncoding[i.rn.realReg()], 336 regNumberInEncoding[i.rm.realReg()], 337 vecArrangement(i.u2)), 338 ) 339 case vecTbl2: 340 c.Emit4Bytes(encodeVecTbl( 341 2, 342 regNumberInEncoding[i.rd.realReg()], 343 regNumberInEncoding[i.rn.realReg()], 344 regNumberInEncoding[i.rm.realReg()], 345 vecArrangement(i.u2)), 346 ) 347 case brTableSequence: 348 encodeBrTableSequence(c, i.rn.reg(), i.targets) 349 case fpuToInt, intToFpu: 350 c.Emit4Bytes(encodeCnvBetweenFloatInt(i)) 351 case fpuRR: 352 c.Emit4Bytes(encodeFloatDataOneSource( 353 fpuUniOp(i.u1), 354 regNumberInEncoding[i.rd.realReg()], 355 regNumberInEncoding[i.rn.realReg()], 356 i.u3 == 1, 357 )) 358 case vecRRR: 359 if op := vecOp(i.u1); op == vecOpBsl || op == vecOpBit || op == vecOpUmlal { 360 panic(fmt.Sprintf("vecOp %s must use vecRRRRewrite instead of vecRRR", op.String())) 361 } 362 fallthrough 363 case vecRRRRewrite: 364 c.Emit4Bytes(encodeVecRRR( 365 vecOp(i.u1), 366 regNumberInEncoding[i.rd.realReg()], 367 regNumberInEncoding[i.rn.realReg()], 368 regNumberInEncoding[i.rm.realReg()], 369 vecArrangement(i.u2), 370 )) 371 case cCmpImm: 372 // Conditional compare (immediate) in https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Register?lang=en 373 sf := uint32(i.u3 & 0b1) 374 nzcv := uint32(i.u2 & 0b1111) 375 cond := uint32(condFlag(i.u1)) 376 imm := uint32(i.rm.data & 0b11111) 377 rn := regNumberInEncoding[i.rn.realReg()] 378 c.Emit4Bytes( 379 sf<<31 | 0b111101001<<22 | imm<<16 | cond<<12 | 0b1<<11 | rn<<5 | nzcv, 380 ) 381 case movFromFPSR: 382 rt := regNumberInEncoding[i.rd.realReg()] 383 c.Emit4Bytes(encodeSystemRegisterMove(rt, true)) 384 case movToFPSR: 385 rt := regNumberInEncoding[i.rn.realReg()] 386 c.Emit4Bytes(encodeSystemRegisterMove(rt, false)) 387 default: 388 panic(i.String()) 389 } 390 } 391 392 func encodeMov64(rd, rn uint32, toIsSp, fromIsSp bool) uint32 { 393 if toIsSp || fromIsSp { 394 // This is an alias of ADD (immediate): 395 // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/MOV--to-from-SP---Move-between-register-and-stack-pointer--an-alias-of-ADD--immediate-- 396 return encodeAddSubtractImmediate(0b100, 0, 0, rn, rd) 397 } else { 398 // This is an alias of ORR (shifted register): 399 // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/MOV--register---Move--register---an-alias-of-ORR--shifted-register-- 400 return encodeLogicalShiftedRegister(0b101, 0, rn, 0, regNumberInEncoding[xzr], rd) 401 } 402 } 403 404 // encodeSystemRegisterMove encodes as "System register move" in 405 // https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Branches--Exception-Generating-and-System-instructions?lang=en 406 // 407 // Note that currently we only supports read/write of FPSR. 408 func encodeSystemRegisterMove(rt uint32, fromSystem bool) uint32 { 409 ret := 0b11010101<<24 | 0b11011<<16 | 0b01000100<<8 | 0b001<<5 | rt 410 if fromSystem { 411 ret |= 0b1 << 21 412 } 413 return ret 414 } 415 416 // encodeVecRRR encodes as either "Advanced SIMD three *" in 417 // https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en 418 func encodeVecRRR(op vecOp, rd, rn, rm uint32, arr vecArrangement) uint32 { 419 switch op { 420 case vecOpBit: 421 _, q := arrToSizeQEncoded(arr) 422 return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b00011, 0b10 /* always has size 0b10 */, 0b1, q) 423 case vecOpBic: 424 if arr > vecArrangement16B { 425 panic("unsupported arrangement: " + arr.String()) 426 } 427 _, q := arrToSizeQEncoded(arr) 428 return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b00011, 0b01 /* always has size 0b01 */, 0b0, q) 429 case vecOpBsl: 430 if arr > vecArrangement16B { 431 panic("unsupported arrangement: " + arr.String()) 432 } 433 _, q := arrToSizeQEncoded(arr) 434 return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b00011, 0b01 /* always has size 0b01 */, 0b1, q) 435 case vecOpAnd: 436 if arr > vecArrangement16B { 437 panic("unsupported arrangement: " + arr.String()) 438 } 439 _, q := arrToSizeQEncoded(arr) 440 return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b00011, 0b00 /* always has size 0b00 */, 0b0, q) 441 case vecOpOrr: 442 _, q := arrToSizeQEncoded(arr) 443 return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b00011, 0b10 /* always has size 0b10 */, 0b0, q) 444 case vecOpEOR: 445 size, q := arrToSizeQEncoded(arr) 446 return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b00011, size, 0b1, q) 447 case vecOpCmeq: 448 size, q := arrToSizeQEncoded(arr) 449 return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b10001, size, 0b1, q) 450 case vecOpCmgt: 451 size, q := arrToSizeQEncoded(arr) 452 return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b00110, size, 0b0, q) 453 case vecOpCmhi: 454 size, q := arrToSizeQEncoded(arr) 455 return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b00110, size, 0b1, q) 456 case vecOpCmge: 457 size, q := arrToSizeQEncoded(arr) 458 return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b00111, size, 0b0, q) 459 case vecOpCmhs: 460 size, q := arrToSizeQEncoded(arr) 461 return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b00111, size, 0b1, q) 462 case vecOpFcmeq: 463 var size, q uint32 464 switch arr { 465 case vecArrangement4S: 466 size, q = 0b00, 0b1 467 case vecArrangement2S: 468 size, q = 0b00, 0b0 469 case vecArrangement2D: 470 size, q = 0b01, 0b1 471 default: 472 panic("unsupported arrangement: " + arr.String()) 473 } 474 return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b11100, size, 0b0, q) 475 case vecOpFcmgt: 476 if arr < vecArrangement2S || arr == vecArrangement1D { 477 panic("unsupported arrangement: " + arr.String()) 478 } 479 size, q := arrToSizeQEncoded(arr) 480 return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b11100, size, 0b1, q) 481 case vecOpFcmge: 482 var size, q uint32 483 switch arr { 484 case vecArrangement4S: 485 size, q = 0b00, 0b1 486 case vecArrangement2S: 487 size, q = 0b00, 0b0 488 case vecArrangement2D: 489 size, q = 0b01, 0b1 490 default: 491 panic("unsupported arrangement: " + arr.String()) 492 } 493 return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b11100, size, 0b1, q) 494 case vecOpAdd: 495 if arr == vecArrangement1D { 496 panic("unsupported arrangement: " + arr.String()) 497 } 498 size, q := arrToSizeQEncoded(arr) 499 return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b10000, size, 0b0, q) 500 case vecOpSqadd: 501 if arr == vecArrangement1D { 502 panic("unsupported arrangement: " + arr.String()) 503 } 504 size, q := arrToSizeQEncoded(arr) 505 return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b00001, size, 0b0, q) 506 case vecOpUqadd: 507 if arr == vecArrangement1D { 508 panic("unsupported arrangement: " + arr.String()) 509 } 510 size, q := arrToSizeQEncoded(arr) 511 return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b00001, size, 0b1, q) 512 case vecOpAddp: 513 if arr == vecArrangement1D { 514 panic("unsupported arrangement: " + arr.String()) 515 } 516 size, q := arrToSizeQEncoded(arr) 517 return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b10111, size, 0b0, q) 518 case vecOpSqsub: 519 if arr == vecArrangement1D { 520 panic("unsupported arrangement: " + arr.String()) 521 } 522 size, q := arrToSizeQEncoded(arr) 523 return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b00101, size, 0b0, q) 524 case vecOpUqsub: 525 if arr == vecArrangement1D { 526 panic("unsupported arrangement: " + arr.String()) 527 } 528 size, q := arrToSizeQEncoded(arr) 529 return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b00101, size, 0b1, q) 530 case vecOpSub: 531 if arr == vecArrangement1D { 532 panic("unsupported arrangement: " + arr.String()) 533 } 534 size, q := arrToSizeQEncoded(arr) 535 return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b10000, size, 0b1, q) 536 case vecOpFmin: 537 if arr < vecArrangement2S || arr == vecArrangement1D { 538 panic("unsupported arrangement: " + arr.String()) 539 } 540 size, q := arrToSizeQEncoded(arr) 541 return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b11110, size, 0b0, q) 542 case vecOpSmin: 543 if arr > vecArrangement4S { 544 panic("unsupported arrangement: " + arr.String()) 545 } 546 size, q := arrToSizeQEncoded(arr) 547 return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b01101, size, 0b0, q) 548 case vecOpUmin: 549 if arr > vecArrangement4S { 550 panic("unsupported arrangement: " + arr.String()) 551 } 552 size, q := arrToSizeQEncoded(arr) 553 return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b01101, size, 0b1, q) 554 case vecOpFmax: 555 var size, q uint32 556 switch arr { 557 case vecArrangement4S: 558 size, q = 0b00, 0b1 559 case vecArrangement2S: 560 size, q = 0b00, 0b0 561 case vecArrangement2D: 562 size, q = 0b01, 0b1 563 default: 564 panic("unsupported arrangement: " + arr.String()) 565 } 566 return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b11110, size, 0b0, q) 567 case vecOpFadd: 568 var size, q uint32 569 switch arr { 570 case vecArrangement4S: 571 size, q = 0b00, 0b1 572 case vecArrangement2S: 573 size, q = 0b00, 0b0 574 case vecArrangement2D: 575 size, q = 0b01, 0b1 576 default: 577 panic("unsupported arrangement: " + arr.String()) 578 } 579 return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b11010, size, 0b0, q) 580 case vecOpFsub: 581 if arr < vecArrangement2S || arr == vecArrangement1D { 582 panic("unsupported arrangement: " + arr.String()) 583 } 584 size, q := arrToSizeQEncoded(arr) 585 return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b11010, size, 0b0, q) 586 case vecOpFmul: 587 var size, q uint32 588 switch arr { 589 case vecArrangement4S: 590 size, q = 0b00, 0b1 591 case vecArrangement2S: 592 size, q = 0b00, 0b0 593 case vecArrangement2D: 594 size, q = 0b01, 0b1 595 default: 596 panic("unsupported arrangement: " + arr.String()) 597 } 598 return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b11011, size, 0b1, q) 599 case vecOpSqrdmulh: 600 if arr < vecArrangement4H || arr > vecArrangement4S { 601 panic("unsupported arrangement: " + arr.String()) 602 } 603 size, q := arrToSizeQEncoded(arr) 604 return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b10110, size, 0b1, q) 605 case vecOpFdiv: 606 var size, q uint32 607 switch arr { 608 case vecArrangement4S: 609 size, q = 0b00, 0b1 610 case vecArrangement2S: 611 size, q = 0b00, 0b0 612 case vecArrangement2D: 613 size, q = 0b01, 0b1 614 default: 615 panic("unsupported arrangement: " + arr.String()) 616 } 617 return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b11111, size, 0b1, q) 618 case vecOpSmax: 619 if arr > vecArrangement4S { 620 panic("unsupported arrangement: " + arr.String()) 621 } 622 size, q := arrToSizeQEncoded(arr) 623 return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b01100, size, 0b0, q) 624 case vecOpUmax: 625 if arr > vecArrangement4S { 626 panic("unsupported arrangement: " + arr.String()) 627 } 628 size, q := arrToSizeQEncoded(arr) 629 return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b01100, size, 0b1, q) 630 case vecOpUmaxp: 631 if arr > vecArrangement4S { 632 panic("unsupported arrangement: " + arr.String()) 633 } 634 size, q := arrToSizeQEncoded(arr) 635 return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b10100, size, 0b1, q) 636 case vecOpUrhadd: 637 if arr > vecArrangement4S { 638 panic("unsupported arrangement: " + arr.String()) 639 } 640 size, q := arrToSizeQEncoded(arr) 641 return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b00010, size, 0b1, q) 642 case vecOpMul: 643 if arr > vecArrangement4S { 644 panic("unsupported arrangement: " + arr.String()) 645 } 646 size, q := arrToSizeQEncoded(arr) 647 return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b10011, size, 0b0, q) 648 case vecOpUmlal: 649 if arr > vecArrangement4S { 650 panic("unsupported arrangement: " + arr.String()) 651 } 652 size, q := arrToSizeQEncoded(arr) 653 return encodeAdvancedSIMDThreeDifferent(rd, rn, rm, 0b1000, size, 0b1, q) 654 case vecOpSshl: 655 if arr == vecArrangement1D { 656 panic("unsupported arrangement: " + arr.String()) 657 } 658 size, q := arrToSizeQEncoded(arr) 659 return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b01000, size, 0b0, q) 660 case vecOpUshl: 661 if arr == vecArrangement1D { 662 panic("unsupported arrangement: " + arr.String()) 663 } 664 size, q := arrToSizeQEncoded(arr) 665 return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b01000, size, 0b1, q) 666 667 default: 668 panic("TODO: " + op.String()) 669 } 670 } 671 672 func arrToSizeQEncoded(arr vecArrangement) (size, q uint32) { 673 switch arr { 674 case vecArrangement16B: 675 q = 0b1 676 fallthrough 677 case vecArrangement8B: 678 size = 0b00 679 case vecArrangement8H: 680 q = 0b1 681 fallthrough 682 case vecArrangement4H: 683 size = 0b01 684 case vecArrangement4S: 685 q = 0b1 686 fallthrough 687 case vecArrangement2S: 688 size = 0b10 689 case vecArrangement2D: 690 q = 0b1 691 fallthrough 692 case vecArrangement1D: 693 size = 0b11 694 default: 695 panic("BUG") 696 } 697 return 698 } 699 700 // encodeAdvancedSIMDThreeSame encodes as "Advanced SIMD three same" in 701 // https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en 702 func encodeAdvancedSIMDThreeSame(rd, rn, rm, opcode, size, U, Q uint32) uint32 { 703 return Q<<30 | U<<29 | 0b111<<25 | size<<22 | 0b1<<21 | rm<<16 | opcode<<11 | 0b1<<10 | rn<<5 | rd 704 } 705 706 // encodeAdvancedSIMDThreeDifferent encodes as "Advanced SIMD three different" in 707 // https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en 708 func encodeAdvancedSIMDThreeDifferent(rd, rn, rm, opcode, size, U, Q uint32) uint32 { 709 return Q<<30 | U<<29 | 0b111<<25 | size<<22 | 0b1<<21 | rm<<16 | opcode<<12 | rn<<5 | rd 710 } 711 712 // encodeFloatDataOneSource encodes as "Floating-point data-processing (1 source)" in 713 // https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en#simd-dp 714 func encodeFloatDataOneSource(op fpuUniOp, rd, rn uint32, dst64bit bool) uint32 { 715 var opcode, ptype uint32 716 switch op { 717 case fpuUniOpCvt32To64: 718 opcode = 0b000101 719 case fpuUniOpCvt64To32: 720 opcode = 0b000100 721 ptype = 0b01 722 case fpuUniOpNeg: 723 opcode = 0b000010 724 if dst64bit { 725 ptype = 0b01 726 } 727 case fpuUniOpSqrt: 728 opcode = 0b000011 729 if dst64bit { 730 ptype = 0b01 731 } 732 case fpuUniOpRoundPlus: 733 opcode = 0b001001 734 if dst64bit { 735 ptype = 0b01 736 } 737 case fpuUniOpRoundMinus: 738 opcode = 0b001010 739 if dst64bit { 740 ptype = 0b01 741 } 742 case fpuUniOpRoundZero: 743 opcode = 0b001011 744 if dst64bit { 745 ptype = 0b01 746 } 747 case fpuUniOpRoundNearest: 748 opcode = 0b001000 749 if dst64bit { 750 ptype = 0b01 751 } 752 case fpuUniOpAbs: 753 opcode = 0b000001 754 if dst64bit { 755 ptype = 0b01 756 } 757 default: 758 panic("BUG") 759 } 760 return 0b1111<<25 | ptype<<22 | 0b1<<21 | opcode<<15 | 0b1<<14 | rn<<5 | rd 761 } 762 763 // encodeCnvBetweenFloatInt encodes as "Conversion between floating-point and integer" in 764 // https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en 765 func encodeCnvBetweenFloatInt(i *instruction) uint32 { 766 rd := regNumberInEncoding[i.rd.realReg()] 767 rn := regNumberInEncoding[i.rn.realReg()] 768 769 var opcode uint32 770 var rmode uint32 771 var ptype uint32 772 var sf uint32 773 switch i.kind { 774 case intToFpu: // Either UCVTF or SCVTF. 775 rmode = 0b00 776 777 signed := i.u1 == 1 778 src64bit := i.u2 == 1 779 dst64bit := i.u3 == 1 780 if signed { 781 opcode = 0b010 782 } else { 783 opcode = 0b011 784 } 785 if src64bit { 786 sf = 0b1 787 } 788 if dst64bit { 789 ptype = 0b01 790 } else { 791 ptype = 0b00 792 } 793 case fpuToInt: // Either FCVTZU or FCVTZS. 794 rmode = 0b11 795 796 signed := i.u1 == 1 797 src64bit := i.u2 == 1 798 dst64bit := i.u3 == 1 799 800 if signed { 801 opcode = 0b000 802 } else { 803 opcode = 0b001 804 } 805 if dst64bit { 806 sf = 0b1 807 } 808 if src64bit { 809 ptype = 0b01 810 } else { 811 ptype = 0b00 812 } 813 } 814 return sf<<31 | 0b1111<<25 | ptype<<22 | 0b1<<21 | rmode<<19 | opcode<<16 | rn<<5 | rd 815 } 816 817 // encodeAdr encodes a PC-relative ADR instruction. 818 // https://developer.arm.com/documentation/ddi0602/2022-06/Base-Instructions/ADR--Form-PC-relative-address- 819 func encodeAdr(rd uint32, offset uint32) uint32 { 820 if offset >= 1<<20 { 821 panic("BUG: too large adr instruction") 822 } 823 return offset&0b11<<29 | 0b1<<28 | offset&0b1111111111_1111111100<<3 | rd 824 } 825 826 // encodeFpuCSel encodes as "Floating-point conditional select" in 827 // https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en 828 func encodeFpuCSel(rd, rn, rm uint32, c condFlag, _64bit bool) uint32 { 829 var ftype uint32 830 if _64bit { 831 ftype = 0b01 // double precision. 832 } 833 return 0b1111<<25 | ftype<<22 | 0b1<<21 | rm<<16 | uint32(c)<<12 | 0b11<<10 | rn<<5 | rd 834 } 835 836 // encodeMoveToVec encodes as "Move general-purpose register to a vector element" (represented as `ins`) in 837 // https://developer.arm.com/documentation/dui0801/g/A64-SIMD-Vector-Instructions/MOV--vector--from-general- 838 // https://developer.arm.com/documentation/ddi0596/2020-12/SIMD-FP-Instructions/MOV--from-general---Move-general-purpose-register-to-a-vector-element--an-alias-of-INS--general--?lang=en 839 func encodeMoveToVec(rd, rn uint32, arr vecArrangement, index vecIndex) uint32 { 840 var imm5 uint32 841 switch arr { 842 case vecArrangementB: 843 imm5 |= 0b1 844 imm5 |= uint32(index) << 1 845 if index > 0b1111 { 846 panic(fmt.Sprintf("vector index is larger than the allowed bound: %d > 15", index)) 847 } 848 case vecArrangementH: 849 imm5 |= 0b10 850 imm5 |= uint32(index) << 2 851 if index > 0b111 { 852 panic(fmt.Sprintf("vector index is larger than the allowed bound: %d > 7", index)) 853 } 854 case vecArrangementS: 855 imm5 |= 0b100 856 imm5 |= uint32(index) << 3 857 if index > 0b11 { 858 panic(fmt.Sprintf("vector index is larger than the allowed bound: %d > 3", index)) 859 } 860 case vecArrangementD: 861 imm5 |= 0b1000 862 imm5 |= uint32(index) << 4 863 if index > 0b1 { 864 panic(fmt.Sprintf("vector index is larger than the allowed bound: %d > 1", index)) 865 } 866 default: 867 panic("Unsupported arrangement " + arr.String()) 868 } 869 870 return 0b01001110000<<21 | imm5<<16 | 0b000111<<10 | rn<<5 | rd 871 } 872 873 // encodeMoveToVec encodes as "Move vector element to another vector element, mov (element)" (represented as `ins`) in 874 // https://developer.arm.com/documentation/ddi0596/2020-12/SIMD-FP-Instructions/MOV--element---Move-vector-element-to-another-vector-element--an-alias-of-INS--element--?lang=en 875 // https://developer.arm.com/documentation/ddi0596/2020-12/SIMD-FP-Instructions/INS--element---Insert-vector-element-from-another-vector-element-?lang=en 876 func encodeVecMovElement(rd, rn uint32, arr vecArrangement, srcIndex, dstIndex uint32) uint32 { 877 var imm4, imm5 uint32 878 switch arr { 879 case vecArrangementB: 880 imm5 |= 0b1 881 imm5 |= srcIndex << 1 882 imm4 = dstIndex 883 if srcIndex > 0b1111 { 884 panic(fmt.Sprintf("vector index is larger than the allowed bound: %d > 15", srcIndex)) 885 } 886 case vecArrangementH: 887 imm5 |= 0b10 888 imm5 |= srcIndex << 2 889 imm4 = dstIndex << 1 890 if srcIndex > 0b111 { 891 panic(fmt.Sprintf("vector index is larger than the allowed bound: %d > 7", srcIndex)) 892 } 893 case vecArrangementS: 894 imm5 |= 0b100 895 imm5 |= srcIndex << 3 896 imm4 = dstIndex << 2 897 if srcIndex > 0b11 { 898 panic(fmt.Sprintf("vector index is larger than the allowed bound: %d > 3", srcIndex)) 899 } 900 case vecArrangementD: 901 imm5 |= 0b1000 902 imm5 |= srcIndex << 4 903 imm4 = dstIndex << 3 904 if srcIndex > 0b1 { 905 panic(fmt.Sprintf("vector index is larger than the allowed bound: %d > 1", srcIndex)) 906 } 907 default: 908 panic("Unsupported arrangement " + arr.String()) 909 } 910 911 return 0b01101110000<<21 | imm5<<16 | imm4<<11 | 0b1<<10 | rn<<5 | rd 912 } 913 914 // encodeUnconditionalBranchReg encodes as "Unconditional branch (register)" in: 915 // https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Branches--Exception-Generating-and-System-instructions?lang=en 916 func encodeUnconditionalBranchReg(rn uint32, link bool) uint32 { 917 var opc uint32 918 if link { 919 opc = 0b0001 920 } 921 return 0b1101011<<25 | opc<<21 | 0b11111<<16 | rn<<5 922 } 923 924 // encodeMoveFromVec encodes as "Move vector element to a general-purpose register" 925 // (represented as `umov` when dest is 32-bit, `umov` otherwise) in 926 // https://developer.arm.com/documentation/ddi0596/2020-12/SIMD-FP-Instructions/UMOV--Unsigned-Move-vector-element-to-general-purpose-register-?lang=en 927 // https://developer.arm.com/documentation/ddi0596/2020-12/SIMD-FP-Instructions/MOV--to-general---Move-vector-element-to-general-purpose-register--an-alias-of-UMOV-?lang=en 928 func encodeMoveFromVec(rd, rn uint32, arr vecArrangement, index vecIndex, signed bool) uint32 { 929 var op, imm4, q, imm5 uint32 930 switch { 931 case arr == vecArrangementB: 932 imm5 |= 0b1 933 imm5 |= uint32(index) << 1 934 if index > 0b1111 { 935 panic(fmt.Sprintf("vector index is larger than the allowed bound: %d > 15", index)) 936 } 937 case arr == vecArrangementH: 938 imm5 |= 0b10 939 imm5 |= uint32(index) << 2 940 if index > 0b111 { 941 panic(fmt.Sprintf("vector index is larger than the allowed bound: %d > 7", index)) 942 } 943 case arr == vecArrangementS && signed: 944 q = 0b1 945 fallthrough 946 case arr == vecArrangementS: 947 imm5 |= 0b100 948 imm5 |= uint32(index) << 3 949 if index > 0b11 { 950 panic(fmt.Sprintf("vector index is larger than the allowed bound: %d > 3", index)) 951 } 952 case arr == vecArrangementD && !signed: 953 imm5 |= 0b1000 954 imm5 |= uint32(index) << 4 955 q = 0b1 956 if index > 0b1 { 957 panic(fmt.Sprintf("vector index is larger than the allowed bound: %d > 1", index)) 958 } 959 default: 960 panic("Unsupported arrangement " + arr.String()) 961 } 962 if signed { 963 op, imm4 = 0, 0b0101 964 } else { 965 op, imm4 = 0, 0b0111 966 } 967 return op<<29 | 0b01110000<<21 | q<<30 | imm5<<16 | imm4<<11 | 1<<10 | rn<<5 | rd 968 } 969 970 // encodeVecDup encodes as "Duplicate general-purpose register to vector" DUP (general) 971 // (represented as `dup`) 972 // https://developer.arm.com/documentation/ddi0596/2020-12/SIMD-FP-Instructions/DUP--general---Duplicate-general-purpose-register-to-vector-?lang=en 973 func encodeVecDup(rd, rn uint32, arr vecArrangement) uint32 { 974 var q, imm5 uint32 975 switch arr { 976 case vecArrangement8B: 977 q, imm5 = 0b0, 0b1 978 case vecArrangement16B: 979 q, imm5 = 0b1, 0b1 980 case vecArrangement4H: 981 q, imm5 = 0b0, 0b10 982 case vecArrangement8H: 983 q, imm5 = 0b1, 0b10 984 case vecArrangement2S: 985 q, imm5 = 0b0, 0b100 986 case vecArrangement4S: 987 q, imm5 = 0b1, 0b100 988 case vecArrangement2D: 989 q, imm5 = 0b1, 0b1000 990 default: 991 panic("Unsupported arrangement " + arr.String()) 992 } 993 return q<<30 | 0b001110000<<21 | imm5<<16 | 0b000011<<10 | rn<<5 | rd 994 } 995 996 // encodeVecDup encodes as "Duplicate vector element to vector or scalar" DUP (element). 997 // (represented as `dup`) 998 // https://developer.arm.com/documentation/ddi0596/2020-12/SIMD-FP-Instructions/DUP--element---Duplicate-vector-element-to-vector-or-scalar- 999 func encodeVecDupElement(rd, rn uint32, arr vecArrangement, srcIndex vecIndex) uint32 { 1000 var q, imm5 uint32 1001 q = 0b1 1002 switch arr { 1003 case vecArrangementB: 1004 imm5 |= 0b1 1005 imm5 |= uint32(srcIndex) << 1 1006 case vecArrangementH: 1007 imm5 |= 0b10 1008 imm5 |= uint32(srcIndex) << 2 1009 case vecArrangementS: 1010 imm5 |= 0b100 1011 imm5 |= uint32(srcIndex) << 3 1012 case vecArrangementD: 1013 imm5 |= 0b1000 1014 imm5 |= uint32(srcIndex) << 4 1015 default: 1016 panic("unsupported arrangement" + arr.String()) 1017 } 1018 1019 return q<<30 | 0b001110000<<21 | imm5<<16 | 0b1<<10 | rn<<5 | rd 1020 } 1021 1022 // encodeVecExtract encodes as "Advanced SIMD extract." 1023 // Currently only `ext` is defined. 1024 // https://developer.arm.com/documentation/ddi0602/2023-06/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en#simd-dp 1025 // https://developer.arm.com/documentation/ddi0602/2023-06/SIMD-FP-Instructions/EXT--Extract-vector-from-pair-of-vectors-?lang=en 1026 func encodeVecExtract(rd, rn, rm uint32, arr vecArrangement, index uint32) uint32 { 1027 var q, imm4 uint32 1028 switch arr { 1029 case vecArrangement8B: 1030 q, imm4 = 0, 0b0111&uint32(index) 1031 case vecArrangement16B: 1032 q, imm4 = 1, 0b1111&uint32(index) 1033 default: 1034 panic("Unsupported arrangement " + arr.String()) 1035 } 1036 return q<<30 | 0b101110000<<21 | rm<<16 | imm4<<11 | rn<<5 | rd 1037 } 1038 1039 // encodeVecPermute encodes as "Advanced SIMD permute." 1040 // https://developer.arm.com/documentation/ddi0602/2023-06/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en#simd-dp 1041 func encodeVecPermute(op vecOp, rd, rn, rm uint32, arr vecArrangement) uint32 { 1042 var q, size, opcode uint32 1043 switch op { 1044 case vecOpZip1: 1045 opcode = 0b011 1046 if arr == vecArrangement1D { 1047 panic("unsupported arrangement: " + arr.String()) 1048 } 1049 size, q = arrToSizeQEncoded(arr) 1050 default: 1051 panic("TODO: " + op.String()) 1052 } 1053 return q<<30 | 0b001110<<24 | size<<22 | rm<<16 | opcode<<12 | 0b10<<10 | rn<<5 | rd 1054 } 1055 1056 // encodeConditionalSelect encodes as "Conditional select" in 1057 // https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Register?lang=en#condsel 1058 func encodeConditionalSelect(kind instructionKind, rd, rn, rm uint32, c condFlag, _64bit bool) uint32 { 1059 if kind != cSel { 1060 panic("TODO: support other conditional select") 1061 } 1062 1063 ret := 0b110101<<23 | rm<<16 | uint32(c)<<12 | rn<<5 | rd 1064 if _64bit { 1065 ret |= 0b1 << 31 1066 } 1067 return ret 1068 } 1069 1070 const dummyInstruction uint32 = 0x14000000 // "b 0" 1071 1072 // encodeLoadFpuConst32 encodes the following three instructions: 1073 // 1074 // ldr s8, #8 ;; literal load of data.f32 1075 // b 8 ;; skip the data 1076 // data.f32 xxxxxxx 1077 func encodeLoadFpuConst32(c backend.Compiler, rd uint32, rawF32 uint64) { 1078 c.Emit4Bytes( 1079 // https://developer.arm.com/documentation/ddi0596/2020-12/SIMD-FP-Instructions/LDR--literal--SIMD-FP---Load-SIMD-FP-Register--PC-relative-literal--?lang=en 1080 0b111<<26 | (0x8/4)<<5 | rd, 1081 ) 1082 c.Emit4Bytes(encodeUnconditionalBranch(false, 8)) // b 8 1083 if wazevoapi.PrintMachineCodeHexPerFunctionDisassemblable { 1084 // Inlined data.f32 cannot be disassembled, so we add a dummy instruction here. 1085 c.Emit4Bytes(dummyInstruction) 1086 } else { 1087 c.Emit4Bytes(uint32(rawF32)) // data.f32 xxxxxxx 1088 } 1089 } 1090 1091 // encodeLoadFpuConst64 encodes the following three instructions: 1092 // 1093 // ldr d8, #8 ;; literal load of data.f64 1094 // b 12 ;; skip the data 1095 // data.f64 xxxxxxx 1096 func encodeLoadFpuConst64(c backend.Compiler, rd uint32, rawF64 uint64) { 1097 c.Emit4Bytes( 1098 // https://developer.arm.com/documentation/ddi0596/2020-12/SIMD-FP-Instructions/LDR--literal--SIMD-FP---Load-SIMD-FP-Register--PC-relative-literal--?lang=en 1099 0b1<<30 | 0b111<<26 | (0x8/4)<<5 | rd, 1100 ) 1101 c.Emit4Bytes(encodeUnconditionalBranch(false, 12)) // b 12 1102 if wazevoapi.PrintMachineCodeHexPerFunctionDisassemblable { 1103 // Inlined data.f64 cannot be disassembled, so we add dummy instructions here. 1104 c.Emit4Bytes(dummyInstruction) 1105 c.Emit4Bytes(dummyInstruction) 1106 } else { 1107 // data.f64 xxxxxxx 1108 c.Emit4Bytes(uint32(rawF64)) 1109 c.Emit4Bytes(uint32(rawF64 >> 32)) 1110 } 1111 } 1112 1113 // encodeLoadFpuConst128 encodes the following three instructions: 1114 // 1115 // ldr v8, #8 ;; literal load of data.f64 1116 // b 20 ;; skip the data 1117 // data.v128 xxxxxxx 1118 func encodeLoadFpuConst128(c backend.Compiler, rd uint32, lo, hi uint64) { 1119 c.Emit4Bytes( 1120 // https://developer.arm.com/documentation/ddi0596/2020-12/SIMD-FP-Instructions/LDR--literal--SIMD-FP---Load-SIMD-FP-Register--PC-relative-literal--?lang=en 1121 0b1<<31 | 0b111<<26 | (0x8/4)<<5 | rd, 1122 ) 1123 c.Emit4Bytes(encodeUnconditionalBranch(false, 20)) // b 20 1124 if wazevoapi.PrintMachineCodeHexPerFunctionDisassemblable { 1125 // Inlined data.v128 cannot be disassembled, so we add dummy instructions here. 1126 c.Emit4Bytes(dummyInstruction) 1127 c.Emit4Bytes(dummyInstruction) 1128 c.Emit4Bytes(dummyInstruction) 1129 c.Emit4Bytes(dummyInstruction) 1130 } else { 1131 // data.v128 xxxxxxx 1132 c.Emit4Bytes(uint32(lo)) 1133 c.Emit4Bytes(uint32(lo >> 32)) 1134 c.Emit4Bytes(uint32(hi)) 1135 c.Emit4Bytes(uint32(hi >> 32)) 1136 } 1137 } 1138 1139 // encodeAluRRRR encodes as Data-processing (3 source) in 1140 // https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Register?lang=en 1141 func encodeAluRRRR(op aluOp, rd, rn, rm, ra, _64bit uint32) uint32 { 1142 var oO, op31 uint32 1143 switch op { 1144 case aluOpMAdd: 1145 op31, oO = 0b000, 0b0 1146 case aluOpMSub: 1147 op31, oO = 0b000, 0b1 1148 default: 1149 panic("TODO/BUG") 1150 } 1151 return _64bit<<31 | 0b11011<<24 | op31<<21 | rm<<16 | oO<<15 | ra<<10 | rn<<5 | rd 1152 } 1153 1154 // encodeBitRR encodes as Data-processing (1 source) in 1155 // https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Register?lang=en 1156 func encodeBitRR(op bitOp, rd, rn, _64bit uint32) uint32 { 1157 var opcode2, opcode uint32 1158 switch op { 1159 case bitOpRbit: 1160 opcode2, opcode = 0b00000, 0b000000 1161 case bitOpClz: 1162 opcode2, opcode = 0b00000, 0b000100 1163 default: 1164 panic("TODO/BUG") 1165 } 1166 return _64bit<<31 | 0b1_0_11010110<<21 | opcode2<<15 | opcode<<10 | rn<<5 | rd 1167 } 1168 1169 func encodeAsMov32(rn, rd uint32) uint32 { 1170 // This is an alias of ORR (shifted register): 1171 // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/MOV--register---Move--register---an-alias-of-ORR--shifted-register-- 1172 return encodeLogicalShiftedRegister(0b001, 0, rn, 0, regNumberInEncoding[xzr], rd) 1173 } 1174 1175 // encodeExtend encodes extension instructions. 1176 func encodeExtend(signed bool, from, to byte, rd, rn uint32) uint32 { 1177 // UTXB: https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/UXTB--Unsigned-Extend-Byte--an-alias-of-UBFM-?lang=en 1178 // UTXH: https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/UXTH--Unsigned-Extend-Halfword--an-alias-of-UBFM-?lang=en 1179 // STXB: https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/SXTB--Signed-Extend-Byte--an-alias-of-SBFM- 1180 // STXH: https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/SXTH--Sign-Extend-Halfword--an-alias-of-SBFM- 1181 // STXW: https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/SXTW--Sign-Extend-Word--an-alias-of-SBFM- 1182 var _31to10 uint32 1183 switch { 1184 case !signed && from == 8 && to == 32: 1185 // 32-bit UXTB 1186 _31to10 = 0b0101001100000000000111 1187 case !signed && from == 16 && to == 32: 1188 // 32-bit UXTH 1189 _31to10 = 0b0101001100000000001111 1190 case !signed && from == 8 && to == 64: 1191 // 64-bit UXTB 1192 _31to10 = 0b0101001100000000000111 1193 case !signed && from == 16 && to == 64: 1194 // 64-bit UXTH 1195 _31to10 = 0b0101001100000000001111 1196 case !signed && from == 32 && to == 64: 1197 return encodeAsMov32(rn, rd) 1198 case signed && from == 8 && to == 32: 1199 // 32-bit SXTB 1200 _31to10 = 0b0001001100000000000111 1201 case signed && from == 16 && to == 32: 1202 // 32-bit SXTH 1203 _31to10 = 0b0001001100000000001111 1204 case signed && from == 8 && to == 64: 1205 // 64-bit SXTB 1206 _31to10 = 0b1001001101000000000111 1207 case signed && from == 16 && to == 64: 1208 // 64-bit SXTH 1209 _31to10 = 0b1001001101000000001111 1210 case signed && from == 32 && to == 64: 1211 // SXTW 1212 _31to10 = 0b1001001101000000011111 1213 default: 1214 panic("BUG") 1215 } 1216 return _31to10<<10 | rn<<5 | rd 1217 } 1218 1219 func encodeLoadOrStore(kind instructionKind, rt uint32, amode addressMode) uint32 { 1220 var _22to31 uint32 1221 var bits int64 1222 switch kind { 1223 case uLoad8: 1224 _22to31 = 0b0011100001 1225 bits = 8 1226 case sLoad8: 1227 _22to31 = 0b0011100010 1228 bits = 8 1229 case uLoad16: 1230 _22to31 = 0b0111100001 1231 bits = 16 1232 case sLoad16: 1233 _22to31 = 0b0111100010 1234 bits = 16 1235 case uLoad32: 1236 _22to31 = 0b1011100001 1237 bits = 32 1238 case sLoad32: 1239 _22to31 = 0b1011100010 1240 bits = 32 1241 case uLoad64: 1242 _22to31 = 0b1111100001 1243 bits = 64 1244 case fpuLoad32: 1245 _22to31 = 0b1011110001 1246 bits = 32 1247 case fpuLoad64: 1248 _22to31 = 0b1111110001 1249 bits = 64 1250 case fpuLoad128: 1251 _22to31 = 0b0011110011 1252 bits = 128 1253 case store8: 1254 _22to31 = 0b0011100000 1255 bits = 8 1256 case store16: 1257 _22to31 = 0b0111100000 1258 bits = 16 1259 case store32: 1260 _22to31 = 0b1011100000 1261 bits = 32 1262 case store64: 1263 _22to31 = 0b1111100000 1264 bits = 64 1265 case fpuStore32: 1266 _22to31 = 0b1011110000 1267 bits = 32 1268 case fpuStore64: 1269 _22to31 = 0b1111110000 1270 bits = 64 1271 case fpuStore128: 1272 _22to31 = 0b0011110010 1273 bits = 128 1274 default: 1275 panic("BUG") 1276 } 1277 1278 switch amode.kind { 1279 case addressModeKindRegScaledExtended: 1280 return encodeLoadOrStoreExtended(_22to31, 1281 regNumberInEncoding[amode.rn.RealReg()], 1282 regNumberInEncoding[amode.rm.RealReg()], 1283 rt, true, amode.extOp) 1284 case addressModeKindRegScaled: 1285 return encodeLoadOrStoreExtended(_22to31, 1286 regNumberInEncoding[amode.rn.RealReg()], regNumberInEncoding[amode.rm.RealReg()], 1287 rt, true, extendOpNone) 1288 case addressModeKindRegExtended: 1289 return encodeLoadOrStoreExtended(_22to31, 1290 regNumberInEncoding[amode.rn.RealReg()], regNumberInEncoding[amode.rm.RealReg()], 1291 rt, false, amode.extOp) 1292 case addressModeKindRegReg: 1293 return encodeLoadOrStoreExtended(_22to31, 1294 regNumberInEncoding[amode.rn.RealReg()], regNumberInEncoding[amode.rm.RealReg()], 1295 rt, false, extendOpNone) 1296 case addressModeKindRegSignedImm9: 1297 // e.g. https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDUR--Load-Register--unscaled-- 1298 return encodeLoadOrStoreSIMM9(_22to31, 0b00 /* unscaled */, regNumberInEncoding[amode.rn.RealReg()], rt, amode.imm) 1299 case addressModeKindPostIndex: 1300 return encodeLoadOrStoreSIMM9(_22to31, 0b01 /* post index */, regNumberInEncoding[amode.rn.RealReg()], rt, amode.imm) 1301 case addressModeKindPreIndex: 1302 return encodeLoadOrStoreSIMM9(_22to31, 0b11 /* pre index */, regNumberInEncoding[amode.rn.RealReg()], rt, amode.imm) 1303 case addressModeKindRegUnsignedImm12: 1304 // "unsigned immediate" in https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Loads-and-Stores?lang=en 1305 rn := regNumberInEncoding[amode.rn.RealReg()] 1306 imm := amode.imm 1307 div := bits / 8 1308 if imm != 0 && !offsetFitsInAddressModeKindRegUnsignedImm12(byte(bits), imm) { 1309 panic("BUG") 1310 } 1311 imm /= div 1312 return _22to31<<22 | 0b1<<24 | uint32(imm&0b111111111111)<<10 | rn<<5 | rt 1313 default: 1314 panic("BUG") 1315 } 1316 } 1317 1318 // encodeVecLoad1R encodes as Load one single-element structure and Replicate to all lanes (of one register) in 1319 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/LD1R--Load-one-single-element-structure-and-Replicate-to-all-lanes--of-one-register--?lang=en#sa_imm 1320 func encodeVecLoad1R(rt, rn uint32, arr vecArrangement) uint32 { 1321 size, q := arrToSizeQEncoded(arr) 1322 return q<<30 | 0b001101010000001100<<12 | size<<10 | rn<<5 | rt 1323 } 1324 1325 // encodeAluBitmaskImmediate encodes as Logical (immediate) in 1326 // https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Immediate?lang=en 1327 func encodeAluBitmaskImmediate(op aluOp, rd, rn uint32, imm uint64, _64bit bool) uint32 { 1328 var _31to23 uint32 1329 switch op { 1330 case aluOpAnd: 1331 _31to23 = 0b00_100100 1332 case aluOpOrr: 1333 _31to23 = 0b01_100100 1334 case aluOpEor: 1335 _31to23 = 0b10_100100 1336 default: 1337 panic("BUG") 1338 } 1339 if _64bit { 1340 _31to23 |= 0b1 << 8 1341 } 1342 immr, imms, N := bitmaskImmediate(imm, _64bit) 1343 return _31to23<<23 | uint32(N)<<22 | uint32(immr)<<16 | uint32(imms)<<10 | rn<<5 | rd 1344 } 1345 1346 func bitmaskImmediate(c uint64, is64bit bool) (immr, imms, N byte) { 1347 var size uint32 1348 switch { 1349 case c != c>>32|c<<32: 1350 size = 64 1351 case c != c>>16|c<<48: 1352 size = 32 1353 c = uint64(int32(c)) 1354 case c != c>>8|c<<56: 1355 size = 16 1356 c = uint64(int16(c)) 1357 case c != c>>4|c<<60: 1358 size = 8 1359 c = uint64(int8(c)) 1360 case c != c>>2|c<<62: 1361 size = 4 1362 c = uint64(int64(c<<60) >> 60) 1363 default: 1364 size = 2 1365 c = uint64(int64(c<<62) >> 62) 1366 } 1367 1368 neg := false 1369 if int64(c) < 0 { 1370 c = ^c 1371 neg = true 1372 } 1373 1374 onesSize, nonZeroPos := getOnesSequenceSize(c) 1375 if neg { 1376 nonZeroPos = onesSize + nonZeroPos 1377 onesSize = size - onesSize 1378 } 1379 1380 var mode byte = 32 1381 if is64bit && size == 64 { 1382 N, mode = 0b1, 64 1383 } 1384 1385 immr = byte((size - nonZeroPos) & (size - 1) & uint32(mode-1)) 1386 imms = byte((onesSize - 1) | 63&^(size<<1-1)) 1387 return 1388 } 1389 1390 func getOnesSequenceSize(x uint64) (size, nonZeroPos uint32) { 1391 // Take 0b00111000 for example: 1392 y := getLowestBit(x) // = 0b0000100 1393 nonZeroPos = setBitPos(y) // = 2 1394 size = setBitPos(x+y) - nonZeroPos // = setBitPos(0b0100000) - 2 = 5 - 2 = 3 1395 return 1396 } 1397 1398 func setBitPos(x uint64) (ret uint32) { 1399 for ; ; ret++ { 1400 if x == 0b1 { 1401 break 1402 } 1403 x = x >> 1 1404 } 1405 return 1406 } 1407 1408 // encodeLoadOrStoreExtended encodes store/load instruction as "extended register offset" in Load/store register (register offset): 1409 // https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Loads-and-Stores?lang=en 1410 func encodeLoadOrStoreExtended(_22to32 uint32, rn, rm, rt uint32, scaled bool, extOp extendOp) uint32 { 1411 var option uint32 1412 switch extOp { 1413 case extendOpUXTW: 1414 option = 0b010 1415 case extendOpSXTW: 1416 option = 0b110 1417 case extendOpNone: 1418 option = 0b111 1419 default: 1420 panic("BUG") 1421 } 1422 var s uint32 1423 if scaled { 1424 s = 0b1 1425 } 1426 return _22to32<<22 | 0b1<<21 | rm<<16 | option<<13 | s<<12 | 0b10<<10 | rn<<5 | rt 1427 } 1428 1429 // encodeLoadOrStoreSIMM9 encodes store/load instruction as one of post-index, pre-index or unscaled immediate as in 1430 // https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Loads-and-Stores?lang=en 1431 func encodeLoadOrStoreSIMM9(_22to32, _1011 uint32, rn, rt uint32, imm9 int64) uint32 { 1432 return _22to32<<22 | (uint32(imm9)&0b111111111)<<12 | _1011<<10 | rn<<5 | rt 1433 } 1434 1435 // encodeFpuRRR encodes as single or double precision (depending on `_64bit`) of Floating-point data-processing (2 source) in 1436 // https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en 1437 func encodeFpuRRR(op fpuBinOp, rd, rn, rm uint32, _64bit bool) (ret uint32) { 1438 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/ADD--vector--Add-vectors--scalar--floating-point-and-integer- 1439 var opcode uint32 1440 switch op { 1441 case fpuBinOpAdd: 1442 opcode = 0b0010 1443 case fpuBinOpSub: 1444 opcode = 0b0011 1445 case fpuBinOpMul: 1446 opcode = 0b0000 1447 case fpuBinOpDiv: 1448 opcode = 0b0001 1449 case fpuBinOpMax: 1450 opcode = 0b0100 1451 case fpuBinOpMin: 1452 opcode = 0b0101 1453 default: 1454 panic("BUG") 1455 } 1456 var ptype uint32 1457 if _64bit { 1458 ptype = 0b01 1459 } 1460 return 0b1111<<25 | ptype<<22 | 0b1<<21 | rm<<16 | opcode<<12 | 0b1<<11 | rn<<5 | rd 1461 } 1462 1463 // encodeAluRRImm12 encodes as Add/subtract (immediate) in 1464 // https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Immediate?lang=en 1465 func encodeAluRRImm12(op aluOp, rd, rn uint32, imm12 uint16, shiftBit byte, _64bit bool) uint32 { 1466 var _31to24 uint32 1467 switch op { 1468 case aluOpAdd: 1469 _31to24 = 0b00_10001 1470 case aluOpAddS: 1471 _31to24 = 0b01_10001 1472 case aluOpSub: 1473 _31to24 = 0b10_10001 1474 case aluOpSubS: 1475 _31to24 = 0b11_10001 1476 default: 1477 panic("BUG") 1478 } 1479 if _64bit { 1480 _31to24 |= 0b1 << 7 1481 } 1482 return _31to24<<24 | uint32(shiftBit)<<22 | uint32(imm12&0b111111111111)<<10 | rn<<5 | rd 1483 } 1484 1485 // encodeAluRRR encodes as Data Processing (shifted register), depending on aluOp. 1486 // https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Register?lang=en#addsub_shift 1487 func encodeAluRRRShift(op aluOp, rd, rn, rm, amount uint32, shiftOp shiftOp, _64bit bool) uint32 { 1488 var _31to24 uint32 1489 var opc, n uint32 1490 switch op { 1491 case aluOpAdd: 1492 _31to24 = 0b00001011 1493 case aluOpAddS: 1494 _31to24 = 0b00101011 1495 case aluOpSub: 1496 _31to24 = 0b01001011 1497 case aluOpSubS: 1498 _31to24 = 0b01101011 1499 case aluOpAnd, aluOpOrr, aluOpEor: 1500 // "Logical (shifted register)". 1501 switch op { 1502 case aluOpAnd: 1503 // all zeros 1504 case aluOpOrr: 1505 opc = 0b01 1506 case aluOpEor: 1507 opc = 0b10 1508 } 1509 _31to24 = 0b000_01010 1510 default: 1511 panic(op.String()) 1512 } 1513 1514 if _64bit { 1515 _31to24 |= 0b1 << 7 1516 } 1517 1518 var shift uint32 1519 switch shiftOp { 1520 case shiftOpLSL: 1521 shift = 0b00 1522 case shiftOpLSR: 1523 shift = 0b01 1524 case shiftOpASR: 1525 shift = 0b10 1526 default: 1527 panic(shiftOp.String()) 1528 } 1529 return opc<<29 | n<<21 | _31to24<<24 | shift<<22 | rm<<16 | (amount << 10) | (rn << 5) | rd 1530 } 1531 1532 // "Add/subtract (extended register)" in 1533 // https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Register?lang=en#addsub_ext 1534 func encodeAluRRRExtend(ao aluOp, rd, rn, rm uint32, extOp extendOp, to byte) uint32 { 1535 var s, op uint32 1536 switch ao { 1537 case aluOpAdd: 1538 op = 0b0 1539 case aluOpAddS: 1540 op, s = 0b0, 0b1 1541 case aluOpSub: 1542 op = 0b1 1543 case aluOpSubS: 1544 op, s = 0b1, 0b1 1545 default: 1546 panic("BUG: extended register operand can be used only for add/sub") 1547 } 1548 1549 var sf uint32 1550 if to == 64 { 1551 sf = 0b1 1552 } 1553 1554 var option uint32 1555 switch extOp { 1556 case extendOpUXTB: 1557 option = 0b000 1558 case extendOpUXTH: 1559 option = 0b001 1560 case extendOpUXTW: 1561 option = 0b010 1562 case extendOpSXTB: 1563 option = 0b100 1564 case extendOpSXTH: 1565 option = 0b101 1566 case extendOpSXTW: 1567 option = 0b110 1568 case extendOpSXTX, extendOpUXTX: 1569 panic(fmt.Sprintf("%s is essentially noop, and should be handled much earlier than encoding", extOp.String())) 1570 } 1571 return sf<<31 | op<<30 | s<<29 | 0b1011001<<21 | rm<<16 | option<<13 | rn<<5 | rd 1572 } 1573 1574 // encodeAluRRR encodes as Data Processing (register), depending on aluOp. 1575 // https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Register?lang=en 1576 func encodeAluRRR(op aluOp, rd, rn, rm uint32, _64bit, isRnSp bool) uint32 { 1577 var _31to21, _15to10 uint32 1578 switch op { 1579 case aluOpAdd: 1580 if isRnSp { 1581 // "Extended register" with UXTW. 1582 _31to21 = 0b00001011_001 1583 _15to10 = 0b011000 1584 } else { 1585 // "Shifted register" with shift = 0 1586 _31to21 = 0b00001011_000 1587 } 1588 case aluOpAddS: 1589 if isRnSp { 1590 panic("TODO") 1591 } 1592 // "Shifted register" with shift = 0 1593 _31to21 = 0b00101011_000 1594 case aluOpSub: 1595 if isRnSp { 1596 // "Extended register" with UXTW. 1597 _31to21 = 0b01001011_001 1598 _15to10 = 0b011000 1599 } else { 1600 // "Shifted register" with shift = 0 1601 _31to21 = 0b01001011_000 1602 } 1603 case aluOpSubS: 1604 if isRnSp { 1605 panic("TODO") 1606 } 1607 // "Shifted register" with shift = 0 1608 _31to21 = 0b01101011_000 1609 case aluOpAnd, aluOpOrr, aluOpEor: 1610 // "Logical (shifted register)". 1611 var opc, n uint32 1612 switch op { 1613 case aluOpAnd: 1614 // all zeros 1615 case aluOpOrr: 1616 opc = 0b01 1617 case aluOpEor: 1618 opc = 0b10 1619 } 1620 _31to21 = 0b000_01010_000 | opc<<8 | n 1621 case aluOpLsl, aluOpAsr, aluOpLsr, aluOpRotR: 1622 // "Data-processing (2 source)". 1623 _31to21 = 0b00011010_110 1624 switch op { 1625 case aluOpLsl: 1626 _15to10 = 0b001000 1627 case aluOpLsr: 1628 _15to10 = 0b001001 1629 case aluOpAsr: 1630 _15to10 = 0b001010 1631 case aluOpRotR: 1632 _15to10 = 0b001011 1633 } 1634 case aluOpSDiv: 1635 // "Data-processing (2 source)". 1636 _31to21 = 0b11010110 1637 _15to10 = 0b000011 1638 case aluOpUDiv: 1639 // "Data-processing (2 source)". 1640 _31to21 = 0b11010110 1641 _15to10 = 0b000010 1642 default: 1643 panic(op.String()) 1644 } 1645 if _64bit { 1646 _31to21 |= 0b1 << 10 1647 } 1648 return _31to21<<21 | rm<<16 | (_15to10 << 10) | (rn << 5) | rd 1649 } 1650 1651 // encodeLogicalShiftedRegister encodes as Logical (shifted register) in 1652 // https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Register?lang=en 1653 func encodeLogicalShiftedRegister(sf_opc uint32, shift_N uint32, rm uint32, imm6 uint32, rn, rd uint32) (ret uint32) { 1654 ret = sf_opc << 29 1655 ret |= 0b01010 << 24 1656 ret |= shift_N << 21 1657 ret |= rm << 16 1658 ret |= imm6 << 10 1659 ret |= rn << 5 1660 ret |= rd 1661 return 1662 } 1663 1664 // encodeAddSubtractImmediate encodes as Add/subtract (immediate) in 1665 // https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Immediate?lang=en 1666 func encodeAddSubtractImmediate(sf_op_s uint32, sh uint32, imm12 uint32, rn, rd uint32) (ret uint32) { 1667 ret = sf_op_s << 29 1668 ret |= 0b100010 << 23 1669 ret |= sh << 22 1670 ret |= imm12 << 10 1671 ret |= rn << 5 1672 ret |= rd 1673 return 1674 } 1675 1676 // encodePreOrPostIndexLoadStorePair64 encodes as Load/store pair (pre/post-indexed) in 1677 // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDP--Load-Pair-of-Registers- 1678 // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/STP--Store-Pair-of-Registers- 1679 func encodePreOrPostIndexLoadStorePair64(pre bool, load bool, rn, rt, rt2 uint32, imm7 int64) (ret uint32) { 1680 if imm7%8 != 0 { 1681 panic("imm7 for pair load/store must be a multiple of 8") 1682 } 1683 imm7 /= 8 1684 ret = rt 1685 ret |= rn << 5 1686 ret |= rt2 << 10 1687 ret |= (uint32(imm7) & 0b1111111) << 15 1688 if load { 1689 ret |= 0b1 << 22 1690 } 1691 ret |= 0b101010001 << 23 1692 if pre { 1693 ret |= 0b1 << 24 1694 } 1695 return 1696 } 1697 1698 // encodeUnconditionalBranch encodes as B or BL instructions: 1699 // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/B--Branch- 1700 // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/BL--Branch-with-Link- 1701 func encodeUnconditionalBranch(link bool, imm26 int64) (ret uint32) { 1702 if imm26%4 != 0 { 1703 panic("imm26 for branch must be a multiple of 4") 1704 } 1705 imm26 /= 4 1706 ret = uint32(imm26 & 0b11_11111111_11111111_11111111) 1707 ret |= 0b101 << 26 1708 if link { 1709 ret |= 0b1 << 31 1710 } 1711 return 1712 } 1713 1714 // encodeCBZCBNZ encodes as either CBZ or CBNZ: 1715 // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/CBZ--Compare-and-Branch-on-Zero- 1716 // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/CBNZ--Compare-and-Branch-on-Nonzero- 1717 func encodeCBZCBNZ(rt uint32, nz bool, imm19 uint32, _64bit bool) (ret uint32) { 1718 ret = rt 1719 ret |= imm19 << 5 1720 if nz { 1721 ret |= 1 << 24 1722 } 1723 ret |= 0b11010 << 25 1724 if _64bit { 1725 ret |= 1 << 31 1726 } 1727 return 1728 } 1729 1730 // encodeMoveWideImmediate encodes as either MOVZ, MOVN or MOVK, as Move wide (immediate) in 1731 // https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Immediate?lang=en 1732 // 1733 // "shift" must have been divided by 16 at this point. 1734 func encodeMoveWideImmediate(opc uint32, rd uint32, imm, shift, _64bit uint64) (ret uint32) { 1735 ret = rd 1736 ret |= uint32(imm&0xffff) << 5 1737 ret |= (uint32(shift)) << 21 1738 ret |= 0b100101 << 23 1739 ret |= opc << 29 1740 ret |= uint32(_64bit) << 31 1741 return 1742 } 1743 1744 // encodeAluRRImm encodes as "Bitfield" in 1745 // https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Immediate?lang=en#log_imm 1746 func encodeAluRRImm(op aluOp, rd, rn, amount, _64bit uint32) uint32 { 1747 var opc uint32 1748 var immr, imms uint32 1749 switch op { 1750 case aluOpLsl: 1751 // LSL (immediate) is an alias for UBFM. 1752 // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/UBFM--Unsigned-Bitfield-Move-?lang=en 1753 opc = 0b10 1754 if amount == 0 { 1755 // This can be encoded as NOP, but we don't do it for consistency: lsr xn, xm, #0 1756 immr = 0 1757 if _64bit == 1 { 1758 imms = 0b111111 1759 } else { 1760 imms = 0b11111 1761 } 1762 } else { 1763 if _64bit == 1 { 1764 immr = 64 - amount 1765 } else { 1766 immr = (32 - amount) & 0b11111 1767 } 1768 imms = immr - 1 1769 } 1770 case aluOpLsr: 1771 // LSR (immediate) is an alias for UBFM. 1772 // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LSR--immediate---Logical-Shift-Right--immediate---an-alias-of-UBFM-?lang=en 1773 opc = 0b10 1774 imms, immr = 0b011111|_64bit<<5, amount 1775 case aluOpAsr: 1776 // ASR (immediate) is an alias for SBFM. 1777 // https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/SBFM--Signed-Bitfield-Move-?lang=en 1778 opc = 0b00 1779 imms, immr = 0b011111|_64bit<<5, amount 1780 default: 1781 panic(op.String()) 1782 } 1783 return _64bit<<31 | opc<<29 | 0b100110<<23 | _64bit<<22 | immr<<16 | imms<<10 | rn<<5 | rd 1784 } 1785 1786 // encodeVecLanes encodes as Data Processing (Advanced SIMD across lanes) depending on vecOp in 1787 // https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en 1788 func encodeVecLanes(op vecOp, rd uint32, rn uint32, arr vecArrangement) uint32 { 1789 var u, q, size, opcode uint32 1790 switch arr { 1791 case vecArrangement8B: 1792 q, size = 0b0, 0b00 1793 case vecArrangement16B: 1794 q, size = 0b1, 0b00 1795 case vecArrangement4H: 1796 q, size = 0, 0b01 1797 case vecArrangement8H: 1798 q, size = 1, 0b01 1799 case vecArrangement4S: 1800 q, size = 1, 0b10 1801 default: 1802 panic("unsupported arrangement: " + arr.String()) 1803 } 1804 switch op { 1805 case vecOpUaddlv: 1806 u, opcode = 1, 0b00011 1807 case vecOpUminv: 1808 u, opcode = 1, 0b11010 1809 case vecOpAddv: 1810 u, opcode = 0, 0b11011 1811 default: 1812 panic("unsupported or illegal vecOp: " + op.String()) 1813 } 1814 return q<<30 | u<<29 | 0b1110<<24 | size<<22 | 0b11000<<17 | opcode<<12 | 0b10<<10 | rn<<5 | rd 1815 } 1816 1817 // encodeVecLanes encodes as Data Processing (Advanced SIMD scalar shift by immediate) depending on vecOp in 1818 // https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en 1819 func encodeVecShiftImm(op vecOp, rd uint32, rn, amount uint32, arr vecArrangement) uint32 { 1820 var u, q, immh, immb, opcode uint32 1821 switch op { 1822 case vecOpSshll: 1823 u, opcode = 0b0, 0b10100 1824 case vecOpUshll: 1825 u, opcode = 0b1, 0b10100 1826 case vecOpSshr: 1827 u, opcode = 0, 0b00000 1828 default: 1829 panic("unsupported or illegal vecOp: " + op.String()) 1830 } 1831 switch arr { 1832 case vecArrangement16B: 1833 q = 0b1 1834 fallthrough 1835 case vecArrangement8B: 1836 immh = 0b0001 1837 immb = 8 - uint32(amount&0b111) 1838 case vecArrangement8H: 1839 q = 0b1 1840 fallthrough 1841 case vecArrangement4H: 1842 v := 16 - uint32(amount&0b1111) 1843 immb = v & 0b111 1844 immh = 0b0010 | (v >> 3) 1845 case vecArrangement4S: 1846 q = 0b1 1847 fallthrough 1848 case vecArrangement2S: 1849 v := 32 - uint32(amount&0b11111) 1850 immb = v & 0b111 1851 immh = 0b0100 | (v >> 3) 1852 case vecArrangement2D: 1853 q = 0b1 1854 v := 64 - uint32(amount&0b111111) 1855 immb = v & 0b111 1856 immh = 0b1000 | (v >> 3) 1857 default: 1858 panic("unsupported arrangement: " + arr.String()) 1859 } 1860 return q<<30 | u<<29 | 0b011110<<23 | immh<<19 | immb<<16 | 0b000001<<10 | opcode<<11 | 0b1<<10 | rn<<5 | rd 1861 } 1862 1863 // encodeVecTbl encodes as Data Processing (Advanced SIMD table lookup) in 1864 // https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en#simd-dp 1865 // 1866 // Note: tblOp may encode tbl1, tbl2... in the future. Currently, it is ignored. 1867 func encodeVecTbl(nregs, rd, rn, rm uint32, arr vecArrangement) uint32 { 1868 var q, op2, len, op uint32 1869 1870 switch nregs { 1871 case 1: 1872 // tbl: single-register 1873 len = 0b00 1874 case 2: 1875 // tbl2: 2-register table 1876 len = 0b01 1877 default: 1878 panic(fmt.Sprintf("unsupported number or registers %d", nregs)) 1879 } 1880 switch arr { 1881 case vecArrangement8B: 1882 q = 0b0 1883 case vecArrangement16B: 1884 q = 0b1 1885 default: 1886 panic("unsupported arrangement: " + arr.String()) 1887 } 1888 1889 return q<<30 | 0b001110<<24 | op2<<22 | rm<<16 | len<<13 | op<<12 | rn<<5 | rd 1890 } 1891 1892 // encodeVecMisc encodes as Data Processing (Advanced SIMD two-register miscellaneous) depending on vecOp in 1893 // https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en#simd-dp 1894 func encodeAdvancedSIMDTwoMisc(op vecOp, rd, rn uint32, arr vecArrangement) uint32 { 1895 var q, u, size, opcode uint32 1896 switch op { 1897 case vecOpCnt: 1898 opcode = 0b00101 1899 switch arr { 1900 case vecArrangement8B: 1901 q, size = 0b0, 0b00 1902 case vecArrangement16B: 1903 q, size = 0b1, 0b00 1904 default: 1905 panic("unsupported arrangement: " + arr.String()) 1906 } 1907 case vecOpCmeq0: 1908 if arr == vecArrangement1D { 1909 panic("unsupported arrangement: " + arr.String()) 1910 } 1911 opcode = 0b01001 1912 size, q = arrToSizeQEncoded(arr) 1913 case vecOpNot: 1914 u = 1 1915 opcode = 0b00101 1916 switch arr { 1917 case vecArrangement8B: 1918 q, size = 0b0, 0b00 1919 case vecArrangement16B: 1920 q, size = 0b1, 0b00 1921 default: 1922 panic("unsupported arrangement: " + arr.String()) 1923 } 1924 case vecOpAbs: 1925 if arr == vecArrangement1D { 1926 panic("unsupported arrangement: " + arr.String()) 1927 } 1928 opcode = 0b01011 1929 u = 0b0 1930 size, q = arrToSizeQEncoded(arr) 1931 case vecOpNeg: 1932 if arr == vecArrangement1D { 1933 panic("unsupported arrangement: " + arr.String()) 1934 } 1935 opcode = 0b01011 1936 u = 0b1 1937 size, q = arrToSizeQEncoded(arr) 1938 case vecOpFabs: 1939 if arr < vecArrangement2S || arr == vecArrangement1D { 1940 panic("unsupported arrangement: " + arr.String()) 1941 } 1942 opcode = 0b01111 1943 u = 0b0 1944 size, q = arrToSizeQEncoded(arr) 1945 case vecOpFneg: 1946 if arr < vecArrangement2S || arr == vecArrangement1D { 1947 panic("unsupported arrangement: " + arr.String()) 1948 } 1949 opcode = 0b01111 1950 u = 0b1 1951 size, q = arrToSizeQEncoded(arr) 1952 case vecOpFrintm: 1953 u = 0b0 1954 opcode = 0b11001 1955 switch arr { 1956 case vecArrangement2S: 1957 q, size = 0b0, 0b00 1958 case vecArrangement4S: 1959 q, size = 0b1, 0b00 1960 case vecArrangement2D: 1961 q, size = 0b1, 0b01 1962 default: 1963 panic("unsupported arrangement: " + arr.String()) 1964 } 1965 case vecOpFrintn: 1966 u = 0b0 1967 opcode = 0b11000 1968 switch arr { 1969 case vecArrangement2S: 1970 q, size = 0b0, 0b00 1971 case vecArrangement4S: 1972 q, size = 0b1, 0b00 1973 case vecArrangement2D: 1974 q, size = 0b1, 0b01 1975 default: 1976 panic("unsupported arrangement: " + arr.String()) 1977 } 1978 case vecOpFrintp: 1979 u = 0b0 1980 opcode = 0b11000 1981 if arr < vecArrangement2S || arr == vecArrangement1D { 1982 panic("unsupported arrangement: " + arr.String()) 1983 } 1984 size, q = arrToSizeQEncoded(arr) 1985 case vecOpFrintz: 1986 u = 0b0 1987 opcode = 0b11001 1988 if arr < vecArrangement2S || arr == vecArrangement1D { 1989 panic("unsupported arrangement: " + arr.String()) 1990 } 1991 size, q = arrToSizeQEncoded(arr) 1992 case vecOpFsqrt: 1993 if arr < vecArrangement2S || arr == vecArrangement1D { 1994 panic("unsupported arrangement: " + arr.String()) 1995 } 1996 opcode = 0b11111 1997 u = 0b1 1998 size, q = arrToSizeQEncoded(arr) 1999 case vecOpFcvtl: 2000 opcode = 0b10111 2001 u = 0b0 2002 switch arr { 2003 case vecArrangement2S: 2004 size, q = 0b01, 0b0 2005 case vecArrangement4H: 2006 size, q = 0b00, 0b0 2007 default: 2008 panic("unsupported arrangement: " + arr.String()) 2009 } 2010 case vecOpFcvtn: 2011 opcode = 0b10110 2012 u = 0b0 2013 switch arr { 2014 case vecArrangement2S: 2015 size, q = 0b01, 0b0 2016 case vecArrangement4H: 2017 size, q = 0b00, 0b0 2018 default: 2019 panic("unsupported arrangement: " + arr.String()) 2020 } 2021 case vecOpFcvtzs: 2022 opcode = 0b11011 2023 u = 0b0 2024 switch arr { 2025 case vecArrangement2S: 2026 q, size = 0b0, 0b10 2027 case vecArrangement4S: 2028 q, size = 0b1, 0b10 2029 case vecArrangement2D: 2030 q, size = 0b1, 0b11 2031 default: 2032 panic("unsupported arrangement: " + arr.String()) 2033 } 2034 case vecOpFcvtzu: 2035 opcode = 0b11011 2036 u = 0b1 2037 switch arr { 2038 case vecArrangement2S: 2039 q, size = 0b0, 0b10 2040 case vecArrangement4S: 2041 q, size = 0b1, 0b10 2042 case vecArrangement2D: 2043 q, size = 0b1, 0b11 2044 default: 2045 panic("unsupported arrangement: " + arr.String()) 2046 } 2047 case vecOpScvtf: 2048 opcode = 0b11101 2049 u = 0b0 2050 switch arr { 2051 case vecArrangement4S: 2052 q, size = 0b1, 0b00 2053 case vecArrangement2S: 2054 q, size = 0b0, 0b00 2055 case vecArrangement2D: 2056 q, size = 0b1, 0b01 2057 default: 2058 panic("unsupported arrangement: " + arr.String()) 2059 } 2060 case vecOpUcvtf: 2061 opcode = 0b11101 2062 u = 0b1 2063 switch arr { 2064 case vecArrangement4S: 2065 q, size = 0b1, 0b00 2066 case vecArrangement2S: 2067 q, size = 0b0, 0b00 2068 case vecArrangement2D: 2069 q, size = 0b1, 0b01 2070 default: 2071 panic("unsupported arrangement: " + arr.String()) 2072 } 2073 case vecOpSqxtn: 2074 // When q == 1 it encodes sqxtn2 (operates on upper 64 bits). 2075 opcode = 0b10100 2076 u = 0b0 2077 if arr > vecArrangement4S { 2078 panic("unsupported arrangement: " + arr.String()) 2079 } 2080 size, q = arrToSizeQEncoded(arr) 2081 case vecOpUqxtn: 2082 // When q == 1 it encodes uqxtn2 (operates on upper 64 bits). 2083 opcode = 0b10100 2084 u = 0b1 2085 if arr > vecArrangement4S { 2086 panic("unsupported arrangement: " + arr.String()) 2087 } 2088 size, q = arrToSizeQEncoded(arr) 2089 case vecOpSqxtun: 2090 // When q == 1 it encodes sqxtun2 (operates on upper 64 bits). 2091 opcode = 0b10010 // 0b10100 2092 u = 0b1 2093 if arr > vecArrangement4S { 2094 panic("unsupported arrangement: " + arr.String()) 2095 } 2096 size, q = arrToSizeQEncoded(arr) 2097 case vecOpRev64: 2098 opcode = 0b00000 2099 size, q = arrToSizeQEncoded(arr) 2100 case vecOpXtn: 2101 u = 0b0 2102 opcode = 0b10010 2103 size, q = arrToSizeQEncoded(arr) 2104 case vecOpShll: 2105 u = 0b1 2106 opcode = 0b10011 2107 switch arr { 2108 case vecArrangement8B: 2109 q, size = 0b0, 0b00 2110 case vecArrangement4H: 2111 q, size = 0b0, 0b01 2112 case vecArrangement2S: 2113 q, size = 0b0, 0b10 2114 default: 2115 panic("unsupported arrangement: " + arr.String()) 2116 } 2117 default: 2118 panic("unsupported or illegal vecOp: " + op.String()) 2119 } 2120 return q<<30 | u<<29 | 0b01110<<24 | size<<22 | 0b10000<<17 | opcode<<12 | 0b10<<10 | rn<<5 | rd 2121 } 2122 2123 // brTableSequenceOffsetTableBegin is the offset inside the brTableSequence where the table begins after 4 instructions 2124 const brTableSequenceOffsetTableBegin = 16 2125 2126 func encodeBrTableSequence(c backend.Compiler, index regalloc.VReg, targets []uint32) { 2127 tmpRegNumber := regNumberInEncoding[tmp] 2128 indexNumber := regNumberInEncoding[index.RealReg()] 2129 2130 // adr tmpReg, PC+16 (PC+16 is the address of the first label offset) 2131 // ldrsw index, [tmpReg, index, UXTW 2] ;; index = int64(*(tmpReg + index*8)) 2132 // add tmpReg, tmpReg, index 2133 // br tmpReg 2134 // [offset_to_l1, offset_to_l2, ..., offset_to_lN] 2135 c.Emit4Bytes(encodeAdr(tmpRegNumber, 16)) 2136 c.Emit4Bytes(encodeLoadOrStore(sLoad32, indexNumber, 2137 addressMode{kind: addressModeKindRegScaledExtended, rn: tmpRegVReg, rm: index, extOp: extendOpUXTW}, 2138 )) 2139 c.Emit4Bytes(encodeAluRRR(aluOpAdd, tmpRegNumber, tmpRegNumber, indexNumber, true, false)) 2140 c.Emit4Bytes(encodeUnconditionalBranchReg(tmpRegNumber, false)) 2141 2142 // Offsets are resolved in ResolveRelativeAddress phase. 2143 for _, offset := range targets { 2144 if wazevoapi.PrintMachineCodeHexPerFunctionDisassemblable { 2145 // Inlined offset tables cannot be disassembled properly, so pad dummy instructions to make the debugging easier. 2146 c.Emit4Bytes(dummyInstruction) 2147 } else { 2148 c.Emit4Bytes(offset) 2149 } 2150 } 2151 } 2152 2153 // encodeExitSequence matches the implementation detail of abiImpl.emitGoEntryPreamble. 2154 func encodeExitSequence(c backend.Compiler, ctxReg regalloc.VReg) { 2155 // Restore the FP, SP and LR, and return to the Go code: 2156 // ldr lr, [ctxReg, #GoReturnAddress] 2157 // ldr fp, [ctxReg, #OriginalFramePointer] 2158 // ldr tmp, [ctxReg, #OriginalStackPointer] 2159 // mov sp, tmp ;; sp cannot be str'ed directly. 2160 // ret ;; --> return to the Go code 2161 2162 var ctxEvicted bool 2163 if ctx := ctxReg.RealReg(); ctx == fp || ctx == lr { 2164 // In order to avoid overwriting the context register, we move ctxReg to tmp. 2165 c.Emit4Bytes(encodeMov64(regNumberInEncoding[tmp], regNumberInEncoding[ctx], false, false)) 2166 ctxReg = tmpRegVReg 2167 ctxEvicted = true 2168 } 2169 2170 restoreLr := encodeLoadOrStore( 2171 uLoad64, 2172 regNumberInEncoding[lr], 2173 addressMode{ 2174 kind: addressModeKindRegUnsignedImm12, 2175 rn: ctxReg, 2176 imm: wazevoapi.ExecutionContextOffsetGoReturnAddress.I64(), 2177 }, 2178 ) 2179 2180 restoreFp := encodeLoadOrStore( 2181 uLoad64, 2182 regNumberInEncoding[fp], 2183 addressMode{ 2184 kind: addressModeKindRegUnsignedImm12, 2185 rn: ctxReg, 2186 imm: wazevoapi.ExecutionContextOffsetOriginalFramePointer.I64(), 2187 }, 2188 ) 2189 2190 restoreSpToTmp := encodeLoadOrStore( 2191 uLoad64, 2192 regNumberInEncoding[tmp], 2193 addressMode{ 2194 kind: addressModeKindRegUnsignedImm12, 2195 rn: ctxReg, 2196 imm: wazevoapi.ExecutionContextOffsetOriginalStackPointer.I64(), 2197 }, 2198 ) 2199 2200 movTmpToSp := encodeAddSubtractImmediate(0b100, 0, 0, 2201 regNumberInEncoding[tmp], regNumberInEncoding[sp]) 2202 2203 c.Emit4Bytes(restoreFp) 2204 c.Emit4Bytes(restoreLr) 2205 c.Emit4Bytes(restoreSpToTmp) 2206 c.Emit4Bytes(movTmpToSp) 2207 c.Emit4Bytes(encodeRet()) 2208 if !ctxEvicted { 2209 // In order to have the fixed-length exit sequence, we need to padd the binary. 2210 // Since this will never be reached, we insert a dummy instruction. 2211 c.Emit4Bytes(dummyInstruction) 2212 } 2213 } 2214 2215 func encodeRet() uint32 { 2216 // https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/RET--Return-from-subroutine-?lang=en 2217 return 0b1101011001011111<<16 | regNumberInEncoding[lr]<<5 2218 }