github.com/tetratelabs/wazero@v1.7.3-0.20240513003603-48f702e154b5/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go (about) 1 package arm64 2 3 import ( 4 "context" 5 "fmt" 6 7 "github.com/tetratelabs/wazero/internal/engine/wazevo/backend" 8 "github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc" 9 "github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi" 10 ) 11 12 // Encode implements backend.Machine Encode. 13 func (m *machine) Encode(ctx context.Context) error { 14 m.resolveRelativeAddresses(ctx) 15 m.encode(m.executableContext.RootInstr) 16 if l := len(m.compiler.Buf()); l > maxFunctionExecutableSize { 17 return fmt.Errorf("function size exceeds the limit: %d > %d", l, maxFunctionExecutableSize) 18 } 19 return nil 20 } 21 22 func (m *machine) encode(root *instruction) { 23 for cur := root; cur != nil; cur = cur.next { 24 cur.encode(m) 25 } 26 } 27 28 func (i *instruction) encode(m *machine) { 29 c := m.compiler 30 switch kind := i.kind; kind { 31 case nop0, emitSourceOffsetInfo, loadConstBlockArg: 32 case exitSequence: 33 encodeExitSequence(c, i.rn.reg()) 34 case ret: 35 // https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/RET--Return-from-subroutine-?lang=en 36 c.Emit4Bytes(encodeRet()) 37 case br: 38 imm := i.brOffset() 39 c.Emit4Bytes(encodeUnconditionalBranch(false, imm)) 40 case call: 41 // We still don't know the exact address of the function to call, so we emit a placeholder. 42 c.AddRelocationInfo(i.callFuncRef()) 43 c.Emit4Bytes(encodeUnconditionalBranch(true, 0)) // 0 = placeholder 44 case callInd: 45 c.Emit4Bytes(encodeUnconditionalBranchReg(regNumberInEncoding[i.rn.realReg()], true)) 46 case store8, store16, store32, store64, fpuStore32, fpuStore64, fpuStore128: 47 c.Emit4Bytes(encodeLoadOrStore(i.kind, regNumberInEncoding[i.rn.realReg()], i.amode)) 48 case uLoad8, uLoad16, uLoad32, uLoad64, sLoad8, sLoad16, sLoad32, fpuLoad32, fpuLoad64, fpuLoad128: 49 c.Emit4Bytes(encodeLoadOrStore(i.kind, regNumberInEncoding[i.rd.realReg()], i.amode)) 50 case vecLoad1R: 51 c.Emit4Bytes(encodeVecLoad1R( 52 regNumberInEncoding[i.rd.realReg()], 53 regNumberInEncoding[i.rn.realReg()], 54 vecArrangement(i.u1))) 55 case condBr: 56 imm19 := i.condBrOffset() 57 if imm19%4 != 0 { 58 panic("imm26 for branch must be a multiple of 4") 59 } 60 61 imm19U32 := uint32(imm19/4) & 0b111_11111111_11111111 62 brCond := i.condBrCond() 63 switch brCond.kind() { 64 case condKindRegisterZero: 65 rt := regNumberInEncoding[brCond.register().RealReg()] 66 c.Emit4Bytes(encodeCBZCBNZ(rt, false, imm19U32, i.condBr64bit())) 67 case condKindRegisterNotZero: 68 rt := regNumberInEncoding[brCond.register().RealReg()] 69 c.Emit4Bytes(encodeCBZCBNZ(rt, true, imm19U32, i.condBr64bit())) 70 case condKindCondFlagSet: 71 // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/B-cond--Branch-conditionally- 72 fl := brCond.flag() 73 c.Emit4Bytes(0b01010100<<24 | (imm19U32 << 5) | uint32(fl)) 74 default: 75 panic("BUG") 76 } 77 case movN: 78 c.Emit4Bytes(encodeMoveWideImmediate(0b00, regNumberInEncoding[i.rd.realReg()], i.u1, i.u2, i.u3)) 79 case movZ: 80 c.Emit4Bytes(encodeMoveWideImmediate(0b10, regNumberInEncoding[i.rd.realReg()], i.u1, i.u2, i.u3)) 81 case movK: 82 c.Emit4Bytes(encodeMoveWideImmediate(0b11, regNumberInEncoding[i.rd.realReg()], i.u1, i.u2, i.u3)) 83 case mov32: 84 to, from := i.rd.realReg(), i.rn.realReg() 85 c.Emit4Bytes(encodeAsMov32(regNumberInEncoding[from], regNumberInEncoding[to])) 86 case mov64: 87 to, from := i.rd.realReg(), i.rn.realReg() 88 toIsSp := to == sp 89 fromIsSp := from == sp 90 c.Emit4Bytes(encodeMov64(regNumberInEncoding[to], regNumberInEncoding[from], toIsSp, fromIsSp)) 91 case loadP64, storeP64: 92 rt, rt2 := regNumberInEncoding[i.rn.realReg()], regNumberInEncoding[i.rm.realReg()] 93 amode := i.amode 94 rn := regNumberInEncoding[amode.rn.RealReg()] 95 var pre bool 96 switch amode.kind { 97 case addressModeKindPostIndex: 98 case addressModeKindPreIndex: 99 pre = true 100 default: 101 panic("BUG") 102 } 103 c.Emit4Bytes(encodePreOrPostIndexLoadStorePair64(pre, kind == loadP64, rn, rt, rt2, amode.imm)) 104 case loadFpuConst32: 105 rd := regNumberInEncoding[i.rd.realReg()] 106 if i.u1 == 0 { 107 c.Emit4Bytes(encodeVecRRR(vecOpEOR, rd, rd, rd, vecArrangement8B)) 108 } else { 109 encodeLoadFpuConst32(c, rd, i.u1) 110 } 111 case loadFpuConst64: 112 rd := regNumberInEncoding[i.rd.realReg()] 113 if i.u1 == 0 { 114 c.Emit4Bytes(encodeVecRRR(vecOpEOR, rd, rd, rd, vecArrangement8B)) 115 } else { 116 encodeLoadFpuConst64(c, regNumberInEncoding[i.rd.realReg()], i.u1) 117 } 118 case loadFpuConst128: 119 rd := regNumberInEncoding[i.rd.realReg()] 120 lo, hi := i.u1, i.u2 121 if lo == 0 && hi == 0 { 122 c.Emit4Bytes(encodeVecRRR(vecOpEOR, rd, rd, rd, vecArrangement16B)) 123 } else { 124 encodeLoadFpuConst128(c, rd, lo, hi) 125 } 126 case aluRRRR: 127 c.Emit4Bytes(encodeAluRRRR( 128 aluOp(i.u1), 129 regNumberInEncoding[i.rd.realReg()], 130 regNumberInEncoding[i.rn.realReg()], 131 regNumberInEncoding[i.rm.realReg()], 132 regNumberInEncoding[i.ra.realReg()], 133 uint32(i.u3), 134 )) 135 case aluRRImmShift: 136 c.Emit4Bytes(encodeAluRRImm( 137 aluOp(i.u1), 138 regNumberInEncoding[i.rd.realReg()], 139 regNumberInEncoding[i.rn.realReg()], 140 uint32(i.rm.shiftImm()), 141 uint32(i.u3), 142 )) 143 case aluRRR: 144 rn := i.rn.realReg() 145 c.Emit4Bytes(encodeAluRRR( 146 aluOp(i.u1), 147 regNumberInEncoding[i.rd.realReg()], 148 regNumberInEncoding[rn], 149 regNumberInEncoding[i.rm.realReg()], 150 i.u3 == 1, 151 rn == sp, 152 )) 153 case aluRRRExtend: 154 rm, exo, to := i.rm.er() 155 c.Emit4Bytes(encodeAluRRRExtend( 156 aluOp(i.u1), 157 regNumberInEncoding[i.rd.realReg()], 158 regNumberInEncoding[i.rn.realReg()], 159 regNumberInEncoding[rm.RealReg()], 160 exo, 161 to, 162 )) 163 case aluRRRShift: 164 r, amt, sop := i.rm.sr() 165 c.Emit4Bytes(encodeAluRRRShift( 166 aluOp(i.u1), 167 regNumberInEncoding[i.rd.realReg()], 168 regNumberInEncoding[i.rn.realReg()], 169 regNumberInEncoding[r.RealReg()], 170 uint32(amt), 171 sop, 172 i.u3 == 1, 173 )) 174 case aluRRBitmaskImm: 175 c.Emit4Bytes(encodeAluBitmaskImmediate( 176 aluOp(i.u1), 177 regNumberInEncoding[i.rd.realReg()], 178 regNumberInEncoding[i.rn.realReg()], 179 i.u2, 180 i.u3 == 1, 181 )) 182 case bitRR: 183 c.Emit4Bytes(encodeBitRR( 184 bitOp(i.u1), 185 regNumberInEncoding[i.rd.realReg()], 186 regNumberInEncoding[i.rn.realReg()], 187 uint32(i.u2)), 188 ) 189 case aluRRImm12: 190 imm12, shift := i.rm.imm12() 191 c.Emit4Bytes(encodeAluRRImm12( 192 aluOp(i.u1), 193 regNumberInEncoding[i.rd.realReg()], 194 regNumberInEncoding[i.rn.realReg()], 195 imm12, shift, 196 i.u3 == 1, 197 )) 198 case fpuRRR: 199 c.Emit4Bytes(encodeFpuRRR( 200 fpuBinOp(i.u1), 201 regNumberInEncoding[i.rd.realReg()], 202 regNumberInEncoding[i.rn.realReg()], 203 regNumberInEncoding[i.rm.realReg()], 204 i.u3 == 1, 205 )) 206 case fpuMov64, fpuMov128: 207 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/MOV--vector---Move-vector--an-alias-of-ORR--vector--register-- 208 rd := regNumberInEncoding[i.rd.realReg()] 209 rn := regNumberInEncoding[i.rn.realReg()] 210 var q uint32 211 if kind == fpuMov128 { 212 q = 0b1 213 } 214 c.Emit4Bytes(q<<30 | 0b1110101<<21 | rn<<16 | 0b000111<<10 | rn<<5 | rd) 215 case cSet: 216 rd := regNumberInEncoding[i.rd.realReg()] 217 cf := condFlag(i.u1) 218 if i.u2 == 1 { 219 // https://developer.arm.com/documentation/ddi0602/2022-03/Base-Instructions/CSETM--Conditional-Set-Mask--an-alias-of-CSINV- 220 // Note that we set 64bit version here. 221 c.Emit4Bytes(0b1101101010011111<<16 | uint32(cf.invert())<<12 | 0b011111<<5 | rd) 222 } else { 223 // https://developer.arm.com/documentation/ddi0602/2022-06/Base-Instructions/CSET--Conditional-Set--an-alias-of-CSINC- 224 // Note that we set 64bit version here. 225 c.Emit4Bytes(0b1001101010011111<<16 | uint32(cf.invert())<<12 | 0b111111<<5 | rd) 226 } 227 case extend: 228 c.Emit4Bytes(encodeExtend(i.u3 == 1, byte(i.u1), byte(i.u2), regNumberInEncoding[i.rd.realReg()], regNumberInEncoding[i.rn.realReg()])) 229 case fpuCmp: 230 // https://developer.arm.com/documentation/ddi0596/2020-12/SIMD-FP-Instructions/FCMP--Floating-point-quiet-Compare--scalar--?lang=en 231 rn, rm := regNumberInEncoding[i.rn.realReg()], regNumberInEncoding[i.rm.realReg()] 232 var ftype uint32 233 if i.u3 == 1 { 234 ftype = 0b01 // double precision. 235 } 236 c.Emit4Bytes(0b1111<<25 | ftype<<22 | 1<<21 | rm<<16 | 0b1<<13 | rn<<5) 237 case udf: 238 // https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/UDF--Permanently-Undefined-?lang=en 239 if wazevoapi.PrintMachineCodeHexPerFunctionDisassemblable { 240 c.Emit4Bytes(dummyInstruction) 241 } else { 242 c.Emit4Bytes(0) 243 } 244 case adr: 245 c.Emit4Bytes(encodeAdr(regNumberInEncoding[i.rd.realReg()], uint32(i.u1))) 246 case cSel: 247 c.Emit4Bytes(encodeConditionalSelect( 248 kind, 249 regNumberInEncoding[i.rd.realReg()], 250 regNumberInEncoding[i.rn.realReg()], 251 regNumberInEncoding[i.rm.realReg()], 252 condFlag(i.u1), 253 i.u3 == 1, 254 )) 255 case fpuCSel: 256 c.Emit4Bytes(encodeFpuCSel( 257 regNumberInEncoding[i.rd.realReg()], 258 regNumberInEncoding[i.rn.realReg()], 259 regNumberInEncoding[i.rm.realReg()], 260 condFlag(i.u1), 261 i.u3 == 1, 262 )) 263 case movToVec: 264 c.Emit4Bytes(encodeMoveToVec( 265 regNumberInEncoding[i.rd.realReg()], 266 regNumberInEncoding[i.rn.realReg()], 267 vecArrangement(byte(i.u1)), 268 vecIndex(i.u2), 269 )) 270 case movFromVec, movFromVecSigned: 271 c.Emit4Bytes(encodeMoveFromVec( 272 regNumberInEncoding[i.rd.realReg()], 273 regNumberInEncoding[i.rn.realReg()], 274 vecArrangement(byte(i.u1)), 275 vecIndex(i.u2), 276 i.kind == movFromVecSigned, 277 )) 278 case vecDup: 279 c.Emit4Bytes(encodeVecDup( 280 regNumberInEncoding[i.rd.realReg()], 281 regNumberInEncoding[i.rn.realReg()], 282 vecArrangement(byte(i.u1)))) 283 case vecDupElement: 284 c.Emit4Bytes(encodeVecDupElement( 285 regNumberInEncoding[i.rd.realReg()], 286 regNumberInEncoding[i.rn.realReg()], 287 vecArrangement(byte(i.u1)), 288 vecIndex(i.u2))) 289 case vecExtract: 290 c.Emit4Bytes(encodeVecExtract( 291 regNumberInEncoding[i.rd.realReg()], 292 regNumberInEncoding[i.rn.realReg()], 293 regNumberInEncoding[i.rm.realReg()], 294 vecArrangement(byte(i.u1)), 295 uint32(i.u2))) 296 case vecPermute: 297 c.Emit4Bytes(encodeVecPermute( 298 vecOp(i.u1), 299 regNumberInEncoding[i.rd.realReg()], 300 regNumberInEncoding[i.rn.realReg()], 301 regNumberInEncoding[i.rm.realReg()], 302 vecArrangement(byte(i.u2)))) 303 case vecMovElement: 304 c.Emit4Bytes(encodeVecMovElement( 305 regNumberInEncoding[i.rd.realReg()], 306 regNumberInEncoding[i.rn.realReg()], 307 vecArrangement(i.u1), 308 uint32(i.u2), uint32(i.u3), 309 )) 310 case vecMisc: 311 c.Emit4Bytes(encodeAdvancedSIMDTwoMisc( 312 vecOp(i.u1), 313 regNumberInEncoding[i.rd.realReg()], 314 regNumberInEncoding[i.rn.realReg()], 315 vecArrangement(i.u2), 316 )) 317 case vecLanes: 318 c.Emit4Bytes(encodeVecLanes( 319 vecOp(i.u1), 320 regNumberInEncoding[i.rd.realReg()], 321 regNumberInEncoding[i.rn.realReg()], 322 vecArrangement(i.u2), 323 )) 324 case vecShiftImm: 325 c.Emit4Bytes(encodeVecShiftImm( 326 vecOp(i.u1), 327 regNumberInEncoding[i.rd.realReg()], 328 regNumberInEncoding[i.rn.realReg()], 329 uint32(i.rm.shiftImm()), 330 vecArrangement(i.u2), 331 )) 332 case vecTbl: 333 c.Emit4Bytes(encodeVecTbl( 334 1, 335 regNumberInEncoding[i.rd.realReg()], 336 regNumberInEncoding[i.rn.realReg()], 337 regNumberInEncoding[i.rm.realReg()], 338 vecArrangement(i.u2)), 339 ) 340 case vecTbl2: 341 c.Emit4Bytes(encodeVecTbl( 342 2, 343 regNumberInEncoding[i.rd.realReg()], 344 regNumberInEncoding[i.rn.realReg()], 345 regNumberInEncoding[i.rm.realReg()], 346 vecArrangement(i.u2)), 347 ) 348 case brTableSequence: 349 targets := m.jmpTableTargets[i.u1] 350 encodeBrTableSequence(c, i.rn.reg(), targets) 351 case fpuToInt, intToFpu: 352 c.Emit4Bytes(encodeCnvBetweenFloatInt(i)) 353 case fpuRR: 354 c.Emit4Bytes(encodeFloatDataOneSource( 355 fpuUniOp(i.u1), 356 regNumberInEncoding[i.rd.realReg()], 357 regNumberInEncoding[i.rn.realReg()], 358 i.u3 == 1, 359 )) 360 case vecRRR: 361 if op := vecOp(i.u1); op == vecOpBsl || op == vecOpBit || op == vecOpUmlal { 362 panic(fmt.Sprintf("vecOp %s must use vecRRRRewrite instead of vecRRR", op.String())) 363 } 364 fallthrough 365 case vecRRRRewrite: 366 c.Emit4Bytes(encodeVecRRR( 367 vecOp(i.u1), 368 regNumberInEncoding[i.rd.realReg()], 369 regNumberInEncoding[i.rn.realReg()], 370 regNumberInEncoding[i.rm.realReg()], 371 vecArrangement(i.u2), 372 )) 373 case cCmpImm: 374 // Conditional compare (immediate) in https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Register?lang=en 375 sf := uint32(i.u3 & 0b1) 376 nzcv := uint32(i.u2 & 0b1111) 377 cond := uint32(condFlag(i.u1)) 378 imm := uint32(i.rm.data & 0b11111) 379 rn := regNumberInEncoding[i.rn.realReg()] 380 c.Emit4Bytes( 381 sf<<31 | 0b111101001<<22 | imm<<16 | cond<<12 | 0b1<<11 | rn<<5 | nzcv, 382 ) 383 case movFromFPSR: 384 rt := regNumberInEncoding[i.rd.realReg()] 385 c.Emit4Bytes(encodeSystemRegisterMove(rt, true)) 386 case movToFPSR: 387 rt := regNumberInEncoding[i.rn.realReg()] 388 c.Emit4Bytes(encodeSystemRegisterMove(rt, false)) 389 case atomicRmw: 390 c.Emit4Bytes(encodeAtomicRmw( 391 atomicRmwOp(i.u1), 392 regNumberInEncoding[i.rm.realReg()], 393 regNumberInEncoding[i.rd.realReg()], 394 regNumberInEncoding[i.rn.realReg()], 395 uint32(i.u2), 396 )) 397 case atomicCas: 398 c.Emit4Bytes(encodeAtomicCas( 399 regNumberInEncoding[i.rd.realReg()], 400 regNumberInEncoding[i.rm.realReg()], 401 regNumberInEncoding[i.rn.realReg()], 402 uint32(i.u2), 403 )) 404 case atomicLoad: 405 c.Emit4Bytes(encodeAtomicLoadStore( 406 regNumberInEncoding[i.rn.realReg()], 407 regNumberInEncoding[i.rd.realReg()], 408 uint32(i.u2), 409 1, 410 )) 411 case atomicStore: 412 c.Emit4Bytes(encodeAtomicLoadStore( 413 regNumberInEncoding[i.rn.realReg()], 414 regNumberInEncoding[i.rm.realReg()], 415 uint32(i.u2), 416 0, 417 )) 418 case dmb: 419 c.Emit4Bytes(encodeDMB()) 420 default: 421 panic(i.String()) 422 } 423 } 424 425 func encodeMov64(rd, rn uint32, toIsSp, fromIsSp bool) uint32 { 426 if toIsSp || fromIsSp { 427 // This is an alias of ADD (immediate): 428 // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/MOV--to-from-SP---Move-between-register-and-stack-pointer--an-alias-of-ADD--immediate-- 429 return encodeAddSubtractImmediate(0b100, 0, 0, rn, rd) 430 } else { 431 // This is an alias of ORR (shifted register): 432 // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/MOV--register---Move--register---an-alias-of-ORR--shifted-register-- 433 return encodeLogicalShiftedRegister(0b101, 0, rn, 0, regNumberInEncoding[xzr], rd) 434 } 435 } 436 437 // encodeSystemRegisterMove encodes as "System register move" in 438 // https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Branches--Exception-Generating-and-System-instructions?lang=en 439 // 440 // Note that currently we only supports read/write of FPSR. 441 func encodeSystemRegisterMove(rt uint32, fromSystem bool) uint32 { 442 ret := 0b11010101<<24 | 0b11011<<16 | 0b01000100<<8 | 0b001<<5 | rt 443 if fromSystem { 444 ret |= 0b1 << 21 445 } 446 return ret 447 } 448 449 // encodeVecRRR encodes as either "Advanced SIMD three *" in 450 // https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en 451 func encodeVecRRR(op vecOp, rd, rn, rm uint32, arr vecArrangement) uint32 { 452 switch op { 453 case vecOpBit: 454 _, q := arrToSizeQEncoded(arr) 455 return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b00011, 0b10 /* always has size 0b10 */, 0b1, q) 456 case vecOpBic: 457 if arr > vecArrangement16B { 458 panic("unsupported arrangement: " + arr.String()) 459 } 460 _, q := arrToSizeQEncoded(arr) 461 return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b00011, 0b01 /* always has size 0b01 */, 0b0, q) 462 case vecOpBsl: 463 if arr > vecArrangement16B { 464 panic("unsupported arrangement: " + arr.String()) 465 } 466 _, q := arrToSizeQEncoded(arr) 467 return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b00011, 0b01 /* always has size 0b01 */, 0b1, q) 468 case vecOpAnd: 469 if arr > vecArrangement16B { 470 panic("unsupported arrangement: " + arr.String()) 471 } 472 _, q := arrToSizeQEncoded(arr) 473 return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b00011, 0b00 /* always has size 0b00 */, 0b0, q) 474 case vecOpOrr: 475 _, q := arrToSizeQEncoded(arr) 476 return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b00011, 0b10 /* always has size 0b10 */, 0b0, q) 477 case vecOpEOR: 478 size, q := arrToSizeQEncoded(arr) 479 return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b00011, size, 0b1, q) 480 case vecOpCmeq: 481 size, q := arrToSizeQEncoded(arr) 482 return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b10001, size, 0b1, q) 483 case vecOpCmgt: 484 size, q := arrToSizeQEncoded(arr) 485 return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b00110, size, 0b0, q) 486 case vecOpCmhi: 487 size, q := arrToSizeQEncoded(arr) 488 return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b00110, size, 0b1, q) 489 case vecOpCmge: 490 size, q := arrToSizeQEncoded(arr) 491 return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b00111, size, 0b0, q) 492 case vecOpCmhs: 493 size, q := arrToSizeQEncoded(arr) 494 return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b00111, size, 0b1, q) 495 case vecOpFcmeq: 496 var size, q uint32 497 switch arr { 498 case vecArrangement4S: 499 size, q = 0b00, 0b1 500 case vecArrangement2S: 501 size, q = 0b00, 0b0 502 case vecArrangement2D: 503 size, q = 0b01, 0b1 504 default: 505 panic("unsupported arrangement: " + arr.String()) 506 } 507 return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b11100, size, 0b0, q) 508 case vecOpFcmgt: 509 if arr < vecArrangement2S || arr == vecArrangement1D { 510 panic("unsupported arrangement: " + arr.String()) 511 } 512 size, q := arrToSizeQEncoded(arr) 513 return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b11100, size, 0b1, q) 514 case vecOpFcmge: 515 var size, q uint32 516 switch arr { 517 case vecArrangement4S: 518 size, q = 0b00, 0b1 519 case vecArrangement2S: 520 size, q = 0b00, 0b0 521 case vecArrangement2D: 522 size, q = 0b01, 0b1 523 default: 524 panic("unsupported arrangement: " + arr.String()) 525 } 526 return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b11100, size, 0b1, q) 527 case vecOpAdd: 528 if arr == vecArrangement1D { 529 panic("unsupported arrangement: " + arr.String()) 530 } 531 size, q := arrToSizeQEncoded(arr) 532 return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b10000, size, 0b0, q) 533 case vecOpSqadd: 534 if arr == vecArrangement1D { 535 panic("unsupported arrangement: " + arr.String()) 536 } 537 size, q := arrToSizeQEncoded(arr) 538 return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b00001, size, 0b0, q) 539 case vecOpUqadd: 540 if arr == vecArrangement1D { 541 panic("unsupported arrangement: " + arr.String()) 542 } 543 size, q := arrToSizeQEncoded(arr) 544 return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b00001, size, 0b1, q) 545 case vecOpAddp: 546 if arr == vecArrangement1D { 547 panic("unsupported arrangement: " + arr.String()) 548 } 549 size, q := arrToSizeQEncoded(arr) 550 return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b10111, size, 0b0, q) 551 case vecOpSqsub: 552 if arr == vecArrangement1D { 553 panic("unsupported arrangement: " + arr.String()) 554 } 555 size, q := arrToSizeQEncoded(arr) 556 return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b00101, size, 0b0, q) 557 case vecOpUqsub: 558 if arr == vecArrangement1D { 559 panic("unsupported arrangement: " + arr.String()) 560 } 561 size, q := arrToSizeQEncoded(arr) 562 return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b00101, size, 0b1, q) 563 case vecOpSub: 564 if arr == vecArrangement1D { 565 panic("unsupported arrangement: " + arr.String()) 566 } 567 size, q := arrToSizeQEncoded(arr) 568 return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b10000, size, 0b1, q) 569 case vecOpFmin: 570 if arr < vecArrangement2S || arr == vecArrangement1D { 571 panic("unsupported arrangement: " + arr.String()) 572 } 573 size, q := arrToSizeQEncoded(arr) 574 return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b11110, size, 0b0, q) 575 case vecOpSmin: 576 if arr > vecArrangement4S { 577 panic("unsupported arrangement: " + arr.String()) 578 } 579 size, q := arrToSizeQEncoded(arr) 580 return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b01101, size, 0b0, q) 581 case vecOpUmin: 582 if arr > vecArrangement4S { 583 panic("unsupported arrangement: " + arr.String()) 584 } 585 size, q := arrToSizeQEncoded(arr) 586 return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b01101, size, 0b1, q) 587 case vecOpFmax: 588 var size, q uint32 589 switch arr { 590 case vecArrangement4S: 591 size, q = 0b00, 0b1 592 case vecArrangement2S: 593 size, q = 0b00, 0b0 594 case vecArrangement2D: 595 size, q = 0b01, 0b1 596 default: 597 panic("unsupported arrangement: " + arr.String()) 598 } 599 return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b11110, size, 0b0, q) 600 case vecOpFadd: 601 var size, q uint32 602 switch arr { 603 case vecArrangement4S: 604 size, q = 0b00, 0b1 605 case vecArrangement2S: 606 size, q = 0b00, 0b0 607 case vecArrangement2D: 608 size, q = 0b01, 0b1 609 default: 610 panic("unsupported arrangement: " + arr.String()) 611 } 612 return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b11010, size, 0b0, q) 613 case vecOpFsub: 614 if arr < vecArrangement2S || arr == vecArrangement1D { 615 panic("unsupported arrangement: " + arr.String()) 616 } 617 size, q := arrToSizeQEncoded(arr) 618 return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b11010, size, 0b0, q) 619 case vecOpFmul: 620 var size, q uint32 621 switch arr { 622 case vecArrangement4S: 623 size, q = 0b00, 0b1 624 case vecArrangement2S: 625 size, q = 0b00, 0b0 626 case vecArrangement2D: 627 size, q = 0b01, 0b1 628 default: 629 panic("unsupported arrangement: " + arr.String()) 630 } 631 return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b11011, size, 0b1, q) 632 case vecOpSqrdmulh: 633 if arr < vecArrangement4H || arr > vecArrangement4S { 634 panic("unsupported arrangement: " + arr.String()) 635 } 636 size, q := arrToSizeQEncoded(arr) 637 return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b10110, size, 0b1, q) 638 case vecOpFdiv: 639 var size, q uint32 640 switch arr { 641 case vecArrangement4S: 642 size, q = 0b00, 0b1 643 case vecArrangement2S: 644 size, q = 0b00, 0b0 645 case vecArrangement2D: 646 size, q = 0b01, 0b1 647 default: 648 panic("unsupported arrangement: " + arr.String()) 649 } 650 return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b11111, size, 0b1, q) 651 case vecOpSmax: 652 if arr > vecArrangement4S { 653 panic("unsupported arrangement: " + arr.String()) 654 } 655 size, q := arrToSizeQEncoded(arr) 656 return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b01100, size, 0b0, q) 657 case vecOpUmax: 658 if arr > vecArrangement4S { 659 panic("unsupported arrangement: " + arr.String()) 660 } 661 size, q := arrToSizeQEncoded(arr) 662 return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b01100, size, 0b1, q) 663 case vecOpUmaxp: 664 if arr > vecArrangement4S { 665 panic("unsupported arrangement: " + arr.String()) 666 } 667 size, q := arrToSizeQEncoded(arr) 668 return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b10100, size, 0b1, q) 669 case vecOpUrhadd: 670 if arr > vecArrangement4S { 671 panic("unsupported arrangement: " + arr.String()) 672 } 673 size, q := arrToSizeQEncoded(arr) 674 return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b00010, size, 0b1, q) 675 case vecOpMul: 676 if arr > vecArrangement4S { 677 panic("unsupported arrangement: " + arr.String()) 678 } 679 size, q := arrToSizeQEncoded(arr) 680 return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b10011, size, 0b0, q) 681 case vecOpUmlal: 682 if arr > vecArrangement4S { 683 panic("unsupported arrangement: " + arr.String()) 684 } 685 size, q := arrToSizeQEncoded(arr) 686 return encodeAdvancedSIMDThreeDifferent(rd, rn, rm, 0b1000, size, 0b1, q) 687 case vecOpSshl: 688 if arr == vecArrangement1D { 689 panic("unsupported arrangement: " + arr.String()) 690 } 691 size, q := arrToSizeQEncoded(arr) 692 return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b01000, size, 0b0, q) 693 case vecOpUshl: 694 if arr == vecArrangement1D { 695 panic("unsupported arrangement: " + arr.String()) 696 } 697 size, q := arrToSizeQEncoded(arr) 698 return encodeAdvancedSIMDThreeSame(rd, rn, rm, 0b01000, size, 0b1, q) 699 700 case vecOpSmull: 701 if arr > vecArrangement4S { 702 panic("unsupported arrangement: " + arr.String()) 703 } 704 size, _ := arrToSizeQEncoded(arr) 705 return encodeAdvancedSIMDThreeDifferent(rd, rn, rm, 0b1100, size, 0b0, 0b0) 706 707 case vecOpSmull2: 708 if arr > vecArrangement4S { 709 panic("unsupported arrangement: " + arr.String()) 710 } 711 size, _ := arrToSizeQEncoded(arr) 712 return encodeAdvancedSIMDThreeDifferent(rd, rn, rm, 0b1100, size, 0b0, 0b1) 713 714 default: 715 panic("TODO: " + op.String()) 716 } 717 } 718 719 func arrToSizeQEncoded(arr vecArrangement) (size, q uint32) { 720 switch arr { 721 case vecArrangement16B: 722 q = 0b1 723 fallthrough 724 case vecArrangement8B: 725 size = 0b00 726 case vecArrangement8H: 727 q = 0b1 728 fallthrough 729 case vecArrangement4H: 730 size = 0b01 731 case vecArrangement4S: 732 q = 0b1 733 fallthrough 734 case vecArrangement2S: 735 size = 0b10 736 case vecArrangement2D: 737 q = 0b1 738 fallthrough 739 case vecArrangement1D: 740 size = 0b11 741 default: 742 panic("BUG") 743 } 744 return 745 } 746 747 // encodeAdvancedSIMDThreeSame encodes as "Advanced SIMD three same" in 748 // https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en 749 func encodeAdvancedSIMDThreeSame(rd, rn, rm, opcode, size, U, Q uint32) uint32 { 750 return Q<<30 | U<<29 | 0b111<<25 | size<<22 | 0b1<<21 | rm<<16 | opcode<<11 | 0b1<<10 | rn<<5 | rd 751 } 752 753 // encodeAdvancedSIMDThreeDifferent encodes as "Advanced SIMD three different" in 754 // https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en 755 func encodeAdvancedSIMDThreeDifferent(rd, rn, rm, opcode, size, U, Q uint32) uint32 { 756 return Q<<30 | U<<29 | 0b111<<25 | size<<22 | 0b1<<21 | rm<<16 | opcode<<12 | rn<<5 | rd 757 } 758 759 // encodeFloatDataOneSource encodes as "Floating-point data-processing (1 source)" in 760 // https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en#simd-dp 761 func encodeFloatDataOneSource(op fpuUniOp, rd, rn uint32, dst64bit bool) uint32 { 762 var opcode, ptype uint32 763 switch op { 764 case fpuUniOpCvt32To64: 765 opcode = 0b000101 766 case fpuUniOpCvt64To32: 767 opcode = 0b000100 768 ptype = 0b01 769 case fpuUniOpNeg: 770 opcode = 0b000010 771 if dst64bit { 772 ptype = 0b01 773 } 774 case fpuUniOpSqrt: 775 opcode = 0b000011 776 if dst64bit { 777 ptype = 0b01 778 } 779 case fpuUniOpRoundPlus: 780 opcode = 0b001001 781 if dst64bit { 782 ptype = 0b01 783 } 784 case fpuUniOpRoundMinus: 785 opcode = 0b001010 786 if dst64bit { 787 ptype = 0b01 788 } 789 case fpuUniOpRoundZero: 790 opcode = 0b001011 791 if dst64bit { 792 ptype = 0b01 793 } 794 case fpuUniOpRoundNearest: 795 opcode = 0b001000 796 if dst64bit { 797 ptype = 0b01 798 } 799 case fpuUniOpAbs: 800 opcode = 0b000001 801 if dst64bit { 802 ptype = 0b01 803 } 804 default: 805 panic("BUG") 806 } 807 return 0b1111<<25 | ptype<<22 | 0b1<<21 | opcode<<15 | 0b1<<14 | rn<<5 | rd 808 } 809 810 // encodeCnvBetweenFloatInt encodes as "Conversion between floating-point and integer" in 811 // https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en 812 func encodeCnvBetweenFloatInt(i *instruction) uint32 { 813 rd := regNumberInEncoding[i.rd.realReg()] 814 rn := regNumberInEncoding[i.rn.realReg()] 815 816 var opcode uint32 817 var rmode uint32 818 var ptype uint32 819 var sf uint32 820 switch i.kind { 821 case intToFpu: // Either UCVTF or SCVTF. 822 rmode = 0b00 823 824 signed := i.u1 == 1 825 src64bit := i.u2 == 1 826 dst64bit := i.u3 == 1 827 if signed { 828 opcode = 0b010 829 } else { 830 opcode = 0b011 831 } 832 if src64bit { 833 sf = 0b1 834 } 835 if dst64bit { 836 ptype = 0b01 837 } else { 838 ptype = 0b00 839 } 840 case fpuToInt: // Either FCVTZU or FCVTZS. 841 rmode = 0b11 842 843 signed := i.u1 == 1 844 src64bit := i.u2 == 1 845 dst64bit := i.u3 == 1 846 847 if signed { 848 opcode = 0b000 849 } else { 850 opcode = 0b001 851 } 852 if dst64bit { 853 sf = 0b1 854 } 855 if src64bit { 856 ptype = 0b01 857 } else { 858 ptype = 0b00 859 } 860 } 861 return sf<<31 | 0b1111<<25 | ptype<<22 | 0b1<<21 | rmode<<19 | opcode<<16 | rn<<5 | rd 862 } 863 864 // encodeAdr encodes a PC-relative ADR instruction. 865 // https://developer.arm.com/documentation/ddi0602/2022-06/Base-Instructions/ADR--Form-PC-relative-address- 866 func encodeAdr(rd uint32, offset uint32) uint32 { 867 if offset >= 1<<20 { 868 panic("BUG: too large adr instruction") 869 } 870 return offset&0b11<<29 | 0b1<<28 | offset&0b1111111111_1111111100<<3 | rd 871 } 872 873 // encodeFpuCSel encodes as "Floating-point conditional select" in 874 // https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en 875 func encodeFpuCSel(rd, rn, rm uint32, c condFlag, _64bit bool) uint32 { 876 var ftype uint32 877 if _64bit { 878 ftype = 0b01 // double precision. 879 } 880 return 0b1111<<25 | ftype<<22 | 0b1<<21 | rm<<16 | uint32(c)<<12 | 0b11<<10 | rn<<5 | rd 881 } 882 883 // encodeMoveToVec encodes as "Move general-purpose register to a vector element" (represented as `ins`) in 884 // https://developer.arm.com/documentation/dui0801/g/A64-SIMD-Vector-Instructions/MOV--vector--from-general- 885 // https://developer.arm.com/documentation/ddi0596/2020-12/SIMD-FP-Instructions/MOV--from-general---Move-general-purpose-register-to-a-vector-element--an-alias-of-INS--general--?lang=en 886 func encodeMoveToVec(rd, rn uint32, arr vecArrangement, index vecIndex) uint32 { 887 var imm5 uint32 888 switch arr { 889 case vecArrangementB: 890 imm5 |= 0b1 891 imm5 |= uint32(index) << 1 892 if index > 0b1111 { 893 panic(fmt.Sprintf("vector index is larger than the allowed bound: %d > 15", index)) 894 } 895 case vecArrangementH: 896 imm5 |= 0b10 897 imm5 |= uint32(index) << 2 898 if index > 0b111 { 899 panic(fmt.Sprintf("vector index is larger than the allowed bound: %d > 7", index)) 900 } 901 case vecArrangementS: 902 imm5 |= 0b100 903 imm5 |= uint32(index) << 3 904 if index > 0b11 { 905 panic(fmt.Sprintf("vector index is larger than the allowed bound: %d > 3", index)) 906 } 907 case vecArrangementD: 908 imm5 |= 0b1000 909 imm5 |= uint32(index) << 4 910 if index > 0b1 { 911 panic(fmt.Sprintf("vector index is larger than the allowed bound: %d > 1", index)) 912 } 913 default: 914 panic("Unsupported arrangement " + arr.String()) 915 } 916 917 return 0b01001110000<<21 | imm5<<16 | 0b000111<<10 | rn<<5 | rd 918 } 919 920 // encodeMoveToVec encodes as "Move vector element to another vector element, mov (element)" (represented as `ins`) in 921 // https://developer.arm.com/documentation/ddi0596/2020-12/SIMD-FP-Instructions/MOV--element---Move-vector-element-to-another-vector-element--an-alias-of-INS--element--?lang=en 922 // https://developer.arm.com/documentation/ddi0596/2020-12/SIMD-FP-Instructions/INS--element---Insert-vector-element-from-another-vector-element-?lang=en 923 func encodeVecMovElement(rd, rn uint32, arr vecArrangement, srcIndex, dstIndex uint32) uint32 { 924 var imm4, imm5 uint32 925 switch arr { 926 case vecArrangementB: 927 imm5 |= 0b1 928 imm5 |= srcIndex << 1 929 imm4 = dstIndex 930 if srcIndex > 0b1111 { 931 panic(fmt.Sprintf("vector index is larger than the allowed bound: %d > 15", srcIndex)) 932 } 933 case vecArrangementH: 934 imm5 |= 0b10 935 imm5 |= srcIndex << 2 936 imm4 = dstIndex << 1 937 if srcIndex > 0b111 { 938 panic(fmt.Sprintf("vector index is larger than the allowed bound: %d > 7", srcIndex)) 939 } 940 case vecArrangementS: 941 imm5 |= 0b100 942 imm5 |= srcIndex << 3 943 imm4 = dstIndex << 2 944 if srcIndex > 0b11 { 945 panic(fmt.Sprintf("vector index is larger than the allowed bound: %d > 3", srcIndex)) 946 } 947 case vecArrangementD: 948 imm5 |= 0b1000 949 imm5 |= srcIndex << 4 950 imm4 = dstIndex << 3 951 if srcIndex > 0b1 { 952 panic(fmt.Sprintf("vector index is larger than the allowed bound: %d > 1", srcIndex)) 953 } 954 default: 955 panic("Unsupported arrangement " + arr.String()) 956 } 957 958 return 0b01101110000<<21 | imm5<<16 | imm4<<11 | 0b1<<10 | rn<<5 | rd 959 } 960 961 // encodeUnconditionalBranchReg encodes as "Unconditional branch (register)" in: 962 // https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Branches--Exception-Generating-and-System-instructions?lang=en 963 func encodeUnconditionalBranchReg(rn uint32, link bool) uint32 { 964 var opc uint32 965 if link { 966 opc = 0b0001 967 } 968 return 0b1101011<<25 | opc<<21 | 0b11111<<16 | rn<<5 969 } 970 971 // encodeMoveFromVec encodes as "Move vector element to a general-purpose register" 972 // (represented as `umov` when dest is 32-bit, `umov` otherwise) in 973 // https://developer.arm.com/documentation/ddi0596/2020-12/SIMD-FP-Instructions/UMOV--Unsigned-Move-vector-element-to-general-purpose-register-?lang=en 974 // https://developer.arm.com/documentation/ddi0596/2020-12/SIMD-FP-Instructions/MOV--to-general---Move-vector-element-to-general-purpose-register--an-alias-of-UMOV-?lang=en 975 func encodeMoveFromVec(rd, rn uint32, arr vecArrangement, index vecIndex, signed bool) uint32 { 976 var op, imm4, q, imm5 uint32 977 switch { 978 case arr == vecArrangementB: 979 imm5 |= 0b1 980 imm5 |= uint32(index) << 1 981 if index > 0b1111 { 982 panic(fmt.Sprintf("vector index is larger than the allowed bound: %d > 15", index)) 983 } 984 case arr == vecArrangementH: 985 imm5 |= 0b10 986 imm5 |= uint32(index) << 2 987 if index > 0b111 { 988 panic(fmt.Sprintf("vector index is larger than the allowed bound: %d > 7", index)) 989 } 990 case arr == vecArrangementS && signed: 991 q = 0b1 992 fallthrough 993 case arr == vecArrangementS: 994 imm5 |= 0b100 995 imm5 |= uint32(index) << 3 996 if index > 0b11 { 997 panic(fmt.Sprintf("vector index is larger than the allowed bound: %d > 3", index)) 998 } 999 case arr == vecArrangementD && !signed: 1000 imm5 |= 0b1000 1001 imm5 |= uint32(index) << 4 1002 q = 0b1 1003 if index > 0b1 { 1004 panic(fmt.Sprintf("vector index is larger than the allowed bound: %d > 1", index)) 1005 } 1006 default: 1007 panic("Unsupported arrangement " + arr.String()) 1008 } 1009 if signed { 1010 op, imm4 = 0, 0b0101 1011 } else { 1012 op, imm4 = 0, 0b0111 1013 } 1014 return op<<29 | 0b01110000<<21 | q<<30 | imm5<<16 | imm4<<11 | 1<<10 | rn<<5 | rd 1015 } 1016 1017 // encodeVecDup encodes as "Duplicate general-purpose register to vector" DUP (general) 1018 // (represented as `dup`) 1019 // https://developer.arm.com/documentation/ddi0596/2020-12/SIMD-FP-Instructions/DUP--general---Duplicate-general-purpose-register-to-vector-?lang=en 1020 func encodeVecDup(rd, rn uint32, arr vecArrangement) uint32 { 1021 var q, imm5 uint32 1022 switch arr { 1023 case vecArrangement8B: 1024 q, imm5 = 0b0, 0b1 1025 case vecArrangement16B: 1026 q, imm5 = 0b1, 0b1 1027 case vecArrangement4H: 1028 q, imm5 = 0b0, 0b10 1029 case vecArrangement8H: 1030 q, imm5 = 0b1, 0b10 1031 case vecArrangement2S: 1032 q, imm5 = 0b0, 0b100 1033 case vecArrangement4S: 1034 q, imm5 = 0b1, 0b100 1035 case vecArrangement2D: 1036 q, imm5 = 0b1, 0b1000 1037 default: 1038 panic("Unsupported arrangement " + arr.String()) 1039 } 1040 return q<<30 | 0b001110000<<21 | imm5<<16 | 0b000011<<10 | rn<<5 | rd 1041 } 1042 1043 // encodeVecDup encodes as "Duplicate vector element to vector or scalar" DUP (element). 1044 // (represented as `dup`) 1045 // https://developer.arm.com/documentation/ddi0596/2020-12/SIMD-FP-Instructions/DUP--element---Duplicate-vector-element-to-vector-or-scalar- 1046 func encodeVecDupElement(rd, rn uint32, arr vecArrangement, srcIndex vecIndex) uint32 { 1047 var q, imm5 uint32 1048 q = 0b1 1049 switch arr { 1050 case vecArrangementB: 1051 imm5 |= 0b1 1052 imm5 |= uint32(srcIndex) << 1 1053 case vecArrangementH: 1054 imm5 |= 0b10 1055 imm5 |= uint32(srcIndex) << 2 1056 case vecArrangementS: 1057 imm5 |= 0b100 1058 imm5 |= uint32(srcIndex) << 3 1059 case vecArrangementD: 1060 imm5 |= 0b1000 1061 imm5 |= uint32(srcIndex) << 4 1062 default: 1063 panic("unsupported arrangement" + arr.String()) 1064 } 1065 1066 return q<<30 | 0b001110000<<21 | imm5<<16 | 0b1<<10 | rn<<5 | rd 1067 } 1068 1069 // encodeVecExtract encodes as "Advanced SIMD extract." 1070 // Currently only `ext` is defined. 1071 // https://developer.arm.com/documentation/ddi0602/2023-06/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en#simd-dp 1072 // https://developer.arm.com/documentation/ddi0602/2023-06/SIMD-FP-Instructions/EXT--Extract-vector-from-pair-of-vectors-?lang=en 1073 func encodeVecExtract(rd, rn, rm uint32, arr vecArrangement, index uint32) uint32 { 1074 var q, imm4 uint32 1075 switch arr { 1076 case vecArrangement8B: 1077 q, imm4 = 0, 0b0111&uint32(index) 1078 case vecArrangement16B: 1079 q, imm4 = 1, 0b1111&uint32(index) 1080 default: 1081 panic("Unsupported arrangement " + arr.String()) 1082 } 1083 return q<<30 | 0b101110000<<21 | rm<<16 | imm4<<11 | rn<<5 | rd 1084 } 1085 1086 // encodeVecPermute encodes as "Advanced SIMD permute." 1087 // https://developer.arm.com/documentation/ddi0602/2023-06/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en#simd-dp 1088 func encodeVecPermute(op vecOp, rd, rn, rm uint32, arr vecArrangement) uint32 { 1089 var q, size, opcode uint32 1090 switch op { 1091 case vecOpZip1: 1092 opcode = 0b011 1093 if arr == vecArrangement1D { 1094 panic("unsupported arrangement: " + arr.String()) 1095 } 1096 size, q = arrToSizeQEncoded(arr) 1097 default: 1098 panic("TODO: " + op.String()) 1099 } 1100 return q<<30 | 0b001110<<24 | size<<22 | rm<<16 | opcode<<12 | 0b10<<10 | rn<<5 | rd 1101 } 1102 1103 // encodeConditionalSelect encodes as "Conditional select" in 1104 // https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Register?lang=en#condsel 1105 func encodeConditionalSelect(kind instructionKind, rd, rn, rm uint32, c condFlag, _64bit bool) uint32 { 1106 if kind != cSel { 1107 panic("TODO: support other conditional select") 1108 } 1109 1110 ret := 0b110101<<23 | rm<<16 | uint32(c)<<12 | rn<<5 | rd 1111 if _64bit { 1112 ret |= 0b1 << 31 1113 } 1114 return ret 1115 } 1116 1117 const dummyInstruction uint32 = 0x14000000 // "b 0" 1118 1119 // encodeLoadFpuConst32 encodes the following three instructions: 1120 // 1121 // ldr s8, #8 ;; literal load of data.f32 1122 // b 8 ;; skip the data 1123 // data.f32 xxxxxxx 1124 func encodeLoadFpuConst32(c backend.Compiler, rd uint32, rawF32 uint64) { 1125 c.Emit4Bytes( 1126 // https://developer.arm.com/documentation/ddi0596/2020-12/SIMD-FP-Instructions/LDR--literal--SIMD-FP---Load-SIMD-FP-Register--PC-relative-literal--?lang=en 1127 0b111<<26 | (0x8/4)<<5 | rd, 1128 ) 1129 c.Emit4Bytes(encodeUnconditionalBranch(false, 8)) // b 8 1130 if wazevoapi.PrintMachineCodeHexPerFunctionDisassemblable { 1131 // Inlined data.f32 cannot be disassembled, so we add a dummy instruction here. 1132 c.Emit4Bytes(dummyInstruction) 1133 } else { 1134 c.Emit4Bytes(uint32(rawF32)) // data.f32 xxxxxxx 1135 } 1136 } 1137 1138 // encodeLoadFpuConst64 encodes the following three instructions: 1139 // 1140 // ldr d8, #8 ;; literal load of data.f64 1141 // b 12 ;; skip the data 1142 // data.f64 xxxxxxx 1143 func encodeLoadFpuConst64(c backend.Compiler, rd uint32, rawF64 uint64) { 1144 c.Emit4Bytes( 1145 // https://developer.arm.com/documentation/ddi0596/2020-12/SIMD-FP-Instructions/LDR--literal--SIMD-FP---Load-SIMD-FP-Register--PC-relative-literal--?lang=en 1146 0b1<<30 | 0b111<<26 | (0x8/4)<<5 | rd, 1147 ) 1148 c.Emit4Bytes(encodeUnconditionalBranch(false, 12)) // b 12 1149 if wazevoapi.PrintMachineCodeHexPerFunctionDisassemblable { 1150 // Inlined data.f64 cannot be disassembled, so we add dummy instructions here. 1151 c.Emit4Bytes(dummyInstruction) 1152 c.Emit4Bytes(dummyInstruction) 1153 } else { 1154 // data.f64 xxxxxxx 1155 c.Emit4Bytes(uint32(rawF64)) 1156 c.Emit4Bytes(uint32(rawF64 >> 32)) 1157 } 1158 } 1159 1160 // encodeLoadFpuConst128 encodes the following three instructions: 1161 // 1162 // ldr v8, #8 ;; literal load of data.f64 1163 // b 20 ;; skip the data 1164 // data.v128 xxxxxxx 1165 func encodeLoadFpuConst128(c backend.Compiler, rd uint32, lo, hi uint64) { 1166 c.Emit4Bytes( 1167 // https://developer.arm.com/documentation/ddi0596/2020-12/SIMD-FP-Instructions/LDR--literal--SIMD-FP---Load-SIMD-FP-Register--PC-relative-literal--?lang=en 1168 0b1<<31 | 0b111<<26 | (0x8/4)<<5 | rd, 1169 ) 1170 c.Emit4Bytes(encodeUnconditionalBranch(false, 20)) // b 20 1171 if wazevoapi.PrintMachineCodeHexPerFunctionDisassemblable { 1172 // Inlined data.v128 cannot be disassembled, so we add dummy instructions here. 1173 c.Emit4Bytes(dummyInstruction) 1174 c.Emit4Bytes(dummyInstruction) 1175 c.Emit4Bytes(dummyInstruction) 1176 c.Emit4Bytes(dummyInstruction) 1177 } else { 1178 // data.v128 xxxxxxx 1179 c.Emit4Bytes(uint32(lo)) 1180 c.Emit4Bytes(uint32(lo >> 32)) 1181 c.Emit4Bytes(uint32(hi)) 1182 c.Emit4Bytes(uint32(hi >> 32)) 1183 } 1184 } 1185 1186 // encodeAluRRRR encodes as Data-processing (3 source) in 1187 // https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Register?lang=en 1188 func encodeAluRRRR(op aluOp, rd, rn, rm, ra, _64bit uint32) uint32 { 1189 var oO, op31 uint32 1190 switch op { 1191 case aluOpMAdd: 1192 op31, oO = 0b000, 0b0 1193 case aluOpMSub: 1194 op31, oO = 0b000, 0b1 1195 default: 1196 panic("TODO/BUG") 1197 } 1198 return _64bit<<31 | 0b11011<<24 | op31<<21 | rm<<16 | oO<<15 | ra<<10 | rn<<5 | rd 1199 } 1200 1201 // encodeBitRR encodes as Data-processing (1 source) in 1202 // https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Register?lang=en 1203 func encodeBitRR(op bitOp, rd, rn, _64bit uint32) uint32 { 1204 var opcode2, opcode uint32 1205 switch op { 1206 case bitOpRbit: 1207 opcode2, opcode = 0b00000, 0b000000 1208 case bitOpClz: 1209 opcode2, opcode = 0b00000, 0b000100 1210 default: 1211 panic("TODO/BUG") 1212 } 1213 return _64bit<<31 | 0b1_0_11010110<<21 | opcode2<<15 | opcode<<10 | rn<<5 | rd 1214 } 1215 1216 func encodeAsMov32(rn, rd uint32) uint32 { 1217 // This is an alias of ORR (shifted register): 1218 // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/MOV--register---Move--register---an-alias-of-ORR--shifted-register-- 1219 return encodeLogicalShiftedRegister(0b001, 0, rn, 0, regNumberInEncoding[xzr], rd) 1220 } 1221 1222 // encodeExtend encodes extension instructions. 1223 func encodeExtend(signed bool, from, to byte, rd, rn uint32) uint32 { 1224 // UTXB: https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/UXTB--Unsigned-Extend-Byte--an-alias-of-UBFM-?lang=en 1225 // UTXH: https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/UXTH--Unsigned-Extend-Halfword--an-alias-of-UBFM-?lang=en 1226 // STXB: https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/SXTB--Signed-Extend-Byte--an-alias-of-SBFM- 1227 // STXH: https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/SXTH--Sign-Extend-Halfword--an-alias-of-SBFM- 1228 // STXW: https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/SXTW--Sign-Extend-Word--an-alias-of-SBFM- 1229 var _31to10 uint32 1230 switch { 1231 case !signed && from == 8 && to == 32: 1232 // 32-bit UXTB 1233 _31to10 = 0b0101001100000000000111 1234 case !signed && from == 16 && to == 32: 1235 // 32-bit UXTH 1236 _31to10 = 0b0101001100000000001111 1237 case !signed && from == 8 && to == 64: 1238 // 64-bit UXTB 1239 _31to10 = 0b0101001100000000000111 1240 case !signed && from == 16 && to == 64: 1241 // 64-bit UXTH 1242 _31to10 = 0b0101001100000000001111 1243 case !signed && from == 32 && to == 64: 1244 return encodeAsMov32(rn, rd) 1245 case signed && from == 8 && to == 32: 1246 // 32-bit SXTB 1247 _31to10 = 0b0001001100000000000111 1248 case signed && from == 16 && to == 32: 1249 // 32-bit SXTH 1250 _31to10 = 0b0001001100000000001111 1251 case signed && from == 8 && to == 64: 1252 // 64-bit SXTB 1253 _31to10 = 0b1001001101000000000111 1254 case signed && from == 16 && to == 64: 1255 // 64-bit SXTH 1256 _31to10 = 0b1001001101000000001111 1257 case signed && from == 32 && to == 64: 1258 // SXTW 1259 _31to10 = 0b1001001101000000011111 1260 default: 1261 panic("BUG") 1262 } 1263 return _31to10<<10 | rn<<5 | rd 1264 } 1265 1266 func encodeLoadOrStore(kind instructionKind, rt uint32, amode addressMode) uint32 { 1267 var _22to31 uint32 1268 var bits int64 1269 switch kind { 1270 case uLoad8: 1271 _22to31 = 0b0011100001 1272 bits = 8 1273 case sLoad8: 1274 _22to31 = 0b0011100010 1275 bits = 8 1276 case uLoad16: 1277 _22to31 = 0b0111100001 1278 bits = 16 1279 case sLoad16: 1280 _22to31 = 0b0111100010 1281 bits = 16 1282 case uLoad32: 1283 _22to31 = 0b1011100001 1284 bits = 32 1285 case sLoad32: 1286 _22to31 = 0b1011100010 1287 bits = 32 1288 case uLoad64: 1289 _22to31 = 0b1111100001 1290 bits = 64 1291 case fpuLoad32: 1292 _22to31 = 0b1011110001 1293 bits = 32 1294 case fpuLoad64: 1295 _22to31 = 0b1111110001 1296 bits = 64 1297 case fpuLoad128: 1298 _22to31 = 0b0011110011 1299 bits = 128 1300 case store8: 1301 _22to31 = 0b0011100000 1302 bits = 8 1303 case store16: 1304 _22to31 = 0b0111100000 1305 bits = 16 1306 case store32: 1307 _22to31 = 0b1011100000 1308 bits = 32 1309 case store64: 1310 _22to31 = 0b1111100000 1311 bits = 64 1312 case fpuStore32: 1313 _22to31 = 0b1011110000 1314 bits = 32 1315 case fpuStore64: 1316 _22to31 = 0b1111110000 1317 bits = 64 1318 case fpuStore128: 1319 _22to31 = 0b0011110010 1320 bits = 128 1321 default: 1322 panic("BUG") 1323 } 1324 1325 switch amode.kind { 1326 case addressModeKindRegScaledExtended: 1327 return encodeLoadOrStoreExtended(_22to31, 1328 regNumberInEncoding[amode.rn.RealReg()], 1329 regNumberInEncoding[amode.rm.RealReg()], 1330 rt, true, amode.extOp) 1331 case addressModeKindRegScaled: 1332 return encodeLoadOrStoreExtended(_22to31, 1333 regNumberInEncoding[amode.rn.RealReg()], regNumberInEncoding[amode.rm.RealReg()], 1334 rt, true, extendOpNone) 1335 case addressModeKindRegExtended: 1336 return encodeLoadOrStoreExtended(_22to31, 1337 regNumberInEncoding[amode.rn.RealReg()], regNumberInEncoding[amode.rm.RealReg()], 1338 rt, false, amode.extOp) 1339 case addressModeKindRegReg: 1340 return encodeLoadOrStoreExtended(_22to31, 1341 regNumberInEncoding[amode.rn.RealReg()], regNumberInEncoding[amode.rm.RealReg()], 1342 rt, false, extendOpNone) 1343 case addressModeKindRegSignedImm9: 1344 // e.g. https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDUR--Load-Register--unscaled-- 1345 return encodeLoadOrStoreSIMM9(_22to31, 0b00 /* unscaled */, regNumberInEncoding[amode.rn.RealReg()], rt, amode.imm) 1346 case addressModeKindPostIndex: 1347 return encodeLoadOrStoreSIMM9(_22to31, 0b01 /* post index */, regNumberInEncoding[amode.rn.RealReg()], rt, amode.imm) 1348 case addressModeKindPreIndex: 1349 return encodeLoadOrStoreSIMM9(_22to31, 0b11 /* pre index */, regNumberInEncoding[amode.rn.RealReg()], rt, amode.imm) 1350 case addressModeKindRegUnsignedImm12: 1351 // "unsigned immediate" in https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Loads-and-Stores?lang=en 1352 rn := regNumberInEncoding[amode.rn.RealReg()] 1353 imm := amode.imm 1354 div := bits / 8 1355 if imm != 0 && !offsetFitsInAddressModeKindRegUnsignedImm12(byte(bits), imm) { 1356 panic("BUG") 1357 } 1358 imm /= div 1359 return _22to31<<22 | 0b1<<24 | uint32(imm&0b111111111111)<<10 | rn<<5 | rt 1360 default: 1361 panic("BUG") 1362 } 1363 } 1364 1365 // encodeVecLoad1R encodes as Load one single-element structure and Replicate to all lanes (of one register) in 1366 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/LD1R--Load-one-single-element-structure-and-Replicate-to-all-lanes--of-one-register--?lang=en#sa_imm 1367 func encodeVecLoad1R(rt, rn uint32, arr vecArrangement) uint32 { 1368 size, q := arrToSizeQEncoded(arr) 1369 return q<<30 | 0b001101010000001100<<12 | size<<10 | rn<<5 | rt 1370 } 1371 1372 // encodeAluBitmaskImmediate encodes as Logical (immediate) in 1373 // https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Immediate?lang=en 1374 func encodeAluBitmaskImmediate(op aluOp, rd, rn uint32, imm uint64, _64bit bool) uint32 { 1375 var _31to23 uint32 1376 switch op { 1377 case aluOpAnd: 1378 _31to23 = 0b00_100100 1379 case aluOpOrr: 1380 _31to23 = 0b01_100100 1381 case aluOpEor: 1382 _31to23 = 0b10_100100 1383 case aluOpAnds: 1384 _31to23 = 0b11_100100 1385 default: 1386 panic("BUG") 1387 } 1388 if _64bit { 1389 _31to23 |= 0b1 << 8 1390 } 1391 immr, imms, N := bitmaskImmediate(imm, _64bit) 1392 return _31to23<<23 | uint32(N)<<22 | uint32(immr)<<16 | uint32(imms)<<10 | rn<<5 | rd 1393 } 1394 1395 func bitmaskImmediate(c uint64, is64bit bool) (immr, imms, N byte) { 1396 var size uint32 1397 switch { 1398 case c != c>>32|c<<32: 1399 size = 64 1400 case c != c>>16|c<<48: 1401 size = 32 1402 c = uint64(int32(c)) 1403 case c != c>>8|c<<56: 1404 size = 16 1405 c = uint64(int16(c)) 1406 case c != c>>4|c<<60: 1407 size = 8 1408 c = uint64(int8(c)) 1409 case c != c>>2|c<<62: 1410 size = 4 1411 c = uint64(int64(c<<60) >> 60) 1412 default: 1413 size = 2 1414 c = uint64(int64(c<<62) >> 62) 1415 } 1416 1417 neg := false 1418 if int64(c) < 0 { 1419 c = ^c 1420 neg = true 1421 } 1422 1423 onesSize, nonZeroPos := getOnesSequenceSize(c) 1424 if neg { 1425 nonZeroPos = onesSize + nonZeroPos 1426 onesSize = size - onesSize 1427 } 1428 1429 var mode byte = 32 1430 if is64bit && size == 64 { 1431 N, mode = 0b1, 64 1432 } 1433 1434 immr = byte((size - nonZeroPos) & (size - 1) & uint32(mode-1)) 1435 imms = byte((onesSize - 1) | 63&^(size<<1-1)) 1436 return 1437 } 1438 1439 func getOnesSequenceSize(x uint64) (size, nonZeroPos uint32) { 1440 // Take 0b00111000 for example: 1441 y := getLowestBit(x) // = 0b0000100 1442 nonZeroPos = setBitPos(y) // = 2 1443 size = setBitPos(x+y) - nonZeroPos // = setBitPos(0b0100000) - 2 = 5 - 2 = 3 1444 return 1445 } 1446 1447 func setBitPos(x uint64) (ret uint32) { 1448 for ; ; ret++ { 1449 if x == 0b1 { 1450 break 1451 } 1452 x = x >> 1 1453 } 1454 return 1455 } 1456 1457 // encodeLoadOrStoreExtended encodes store/load instruction as "extended register offset" in Load/store register (register offset): 1458 // https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Loads-and-Stores?lang=en 1459 func encodeLoadOrStoreExtended(_22to32 uint32, rn, rm, rt uint32, scaled bool, extOp extendOp) uint32 { 1460 var option uint32 1461 switch extOp { 1462 case extendOpUXTW: 1463 option = 0b010 1464 case extendOpSXTW: 1465 option = 0b110 1466 case extendOpNone: 1467 option = 0b111 1468 default: 1469 panic("BUG") 1470 } 1471 var s uint32 1472 if scaled { 1473 s = 0b1 1474 } 1475 return _22to32<<22 | 0b1<<21 | rm<<16 | option<<13 | s<<12 | 0b10<<10 | rn<<5 | rt 1476 } 1477 1478 // encodeLoadOrStoreSIMM9 encodes store/load instruction as one of post-index, pre-index or unscaled immediate as in 1479 // https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Loads-and-Stores?lang=en 1480 func encodeLoadOrStoreSIMM9(_22to32, _1011 uint32, rn, rt uint32, imm9 int64) uint32 { 1481 return _22to32<<22 | (uint32(imm9)&0b111111111)<<12 | _1011<<10 | rn<<5 | rt 1482 } 1483 1484 // encodeFpuRRR encodes as single or double precision (depending on `_64bit`) of Floating-point data-processing (2 source) in 1485 // https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en 1486 func encodeFpuRRR(op fpuBinOp, rd, rn, rm uint32, _64bit bool) (ret uint32) { 1487 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/ADD--vector--Add-vectors--scalar--floating-point-and-integer- 1488 var opcode uint32 1489 switch op { 1490 case fpuBinOpAdd: 1491 opcode = 0b0010 1492 case fpuBinOpSub: 1493 opcode = 0b0011 1494 case fpuBinOpMul: 1495 opcode = 0b0000 1496 case fpuBinOpDiv: 1497 opcode = 0b0001 1498 case fpuBinOpMax: 1499 opcode = 0b0100 1500 case fpuBinOpMin: 1501 opcode = 0b0101 1502 default: 1503 panic("BUG") 1504 } 1505 var ptype uint32 1506 if _64bit { 1507 ptype = 0b01 1508 } 1509 return 0b1111<<25 | ptype<<22 | 0b1<<21 | rm<<16 | opcode<<12 | 0b1<<11 | rn<<5 | rd 1510 } 1511 1512 // encodeAluRRImm12 encodes as Add/subtract (immediate) in 1513 // https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Immediate?lang=en 1514 func encodeAluRRImm12(op aluOp, rd, rn uint32, imm12 uint16, shiftBit byte, _64bit bool) uint32 { 1515 var _31to24 uint32 1516 switch op { 1517 case aluOpAdd: 1518 _31to24 = 0b00_10001 1519 case aluOpAddS: 1520 _31to24 = 0b01_10001 1521 case aluOpSub: 1522 _31to24 = 0b10_10001 1523 case aluOpSubS: 1524 _31to24 = 0b11_10001 1525 default: 1526 panic("BUG") 1527 } 1528 if _64bit { 1529 _31to24 |= 0b1 << 7 1530 } 1531 return _31to24<<24 | uint32(shiftBit)<<22 | uint32(imm12&0b111111111111)<<10 | rn<<5 | rd 1532 } 1533 1534 // encodeAluRRR encodes as Data Processing (shifted register), depending on aluOp. 1535 // https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Register?lang=en#addsub_shift 1536 func encodeAluRRRShift(op aluOp, rd, rn, rm, amount uint32, shiftOp shiftOp, _64bit bool) uint32 { 1537 var _31to24 uint32 1538 var opc, n uint32 1539 switch op { 1540 case aluOpAdd: 1541 _31to24 = 0b00001011 1542 case aluOpAddS: 1543 _31to24 = 0b00101011 1544 case aluOpSub: 1545 _31to24 = 0b01001011 1546 case aluOpSubS: 1547 _31to24 = 0b01101011 1548 case aluOpAnd, aluOpOrr, aluOpEor, aluOpAnds: 1549 // "Logical (shifted register)". 1550 switch op { 1551 case aluOpAnd: 1552 // all zeros 1553 case aluOpOrr: 1554 opc = 0b01 1555 case aluOpEor: 1556 opc = 0b10 1557 case aluOpAnds: 1558 opc = 0b11 1559 } 1560 _31to24 = 0b000_01010 1561 default: 1562 panic(op.String()) 1563 } 1564 1565 if _64bit { 1566 _31to24 |= 0b1 << 7 1567 } 1568 1569 var shift uint32 1570 switch shiftOp { 1571 case shiftOpLSL: 1572 shift = 0b00 1573 case shiftOpLSR: 1574 shift = 0b01 1575 case shiftOpASR: 1576 shift = 0b10 1577 default: 1578 panic(shiftOp.String()) 1579 } 1580 return opc<<29 | n<<21 | _31to24<<24 | shift<<22 | rm<<16 | (amount << 10) | (rn << 5) | rd 1581 } 1582 1583 // "Add/subtract (extended register)" in 1584 // https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Register?lang=en#addsub_ext 1585 func encodeAluRRRExtend(ao aluOp, rd, rn, rm uint32, extOp extendOp, to byte) uint32 { 1586 var s, op uint32 1587 switch ao { 1588 case aluOpAdd: 1589 op = 0b0 1590 case aluOpAddS: 1591 op, s = 0b0, 0b1 1592 case aluOpSub: 1593 op = 0b1 1594 case aluOpSubS: 1595 op, s = 0b1, 0b1 1596 default: 1597 panic("BUG: extended register operand can be used only for add/sub") 1598 } 1599 1600 var sf uint32 1601 if to == 64 { 1602 sf = 0b1 1603 } 1604 1605 var option uint32 1606 switch extOp { 1607 case extendOpUXTB: 1608 option = 0b000 1609 case extendOpUXTH: 1610 option = 0b001 1611 case extendOpUXTW: 1612 option = 0b010 1613 case extendOpSXTB: 1614 option = 0b100 1615 case extendOpSXTH: 1616 option = 0b101 1617 case extendOpSXTW: 1618 option = 0b110 1619 case extendOpSXTX, extendOpUXTX: 1620 panic(fmt.Sprintf("%s is essentially noop, and should be handled much earlier than encoding", extOp.String())) 1621 } 1622 return sf<<31 | op<<30 | s<<29 | 0b1011001<<21 | rm<<16 | option<<13 | rn<<5 | rd 1623 } 1624 1625 // encodeAluRRR encodes as Data Processing (register), depending on aluOp. 1626 // https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Register?lang=en 1627 func encodeAluRRR(op aluOp, rd, rn, rm uint32, _64bit, isRnSp bool) uint32 { 1628 var _31to21, _15to10 uint32 1629 switch op { 1630 case aluOpAdd: 1631 if isRnSp { 1632 // "Extended register" with UXTW. 1633 _31to21 = 0b00001011_001 1634 _15to10 = 0b011000 1635 } else { 1636 // "Shifted register" with shift = 0 1637 _31to21 = 0b00001011_000 1638 } 1639 case aluOpAddS: 1640 if isRnSp { 1641 panic("TODO") 1642 } 1643 // "Shifted register" with shift = 0 1644 _31to21 = 0b00101011_000 1645 case aluOpSub: 1646 if isRnSp { 1647 // "Extended register" with UXTW. 1648 _31to21 = 0b01001011_001 1649 _15to10 = 0b011000 1650 } else { 1651 // "Shifted register" with shift = 0 1652 _31to21 = 0b01001011_000 1653 } 1654 case aluOpSubS: 1655 if isRnSp { 1656 panic("TODO") 1657 } 1658 // "Shifted register" with shift = 0 1659 _31to21 = 0b01101011_000 1660 case aluOpAnd, aluOpOrr, aluOpOrn, aluOpEor, aluOpAnds: 1661 // "Logical (shifted register)". 1662 var opc, n uint32 1663 switch op { 1664 case aluOpAnd: 1665 // all zeros 1666 case aluOpOrr: 1667 opc = 0b01 1668 case aluOpOrn: 1669 opc = 0b01 1670 n = 1 1671 case aluOpEor: 1672 opc = 0b10 1673 case aluOpAnds: 1674 opc = 0b11 1675 } 1676 _31to21 = 0b000_01010_000 | opc<<8 | n 1677 case aluOpLsl, aluOpAsr, aluOpLsr, aluOpRotR: 1678 // "Data-processing (2 source)". 1679 _31to21 = 0b00011010_110 1680 switch op { 1681 case aluOpLsl: 1682 _15to10 = 0b001000 1683 case aluOpLsr: 1684 _15to10 = 0b001001 1685 case aluOpAsr: 1686 _15to10 = 0b001010 1687 case aluOpRotR: 1688 _15to10 = 0b001011 1689 } 1690 case aluOpSDiv: 1691 // "Data-processing (2 source)". 1692 _31to21 = 0b11010110 1693 _15to10 = 0b000011 1694 case aluOpUDiv: 1695 // "Data-processing (2 source)". 1696 _31to21 = 0b11010110 1697 _15to10 = 0b000010 1698 default: 1699 panic(op.String()) 1700 } 1701 if _64bit { 1702 _31to21 |= 0b1 << 10 1703 } 1704 return _31to21<<21 | rm<<16 | (_15to10 << 10) | (rn << 5) | rd 1705 } 1706 1707 // encodeLogicalShiftedRegister encodes as Logical (shifted register) in 1708 // https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Register?lang=en 1709 func encodeLogicalShiftedRegister(sf_opc uint32, shift_N uint32, rm uint32, imm6 uint32, rn, rd uint32) (ret uint32) { 1710 ret = sf_opc << 29 1711 ret |= 0b01010 << 24 1712 ret |= shift_N << 21 1713 ret |= rm << 16 1714 ret |= imm6 << 10 1715 ret |= rn << 5 1716 ret |= rd 1717 return 1718 } 1719 1720 // encodeAddSubtractImmediate encodes as Add/subtract (immediate) in 1721 // https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Immediate?lang=en 1722 func encodeAddSubtractImmediate(sf_op_s uint32, sh uint32, imm12 uint32, rn, rd uint32) (ret uint32) { 1723 ret = sf_op_s << 29 1724 ret |= 0b100010 << 23 1725 ret |= sh << 22 1726 ret |= imm12 << 10 1727 ret |= rn << 5 1728 ret |= rd 1729 return 1730 } 1731 1732 // encodePreOrPostIndexLoadStorePair64 encodes as Load/store pair (pre/post-indexed) in 1733 // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LDP--Load-Pair-of-Registers- 1734 // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/STP--Store-Pair-of-Registers- 1735 func encodePreOrPostIndexLoadStorePair64(pre bool, load bool, rn, rt, rt2 uint32, imm7 int64) (ret uint32) { 1736 if imm7%8 != 0 { 1737 panic("imm7 for pair load/store must be a multiple of 8") 1738 } 1739 imm7 /= 8 1740 ret = rt 1741 ret |= rn << 5 1742 ret |= rt2 << 10 1743 ret |= (uint32(imm7) & 0b1111111) << 15 1744 if load { 1745 ret |= 0b1 << 22 1746 } 1747 ret |= 0b101010001 << 23 1748 if pre { 1749 ret |= 0b1 << 24 1750 } 1751 return 1752 } 1753 1754 // encodeUnconditionalBranch encodes as B or BL instructions: 1755 // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/B--Branch- 1756 // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/BL--Branch-with-Link- 1757 func encodeUnconditionalBranch(link bool, imm26 int64) (ret uint32) { 1758 if imm26%4 != 0 { 1759 panic("imm26 for branch must be a multiple of 4") 1760 } 1761 imm26 /= 4 1762 ret = uint32(imm26 & 0b11_11111111_11111111_11111111) 1763 ret |= 0b101 << 26 1764 if link { 1765 ret |= 0b1 << 31 1766 } 1767 return 1768 } 1769 1770 // encodeCBZCBNZ encodes as either CBZ or CBNZ: 1771 // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/CBZ--Compare-and-Branch-on-Zero- 1772 // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/CBNZ--Compare-and-Branch-on-Nonzero- 1773 func encodeCBZCBNZ(rt uint32, nz bool, imm19 uint32, _64bit bool) (ret uint32) { 1774 ret = rt 1775 ret |= imm19 << 5 1776 if nz { 1777 ret |= 1 << 24 1778 } 1779 ret |= 0b11010 << 25 1780 if _64bit { 1781 ret |= 1 << 31 1782 } 1783 return 1784 } 1785 1786 // encodeMoveWideImmediate encodes as either MOVZ, MOVN or MOVK, as Move wide (immediate) in 1787 // https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Immediate?lang=en 1788 // 1789 // "shift" must have been divided by 16 at this point. 1790 func encodeMoveWideImmediate(opc uint32, rd uint32, imm, shift, _64bit uint64) (ret uint32) { 1791 ret = rd 1792 ret |= uint32(imm&0xffff) << 5 1793 ret |= (uint32(shift)) << 21 1794 ret |= 0b100101 << 23 1795 ret |= opc << 29 1796 ret |= uint32(_64bit) << 31 1797 return 1798 } 1799 1800 // encodeAluRRImm encodes as "Bitfield" in 1801 // https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Immediate?lang=en#log_imm 1802 func encodeAluRRImm(op aluOp, rd, rn, amount, _64bit uint32) uint32 { 1803 var opc uint32 1804 var immr, imms uint32 1805 switch op { 1806 case aluOpLsl: 1807 // LSL (immediate) is an alias for UBFM. 1808 // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/UBFM--Unsigned-Bitfield-Move-?lang=en 1809 opc = 0b10 1810 if amount == 0 { 1811 // This can be encoded as NOP, but we don't do it for consistency: lsr xn, xm, #0 1812 immr = 0 1813 if _64bit == 1 { 1814 imms = 0b111111 1815 } else { 1816 imms = 0b11111 1817 } 1818 } else { 1819 if _64bit == 1 { 1820 immr = 64 - amount 1821 } else { 1822 immr = (32 - amount) & 0b11111 1823 } 1824 imms = immr - 1 1825 } 1826 case aluOpLsr: 1827 // LSR (immediate) is an alias for UBFM. 1828 // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LSR--immediate---Logical-Shift-Right--immediate---an-alias-of-UBFM-?lang=en 1829 opc = 0b10 1830 imms, immr = 0b011111|_64bit<<5, amount 1831 case aluOpAsr: 1832 // ASR (immediate) is an alias for SBFM. 1833 // https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/SBFM--Signed-Bitfield-Move-?lang=en 1834 opc = 0b00 1835 imms, immr = 0b011111|_64bit<<5, amount 1836 default: 1837 panic(op.String()) 1838 } 1839 return _64bit<<31 | opc<<29 | 0b100110<<23 | _64bit<<22 | immr<<16 | imms<<10 | rn<<5 | rd 1840 } 1841 1842 // encodeVecLanes encodes as Data Processing (Advanced SIMD across lanes) depending on vecOp in 1843 // https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en 1844 func encodeVecLanes(op vecOp, rd uint32, rn uint32, arr vecArrangement) uint32 { 1845 var u, q, size, opcode uint32 1846 switch arr { 1847 case vecArrangement8B: 1848 q, size = 0b0, 0b00 1849 case vecArrangement16B: 1850 q, size = 0b1, 0b00 1851 case vecArrangement4H: 1852 q, size = 0, 0b01 1853 case vecArrangement8H: 1854 q, size = 1, 0b01 1855 case vecArrangement4S: 1856 q, size = 1, 0b10 1857 default: 1858 panic("unsupported arrangement: " + arr.String()) 1859 } 1860 switch op { 1861 case vecOpUaddlv: 1862 u, opcode = 1, 0b00011 1863 case vecOpUminv: 1864 u, opcode = 1, 0b11010 1865 case vecOpAddv: 1866 u, opcode = 0, 0b11011 1867 default: 1868 panic("unsupported or illegal vecOp: " + op.String()) 1869 } 1870 return q<<30 | u<<29 | 0b1110<<24 | size<<22 | 0b11000<<17 | opcode<<12 | 0b10<<10 | rn<<5 | rd 1871 } 1872 1873 // encodeVecLanes encodes as Data Processing (Advanced SIMD scalar shift by immediate) depending on vecOp in 1874 // https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en 1875 func encodeVecShiftImm(op vecOp, rd uint32, rn, amount uint32, arr vecArrangement) uint32 { 1876 var u, q, immh, immb, opcode uint32 1877 switch op { 1878 case vecOpSshll: 1879 u, opcode = 0b0, 0b10100 1880 case vecOpUshll: 1881 u, opcode = 0b1, 0b10100 1882 case vecOpSshr: 1883 u, opcode = 0, 0b00000 1884 default: 1885 panic("unsupported or illegal vecOp: " + op.String()) 1886 } 1887 switch arr { 1888 case vecArrangement16B: 1889 q = 0b1 1890 fallthrough 1891 case vecArrangement8B: 1892 immh = 0b0001 1893 immb = 8 - uint32(amount&0b111) 1894 case vecArrangement8H: 1895 q = 0b1 1896 fallthrough 1897 case vecArrangement4H: 1898 v := 16 - uint32(amount&0b1111) 1899 immb = v & 0b111 1900 immh = 0b0010 | (v >> 3) 1901 case vecArrangement4S: 1902 q = 0b1 1903 fallthrough 1904 case vecArrangement2S: 1905 v := 32 - uint32(amount&0b11111) 1906 immb = v & 0b111 1907 immh = 0b0100 | (v >> 3) 1908 case vecArrangement2D: 1909 q = 0b1 1910 v := 64 - uint32(amount&0b111111) 1911 immb = v & 0b111 1912 immh = 0b1000 | (v >> 3) 1913 default: 1914 panic("unsupported arrangement: " + arr.String()) 1915 } 1916 return q<<30 | u<<29 | 0b011110<<23 | immh<<19 | immb<<16 | 0b000001<<10 | opcode<<11 | 0b1<<10 | rn<<5 | rd 1917 } 1918 1919 // encodeVecTbl encodes as Data Processing (Advanced SIMD table lookup) in 1920 // https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en#simd-dp 1921 // 1922 // Note: tblOp may encode tbl1, tbl2... in the future. Currently, it is ignored. 1923 func encodeVecTbl(nregs, rd, rn, rm uint32, arr vecArrangement) uint32 { 1924 var q, op2, len, op uint32 1925 1926 switch nregs { 1927 case 1: 1928 // tbl: single-register 1929 len = 0b00 1930 case 2: 1931 // tbl2: 2-register table 1932 len = 0b01 1933 default: 1934 panic(fmt.Sprintf("unsupported number or registers %d", nregs)) 1935 } 1936 switch arr { 1937 case vecArrangement8B: 1938 q = 0b0 1939 case vecArrangement16B: 1940 q = 0b1 1941 default: 1942 panic("unsupported arrangement: " + arr.String()) 1943 } 1944 1945 return q<<30 | 0b001110<<24 | op2<<22 | rm<<16 | len<<13 | op<<12 | rn<<5 | rd 1946 } 1947 1948 // encodeVecMisc encodes as Data Processing (Advanced SIMD two-register miscellaneous) depending on vecOp in 1949 // https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en#simd-dp 1950 func encodeAdvancedSIMDTwoMisc(op vecOp, rd, rn uint32, arr vecArrangement) uint32 { 1951 var q, u, size, opcode uint32 1952 switch op { 1953 case vecOpCnt: 1954 opcode = 0b00101 1955 switch arr { 1956 case vecArrangement8B: 1957 q, size = 0b0, 0b00 1958 case vecArrangement16B: 1959 q, size = 0b1, 0b00 1960 default: 1961 panic("unsupported arrangement: " + arr.String()) 1962 } 1963 case vecOpCmeq0: 1964 if arr == vecArrangement1D { 1965 panic("unsupported arrangement: " + arr.String()) 1966 } 1967 opcode = 0b01001 1968 size, q = arrToSizeQEncoded(arr) 1969 case vecOpNot: 1970 u = 1 1971 opcode = 0b00101 1972 switch arr { 1973 case vecArrangement8B: 1974 q, size = 0b0, 0b00 1975 case vecArrangement16B: 1976 q, size = 0b1, 0b00 1977 default: 1978 panic("unsupported arrangement: " + arr.String()) 1979 } 1980 case vecOpAbs: 1981 if arr == vecArrangement1D { 1982 panic("unsupported arrangement: " + arr.String()) 1983 } 1984 opcode = 0b01011 1985 u = 0b0 1986 size, q = arrToSizeQEncoded(arr) 1987 case vecOpNeg: 1988 if arr == vecArrangement1D { 1989 panic("unsupported arrangement: " + arr.String()) 1990 } 1991 opcode = 0b01011 1992 u = 0b1 1993 size, q = arrToSizeQEncoded(arr) 1994 case vecOpFabs: 1995 if arr < vecArrangement2S || arr == vecArrangement1D { 1996 panic("unsupported arrangement: " + arr.String()) 1997 } 1998 opcode = 0b01111 1999 u = 0b0 2000 size, q = arrToSizeQEncoded(arr) 2001 case vecOpFneg: 2002 if arr < vecArrangement2S || arr == vecArrangement1D { 2003 panic("unsupported arrangement: " + arr.String()) 2004 } 2005 opcode = 0b01111 2006 u = 0b1 2007 size, q = arrToSizeQEncoded(arr) 2008 case vecOpFrintm: 2009 u = 0b0 2010 opcode = 0b11001 2011 switch arr { 2012 case vecArrangement2S: 2013 q, size = 0b0, 0b00 2014 case vecArrangement4S: 2015 q, size = 0b1, 0b00 2016 case vecArrangement2D: 2017 q, size = 0b1, 0b01 2018 default: 2019 panic("unsupported arrangement: " + arr.String()) 2020 } 2021 case vecOpFrintn: 2022 u = 0b0 2023 opcode = 0b11000 2024 switch arr { 2025 case vecArrangement2S: 2026 q, size = 0b0, 0b00 2027 case vecArrangement4S: 2028 q, size = 0b1, 0b00 2029 case vecArrangement2D: 2030 q, size = 0b1, 0b01 2031 default: 2032 panic("unsupported arrangement: " + arr.String()) 2033 } 2034 case vecOpFrintp: 2035 u = 0b0 2036 opcode = 0b11000 2037 if arr < vecArrangement2S || arr == vecArrangement1D { 2038 panic("unsupported arrangement: " + arr.String()) 2039 } 2040 size, q = arrToSizeQEncoded(arr) 2041 case vecOpFrintz: 2042 u = 0b0 2043 opcode = 0b11001 2044 if arr < vecArrangement2S || arr == vecArrangement1D { 2045 panic("unsupported arrangement: " + arr.String()) 2046 } 2047 size, q = arrToSizeQEncoded(arr) 2048 case vecOpFsqrt: 2049 if arr < vecArrangement2S || arr == vecArrangement1D { 2050 panic("unsupported arrangement: " + arr.String()) 2051 } 2052 opcode = 0b11111 2053 u = 0b1 2054 size, q = arrToSizeQEncoded(arr) 2055 case vecOpFcvtl: 2056 opcode = 0b10111 2057 u = 0b0 2058 switch arr { 2059 case vecArrangement2S: 2060 size, q = 0b01, 0b0 2061 case vecArrangement4H: 2062 size, q = 0b00, 0b0 2063 default: 2064 panic("unsupported arrangement: " + arr.String()) 2065 } 2066 case vecOpFcvtn: 2067 opcode = 0b10110 2068 u = 0b0 2069 switch arr { 2070 case vecArrangement2S: 2071 size, q = 0b01, 0b0 2072 case vecArrangement4H: 2073 size, q = 0b00, 0b0 2074 default: 2075 panic("unsupported arrangement: " + arr.String()) 2076 } 2077 case vecOpFcvtzs: 2078 opcode = 0b11011 2079 u = 0b0 2080 switch arr { 2081 case vecArrangement2S: 2082 q, size = 0b0, 0b10 2083 case vecArrangement4S: 2084 q, size = 0b1, 0b10 2085 case vecArrangement2D: 2086 q, size = 0b1, 0b11 2087 default: 2088 panic("unsupported arrangement: " + arr.String()) 2089 } 2090 case vecOpFcvtzu: 2091 opcode = 0b11011 2092 u = 0b1 2093 switch arr { 2094 case vecArrangement2S: 2095 q, size = 0b0, 0b10 2096 case vecArrangement4S: 2097 q, size = 0b1, 0b10 2098 case vecArrangement2D: 2099 q, size = 0b1, 0b11 2100 default: 2101 panic("unsupported arrangement: " + arr.String()) 2102 } 2103 case vecOpScvtf: 2104 opcode = 0b11101 2105 u = 0b0 2106 switch arr { 2107 case vecArrangement4S: 2108 q, size = 0b1, 0b00 2109 case vecArrangement2S: 2110 q, size = 0b0, 0b00 2111 case vecArrangement2D: 2112 q, size = 0b1, 0b01 2113 default: 2114 panic("unsupported arrangement: " + arr.String()) 2115 } 2116 case vecOpUcvtf: 2117 opcode = 0b11101 2118 u = 0b1 2119 switch arr { 2120 case vecArrangement4S: 2121 q, size = 0b1, 0b00 2122 case vecArrangement2S: 2123 q, size = 0b0, 0b00 2124 case vecArrangement2D: 2125 q, size = 0b1, 0b01 2126 default: 2127 panic("unsupported arrangement: " + arr.String()) 2128 } 2129 case vecOpSqxtn: 2130 // When q == 1 it encodes sqxtn2 (operates on upper 64 bits). 2131 opcode = 0b10100 2132 u = 0b0 2133 if arr > vecArrangement4S { 2134 panic("unsupported arrangement: " + arr.String()) 2135 } 2136 size, q = arrToSizeQEncoded(arr) 2137 case vecOpUqxtn: 2138 // When q == 1 it encodes uqxtn2 (operates on upper 64 bits). 2139 opcode = 0b10100 2140 u = 0b1 2141 if arr > vecArrangement4S { 2142 panic("unsupported arrangement: " + arr.String()) 2143 } 2144 size, q = arrToSizeQEncoded(arr) 2145 case vecOpSqxtun: 2146 // When q == 1 it encodes sqxtun2 (operates on upper 64 bits). 2147 opcode = 0b10010 // 0b10100 2148 u = 0b1 2149 if arr > vecArrangement4S { 2150 panic("unsupported arrangement: " + arr.String()) 2151 } 2152 size, q = arrToSizeQEncoded(arr) 2153 case vecOpRev64: 2154 opcode = 0b00000 2155 size, q = arrToSizeQEncoded(arr) 2156 case vecOpXtn: 2157 u = 0b0 2158 opcode = 0b10010 2159 size, q = arrToSizeQEncoded(arr) 2160 case vecOpShll: 2161 u = 0b1 2162 opcode = 0b10011 2163 switch arr { 2164 case vecArrangement8B: 2165 q, size = 0b0, 0b00 2166 case vecArrangement4H: 2167 q, size = 0b0, 0b01 2168 case vecArrangement2S: 2169 q, size = 0b0, 0b10 2170 default: 2171 panic("unsupported arrangement: " + arr.String()) 2172 } 2173 default: 2174 panic("unsupported or illegal vecOp: " + op.String()) 2175 } 2176 return q<<30 | u<<29 | 0b01110<<24 | size<<22 | 0b10000<<17 | opcode<<12 | 0b10<<10 | rn<<5 | rd 2177 } 2178 2179 // brTableSequenceOffsetTableBegin is the offset inside the brTableSequence where the table begins after 4 instructions 2180 const brTableSequenceOffsetTableBegin = 16 2181 2182 func encodeBrTableSequence(c backend.Compiler, index regalloc.VReg, targets []uint32) { 2183 tmpRegNumber := regNumberInEncoding[tmp] 2184 indexNumber := regNumberInEncoding[index.RealReg()] 2185 2186 // adr tmpReg, PC+16 (PC+16 is the address of the first label offset) 2187 // ldrsw index, [tmpReg, index, UXTW 2] ;; index = int64(*(tmpReg + index*8)) 2188 // add tmpReg, tmpReg, index 2189 // br tmpReg 2190 // [offset_to_l1, offset_to_l2, ..., offset_to_lN] 2191 c.Emit4Bytes(encodeAdr(tmpRegNumber, 16)) 2192 c.Emit4Bytes(encodeLoadOrStore(sLoad32, indexNumber, 2193 addressMode{kind: addressModeKindRegScaledExtended, rn: tmpRegVReg, rm: index, extOp: extendOpUXTW}, 2194 )) 2195 c.Emit4Bytes(encodeAluRRR(aluOpAdd, tmpRegNumber, tmpRegNumber, indexNumber, true, false)) 2196 c.Emit4Bytes(encodeUnconditionalBranchReg(tmpRegNumber, false)) 2197 2198 // Offsets are resolved in ResolveRelativeAddress phase. 2199 for _, offset := range targets { 2200 if wazevoapi.PrintMachineCodeHexPerFunctionDisassemblable { 2201 // Inlined offset tables cannot be disassembled properly, so pad dummy instructions to make the debugging easier. 2202 c.Emit4Bytes(dummyInstruction) 2203 } else { 2204 c.Emit4Bytes(offset) 2205 } 2206 } 2207 } 2208 2209 // encodeExitSequence matches the implementation detail of functionABI.emitGoEntryPreamble. 2210 func encodeExitSequence(c backend.Compiler, ctxReg regalloc.VReg) { 2211 // Restore the FP, SP and LR, and return to the Go code: 2212 // ldr lr, [ctxReg, #GoReturnAddress] 2213 // ldr fp, [ctxReg, #OriginalFramePointer] 2214 // ldr tmp, [ctxReg, #OriginalStackPointer] 2215 // mov sp, tmp ;; sp cannot be str'ed directly. 2216 // ret ;; --> return to the Go code 2217 2218 var ctxEvicted bool 2219 if ctx := ctxReg.RealReg(); ctx == fp || ctx == lr { 2220 // In order to avoid overwriting the context register, we move ctxReg to tmp. 2221 c.Emit4Bytes(encodeMov64(regNumberInEncoding[tmp], regNumberInEncoding[ctx], false, false)) 2222 ctxReg = tmpRegVReg 2223 ctxEvicted = true 2224 } 2225 2226 restoreLr := encodeLoadOrStore( 2227 uLoad64, 2228 regNumberInEncoding[lr], 2229 addressMode{ 2230 kind: addressModeKindRegUnsignedImm12, 2231 rn: ctxReg, 2232 imm: wazevoapi.ExecutionContextOffsetGoReturnAddress.I64(), 2233 }, 2234 ) 2235 2236 restoreFp := encodeLoadOrStore( 2237 uLoad64, 2238 regNumberInEncoding[fp], 2239 addressMode{ 2240 kind: addressModeKindRegUnsignedImm12, 2241 rn: ctxReg, 2242 imm: wazevoapi.ExecutionContextOffsetOriginalFramePointer.I64(), 2243 }, 2244 ) 2245 2246 restoreSpToTmp := encodeLoadOrStore( 2247 uLoad64, 2248 regNumberInEncoding[tmp], 2249 addressMode{ 2250 kind: addressModeKindRegUnsignedImm12, 2251 rn: ctxReg, 2252 imm: wazevoapi.ExecutionContextOffsetOriginalStackPointer.I64(), 2253 }, 2254 ) 2255 2256 movTmpToSp := encodeAddSubtractImmediate(0b100, 0, 0, 2257 regNumberInEncoding[tmp], regNumberInEncoding[sp]) 2258 2259 c.Emit4Bytes(restoreFp) 2260 c.Emit4Bytes(restoreLr) 2261 c.Emit4Bytes(restoreSpToTmp) 2262 c.Emit4Bytes(movTmpToSp) 2263 c.Emit4Bytes(encodeRet()) 2264 if !ctxEvicted { 2265 // In order to have the fixed-length exit sequence, we need to padd the binary. 2266 // Since this will never be reached, we insert a dummy instruction. 2267 c.Emit4Bytes(dummyInstruction) 2268 } 2269 } 2270 2271 func encodeRet() uint32 { 2272 // https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/RET--Return-from-subroutine-?lang=en 2273 return 0b1101011001011111<<16 | regNumberInEncoding[lr]<<5 2274 } 2275 2276 func encodeAtomicRmw(op atomicRmwOp, rs, rt, rn uint32, size uint32) uint32 { 2277 var _31to21, _15to10, sz uint32 2278 2279 switch size { 2280 case 8: 2281 sz = 0b11 2282 case 4: 2283 sz = 0b10 2284 case 2: 2285 sz = 0b01 2286 case 1: 2287 sz = 0b00 2288 } 2289 2290 _31to21 = 0b00111000_111 | sz<<9 2291 2292 switch op { 2293 case atomicRmwOpAdd: 2294 _15to10 = 0b000000 2295 case atomicRmwOpClr: 2296 _15to10 = 0b000100 2297 case atomicRmwOpSet: 2298 _15to10 = 0b001100 2299 case atomicRmwOpEor: 2300 _15to10 = 0b001000 2301 case atomicRmwOpSwp: 2302 _15to10 = 0b100000 2303 } 2304 2305 return _31to21<<21 | rs<<16 | _15to10<<10 | rn<<5 | rt 2306 } 2307 2308 func encodeAtomicCas(rs, rt, rn uint32, size uint32) uint32 { 2309 var _31to21, _15to10, sz uint32 2310 2311 switch size { 2312 case 8: 2313 sz = 0b11 2314 case 4: 2315 sz = 0b10 2316 case 2: 2317 sz = 0b01 2318 case 1: 2319 sz = 0b00 2320 } 2321 2322 _31to21 = 0b00001000_111 | sz<<9 2323 _15to10 = 0b111111 2324 2325 return _31to21<<21 | rs<<16 | _15to10<<10 | rn<<5 | rt 2326 } 2327 2328 func encodeAtomicLoadStore(rn, rt, size, l uint32) uint32 { 2329 var _31to21, _20to16, _15to10, sz uint32 2330 2331 switch size { 2332 case 8: 2333 sz = 0b11 2334 case 4: 2335 sz = 0b10 2336 case 2: 2337 sz = 0b01 2338 case 1: 2339 sz = 0b00 2340 } 2341 2342 _31to21 = 0b00001000_100 | sz<<9 | l<<1 2343 _20to16 = 0b11111 2344 _15to10 = 0b111111 2345 2346 return _31to21<<21 | _20to16<<16 | _15to10<<10 | rn<<5 | rt 2347 } 2348 2349 func encodeDMB() uint32 { 2350 return 0b11010101000000110011101110111111 2351 }