wa-lang.org/wazero@v1.0.2/internal/asm/arm64/impl.go (about) 1 package arm64 2 3 import ( 4 "bytes" 5 "encoding/binary" 6 "errors" 7 "fmt" 8 9 "wa-lang.org/wazero/internal/asm" 10 ) 11 12 type nodeImpl struct { 13 instruction asm.Instruction 14 15 offsetInBinaryField asm.NodeOffsetInBinary // Field suffix to dodge conflict with OffsetInBinary 16 17 // jumpTarget holds the target node in the linked for the jump-kind instruction. 18 jumpTarget *nodeImpl 19 // next holds the next node from this node in the assembled linked list. 20 next *nodeImpl 21 22 types operandTypes 23 srcReg, srcReg2, dstReg, dstReg2 asm.Register 24 srcConst, dstConst asm.ConstantValue 25 26 vectorArrangement VectorArrangement 27 srcVectorIndex, dstVectorIndex VectorIndex 28 29 // readInstructionAddressBeforeTargetInstruction holds the instruction right before the target of 30 // read instruction address instruction. See asm.assemblerBase.CompileReadInstructionAddress. 31 readInstructionAddressBeforeTargetInstruction asm.Instruction 32 33 // jumpOrigins hold all the nodes trying to jump into this node. In other words, all the nodes with .jumpTarget == this. 34 jumpOrigins map[*nodeImpl]struct{} 35 36 staticConst *asm.StaticConst 37 } 38 39 // AssignJumpTarget implements the same method as documented on asm.Node. 40 func (n *nodeImpl) AssignJumpTarget(target asm.Node) { 41 n.jumpTarget = target.(*nodeImpl) 42 } 43 44 // AssignDestinationConstant implements the same method as documented on asm.Node. 45 func (n *nodeImpl) AssignDestinationConstant(value asm.ConstantValue) { 46 n.dstConst = value 47 } 48 49 // AssignSourceConstant implements the same method as documented on asm.Node. 50 func (n *nodeImpl) AssignSourceConstant(value asm.ConstantValue) { 51 n.srcConst = value 52 } 53 54 // OffsetInBinary implements the same method as documented on asm.Node. 55 func (n *nodeImpl) OffsetInBinary() asm.NodeOffsetInBinary { 56 return n.offsetInBinaryField 57 } 58 59 // String implements fmt.Stringer. 60 // 61 // This is for debugging purpose, and the format is similar to the AT&T assembly syntax, 62 // meaning that this should look like "INSTRUCTION ${from}, ${to}" where each operand 63 // might be embraced by '[]' to represent the memory location, and multiple operands 64 // are embraced by `()`. 65 func (n *nodeImpl) String() (ret string) { 66 instName := InstructionName(n.instruction) 67 switch n.types { 68 case operandTypesNoneToNone: 69 ret = instName 70 case operandTypesNoneToRegister: 71 ret = fmt.Sprintf("%s %s", instName, RegisterName(n.dstReg)) 72 case operandTypesNoneToBranch: 73 ret = fmt.Sprintf("%s {%v}", instName, n.jumpTarget) 74 case operandTypesRegisterToRegister: 75 ret = fmt.Sprintf("%s %s, %s", instName, RegisterName(n.srcReg), RegisterName(n.dstReg)) 76 case operandTypesLeftShiftedRegisterToRegister: 77 ret = fmt.Sprintf("%s (%s, %s << %d), %s", instName, RegisterName(n.srcReg), RegisterName(n.srcReg2), n.srcConst, RegisterName(n.dstReg)) 78 case operandTypesTwoRegistersToRegister: 79 ret = fmt.Sprintf("%s (%s, %s), %s", instName, RegisterName(n.srcReg), RegisterName(n.srcReg2), RegisterName(n.dstReg)) 80 case operandTypesThreeRegistersToRegister: 81 ret = fmt.Sprintf("%s (%s, %s, %s), %s)", instName, RegisterName(n.srcReg), RegisterName(n.srcReg2), RegisterName(n.dstReg), RegisterName(n.dstReg2)) 82 case operandTypesTwoRegistersToNone: 83 ret = fmt.Sprintf("%s (%s, %s)", instName, RegisterName(n.srcReg), RegisterName(n.srcReg2)) 84 case operandTypesRegisterAndConstToNone: 85 ret = fmt.Sprintf("%s (%s, 0x%x)", instName, RegisterName(n.srcReg), n.srcConst) 86 case operandTypesRegisterToMemory: 87 if n.dstReg2 != asm.NilRegister { 88 ret = fmt.Sprintf("%s %s, [%s + %s]", instName, RegisterName(n.srcReg), RegisterName(n.dstReg), RegisterName(n.dstReg2)) 89 } else { 90 ret = fmt.Sprintf("%s %s, [%s + 0x%x]", instName, RegisterName(n.srcReg), RegisterName(n.dstReg), n.dstConst) 91 } 92 case operandTypesMemoryToRegister: 93 if n.srcReg2 != asm.NilRegister { 94 ret = fmt.Sprintf("%s [%s + %s], %s", instName, RegisterName(n.srcReg), RegisterName(n.srcReg2), RegisterName(n.dstReg)) 95 } else { 96 ret = fmt.Sprintf("%s [%s + 0x%x], %s", instName, RegisterName(n.srcReg), n.srcConst, RegisterName(n.dstReg)) 97 } 98 case operandTypesConstToRegister: 99 ret = fmt.Sprintf("%s 0x%x, %s", instName, n.srcConst, RegisterName(n.dstReg)) 100 case operandTypesRegisterToVectorRegister: 101 ret = fmt.Sprintf("%s %s, %s.%s[%d]", instName, RegisterName(n.srcReg), RegisterName(n.dstReg), n.vectorArrangement, n.dstVectorIndex) 102 case operandTypesVectorRegisterToRegister: 103 ret = fmt.Sprintf("%s %s.%s[%d], %s", instName, RegisterName(n.srcReg), n.vectorArrangement, n.srcVectorIndex, RegisterName(n.dstReg)) 104 case operandTypesVectorRegisterToMemory: 105 if n.dstReg2 != asm.NilRegister { 106 ret = fmt.Sprintf("%s %s.%s, [%s + %s]", instName, RegisterName(n.srcReg), n.vectorArrangement, RegisterName(n.dstReg), RegisterName(n.dstReg2)) 107 } else { 108 ret = fmt.Sprintf("%s %s.%s, [%s + 0x%x]", instName, RegisterName(n.srcReg), n.vectorArrangement, RegisterName(n.dstReg), n.dstConst) 109 } 110 case operandTypesMemoryToVectorRegister: 111 ret = fmt.Sprintf("%s [%s], %s.%s", instName, RegisterName(n.srcReg), RegisterName(n.dstReg), n.vectorArrangement) 112 case operandTypesVectorRegisterToVectorRegister: 113 ret = fmt.Sprintf("%s %[2]s.%[4]s, %[3]s.%[4]s", instName, RegisterName(n.srcReg), RegisterName(n.dstReg), n.vectorArrangement) 114 case operandTypesStaticConstToVectorRegister: 115 ret = fmt.Sprintf("%s $%#x %s.%s", instName, n.staticConst.Raw, RegisterName(n.dstReg), n.vectorArrangement) 116 case operandTypesTwoVectorRegistersToVectorRegister: 117 ret = fmt.Sprintf("%s (%s.%[5]s, %[3]s.%[5]s), %[4]s.%[5]s", instName, RegisterName(n.srcReg), RegisterName(n.srcReg2), RegisterName(n.dstReg), n.vectorArrangement) 118 } 119 return 120 } 121 122 // operandType represents where an operand is placed for an instruction. 123 // Note: this is almost the same as obj.AddrType in GO assembler. 124 type operandType byte 125 126 const ( 127 operandTypeNone operandType = iota 128 operandTypeRegister 129 operandTypeLeftShiftedRegister 130 operandTypeTwoRegisters 131 operandTypeThreeRegisters 132 operandTypeRegisterAndConst 133 operandTypeMemory 134 operandTypeConst 135 operandTypeBranch 136 operandTypeSIMDByte 137 operandTypeTwoSIMDBytes 138 operandTypeVectorRegister 139 operandTypeTwoVectorRegisters 140 operandTypeStaticConst 141 ) 142 143 // String implements fmt.Stringer. 144 func (o operandType) String() (ret string) { 145 switch o { 146 case operandTypeNone: 147 ret = "none" 148 case operandTypeRegister: 149 ret = "register" 150 case operandTypeLeftShiftedRegister: 151 ret = "left-shifted-register" 152 case operandTypeTwoRegisters: 153 ret = "two-registers" 154 case operandTypeRegisterAndConst: 155 ret = "register-and-const" 156 case operandTypeMemory: 157 ret = "memory" 158 case operandTypeConst: 159 ret = "const" 160 case operandTypeBranch: 161 ret = "branch" 162 case operandTypeSIMDByte: 163 ret = "simd-byte" 164 case operandTypeTwoSIMDBytes: 165 ret = "two-simd-bytes" 166 case operandTypeVectorRegister: 167 ret = "vector-register" 168 case operandTypeStaticConst: 169 ret = "static-const" 170 case operandTypeTwoVectorRegisters: 171 ret = "two-vector-registers" 172 } 173 return 174 } 175 176 // operandTypes represents the only combinations of two operandTypes used by wazero 177 type operandTypes struct{ src, dst operandType } 178 179 var ( 180 operandTypesNoneToNone = operandTypes{operandTypeNone, operandTypeNone} 181 operandTypesNoneToRegister = operandTypes{operandTypeNone, operandTypeRegister} 182 operandTypesNoneToBranch = operandTypes{operandTypeNone, operandTypeBranch} 183 operandTypesRegisterToRegister = operandTypes{operandTypeRegister, operandTypeRegister} 184 operandTypesLeftShiftedRegisterToRegister = operandTypes{operandTypeLeftShiftedRegister, operandTypeRegister} 185 operandTypesTwoRegistersToRegister = operandTypes{operandTypeTwoRegisters, operandTypeRegister} 186 operandTypesThreeRegistersToRegister = operandTypes{operandTypeThreeRegisters, operandTypeRegister} 187 operandTypesTwoRegistersToNone = operandTypes{operandTypeTwoRegisters, operandTypeNone} 188 operandTypesRegisterAndConstToNone = operandTypes{operandTypeRegisterAndConst, operandTypeNone} 189 operandTypesRegisterToMemory = operandTypes{operandTypeRegister, operandTypeMemory} 190 operandTypesMemoryToRegister = operandTypes{operandTypeMemory, operandTypeRegister} 191 operandTypesConstToRegister = operandTypes{operandTypeConst, operandTypeRegister} 192 operandTypesRegisterToVectorRegister = operandTypes{operandTypeRegister, operandTypeVectorRegister} 193 operandTypesVectorRegisterToRegister = operandTypes{operandTypeVectorRegister, operandTypeRegister} 194 operandTypesMemoryToVectorRegister = operandTypes{operandTypeMemory, operandTypeVectorRegister} 195 operandTypesVectorRegisterToMemory = operandTypes{operandTypeVectorRegister, operandTypeMemory} 196 operandTypesVectorRegisterToVectorRegister = operandTypes{operandTypeVectorRegister, operandTypeVectorRegister} 197 operandTypesTwoVectorRegistersToVectorRegister = operandTypes{operandTypeTwoVectorRegisters, operandTypeVectorRegister} 198 operandTypesStaticConstToVectorRegister = operandTypes{operandTypeStaticConst, operandTypeVectorRegister} 199 ) 200 201 // String implements fmt.Stringer 202 func (o operandTypes) String() string { 203 return fmt.Sprintf("from:%s,to:%s", o.src, o.dst) 204 } 205 206 const ( 207 maxSignedInt26 int64 = 1<<25 - 1 208 minSignedInt26 int64 = -(1 << 25) 209 210 maxSignedInt19 int64 = 1<<19 - 1 211 minSignedInt19 int64 = -(1 << 19) 212 ) 213 214 // AssemblerImpl implements Assembler. 215 type AssemblerImpl struct { 216 asm.BaseAssemblerImpl 217 Root, Current *nodeImpl 218 Buf *bytes.Buffer 219 temporaryRegister asm.Register 220 nodeCount int 221 pool *asm.StaticConstPool 222 // MaxDisplacementForConstantPool is fixed to defaultMaxDisplacementForConstPool 223 // but have it as a field here for testability. 224 MaxDisplacementForConstantPool int 225 } 226 227 func NewAssembler(temporaryRegister asm.Register) *AssemblerImpl { 228 return &AssemblerImpl{ 229 Buf: bytes.NewBuffer(nil), temporaryRegister: temporaryRegister, 230 pool: asm.NewStaticConstPool(), 231 MaxDisplacementForConstantPool: defaultMaxDisplacementForConstPool, 232 } 233 } 234 235 // newNode creates a new Node and appends it into the linked list. 236 func (a *AssemblerImpl) newNode(instruction asm.Instruction, types operandTypes) *nodeImpl { 237 n := &nodeImpl{ 238 instruction: instruction, 239 next: nil, 240 types: types, 241 jumpOrigins: map[*nodeImpl]struct{}{}, 242 } 243 244 a.addNode(n) 245 return n 246 } 247 248 // addNode appends the new node into the linked list. 249 func (a *AssemblerImpl) addNode(node *nodeImpl) { 250 a.nodeCount++ 251 252 if a.Root == nil { 253 a.Root = node 254 a.Current = node 255 } else { 256 parent := a.Current 257 parent.next = node 258 a.Current = node 259 } 260 261 for _, o := range a.SetBranchTargetOnNextNodes { 262 origin := o.(*nodeImpl) 263 origin.jumpTarget = node 264 } 265 a.SetBranchTargetOnNextNodes = nil 266 } 267 268 // Assemble implements asm.AssemblerBase 269 func (a *AssemblerImpl) Assemble() ([]byte, error) { 270 // arm64 has 32-bit fixed length instructions, 271 // but note that some nodes are encoded as multiple instructions, 272 // so the resulting binary might not be the size of count*8. 273 a.Buf.Grow(a.nodeCount * 8) 274 275 for n := a.Root; n != nil; n = n.next { 276 n.offsetInBinaryField = uint64(a.Buf.Len()) 277 if err := a.encodeNode(n); err != nil { 278 return nil, err 279 } 280 a.maybeFlushConstPool(n.next == nil) 281 } 282 283 code := a.bytes() 284 for _, cb := range a.OnGenerateCallbacks { 285 if err := cb(code); err != nil { 286 return nil, err 287 } 288 } 289 return code, nil 290 } 291 292 const defaultMaxDisplacementForConstPool = (1 << 20) - 1 - 4 // -4 for unconditional branch to skip the constants. 293 294 // maybeFlushConstPool flushes the constant pool if endOfBinary or a boundary condition was met. 295 func (a *AssemblerImpl) maybeFlushConstPool(endOfBinary bool) { 296 if a.pool.FirstUseOffsetInBinary == nil { 297 return 298 } 299 300 // If endOfBinary = true, we no longer need to emit the instructions, therefore 301 // flush all the constants. 302 if endOfBinary || 303 // Also, if the offset between the first usage of the constant pool and 304 // the first constant would exceed 2^20 -1(= 2MiB-1), which is the maximum offset 305 // for LDR(literal)/ADR instruction, flush all the constants in the pool. 306 (a.Buf.Len()+a.pool.PoolSizeInBytes-int(*a.pool.FirstUseOffsetInBinary)) >= a.MaxDisplacementForConstantPool { 307 308 // Before emitting consts, we have to add br instruction to skip the const pool. 309 // https://github.com/golang/go/blob/release-branch.go1.15/src/cmd/internal/obj/arm64/asm7.go#L1123-L1129 310 skipOffset := a.pool.PoolSizeInBytes/4 + 1 311 if a.pool.PoolSizeInBytes%4 != 0 { 312 skipOffset++ 313 } 314 if endOfBinary { 315 // If this is the end of binary, we never reach this block, 316 // so offset can be zero (which is the behavior of Go's assembler). 317 skipOffset = 0 318 } 319 320 a.Buf.Write([]byte{ 321 byte(skipOffset), 322 byte(skipOffset >> 8), 323 byte(skipOffset >> 16), 324 0x14, 325 }) 326 327 // Then adding the consts into the binary. 328 for _, c := range a.pool.Consts { 329 c.SetOffsetInBinary(uint64(a.Buf.Len())) 330 a.Buf.Write(c.Raw) 331 } 332 333 // arm64 instructions are 4-byte (32-bit) aligned, so we must pad the zero consts here. 334 if pad := a.Buf.Len() % 4; pad != 0 { 335 a.Buf.Write(make([]byte, 4-pad)) 336 } 337 338 // After the flush, reset the constant pool. 339 a.pool = asm.NewStaticConstPool() 340 } 341 } 342 343 // bytes returns the encoded binary. 344 func (a *AssemblerImpl) bytes() []byte { 345 // 16 bytes alignment to match our impl with golang-asm. 346 // https://github.com/golang/go/blob/release-branch.go1.15/src/cmd/internal/obj/arm64/asm7.go#L62 347 // 348 // TODO: Delete after golang-asm removal. 349 if pad := 16 - a.Buf.Len()%16; pad > 0 && pad != 16 { 350 a.Buf.Write(make([]byte, pad)) 351 } 352 return a.Buf.Bytes() 353 } 354 355 // encodeNode encodes the given node into writer. 356 func (a *AssemblerImpl) encodeNode(n *nodeImpl) (err error) { 357 switch n.types { 358 case operandTypesNoneToNone: 359 err = a.encodeNoneToNone(n) 360 case operandTypesNoneToRegister: 361 err = a.encodeJumpToRegister(n) 362 case operandTypesNoneToBranch: 363 err = a.encodeRelativeBranch(n) 364 case operandTypesRegisterToRegister: 365 err = a.encodeRegisterToRegister(n) 366 case operandTypesLeftShiftedRegisterToRegister: 367 err = a.encodeLeftShiftedRegisterToRegister(n) 368 case operandTypesTwoRegistersToRegister: 369 err = a.encodeTwoRegistersToRegister(n) 370 case operandTypesThreeRegistersToRegister: 371 err = a.encodeThreeRegistersToRegister(n) 372 case operandTypesTwoRegistersToNone: 373 err = a.encodeTwoRegistersToNone(n) 374 case operandTypesRegisterAndConstToNone: 375 err = a.encodeRegisterAndConstToNone(n) 376 case operandTypesRegisterToMemory: 377 err = a.encodeRegisterToMemory(n) 378 case operandTypesMemoryToRegister: 379 err = a.encodeMemoryToRegister(n) 380 case operandTypesConstToRegister: 381 err = a.encodeConstToRegister(n) 382 case operandTypesRegisterToVectorRegister: 383 err = a.encodeRegisterToVectorRegister(n) 384 case operandTypesVectorRegisterToRegister: 385 err = a.encodeVectorRegisterToRegister(n) 386 case operandTypesMemoryToVectorRegister: 387 err = a.encodeMemoryToVectorRegister(n) 388 case operandTypesVectorRegisterToMemory: 389 err = a.encodeVectorRegisterToMemory(n) 390 case operandTypesVectorRegisterToVectorRegister: 391 err = a.encodeVectorRegisterToVectorRegister(n) 392 case operandTypesStaticConstToVectorRegister: 393 err = a.encodeStaticConstToVectorRegister(n) 394 case operandTypesTwoVectorRegistersToVectorRegister: 395 err = a.encodeTwoVectorRegistersToVectorRegister(n) 396 default: 397 err = fmt.Errorf("encoder undefined for [%s] operand type", n.types) 398 } 399 if err != nil { 400 err = fmt.Errorf("%w: %s", err, n) // Ensure the error is debuggable by including the string value of the node. 401 } 402 return 403 } 404 405 // CompileStandAlone implements the same method as documented on asm.AssemblerBase. 406 func (a *AssemblerImpl) CompileStandAlone(instruction asm.Instruction) asm.Node { 407 return a.newNode(instruction, operandTypesNoneToNone) 408 } 409 410 // CompileConstToRegister implements the same method as documented on asm.AssemblerBase. 411 func (a *AssemblerImpl) CompileConstToRegister( 412 instruction asm.Instruction, 413 value asm.ConstantValue, 414 destinationReg asm.Register, 415 ) (inst asm.Node) { 416 n := a.newNode(instruction, operandTypesConstToRegister) 417 n.srcConst = value 418 n.dstReg = destinationReg 419 return n 420 } 421 422 // CompileRegisterToRegister implements the same method as documented on asm.AssemblerBase. 423 func (a *AssemblerImpl) CompileRegisterToRegister(instruction asm.Instruction, from, to asm.Register) { 424 n := a.newNode(instruction, operandTypesRegisterToRegister) 425 n.srcReg = from 426 n.dstReg = to 427 } 428 429 // CompileMemoryToRegister implements the same method as documented on asm.AssemblerBase. 430 func (a *AssemblerImpl) CompileMemoryToRegister( 431 instruction asm.Instruction, 432 sourceBaseReg asm.Register, 433 sourceOffsetConst asm.ConstantValue, 434 destinationReg asm.Register, 435 ) { 436 n := a.newNode(instruction, operandTypesMemoryToRegister) 437 n.srcReg = sourceBaseReg 438 n.srcConst = sourceOffsetConst 439 n.dstReg = destinationReg 440 } 441 442 // CompileRegisterToMemory implements the same method as documented on asm.AssemblerBase. 443 func (a *AssemblerImpl) CompileRegisterToMemory( 444 instruction asm.Instruction, 445 sourceRegister, destinationBaseRegister asm.Register, 446 destinationOffsetConst asm.ConstantValue, 447 ) { 448 n := a.newNode(instruction, operandTypesRegisterToMemory) 449 n.srcReg = sourceRegister 450 n.dstReg = destinationBaseRegister 451 n.dstConst = destinationOffsetConst 452 } 453 454 // CompileJump implements the same method as documented on asm.AssemblerBase. 455 func (a *AssemblerImpl) CompileJump(jmpInstruction asm.Instruction) asm.Node { 456 return a.newNode(jmpInstruction, operandTypesNoneToBranch) 457 } 458 459 // CompileJumpToRegister implements the same method as documented on asm.AssemblerBase. 460 func (a *AssemblerImpl) CompileJumpToRegister(jmpInstruction asm.Instruction, reg asm.Register) { 461 n := a.newNode(jmpInstruction, operandTypesNoneToRegister) 462 n.dstReg = reg 463 } 464 465 // CompileReadInstructionAddress implements the same method as documented on asm.AssemblerBase. 466 func (a *AssemblerImpl) CompileReadInstructionAddress( 467 destinationRegister asm.Register, 468 beforeAcquisitionTargetInstruction asm.Instruction, 469 ) { 470 n := a.newNode(ADR, operandTypesMemoryToRegister) 471 n.dstReg = destinationRegister 472 n.readInstructionAddressBeforeTargetInstruction = beforeAcquisitionTargetInstruction 473 } 474 475 // CompileMemoryWithRegisterOffsetToRegister implements Assembler.CompileMemoryWithRegisterOffsetToRegister 476 func (a *AssemblerImpl) CompileMemoryWithRegisterOffsetToRegister( 477 instruction asm.Instruction, 478 srcBaseReg, srcOffsetReg, dstReg asm.Register, 479 ) { 480 n := a.newNode(instruction, operandTypesMemoryToRegister) 481 n.dstReg = dstReg 482 n.srcReg = srcBaseReg 483 n.srcReg2 = srcOffsetReg 484 } 485 486 // CompileRegisterToMemoryWithRegisterOffset implements Assembler.CompileRegisterToMemoryWithRegisterOffset 487 func (a *AssemblerImpl) CompileRegisterToMemoryWithRegisterOffset( 488 instruction asm.Instruction, 489 srcReg, dstBaseReg, dstOffsetReg asm.Register, 490 ) { 491 n := a.newNode(instruction, operandTypesRegisterToMemory) 492 n.srcReg = srcReg 493 n.dstReg = dstBaseReg 494 n.dstReg2 = dstOffsetReg 495 } 496 497 // CompileTwoRegistersToRegister implements Assembler.CompileTwoRegistersToRegister 498 func (a *AssemblerImpl) CompileTwoRegistersToRegister(instruction asm.Instruction, src1, src2, dst asm.Register) { 499 n := a.newNode(instruction, operandTypesTwoRegistersToRegister) 500 n.srcReg = src1 501 n.srcReg2 = src2 502 n.dstReg = dst 503 } 504 505 // CompileThreeRegistersToRegister implements Assembler.CompileThreeRegistersToRegister 506 func (a *AssemblerImpl) CompileThreeRegistersToRegister( 507 instruction asm.Instruction, 508 src1, src2, src3, dst asm.Register, 509 ) { 510 n := a.newNode(instruction, operandTypesThreeRegistersToRegister) 511 n.srcReg = src1 512 n.srcReg2 = src2 513 n.dstReg = src3 // To minimize the size of nodeImpl struct, we reuse dstReg for the third source operand. 514 n.dstReg2 = dst 515 } 516 517 // CompileTwoRegistersToNone implements Assembler.CompileTwoRegistersToNone 518 func (a *AssemblerImpl) CompileTwoRegistersToNone(instruction asm.Instruction, src1, src2 asm.Register) { 519 n := a.newNode(instruction, operandTypesTwoRegistersToNone) 520 n.srcReg = src1 521 n.srcReg2 = src2 522 } 523 524 // CompileRegisterAndConstToNone implements Assembler.CompileRegisterAndConstToNone 525 func (a *AssemblerImpl) CompileRegisterAndConstToNone( 526 instruction asm.Instruction, 527 src asm.Register, 528 srcConst asm.ConstantValue, 529 ) { 530 n := a.newNode(instruction, operandTypesRegisterAndConstToNone) 531 n.srcReg = src 532 n.srcConst = srcConst 533 } 534 535 // CompileLeftShiftedRegisterToRegister implements Assembler.CompileLeftShiftedRegisterToRegister 536 func (a *AssemblerImpl) CompileLeftShiftedRegisterToRegister( 537 instruction asm.Instruction, 538 shiftedSourceReg asm.Register, 539 shiftNum asm.ConstantValue, 540 srcReg, dstReg asm.Register, 541 ) { 542 n := a.newNode(instruction, operandTypesLeftShiftedRegisterToRegister) 543 n.srcReg = srcReg 544 n.srcReg2 = shiftedSourceReg 545 n.srcConst = shiftNum 546 n.dstReg = dstReg 547 } 548 549 // CompileConditionalRegisterSet implements Assembler.CompileConditionalRegisterSet 550 func (a *AssemblerImpl) CompileConditionalRegisterSet(cond asm.ConditionalRegisterState, dstReg asm.Register) { 551 n := a.newNode(CSET, operandTypesRegisterToRegister) 552 n.srcReg = conditionalRegisterStateToRegister(cond) 553 n.dstReg = dstReg 554 } 555 556 // CompileMemoryToVectorRegister implements Assembler.CompileMemoryToVectorRegister 557 func (a *AssemblerImpl) CompileMemoryToVectorRegister( 558 instruction asm.Instruction, srcBaseReg asm.Register, dstOffset asm.ConstantValue, dstReg asm.Register, arrangement VectorArrangement, 559 ) { 560 n := a.newNode(instruction, operandTypesMemoryToVectorRegister) 561 n.srcReg = srcBaseReg 562 n.srcConst = dstOffset 563 n.dstReg = dstReg 564 n.vectorArrangement = arrangement 565 } 566 567 // CompileMemoryWithRegisterOffsetToVectorRegister implements Assembler.CompileMemoryWithRegisterOffsetToVectorRegister 568 func (a *AssemblerImpl) CompileMemoryWithRegisterOffsetToVectorRegister(instruction asm.Instruction, 569 srcBaseReg, srcOffsetRegister asm.Register, dstReg asm.Register, arrangement VectorArrangement, 570 ) { 571 n := a.newNode(instruction, operandTypesMemoryToVectorRegister) 572 n.srcReg = srcBaseReg 573 n.srcReg2 = srcOffsetRegister 574 n.dstReg = dstReg 575 n.vectorArrangement = arrangement 576 } 577 578 // CompileVectorRegisterToMemory implements Assembler.CompileVectorRegisterToMemory 579 func (a *AssemblerImpl) CompileVectorRegisterToMemory( 580 instruction asm.Instruction, srcReg, dstBaseReg asm.Register, dstOffset asm.ConstantValue, arrangement VectorArrangement, 581 ) { 582 n := a.newNode(instruction, operandTypesVectorRegisterToMemory) 583 n.srcReg = srcReg 584 n.dstReg = dstBaseReg 585 n.dstConst = dstOffset 586 n.vectorArrangement = arrangement 587 } 588 589 // CompileVectorRegisterToMemoryWithRegisterOffset implements Assembler.CompileVectorRegisterToMemoryWithRegisterOffset 590 func (a *AssemblerImpl) CompileVectorRegisterToMemoryWithRegisterOffset(instruction asm.Instruction, 591 srcReg, dstBaseReg, dstOffsetRegister asm.Register, arrangement VectorArrangement, 592 ) { 593 n := a.newNode(instruction, operandTypesVectorRegisterToMemory) 594 n.srcReg = srcReg 595 n.dstReg = dstBaseReg 596 n.dstReg2 = dstOffsetRegister 597 n.vectorArrangement = arrangement 598 } 599 600 // CompileRegisterToVectorRegister implements Assembler.CompileRegisterToVectorRegister 601 func (a *AssemblerImpl) CompileRegisterToVectorRegister( 602 instruction asm.Instruction, srcReg, dstReg asm.Register, arrangement VectorArrangement, index VectorIndex, 603 ) { 604 n := a.newNode(instruction, operandTypesRegisterToVectorRegister) 605 n.srcReg = srcReg 606 n.dstReg = dstReg 607 n.vectorArrangement = arrangement 608 n.dstVectorIndex = index 609 } 610 611 // CompileVectorRegisterToRegister implements Assembler.CompileVectorRegisterToRegister 612 func (a *AssemblerImpl) CompileVectorRegisterToRegister(instruction asm.Instruction, srcReg, dstReg asm.Register, 613 arrangement VectorArrangement, index VectorIndex, 614 ) { 615 n := a.newNode(instruction, operandTypesVectorRegisterToRegister) 616 n.srcReg = srcReg 617 n.dstReg = dstReg 618 n.vectorArrangement = arrangement 619 n.srcVectorIndex = index 620 } 621 622 // CompileVectorRegisterToVectorRegister implements Assembler.CompileVectorRegisterToVectorRegister 623 func (a *AssemblerImpl) CompileVectorRegisterToVectorRegister( 624 instruction asm.Instruction, srcReg, dstReg asm.Register, arrangement VectorArrangement, srcIndex, dstIndex VectorIndex, 625 ) { 626 n := a.newNode(instruction, operandTypesVectorRegisterToVectorRegister) 627 n.srcReg = srcReg 628 n.dstReg = dstReg 629 n.vectorArrangement = arrangement 630 n.srcVectorIndex = srcIndex 631 n.dstVectorIndex = dstIndex 632 } 633 634 // CompileVectorRegisterToVectorRegisterWithConst implements Assembler.CompileVectorRegisterToVectorRegisterWithConst 635 func (a *AssemblerImpl) CompileVectorRegisterToVectorRegisterWithConst(instruction asm.Instruction, 636 srcReg, dstReg asm.Register, arrangement VectorArrangement, c asm.ConstantValue, 637 ) { 638 n := a.newNode(instruction, operandTypesVectorRegisterToVectorRegister) 639 n.srcReg = srcReg 640 n.srcConst = c 641 n.dstReg = dstReg 642 n.vectorArrangement = arrangement 643 } 644 645 // CompileStaticConstToRegister implements Assembler.CompileStaticConstToVectorRegister 646 func (a *AssemblerImpl) CompileStaticConstToRegister(instruction asm.Instruction, c *asm.StaticConst, dstReg asm.Register) { 647 n := a.newNode(instruction, operandTypesMemoryToRegister) 648 n.staticConst = c 649 n.dstReg = dstReg 650 } 651 652 // CompileStaticConstToVectorRegister implements Assembler.CompileStaticConstToVectorRegister 653 func (a *AssemblerImpl) CompileStaticConstToVectorRegister(instruction asm.Instruction, 654 c *asm.StaticConst, dstReg asm.Register, arrangement VectorArrangement, 655 ) { 656 n := a.newNode(instruction, operandTypesStaticConstToVectorRegister) 657 n.staticConst = c 658 n.dstReg = dstReg 659 n.vectorArrangement = arrangement 660 } 661 662 // CompileTwoVectorRegistersToVectorRegister implements Assembler.CompileTwoVectorRegistersToVectorRegister. 663 func (a *AssemblerImpl) CompileTwoVectorRegistersToVectorRegister(instruction asm.Instruction, srcReg, srcReg2, dstReg asm.Register, 664 arrangement VectorArrangement, 665 ) { 666 n := a.newNode(instruction, operandTypesTwoVectorRegistersToVectorRegister) 667 n.srcReg = srcReg 668 n.srcReg2 = srcReg2 669 n.dstReg = dstReg 670 n.vectorArrangement = arrangement 671 } 672 673 // CompileTwoVectorRegistersToVectorRegisterWithConst implements Assembler.CompileTwoVectorRegistersToVectorRegisterWithConst. 674 func (a *AssemblerImpl) CompileTwoVectorRegistersToVectorRegisterWithConst(instruction asm.Instruction, 675 srcReg, srcReg2, dstReg asm.Register, arrangement VectorArrangement, c asm.ConstantValue, 676 ) { 677 n := a.newNode(instruction, operandTypesTwoVectorRegistersToVectorRegister) 678 n.srcReg = srcReg 679 n.srcReg2 = srcReg2 680 n.srcConst = c 681 n.dstReg = dstReg 682 n.vectorArrangement = arrangement 683 } 684 685 func errorEncodingUnsupported(n *nodeImpl) error { 686 return fmt.Errorf("%s is unsupported for %s type", InstructionName(n.instruction), n.types) 687 } 688 689 func (a *AssemblerImpl) encodeNoneToNone(n *nodeImpl) (err error) { 690 if n.instruction != NOP { 691 err = errorEncodingUnsupported(n) 692 } 693 return 694 } 695 696 func (a *AssemblerImpl) encodeJumpToRegister(n *nodeImpl) (err error) { 697 // "Unconditional branch (register)" in https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Branches--Exception-Generating-and-System-instructions 698 var opc byte 699 switch n.instruction { 700 case RET: 701 opc = 0b0010 702 case B: 703 opc = 0b0000 704 default: 705 return errorEncodingUnsupported(n) 706 } 707 708 regBits, err := intRegisterBits(n.dstReg) 709 if err != nil { 710 return fmt.Errorf("invalid destination register: %w", err) 711 } 712 713 a.Buf.Write([]byte{ 714 0x00 | (regBits << 5), 715 0x00 | (regBits >> 3), 716 0b000_11111 | (opc << 5), 717 0b1101011_0 | (opc >> 3), 718 }) 719 return 720 } 721 722 func (a *AssemblerImpl) encodeRelativeBranch(n *nodeImpl) (err error) { 723 switch n.instruction { 724 case B, BCONDEQ, BCONDGE, BCONDGT, BCONDHI, BCONDHS, BCONDLE, BCONDLO, BCONDLS, BCONDLT, BCONDMI, BCONDNE, BCONDVS, BCONDPL: 725 default: 726 return errorEncodingUnsupported(n) 727 } 728 729 if n.jumpTarget == nil { 730 return fmt.Errorf("branch target must be set for %s", InstructionName(n.instruction)) 731 } 732 733 // At this point, we don't yet know that target's branch, so emit the placeholder (4 bytes). 734 a.Buf.Write([]byte{0, 0, 0, 0}) 735 736 a.AddOnGenerateCallBack(func(code []byte) error { 737 var condBits byte 738 const condBitsUnconditional = 0xff // Indicates this is not conditional jump. 739 740 // https://developer.arm.com/documentation/den0024/a/CHDEEABE 741 switch n.instruction { 742 case B: 743 condBits = condBitsUnconditional 744 case BCONDEQ: 745 condBits = 0b0000 746 case BCONDGE: 747 condBits = 0b1010 748 case BCONDGT: 749 condBits = 0b1100 750 case BCONDHI: 751 condBits = 0b1000 752 case BCONDHS: 753 condBits = 0b0010 754 case BCONDLE: 755 condBits = 0b1101 756 case BCONDLO: 757 condBits = 0b0011 758 case BCONDLS: 759 condBits = 0b1001 760 case BCONDLT: 761 condBits = 0b1011 762 case BCONDMI: 763 condBits = 0b0100 764 case BCONDPL: 765 condBits = 0b0101 766 case BCONDNE: 767 condBits = 0b0001 768 case BCONDVS: 769 condBits = 0b0110 770 } 771 772 branchInstOffset := int64(n.OffsetInBinary()) 773 offset := int64(n.jumpTarget.OffsetInBinary()) - branchInstOffset 774 if offset%4 != 0 { 775 return errors.New("BUG: relative jump offset must be 4 bytes aligned") 776 } 777 778 branchInst := code[branchInstOffset : branchInstOffset+4] 779 if condBits == condBitsUnconditional { 780 imm26 := offset / 4 781 if imm26 < minSignedInt26 || imm26 > maxSignedInt26 { 782 // In theory this could happen if a Wasm binary has a huge single label (more than 128MB for a single block), 783 // and in that case, we use load the offset into a register and do the register jump, but to avoid the complexity, 784 // we impose this limit for now as that would be *unlikely* happen in practice. 785 return fmt.Errorf("relative jump offset %d/4 must be within %d and %d", offset, minSignedInt26, maxSignedInt26) 786 } 787 // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/B--Branch-?lang=en 788 branchInst[0] = byte(imm26) 789 branchInst[1] = byte(imm26 >> 8) 790 branchInst[2] = byte(imm26 >> 16) 791 branchInst[3] = (byte(imm26 >> 24 & 0b000000_11)) | 0b000101_00 792 } else { 793 imm19 := offset / 4 794 if imm19 < minSignedInt19 || imm19 > maxSignedInt19 { 795 // This should be a bug in our compiler as the conditional jumps are only used in the small offsets (~a few bytes), 796 // and if ever happens, compiler can be fixed. 797 return fmt.Errorf("BUG: relative jump offset %d/4(=%d) must be within %d and %d", offset, imm19, minSignedInt19, maxSignedInt19) 798 } 799 // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/B-cond--Branch-conditionally-?lang=en 800 branchInst[0] = (byte(imm19<<5) & 0b111_0_0000) | condBits 801 branchInst[1] = byte(imm19 >> 3) 802 branchInst[2] = byte(imm19 >> 11) 803 branchInst[3] = 0b01010100 804 } 805 return nil 806 }) 807 return 808 } 809 810 func checkRegisterToRegisterType(src, dst asm.Register, requireSrcInt, requireDstInt bool) (err error) { 811 isSrcInt, isDstInt := isIntRegister(src), isIntRegister(dst) 812 if isSrcInt && !requireSrcInt { 813 err = fmt.Errorf("src requires float register but got %s", RegisterName(src)) 814 } else if !isSrcInt && requireSrcInt { 815 err = fmt.Errorf("src requires int register but got %s", RegisterName(src)) 816 } else if isDstInt && !requireDstInt { 817 err = fmt.Errorf("dst requires float register but got %s", RegisterName(dst)) 818 } else if !isDstInt && requireDstInt { 819 err = fmt.Errorf("dst requires int register but got %s", RegisterName(dst)) 820 } 821 return 822 } 823 824 func (a *AssemblerImpl) encodeRegisterToRegister(n *nodeImpl) (err error) { 825 switch inst := n.instruction; inst { 826 case ADD, ADDW, SUB: 827 if err = checkRegisterToRegisterType(n.srcReg, n.dstReg, true, true); err != nil { 828 return 829 } 830 831 // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Register?lang=en#addsub_shift 832 var sfops byte 833 switch inst { 834 case ADD: 835 sfops = 0b100 836 case ADDW: 837 case SUB: 838 sfops = 0b110 839 } 840 841 srcRegBits, dstRegBits := registerBits(n.srcReg), registerBits(n.dstReg) 842 a.Buf.Write([]byte{ 843 (dstRegBits << 5) | dstRegBits, 844 dstRegBits >> 3, 845 srcRegBits, 846 (sfops << 5) | 0b01011, 847 }) 848 case CLZ, CLZW, RBIT, RBITW: 849 if err = checkRegisterToRegisterType(n.srcReg, n.dstReg, true, true); err != nil { 850 return 851 } 852 853 var sf, opcode byte 854 switch inst { 855 case CLZ: 856 // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/CLZ--Count-Leading-Zeros-?lang=en 857 sf, opcode = 0b1, 0b000_100 858 case CLZW: 859 // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/CLZ--Count-Leading-Zeros-?lang=en 860 sf, opcode = 0b0, 0b000_100 861 case RBIT: 862 // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/RBIT--Reverse-Bits-?lang=en 863 sf, opcode = 0b1, 0b000_000 864 case RBITW: 865 // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/RBIT--Reverse-Bits-?lang=en 866 sf, opcode = 0b0, 0b000_000 867 } 868 if inst == CLZ { 869 sf = 1 870 } 871 872 srcRegBits, dstRegBits := registerBits(n.srcReg), registerBits(n.dstReg) 873 a.Buf.Write([]byte{ 874 (srcRegBits << 5) | dstRegBits, 875 opcode<<2 | (srcRegBits >> 3), 876 0b110_00000, 877 (sf << 7) | 0b0_1011010, 878 }) 879 case CSET: 880 if !isConditionalRegister(n.srcReg) { 881 return fmt.Errorf("CSET requires conditional register but got %s", RegisterName(n.srcReg)) 882 } 883 884 dstRegBits, err := intRegisterBits(n.dstReg) 885 if err != nil { 886 return err 887 } 888 889 // CSET encodes the conditional bits with its least significant bit inverted. 890 // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/CSET--Conditional-Set--an-alias-of-CSINC-?lang=en 891 // 892 // https://developer.arm.com/documentation/den0024/a/CHDEEABE 893 var conditionalBits byte 894 switch n.srcReg { 895 case RegCondEQ: 896 conditionalBits = 0b0001 897 case RegCondNE: 898 conditionalBits = 0b0000 899 case RegCondHS: 900 conditionalBits = 0b0011 901 case RegCondLO: 902 conditionalBits = 0b0010 903 case RegCondMI: 904 conditionalBits = 0b0101 905 case RegCondPL: 906 conditionalBits = 0b0100 907 case RegCondVS: 908 conditionalBits = 0b0111 909 case RegCondVC: 910 conditionalBits = 0b0110 911 case RegCondHI: 912 conditionalBits = 0b1001 913 case RegCondLS: 914 conditionalBits = 0b1000 915 case RegCondGE: 916 conditionalBits = 0b1011 917 case RegCondLT: 918 conditionalBits = 0b1010 919 case RegCondGT: 920 conditionalBits = 0b1101 921 case RegCondLE: 922 conditionalBits = 0b1100 923 case RegCondAL: 924 conditionalBits = 0b1111 925 case RegCondNV: 926 conditionalBits = 0b1110 927 } 928 929 // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/CSET--Conditional-Set--an-alias-of-CSINC-?lang=en 930 a.Buf.Write([]byte{ 931 0b111_00000 | dstRegBits, 932 (conditionalBits << 4) | 0b0000_0111, 933 0b100_11111, 934 0b10011010, 935 }) 936 937 case FABSD, FABSS, FNEGD, FNEGS, FSQRTD, FSQRTS, FCVTSD, FCVTDS, FRINTMD, FRINTMS, 938 FRINTND, FRINTNS, FRINTPD, FRINTPS, FRINTZD, FRINTZS: 939 if err = checkRegisterToRegisterType(n.srcReg, n.dstReg, false, false); err != nil { 940 return 941 } 942 943 srcRegBits, dstRegBits := registerBits(n.srcReg), registerBits(n.dstReg) 944 945 // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en#floatdp1 946 var tp, opcode byte 947 switch inst { 948 case FABSD: 949 opcode, tp = 0b000001, 0b01 950 case FABSS: 951 opcode, tp = 0b000001, 0b00 952 case FNEGD: 953 opcode, tp = 0b000010, 0b01 954 case FNEGS: 955 opcode, tp = 0b000010, 0b00 956 case FSQRTD: 957 opcode, tp = 0b000011, 0b01 958 case FSQRTS: 959 opcode, tp = 0b000011, 0b00 960 case FCVTSD: 961 opcode, tp = 0b000101, 0b00 962 case FCVTDS: 963 opcode, tp = 0b000100, 0b01 964 case FRINTMD: 965 opcode, tp = 0b001010, 0b01 966 case FRINTMS: 967 opcode, tp = 0b001010, 0b00 968 case FRINTND: 969 opcode, tp = 0b001000, 0b01 970 case FRINTNS: 971 opcode, tp = 0b001000, 0b00 972 case FRINTPD: 973 opcode, tp = 0b001001, 0b01 974 case FRINTPS: 975 opcode, tp = 0b001001, 0b00 976 case FRINTZD: 977 opcode, tp = 0b001011, 0b01 978 case FRINTZS: 979 opcode, tp = 0b001011, 0b00 980 } 981 a.Buf.Write([]byte{ 982 (srcRegBits << 5) | dstRegBits, 983 (opcode << 7) | 0b0_10000_00 | (srcRegBits >> 3), 984 tp<<6 | 0b00_1_00000 | opcode>>1, 985 0b0_00_11110, 986 }) 987 988 case FADDD, FADDS, FDIVS, FDIVD, FMAXD, FMAXS, FMIND, FMINS, FMULS, FMULD: 989 if err = checkRegisterToRegisterType(n.srcReg, n.dstReg, false, false); err != nil { 990 return 991 } 992 993 srcRegBits, dstRegBits := registerBits(n.srcReg), registerBits(n.dstReg) 994 995 // "Floating-point data-processing (2 source)" in 996 // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en#floatdp1 997 var tp, opcode byte 998 switch inst { 999 case FADDD: 1000 opcode, tp = 0b0010, 0b01 1001 case FADDS: 1002 opcode, tp = 0b0010, 0b00 1003 case FDIVD: 1004 opcode, tp = 0b0001, 0b01 1005 case FDIVS: 1006 opcode, tp = 0b0001, 0b00 1007 case FMAXD: 1008 opcode, tp = 0b0100, 0b01 1009 case FMAXS: 1010 opcode, tp = 0b0100, 0b00 1011 case FMIND: 1012 opcode, tp = 0b0101, 0b01 1013 case FMINS: 1014 opcode, tp = 0b0101, 0b00 1015 case FMULS: 1016 opcode, tp = 0b0000, 0b00 1017 case FMULD: 1018 opcode, tp = 0b0000, 0b01 1019 } 1020 1021 a.Buf.Write([]byte{ 1022 (dstRegBits << 5) | dstRegBits, 1023 opcode<<4 | 0b0000_10_00 | (dstRegBits >> 3), 1024 tp<<6 | 0b00_1_00000 | srcRegBits, 1025 0b0001_1110, 1026 }) 1027 1028 case FCVTZSD, FCVTZSDW, FCVTZSS, FCVTZSSW, FCVTZUD, FCVTZUDW, FCVTZUS, FCVTZUSW: 1029 if err = checkRegisterToRegisterType(n.srcReg, n.dstReg, false, true); err != nil { 1030 return 1031 } 1032 1033 srcRegBits, dstRegBits := registerBits(n.srcReg), registerBits(n.dstReg) 1034 1035 // "Conversion between floating-point and integer" in 1036 // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en#floatdp1 1037 var sf, tp, opcode byte 1038 switch inst { 1039 case FCVTZSD: // Double to signed 64-bit 1040 sf, tp, opcode = 0b1, 0b01, 0b000 1041 case FCVTZSDW: // Double to signed 32-bit. 1042 sf, tp, opcode = 0b0, 0b01, 0b000 1043 case FCVTZSS: // Single to signed 64-bit. 1044 sf, tp, opcode = 0b1, 0b00, 0b000 1045 case FCVTZSSW: // Single to signed 32-bit. 1046 sf, tp, opcode = 0b0, 0b00, 0b000 1047 case FCVTZUD: // Double to unsigned 64-bit. 1048 sf, tp, opcode = 0b1, 0b01, 0b001 1049 case FCVTZUDW: // Double to unsigned 32-bit. 1050 sf, tp, opcode = 0b0, 0b01, 0b001 1051 case FCVTZUS: // Single to unsigned 64-bit. 1052 sf, tp, opcode = 0b1, 0b00, 0b001 1053 case FCVTZUSW: // Single to unsigned 32-bit. 1054 sf, tp, opcode = 0b0, 0b00, 0b001 1055 } 1056 1057 a.Buf.Write([]byte{ 1058 (srcRegBits << 5) | dstRegBits, 1059 0 | (srcRegBits >> 3), 1060 tp<<6 | 0b00_1_11_000 | opcode, 1061 sf<<7 | 0b0_0_0_11110, 1062 }) 1063 1064 case FMOVD, FMOVS: 1065 isSrcInt, isDstInt := isIntRegister(n.srcReg), isIntRegister(n.dstReg) 1066 if isSrcInt && isDstInt { 1067 return errors.New("FMOV needs at least one of operands to be integer") 1068 } 1069 1070 srcRegBits, dstRegBits := registerBits(n.srcReg), registerBits(n.dstReg) 1071 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FMOV--register---Floating-point-Move-register-without-conversion-?lang=en 1072 if !isSrcInt && !isDstInt { // Float to float. 1073 var tp byte 1074 if inst == FMOVD { 1075 tp = 0b01 1076 } 1077 a.Buf.Write([]byte{ 1078 (srcRegBits << 5) | dstRegBits, 1079 0b0_10000_00 | (srcRegBits >> 3), 1080 tp<<6 | 0b00_1_00000, 1081 0b000_11110, 1082 }) 1083 } else if isSrcInt && !isDstInt { // Int to float. 1084 var tp, sf byte 1085 if inst == FMOVD { 1086 tp, sf = 0b01, 0b1 1087 } 1088 a.Buf.Write([]byte{ 1089 (srcRegBits << 5) | dstRegBits, 1090 srcRegBits >> 3, 1091 tp<<6 | 0b00_1_00_111, 1092 sf<<7 | 0b0_00_11110, 1093 }) 1094 } else { // Float to int. 1095 var tp, sf byte 1096 if inst == FMOVD { 1097 tp, sf = 0b01, 0b1 1098 } 1099 a.Buf.Write([]byte{ 1100 (srcRegBits << 5) | dstRegBits, 1101 srcRegBits >> 3, 1102 tp<<6 | 0b00_1_00_110, 1103 sf<<7 | 0b0_00_11110, 1104 }) 1105 } 1106 1107 case MOVD, MOVW: 1108 if err = checkRegisterToRegisterType(n.srcReg, n.dstReg, true, true); err != nil { 1109 return 1110 } 1111 1112 srcRegBits, dstRegBits := registerBits(n.srcReg), registerBits(n.dstReg) 1113 if n.srcReg == RegRZR && inst == MOVD { 1114 // If this is 64-bit mov from zero register, then we encode this as MOVK. 1115 // See "Move wide (immediate)" in 1116 // https://developer.arm.com/documentation/ddi0602/2021-06/Index-by-Encoding/Data-Processing----Immediate 1117 a.Buf.Write([]byte{ 1118 dstRegBits, 1119 0x0, 1120 0b1000_0000, 1121 0b1_10_10010, 1122 }) 1123 } else { 1124 // MOV can be encoded as ORR (shifted register): "ORR Wd, WZR, Wm". 1125 // https://developer.arm.com/documentation/100069/0609/A64-General-Instructions/MOV--register- 1126 var sf byte 1127 if inst == MOVD { 1128 sf = 0b1 1129 } 1130 a.Buf.Write([]byte{ 1131 (zeroRegisterBits << 5) | dstRegBits, 1132 zeroRegisterBits >> 3, 1133 0b000_00000 | srcRegBits, 1134 sf<<7 | 0b0_01_01010, 1135 }) 1136 } 1137 1138 case MRS: 1139 if n.srcReg != RegFPSR { 1140 return fmt.Errorf("MRS has only support for FPSR register as a src but got %s", RegisterName(n.srcReg)) 1141 } 1142 1143 // For how to specify FPSR register, see "Accessing FPSR" in: 1144 // https://developer.arm.com/documentation/ddi0595/2021-12/AArch64-Registers/FPSR--Floating-point-Status-Register?lang=en 1145 dstRegBits := registerBits(n.dstReg) 1146 a.Buf.Write([]byte{ 1147 0b001<<5 | dstRegBits, 1148 0b0100<<4 | 0b0100, 1149 0b0011_0000 | 0b11<<3 | 0b011, 1150 0b1101_0101, 1151 }) 1152 1153 case MSR: 1154 if n.dstReg != RegFPSR { 1155 return fmt.Errorf("MSR has only support for FPSR register as a dst but got %s", RegisterName(n.srcReg)) 1156 } 1157 1158 // For how to specify FPSR register, see "Accessing FPSR" in: 1159 // https://developer.arm.com/documentation/ddi0595/2021-12/AArch64-Registers/FPSR--Floating-point-Status-Register?lang=en 1160 srcRegBits := registerBits(n.srcReg) 1161 a.Buf.Write([]byte{ 1162 0b001<<5 | srcRegBits, 1163 0b0100<<4 | 0b0100, 1164 0b0001_0000 | 0b11<<3 | 0b011, 1165 0b1101_0101, 1166 }) 1167 1168 case MUL, MULW: 1169 // Multiplications are encoded as MADD (zero register, src, dst), dst = zero + (src * dst) = src * dst. 1170 // See "Data-processing (3 source)" in 1171 // https://developer.arm.com/documentation/ddi0602/2021-06/Index-by-Encoding/Data-Processing----Register?lang=en 1172 if err = checkRegisterToRegisterType(n.srcReg, n.dstReg, true, true); err != nil { 1173 return 1174 } 1175 1176 var sf byte 1177 if inst == MUL { 1178 sf = 0b1 1179 } 1180 1181 srcRegBits, dstRegBits := registerBits(n.srcReg), registerBits(n.dstReg) 1182 1183 a.Buf.Write([]byte{ 1184 dstRegBits<<5 | dstRegBits, 1185 zeroRegisterBits<<2 | dstRegBits>>3, 1186 srcRegBits, 1187 sf<<7 | 0b11011, 1188 }) 1189 1190 case NEG, NEGW: 1191 srcRegBits, dstRegBits := registerBits(n.srcReg), registerBits(n.dstReg) 1192 1193 if err = checkRegisterToRegisterType(n.srcReg, n.dstReg, true, true); err != nil { 1194 return 1195 } 1196 1197 // NEG is encoded as "SUB dst, XZR, src" = "dst = 0 - src" 1198 // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Register?lang=en#addsub_shift 1199 var sf byte 1200 if inst == NEG { 1201 sf = 0b1 1202 } 1203 1204 a.Buf.Write([]byte{ 1205 (zeroRegisterBits << 5) | dstRegBits, 1206 zeroRegisterBits >> 3, 1207 srcRegBits, 1208 sf<<7 | 0b0_10_00000 | 0b0_00_01011, 1209 }) 1210 1211 case SDIV, SDIVW, UDIV, UDIVW: 1212 srcRegBits, dstRegBits := registerBits(n.srcReg), registerBits(n.dstReg) 1213 1214 if err = checkRegisterToRegisterType(n.srcReg, n.dstReg, true, true); err != nil { 1215 return 1216 } 1217 1218 // See "Data-processing (2 source)" in 1219 // https://developer.arm.com/documentation/ddi0602/2021-06/Index-by-Encoding/Data-Processing----Register?lang=en 1220 var sf, opcode byte 1221 switch inst { 1222 case SDIV: 1223 sf, opcode = 0b1, 0b000011 1224 case SDIVW: 1225 sf, opcode = 0b0, 0b000011 1226 case UDIV: 1227 sf, opcode = 0b1, 0b000010 1228 case UDIVW: 1229 sf, opcode = 0b0, 0b000010 1230 } 1231 1232 a.Buf.Write([]byte{ 1233 (dstRegBits << 5) | dstRegBits, 1234 opcode<<2 | (dstRegBits >> 3), 1235 0b110_00000 | srcRegBits, 1236 sf<<7 | 0b0_00_11010, 1237 }) 1238 1239 case SCVTFD, SCVTFWD, SCVTFS, SCVTFWS, UCVTFD, UCVTFS, UCVTFWD, UCVTFWS: 1240 srcRegBits, dstRegBits := registerBits(n.srcReg), registerBits(n.dstReg) 1241 1242 if err = checkRegisterToRegisterType(n.srcReg, n.dstReg, true, false); err != nil { 1243 return 1244 } 1245 1246 // "Conversion between floating-point and integer" in 1247 // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en#floatdp1 1248 var sf, tp, opcode byte 1249 switch inst { 1250 case SCVTFD: // 64-bit integer to double 1251 sf, tp, opcode = 0b1, 0b01, 0b010 1252 case SCVTFWD: // 32-bit integer to double 1253 sf, tp, opcode = 0b0, 0b01, 0b010 1254 case SCVTFS: // 64-bit integer to single 1255 sf, tp, opcode = 0b1, 0b00, 0b010 1256 case SCVTFWS: // 32-bit integer to single 1257 sf, tp, opcode = 0b0, 0b00, 0b010 1258 case UCVTFD: // 64-bit to double 1259 sf, tp, opcode = 0b1, 0b01, 0b011 1260 case UCVTFWD: // 32-bit to double 1261 sf, tp, opcode = 0b0, 0b01, 0b011 1262 case UCVTFS: // 64-bit to single 1263 sf, tp, opcode = 0b1, 0b00, 0b011 1264 case UCVTFWS: // 32-bit to single 1265 sf, tp, opcode = 0b0, 0b00, 0b011 1266 } 1267 1268 a.Buf.Write([]byte{ 1269 (srcRegBits << 5) | dstRegBits, 1270 srcRegBits >> 3, 1271 tp<<6 | 0b00_1_00_000 | opcode, 1272 sf<<7 | 0b0_0_0_11110, 1273 }) 1274 1275 case SXTB, SXTBW, SXTH, SXTHW, SXTW: 1276 if err = checkRegisterToRegisterType(n.srcReg, n.dstReg, true, true); err != nil { 1277 return 1278 } 1279 1280 srcRegBits, dstRegBits := registerBits(n.srcReg), registerBits(n.dstReg) 1281 if n.srcReg == RegRZR { 1282 // If the source is zero register, we encode as MOV dst, zero. 1283 var sf byte 1284 if inst == MOVD { 1285 sf = 0b1 1286 } 1287 a.Buf.Write([]byte{ 1288 (zeroRegisterBits << 5) | dstRegBits, 1289 zeroRegisterBits >> 3, 1290 0b000_00000 | srcRegBits, 1291 sf<<7 | 0b0_01_01010, 1292 }) 1293 return 1294 } 1295 1296 // SXTB is encoded as "SBFM Wd, Wn, #0, #7" 1297 // https://developer.arm.com/documentation/dui0801/g/A64-General-Instructions/SXTB 1298 // SXTH is encoded as "SBFM Wd, Wn, #0, #15" 1299 // https://developer.arm.com/documentation/dui0801/g/A64-General-Instructions/SXTH 1300 // SXTW is encoded as "SBFM Xd, Xn, #0, #31" 1301 // https://developer.arm.com/documentation/dui0802/b/A64-General-Instructions/SXTW 1302 1303 var n, sf, imms, opc byte 1304 switch inst { 1305 case SXTB: 1306 n, sf, imms = 0b1, 0b1, 0x7 1307 case SXTBW: 1308 n, sf, imms = 0b0, 0b0, 0x7 1309 case SXTH: 1310 n, sf, imms = 0b1, 0b1, 0xf 1311 case SXTHW: 1312 n, sf, imms = 0b0, 0b0, 0xf 1313 case SXTW: 1314 n, sf, imms = 0b1, 0b1, 0x1f 1315 } 1316 1317 a.Buf.Write([]byte{ 1318 (srcRegBits << 5) | dstRegBits, 1319 imms<<2 | (srcRegBits >> 3), 1320 n << 6, 1321 sf<<7 | opc<<5 | 0b10011, 1322 }) 1323 default: 1324 return errorEncodingUnsupported(n) 1325 } 1326 return 1327 } 1328 1329 func (a *AssemblerImpl) encodeLeftShiftedRegisterToRegister(n *nodeImpl) (err error) { 1330 baseRegBits, err := intRegisterBits(n.srcReg) 1331 if err != nil { 1332 return err 1333 } 1334 shiftTargetRegBits, err := intRegisterBits(n.srcReg2) 1335 if err != nil { 1336 return err 1337 } 1338 dstRegBits, err := intRegisterBits(n.dstReg) 1339 if err != nil { 1340 return err 1341 } 1342 1343 switch n.instruction { 1344 case ADD: 1345 // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Register?lang=en#addsub_shift 1346 const logicalLeftShiftBits = 0b00 1347 if n.srcConst < 0 || n.srcConst > 64 { 1348 return fmt.Errorf("shift amount must fit in unsigned 6-bit integer (0-64) but got %d", n.srcConst) 1349 } 1350 shiftByte := byte(n.srcConst) 1351 a.Buf.Write([]byte{ 1352 (baseRegBits << 5) | dstRegBits, 1353 (shiftByte << 2) | (baseRegBits >> 3), 1354 (logicalLeftShiftBits << 6) | shiftTargetRegBits, 1355 0b1000_1011, 1356 }) 1357 default: 1358 return errorEncodingUnsupported(n) 1359 } 1360 return 1361 } 1362 1363 func (a *AssemblerImpl) encodeTwoRegistersToRegister(n *nodeImpl) (err error) { 1364 switch inst := n.instruction; inst { 1365 case AND, ANDW, ORR, ORRW, EOR, EORW: 1366 // See "Logical (shifted register)" in 1367 // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Register?lang=en 1368 srcRegBits, srcReg2Bits, dstRegBits := registerBits(n.srcReg), registerBits(n.srcReg2), registerBits(n.dstReg) 1369 var sf, opc byte 1370 switch inst { 1371 case AND: 1372 sf, opc = 0b1, 0b00 1373 case ANDW: 1374 sf, opc = 0b0, 0b00 1375 case ORR: 1376 sf, opc = 0b1, 0b01 1377 case ORRW: 1378 sf, opc = 0b0, 0b01 1379 case EOR: 1380 sf, opc = 0b1, 0b10 1381 case EORW: 1382 sf, opc = 0b0, 0b10 1383 } 1384 a.Buf.Write([]byte{ 1385 (srcReg2Bits << 5) | dstRegBits, 1386 srcReg2Bits >> 3, 1387 srcRegBits, 1388 sf<<7 | opc<<5 | 0b01010, 1389 }) 1390 case ASR, ASRW, LSL, LSLW, LSR, LSRW, ROR, RORW: 1391 // See "Data-processing (2 source)" in 1392 // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Register?lang=en 1393 srcRegBits, srcReg2Bits, dstRegBits := registerBits(n.srcReg), registerBits(n.srcReg2), registerBits(n.dstReg) 1394 1395 var sf, opcode byte 1396 switch inst { 1397 case ASR: 1398 sf, opcode = 0b1, 0b001010 1399 case ASRW: 1400 sf, opcode = 0b0, 0b001010 1401 case LSL: 1402 sf, opcode = 0b1, 0b001000 1403 case LSLW: 1404 sf, opcode = 0b0, 0b001000 1405 case LSR: 1406 sf, opcode = 0b1, 0b001001 1407 case LSRW: 1408 sf, opcode = 0b0, 0b001001 1409 case ROR: 1410 sf, opcode = 0b1, 0b001011 1411 case RORW: 1412 sf, opcode = 0b0, 0b001011 1413 } 1414 a.Buf.Write([]byte{ 1415 (srcReg2Bits << 5) | dstRegBits, 1416 opcode<<2 | (srcReg2Bits >> 3), 1417 0b110_00000 | srcRegBits, 1418 sf<<7 | 0b0_00_11010, 1419 }) 1420 case SDIV, SDIVW, UDIV, UDIVW: 1421 srcRegBits, srcReg2Bits, dstRegBits := registerBits(n.srcReg), registerBits(n.srcReg2), registerBits(n.dstReg) 1422 1423 // See "Data-processing (2 source)" in 1424 // https://developer.arm.com/documentation/ddi0602/2021-06/Index-by-Encoding/Data-Processing----Register?lang=en 1425 var sf, opcode byte 1426 switch inst { 1427 case SDIV: 1428 sf, opcode = 0b1, 0b000011 1429 case SDIVW: 1430 sf, opcode = 0b0, 0b000011 1431 case UDIV: 1432 sf, opcode = 0b1, 0b000010 1433 case UDIVW: 1434 sf, opcode = 0b0, 0b000010 1435 } 1436 1437 a.Buf.Write([]byte{ 1438 (srcReg2Bits << 5) | dstRegBits, 1439 opcode<<2 | (srcReg2Bits >> 3), 1440 0b110_00000 | srcRegBits, 1441 sf<<7 | 0b0_00_11010, 1442 }) 1443 case SUB, SUBW: 1444 srcRegBits, srcReg2Bits, dstRegBits := registerBits(n.srcReg), registerBits(n.srcReg2), registerBits(n.dstReg) 1445 1446 // See "Add/subtract (shifted register)" in 1447 // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Register?lang=en 1448 var sf byte 1449 if inst == SUB { 1450 sf = 0b1 1451 } 1452 1453 a.Buf.Write([]byte{ 1454 (srcReg2Bits << 5) | dstRegBits, 1455 srcReg2Bits >> 3, 1456 srcRegBits, 1457 sf<<7 | 0b0_10_01011, 1458 }) 1459 case FSUBD, FSUBS: 1460 srcRegBits, srcReg2Bits, dstRegBits := registerBits(n.srcReg), registerBits(n.srcReg2), registerBits(n.dstReg) 1461 1462 // See "Floating-point data-processing (2 source)" in 1463 // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en 1464 var tp byte 1465 if inst == FSUBD { 1466 tp = 0b01 1467 } 1468 a.Buf.Write([]byte{ 1469 (srcReg2Bits << 5) | dstRegBits, 1470 0b0011_10_00 | (srcReg2Bits >> 3), 1471 tp<<6 | 0b00_1_00000 | srcRegBits, 1472 0b0_00_11110, 1473 }) 1474 default: 1475 return errorEncodingUnsupported(n) 1476 } 1477 return 1478 } 1479 1480 func (a *AssemblerImpl) encodeThreeRegistersToRegister(n *nodeImpl) (err error) { 1481 switch n.instruction { 1482 case MSUB, MSUBW: 1483 // Dst = Src2 - (Src1 * Src3) 1484 // "Data-processing (3 source)" in: 1485 // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Register?lang=en 1486 src1RegBits, err := intRegisterBits(n.srcReg) 1487 if err != nil { 1488 return err 1489 } 1490 src2RegBits, err := intRegisterBits(n.srcReg2) 1491 if err != nil { 1492 return err 1493 } 1494 src3RegBits, err := intRegisterBits(n.dstReg) 1495 if err != nil { 1496 return err 1497 } 1498 dstRegBits, err := intRegisterBits(n.dstReg2) 1499 if err != nil { 1500 return err 1501 } 1502 1503 var sf byte // is zero for MSUBW (32-bit MSUB). 1504 if n.instruction == MSUB { 1505 sf = 0b1 1506 } 1507 1508 a.Buf.Write([]byte{ 1509 (src3RegBits << 5) | dstRegBits, 1510 0b1_0000000 | (src2RegBits << 2) | (src3RegBits >> 3), 1511 src1RegBits, 1512 sf<<7 | 0b00_11011, 1513 }) 1514 default: 1515 return errorEncodingUnsupported(n) 1516 } 1517 return 1518 } 1519 1520 func (a *AssemblerImpl) encodeTwoRegistersToNone(n *nodeImpl) (err error) { 1521 switch n.instruction { 1522 case CMPW, CMP: 1523 // Compare on two registers is an alias for "SUBS (src1, src2) ZERO" 1524 // which can be encoded as SUBS (shifted registers) with zero shifting. 1525 // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Register?lang=en#addsub_shift 1526 src1RegBits, err := intRegisterBits(n.srcReg) 1527 if err != nil { 1528 return err 1529 } 1530 src2RegBits, err := intRegisterBits(n.srcReg2) 1531 if err != nil { 1532 return err 1533 } 1534 1535 var op byte 1536 if n.instruction == CMP { 1537 op = 0b111 1538 } else { 1539 op = 0b011 1540 } 1541 1542 a.Buf.Write([]byte{ 1543 (src2RegBits << 5) | zeroRegisterBits, 1544 src2RegBits >> 3, 1545 src1RegBits, 1546 0b01011 | (op << 5), 1547 }) 1548 case FCMPS, FCMPD: 1549 // "Floating-point compare" section in: 1550 // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en 1551 src1RegBits, err := vectorRegisterBits(n.srcReg) 1552 if err != nil { 1553 return err 1554 } 1555 src2RegBits, err := vectorRegisterBits(n.srcReg2) 1556 if err != nil { 1557 return err 1558 } 1559 1560 var ftype byte // is zero for FCMPS (single precision float compare). 1561 if n.instruction == FCMPD { 1562 ftype = 0b01 1563 } 1564 a.Buf.Write([]byte{ 1565 src2RegBits << 5, 1566 0b001000_00 | (src2RegBits >> 3), 1567 ftype<<6 | 0b1_00000 | src1RegBits, 1568 0b000_11110, 1569 }) 1570 default: 1571 return errorEncodingUnsupported(n) 1572 } 1573 return 1574 } 1575 1576 func (a *AssemblerImpl) encodeRegisterAndConstToNone(n *nodeImpl) (err error) { 1577 if n.instruction != CMP { 1578 return errorEncodingUnsupported(n) 1579 } 1580 1581 // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/CMP--immediate---Compare--immediate---an-alias-of-SUBS--immediate--?lang=en 1582 if n.srcConst < 0 || n.srcConst > 4095 { 1583 return fmt.Errorf("immediate for CMP must fit in 0 to 4095 but got %d", n.srcConst) 1584 } else if n.srcReg == RegRZR { 1585 return errors.New("zero register is not supported for CMP (immediate)") 1586 } 1587 1588 srcRegBits, err := intRegisterBits(n.srcReg) 1589 if err != nil { 1590 return err 1591 } 1592 1593 a.Buf.Write([]byte{ 1594 (srcRegBits << 5) | zeroRegisterBits, 1595 (byte(n.srcConst) << 2) | (srcRegBits >> 3), 1596 byte(n.srcConst >> 6), 1597 0b111_10001, 1598 }) 1599 return 1600 } 1601 1602 func fitInSigned9Bits(v int64) bool { 1603 return v >= -256 && v <= 255 1604 } 1605 1606 func (a *AssemblerImpl) encodeLoadOrStoreWithRegisterOffset( 1607 baseRegBits, offsetRegBits, targetRegBits byte, opcode, size, v byte, 1608 ) { 1609 // See "Load/store register (register offset)". 1610 // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Loads-and-Stores?lang=en#ldst_regoff 1611 a.Buf.Write([]byte{ 1612 (baseRegBits << 5) | targetRegBits, 1613 0b011_010_00 | (baseRegBits >> 3), 1614 opcode<<6 | 0b00_1_00000 | offsetRegBits, 1615 size<<6 | v<<2 | 0b00_111_0_00, 1616 }) 1617 } 1618 1619 // validateMemoryOffset validates the memory offset if the given offset can be encoded in the assembler. 1620 // In theory, offset can be any, but for simplicity of our homemade assembler, we limit the offset range 1621 // that can be encoded enough for supporting compiler. 1622 func validateMemoryOffset(offset int64) (err error) { 1623 if offset > 255 && offset%4 != 0 { 1624 // This is because we only have large offsets for load/store with Wasm value stack or reading type IDs, and its offset 1625 // is always multiplied by 4 or 8 (== the size of uint32 or uint64 == the type of wasm.FunctionTypeID or value stack in Go) 1626 err = fmt.Errorf("large memory offset (>255) must be a multiple of 4 but got %d", offset) 1627 } else if offset < -256 { // 9-bit signed integer's minimum = 2^8. 1628 err = fmt.Errorf("negative memory offset must be larget than or equal -256 but got %d", offset) 1629 } else if offset > 1<<31-1 { 1630 return fmt.Errorf("large memory offset must be less than %d but got %d", 1<<31-1, offset) 1631 } 1632 return 1633 } 1634 1635 // encodeLoadOrStoreWithConstOffset encodes load/store instructions with the constant offset. 1636 // 1637 // Note: Encoding strategy intentionally matches the Go assembler: https://go.dev/doc/asm 1638 func (a *AssemblerImpl) encodeLoadOrStoreWithConstOffset( 1639 baseRegBits, targetRegBits byte, 1640 offset int64, 1641 opcode, size, v byte, 1642 datasize, datasizeLog2 int64, 1643 ) (err error) { 1644 if err = validateMemoryOffset(offset); err != nil { 1645 return 1646 } 1647 1648 if fitInSigned9Bits(offset) { 1649 // See "LDAPR/STLR (unscaled immediate)" 1650 // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Loads-and-Stores?lang=en#ldapstl_unscaled 1651 if offset < 0 || offset%datasize != 0 { 1652 // This case is encoded as one "unscaled signed store". 1653 a.Buf.Write([]byte{ 1654 (baseRegBits << 5) | targetRegBits, 1655 byte(offset<<4) | (baseRegBits >> 3), 1656 opcode<<6 | (0b00_00_11111 & byte(offset>>4)), 1657 size<<6 | v<<2 | 0b00_1_11_0_00, 1658 }) 1659 return 1660 } 1661 } 1662 1663 // At this point we have the assumption that offset is positive. 1664 // Plus if it is a multiple of datasize, then it can be encoded as a single "unsigned immediate". 1665 if offset%datasize == 0 && 1666 offset < (1<<12)<<datasizeLog2 { 1667 m := offset / datasize 1668 a.Buf.Write([]byte{ 1669 (baseRegBits << 5) | targetRegBits, 1670 (byte(m << 2)) | (baseRegBits >> 3), 1671 opcode<<6 | 0b00_111111&byte(m>>6), 1672 size<<6 | v<<2 | 0b00_1_11_0_01, 1673 }) 1674 return 1675 } 1676 1677 // Otherwise, we need multiple instructions. 1678 tmpRegBits := registerBits(a.temporaryRegister) 1679 offset32 := int32(offset) 1680 1681 // Go's assembler adds a const into the const pool at this point, 1682 // regardless of its usage; e.g. if we enter the then block of the following if statement, 1683 // the const is not used but it is added into the const pool. 1684 c := asm.NewStaticConst(make([]byte, 4)) 1685 binary.LittleEndian.PutUint32(c.Raw, uint32(offset)) 1686 a.pool.AddConst(c, uint64(a.Buf.Len())) 1687 1688 // https://github.com/golang/go/blob/release-branch.go1.15/src/cmd/internal/obj/arm64/asm7.go#L3529-L3532 1689 // If the offset is within 24-bits, we can load it with two ADD instructions. 1690 hi := offset32 - (offset32 & (0xfff << uint(datasizeLog2))) 1691 if hi&^0xfff000 == 0 { 1692 var sfops byte = 0b100 1693 m := ((offset32 - hi) >> datasizeLog2) & 0xfff 1694 hi >>= 12 1695 1696 // https://github.com/golang/go/blob/release-branch.go1.15/src/cmd/internal/obj/arm64/asm7.go#L3534-L3535 1697 a.Buf.Write([]byte{ 1698 (baseRegBits << 5) | tmpRegBits, 1699 (byte(hi) << 2) | (baseRegBits >> 3), 1700 0b01<<6 /* shift by 12 */ | byte(hi>>6), 1701 sfops<<5 | 0b10001, 1702 }) 1703 1704 a.Buf.Write([]byte{ 1705 (tmpRegBits << 5) | targetRegBits, 1706 (byte(m << 2)) | (tmpRegBits >> 3), 1707 opcode<<6 | 0b00_111111&byte(m>>6), 1708 size<<6 | v<<2 | 0b00_1_11_0_01, 1709 }) 1710 } else { 1711 // This case we load the const via ldr(literal) into tem register, 1712 // and the target const is placed after this instruction below. 1713 loadLiteralOffsetInBinary := uint64(a.Buf.Len()) 1714 1715 // First we emit the ldr(literal) with offset zero as we don't yet know the const's placement in the binary. 1716 // https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/LDR--literal---Load-Register--literal-- 1717 a.Buf.Write([]byte{tmpRegBits, 0x0, 0x0, 0b00_011_0_00}) 1718 1719 // Set the callback for the constant, and we set properly the offset in the callback. 1720 1721 c.AddOffsetFinalizedCallback(func(offsetOfConst uint64) { 1722 // ldr(literal) encodes offset divided by 4. 1723 offset := (int(offsetOfConst) - int(loadLiteralOffsetInBinary)) / 4 1724 bin := a.Buf.Bytes() 1725 bin[loadLiteralOffsetInBinary] |= byte(offset << 5) 1726 bin[loadLiteralOffsetInBinary+1] |= byte(offset >> 3) 1727 bin[loadLiteralOffsetInBinary+2] |= byte(offset >> 11) 1728 }) 1729 1730 // Then, load the constant with the register offset. 1731 // https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/LDR--register---Load-Register--register-- 1732 a.Buf.Write([]byte{ 1733 (baseRegBits << 5) | targetRegBits, 1734 0b011_010_00 | (baseRegBits >> 3), 1735 opcode<<6 | 0b00_1_00000 | tmpRegBits, 1736 size<<6 | v<<2 | 0b00_111_0_00, 1737 }) 1738 } 1739 return 1740 } 1741 1742 // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Loads-and-Stores?lang=en#ldst_regoff 1743 var storeInstructionTable = map[asm.Instruction]struct { 1744 size, v byte 1745 datasize, datasizeLog2 int64 1746 isTargetFloat bool 1747 }{ 1748 STRD: {size: 0b11, v: 0x0, datasize: 8, datasizeLog2: 3}, 1749 STRW: {size: 0b10, v: 0x0, datasize: 4, datasizeLog2: 2}, 1750 STRH: {size: 0b01, v: 0x0, datasize: 2, datasizeLog2: 1}, 1751 STRB: {size: 0b00, v: 0x0, datasize: 1, datasizeLog2: 0}, 1752 FSTRD: {size: 0b11, v: 0x1, datasize: 8, datasizeLog2: 3, isTargetFloat: true}, 1753 FSTRS: {size: 0b10, v: 0x1, datasize: 4, datasizeLog2: 2, isTargetFloat: true}, 1754 } 1755 1756 func (a *AssemblerImpl) encodeRegisterToMemory(n *nodeImpl) (err error) { 1757 inst, ok := storeInstructionTable[n.instruction] 1758 if !ok { 1759 return errorEncodingUnsupported(n) 1760 } 1761 1762 var srcRegBits byte 1763 if inst.isTargetFloat { 1764 srcRegBits, err = vectorRegisterBits(n.srcReg) 1765 } else { 1766 srcRegBits, err = intRegisterBits(n.srcReg) 1767 } 1768 if err != nil { 1769 return 1770 } 1771 1772 baseRegBits, err := intRegisterBits(n.dstReg) 1773 if err != nil { 1774 return err 1775 } 1776 1777 const opcode = 0x00 // opcode for store instructions. 1778 if n.dstReg2 != asm.NilRegister { 1779 offsetRegBits, err := intRegisterBits(n.dstReg2) 1780 if err != nil { 1781 return err 1782 } 1783 a.encodeLoadOrStoreWithRegisterOffset(baseRegBits, offsetRegBits, srcRegBits, opcode, inst.size, inst.v) 1784 } else { 1785 err = a.encodeLoadOrStoreWithConstOffset(baseRegBits, srcRegBits, n.dstConst, opcode, inst.size, inst.v, inst.datasize, inst.datasizeLog2) 1786 } 1787 return 1788 } 1789 1790 func (a *AssemblerImpl) encodeADR(n *nodeImpl) (err error) { 1791 dstRegBits, err := intRegisterBits(n.dstReg) 1792 if err != nil { 1793 return err 1794 } 1795 1796 adrInstructionOffsetInBinary := uint64(a.Buf.Len()) 1797 1798 // At this point, we don't yet know the target offset to read from, 1799 // so we emit the ADR instruction with 0 offset, and replace later in the callback. 1800 // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ADR--Form-PC-relative-address-?lang=en 1801 a.Buf.Write([]byte{dstRegBits, 0x0, 0x0, 0b10000}) 1802 1803 // This case, the ADR's target offset is for the staticConst's initial address. 1804 if sc := n.staticConst; sc != nil { 1805 a.pool.AddConst(sc, adrInstructionOffsetInBinary) 1806 sc.AddOffsetFinalizedCallback(func(offsetOfConst uint64) { 1807 adrInstructionBytes := a.Buf.Bytes()[adrInstructionOffsetInBinary : adrInstructionOffsetInBinary+4] 1808 offset := int(offsetOfConst) - int(adrInstructionOffsetInBinary) 1809 1810 // See https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ADR--Form-PC-relative-address-?lang=en 1811 adrInstructionBytes[3] |= byte(offset & 0b00000011 << 5) 1812 offset >>= 2 1813 adrInstructionBytes[0] |= byte(offset << 5) 1814 offset >>= 3 1815 adrInstructionBytes[1] |= byte(offset) 1816 offset >>= 8 1817 adrInstructionBytes[2] |= byte(offset) 1818 }) 1819 return 1820 } 1821 1822 a.AddOnGenerateCallBack(func(code []byte) error { 1823 // Find the target instruction node. 1824 targetNode := n 1825 for ; targetNode != nil; targetNode = targetNode.next { 1826 if targetNode.instruction == n.readInstructionAddressBeforeTargetInstruction { 1827 targetNode = targetNode.next 1828 break 1829 } 1830 } 1831 1832 if targetNode == nil { 1833 return fmt.Errorf("BUG: target instruction %s not found for ADR", InstructionName(n.readInstructionAddressBeforeTargetInstruction)) 1834 } 1835 1836 offset := targetNode.OffsetInBinary() - n.OffsetInBinary() 1837 if i64 := int64(offset); i64 >= 1<<20 || i64 < -1<<20 { 1838 // We could support offset over 20-bit range by special casing them here, 1839 // but 20-bit range should be enough for our impl. If the necessity comes up, 1840 // we could add the special casing here to support arbitrary large offset. 1841 return fmt.Errorf("BUG: too large offset for ADR: %#x", offset) 1842 } 1843 1844 adrInstructionBytes := code[n.OffsetInBinary() : n.OffsetInBinary()+4] 1845 // According to the binary format of ADR instruction: 1846 // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ADR--Form-PC-relative-address-?lang=en 1847 adrInstructionBytes[3] |= byte(offset & 0b00000011 << 5) 1848 offset >>= 2 1849 adrInstructionBytes[0] |= byte(offset << 5) 1850 offset >>= 3 1851 adrInstructionBytes[1] |= byte(offset) 1852 offset >>= 8 1853 adrInstructionBytes[2] |= byte(offset) 1854 return nil 1855 }) 1856 return 1857 } 1858 1859 // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Loads-and-Stores?lang=en#ldst_regoff 1860 var loadInstructionTable = map[asm.Instruction]struct { 1861 size, v, opcode byte 1862 datasize, datasizeLog2 int64 1863 isTargetFloat bool 1864 }{ 1865 FLDRD: {size: 0b11, v: 0x1, datasize: 8, datasizeLog2: 3, isTargetFloat: true, opcode: 0b01}, 1866 FLDRS: {size: 0b10, v: 0x1, datasize: 4, datasizeLog2: 2, isTargetFloat: true, opcode: 0b01}, 1867 LDRD: {size: 0b11, v: 0x0, datasize: 8, datasizeLog2: 3, opcode: 0b01}, 1868 LDRW: {size: 0b10, v: 0x0, datasize: 4, datasizeLog2: 2, opcode: 0b01}, 1869 LDRSHD: {size: 0b01, v: 0x0, datasize: 2, datasizeLog2: 1, opcode: 0b10}, 1870 LDRSHW: {size: 0b01, v: 0x0, datasize: 2, datasizeLog2: 1, opcode: 0b11}, 1871 LDRH: {size: 0b01, v: 0x0, datasize: 2, datasizeLog2: 1, opcode: 0b01}, 1872 LDRSBD: {size: 0b00, v: 0x0, datasize: 1, datasizeLog2: 0, opcode: 0b10}, 1873 LDRSBW: {size: 0b00, v: 0x0, datasize: 1, datasizeLog2: 0, opcode: 0b11}, 1874 LDRB: {size: 0b00, v: 0x0, datasize: 1, datasizeLog2: 0, opcode: 0b01}, 1875 LDRSW: {size: 0b10, v: 0x0, datasize: 4, datasizeLog2: 2, opcode: 0b10}, 1876 } 1877 1878 func (a *AssemblerImpl) encodeMemoryToRegister(n *nodeImpl) (err error) { 1879 if n.instruction == ADR { 1880 return a.encodeADR(n) 1881 } 1882 1883 inst, ok := loadInstructionTable[n.instruction] 1884 if !ok { 1885 return errorEncodingUnsupported(n) 1886 } 1887 1888 var dstRegBits byte 1889 if inst.isTargetFloat { 1890 dstRegBits, err = vectorRegisterBits(n.dstReg) 1891 } else { 1892 dstRegBits, err = intRegisterBits(n.dstReg) 1893 } 1894 if err != nil { 1895 return 1896 } 1897 baseRegBits, err := intRegisterBits(n.srcReg) 1898 if err != nil { 1899 return err 1900 } 1901 1902 if n.srcReg2 != asm.NilRegister { 1903 offsetRegBits, err := intRegisterBits(n.srcReg2) 1904 if err != nil { 1905 return err 1906 } 1907 a.encodeLoadOrStoreWithRegisterOffset(baseRegBits, offsetRegBits, dstRegBits, inst.opcode, 1908 inst.size, inst.v) 1909 } else { 1910 err = a.encodeLoadOrStoreWithConstOffset(baseRegBits, dstRegBits, n.srcConst, inst.opcode, 1911 inst.size, inst.v, inst.datasize, inst.datasizeLog2) 1912 } 1913 return 1914 } 1915 1916 // const16bitAligned check if the value is on the 16-bit alignment. 1917 // If so, returns the shift num divided by 16, and otherwise -1. 1918 func const16bitAligned(v int64) (ret int) { 1919 ret = -1 1920 for s := 0; s < 64; s += 16 { 1921 if (uint64(v) &^ (uint64(0xffff) << uint(s))) == 0 { 1922 ret = s / 16 1923 break 1924 } 1925 } 1926 return 1927 } 1928 1929 // isBitMaskImmediate determines if the value can be encoded as "bitmask immediate". 1930 // 1931 // Such an immediate is a 32-bit or 64-bit pattern viewed as a vector of identical elements of size e = 2, 4, 8, 16, 32, or 64 bits. 1932 // Each element contains the same sub-pattern: a single run of 1 to e-1 non-zero bits, rotated by 0 to e-1 bits. 1933 // 1934 // See https://developer.arm.com/documentation/dui0802/b/A64-General-Instructions/MOV--bitmask-immediate- 1935 func isBitMaskImmediate(x uint64) bool { 1936 // All zeros and ones are not "bitmask immediate" by defainition. 1937 if x == 0 || x == 0xffff_ffff_ffff_ffff { 1938 return false 1939 } 1940 1941 switch { 1942 case x != x>>32|x<<32: 1943 // e = 64 1944 case x != x>>16|x<<48: 1945 // e = 32 (x == x>>32|x<<32). 1946 // e.g. 0x00ff_ff00_00ff_ff00 1947 x = uint64(int32(x)) 1948 case x != x>>8|x<<56: 1949 // e = 16 (x == x>>16|x<<48). 1950 // e.g. 0x00ff_00ff_00ff_00ff 1951 x = uint64(int16(x)) 1952 case x != x>>4|x<<60: 1953 // e = 8 (x == x>>8|x<<56). 1954 // e.g. 0x0f0f_0f0f_0f0f_0f0f 1955 x = uint64(int8(x)) 1956 default: 1957 // e = 4 or 2. 1958 return true 1959 } 1960 return sequenceOfSetbits(x) || sequenceOfSetbits(^x) 1961 } 1962 1963 // sequenceOfSetbits returns true if the number's binary representation is the sequence set bit (1). 1964 // For example: 0b1110 -> true, 0b1010 -> false 1965 func sequenceOfSetbits(x uint64) bool { 1966 y := getLowestBit(x) 1967 // If x is a sequence of set bit, this should results in the number 1968 // with only one set bit (i.e. power of two). 1969 y += x 1970 return (y-1)&y == 0 1971 } 1972 1973 func getLowestBit(x uint64) uint64 { 1974 // See https://stackoverflow.com/questions/12247186/find-the-lowest-set-bit 1975 return x & (^x + 1) 1976 } 1977 1978 func (a *AssemblerImpl) addOrSub64BitRegisters(sfops byte, src1RegBits byte, src2RegBits byte) { 1979 // src1Reg = src1Reg +/- src2Reg 1980 a.Buf.Write([]byte{ 1981 (src1RegBits << 5) | src1RegBits, 1982 src1RegBits >> 3, 1983 src2RegBits, 1984 sfops<<5 | 0b01011, 1985 }) 1986 } 1987 1988 // See "Logical (immediate)" in 1989 // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Immediate 1990 var logicalImmediate = map[asm.Instruction]struct { 1991 sf, opc byte 1992 resolver func(imm asm.ConstantValue) (imms, immr, N byte, err error) 1993 }{ 1994 ANDIMM32: {sf: 0b0, opc: 0b00, resolver: func(imm asm.ConstantValue) (imms, immr, N byte, err error) { 1995 if !isBitMaskImmediate(uint64(imm)) { 1996 err = fmt.Errorf("const %d must be valid bitmask immediate for %s", imm, InstructionName(ANDIMM64)) 1997 return 1998 } 1999 immr, imms, N = bitmaskImmediate(uint64(imm), false) 2000 return 2001 }}, 2002 ANDIMM64: {sf: 0b1, opc: 0b00, resolver: func(imm asm.ConstantValue) (imms, immr, N byte, err error) { 2003 if !isBitMaskImmediate(uint64(imm)) { 2004 err = fmt.Errorf("const %d must be valid bitmask immediate for %s", imm, InstructionName(ANDIMM64)) 2005 return 2006 } 2007 immr, imms, N = bitmaskImmediate(uint64(imm), true) 2008 return 2009 }}, 2010 } 2011 2012 func bitmaskImmediate(c uint64, is64bit bool) (immr, imms, N byte) { 2013 var size uint32 2014 switch { 2015 case c != c>>32|c<<32: 2016 size = 64 2017 case c != c>>16|c<<48: 2018 size = 32 2019 c = uint64(int32(c)) 2020 case c != c>>8|c<<56: 2021 size = 16 2022 c = uint64(int16(c)) 2023 case c != c>>4|c<<60: 2024 size = 8 2025 c = uint64(int8(c)) 2026 case c != c>>2|c<<62: 2027 size = 4 2028 c = uint64(int64(c<<60) >> 60) 2029 default: 2030 size = 2 2031 c = uint64(int64(c<<62) >> 62) 2032 } 2033 2034 neg := false 2035 if int64(c) < 0 { 2036 c = ^c 2037 neg = true 2038 } 2039 2040 onesSize, nonZeroPos := getOnesSequenceSize(c) 2041 if neg { 2042 nonZeroPos = onesSize + nonZeroPos 2043 onesSize = size - onesSize 2044 } 2045 2046 var mode byte = 32 2047 if is64bit { 2048 N, mode = 0b1, 64 2049 } 2050 2051 immr = byte((size - nonZeroPos) & (size - 1) & uint32(mode-1)) 2052 imms = byte((onesSize - 1) | 63&^(size<<1-1)) 2053 return 2054 } 2055 2056 func (a *AssemblerImpl) encodeConstToRegister(n *nodeImpl) (err error) { 2057 // Alias for readability. 2058 c := n.srcConst 2059 2060 dstRegBits, err := intRegisterBits(n.dstReg) 2061 if err != nil { 2062 return err 2063 } 2064 2065 if log, ok := logicalImmediate[n.instruction]; ok { 2066 // See "Logical (immediate)" in 2067 // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Immediate 2068 imms, immr, N, err := log.resolver(c) 2069 if err != nil { 2070 return err 2071 } 2072 2073 a.Buf.Write([]byte{ 2074 (dstRegBits << 5) | dstRegBits, 2075 imms<<2 | dstRegBits>>3, 2076 N<<6 | immr, 2077 log.sf<<7 | log.opc<<5 | 0b10010, 2078 }) 2079 return nil 2080 } 2081 2082 // TODO: refactor and generalize the following like ^ logicalImmediate, etc. 2083 switch inst := n.instruction; inst { 2084 case ADD, ADDS, SUB, SUBS: 2085 var sfops byte 2086 if inst == ADD { 2087 sfops = 0b100 2088 } else if inst == ADDS { 2089 sfops = 0b101 2090 } else if inst == SUB { 2091 sfops = 0b110 2092 } else if inst == SUBS { 2093 sfops = 0b111 2094 } 2095 2096 if c == 0 { 2097 // If the constant equals zero, we encode it as ADD (register) with zero register. 2098 a.addOrSub64BitRegisters(sfops, dstRegBits, zeroRegisterBits) 2099 return 2100 } 2101 2102 if c >= 0 && (c <= 0xfff || (c&0xfff) == 0 && (uint64(c>>12) <= 0xfff)) { 2103 // If the const can be represented as "imm12" or "imm12 << 12": one instruction 2104 // https://github.com/golang/go/blob/release-branch.go1.15/src/cmd/internal/obj/arm64/asm7.go#L2992 2105 2106 if c <= 0xfff { 2107 a.Buf.Write([]byte{ 2108 (dstRegBits << 5) | dstRegBits, 2109 (byte(c) << 2) | (dstRegBits >> 3), 2110 byte(c >> 6), 2111 sfops<<5 | 0b10001, 2112 }) 2113 } else { 2114 c >>= 12 2115 a.Buf.Write([]byte{ 2116 (dstRegBits << 5) | dstRegBits, 2117 (byte(c) << 2) | (dstRegBits >> 3), 2118 0b01<<6 /* shift by 12 */ | byte(c>>6), 2119 sfops<<5 | 0b10001, 2120 }) 2121 } 2122 return 2123 } 2124 2125 if t := const16bitAligned(c); t >= 0 { 2126 // If the const can fit within 16-bit alignment, for example, 0xffff, 0xffff_0000 or 0xffff_0000_0000_0000 2127 // We could load it into temporary with movk. 2128 // https://github.com/golang/go/blob/release-branch.go1.15/src/cmd/internal/obj/arm64/asm7.go#L4029 2129 tmpRegBits := registerBits(a.temporaryRegister) 2130 2131 // MOVZ $c, tmpReg with shifting. 2132 a.load16bitAlignedConst(c>>(16*t), byte(t), tmpRegBits, false, true) 2133 2134 // ADD/SUB tmpReg, dstReg 2135 a.addOrSub64BitRegisters(sfops, dstRegBits, tmpRegBits) 2136 return 2137 } else if t := const16bitAligned(^c); t >= 0 { 2138 // Also if the reverse of the const can fit within 16-bit range, do the same ^^. 2139 // https://github.com/golang/go/blob/release-branch.go1.15/src/cmd/internal/obj/arm64/asm7.go#L4029 2140 tmpRegBits := registerBits(a.temporaryRegister) 2141 2142 // MOVN $c, tmpReg with shifting. 2143 a.load16bitAlignedConst(^c>>(16*t), byte(t), tmpRegBits, true, true) 2144 2145 // ADD/SUB tmpReg, dstReg 2146 a.addOrSub64BitRegisters(sfops, dstRegBits, tmpRegBits) 2147 return 2148 } 2149 2150 if uc := uint64(c); isBitMaskImmediate(uc) { 2151 // If the const can be represented as "bitmask immediate", we load it via ORR into temp register. 2152 // https://github.com/golang/go/blob/release-branch.go1.15/src/cmd/internal/obj/arm64/asm7.go#L6570-L6583 2153 tmpRegBits := registerBits(a.temporaryRegister) 2154 // OOR $c, tmpReg 2155 a.loadConstViaBitMaskImmediate(uc, tmpRegBits, true) 2156 2157 // ADD/SUB tmpReg, dstReg 2158 a.addOrSub64BitRegisters(sfops, dstRegBits, tmpRegBits) 2159 return 2160 } 2161 2162 // If the value fits within 24-bit, then we emit two add instructions 2163 if 0 <= c && c <= 0xffffff && inst != SUBS && inst != ADDS { 2164 // https://github.com/golang/go/blob/release-branch.go1.15/src/cmd/internal/obj/arm64/asm7.go#L3849-L3862 2165 a.Buf.Write([]byte{ 2166 (dstRegBits << 5) | dstRegBits, 2167 (byte(c) << 2) | (dstRegBits >> 3), 2168 byte(c & 0xfff >> 6), 2169 sfops<<5 | 0b10001, 2170 }) 2171 c = c >> 12 2172 a.Buf.Write([]byte{ 2173 (dstRegBits << 5) | dstRegBits, 2174 (byte(c) << 2) | (dstRegBits >> 3), 2175 0b01_000000 /* shift by 12 */ | byte(c>>6), 2176 sfops<<5 | 0b10001, 2177 }) 2178 return 2179 } 2180 2181 // https://github.com/golang/go/blob/release-branch.go1.15/src/cmd/internal/obj/arm64/asm7.go#L3163-L3203 2182 // Otherwise we use MOVZ and MOVNs for loading const into tmpRegister. 2183 tmpRegBits := registerBits(a.temporaryRegister) 2184 a.load64bitConst(c, tmpRegBits) 2185 a.addOrSub64BitRegisters(sfops, dstRegBits, tmpRegBits) 2186 case MOVW: 2187 if c == 0 { 2188 a.Buf.Write([]byte{ 2189 (zeroRegisterBits << 5) | dstRegBits, 2190 zeroRegisterBits >> 3, 2191 0b000_00000 | zeroRegisterBits, 2192 0b0_01_01010, 2193 }) 2194 return 2195 } 2196 2197 // Following the logic here: 2198 // https://github.com/golang/go/blob/release-branch.go1.15/src/cmd/internal/obj/arm64/asm7.go#L1637 2199 c32 := uint32(c) 2200 ic := int64(c32) 2201 if ic >= 0 && (ic <= 0xfff || (ic&0xfff) == 0 && (uint64(ic>>12) <= 0xfff)) { 2202 if isBitMaskImmediate(uint64(c)) { 2203 a.loadConstViaBitMaskImmediate(uint64(c), dstRegBits, false) 2204 return 2205 } 2206 } 2207 2208 if t := const16bitAligned(int64(c32)); t >= 0 { 2209 // If the const can fit within 16-bit alignment, for example, 0xffff, 0xffff_0000 or 0xffff_0000_0000_0000 2210 // We could load it into temporary with movk. 2211 a.load16bitAlignedConst(int64(c32)>>(16*t), byte(t), dstRegBits, false, false) 2212 } else if t := const16bitAligned(int64(^c32)); t >= 0 { 2213 // Also, if the reverse of the const can fit within 16-bit range, do the same ^^. 2214 a.load16bitAlignedConst(int64(^c32)>>(16*t), byte(t), dstRegBits, true, false) 2215 } else if isBitMaskImmediate(uint64(c)) { 2216 a.loadConstViaBitMaskImmediate(uint64(c), dstRegBits, false) 2217 } else { 2218 // Otherwise, we use MOVZ and MOVK to load it. 2219 // https://github.com/golang/go/blob/release-branch.go1.15/src/cmd/internal/obj/arm64/asm7.go#L6623-L6630 2220 c16 := uint16(c32) 2221 // MOVZ: https://developer.arm.com/documentation/dui0802/a/A64-General-Instructions/MOVZ 2222 a.Buf.Write([]byte{ 2223 (byte(c16) << 5) | dstRegBits, 2224 byte(c16 >> 3), 2225 1<<7 | byte(c16>>11), 2226 0b0_10_10010, 2227 }) 2228 // MOVK: https://developer.arm.com/documentation/dui0802/a/A64-General-Instructions/MOVK 2229 c16 = uint16(c32 >> 16) 2230 if c16 != 0 { 2231 a.Buf.Write([]byte{ 2232 (byte(c16) << 5) | dstRegBits, 2233 byte(c16 >> 3), 2234 1<<7 | 0b0_01_00000 /* shift by 16 */ | byte(c16>>11), 2235 0b0_11_10010, 2236 }) 2237 } 2238 } 2239 case MOVD: 2240 // Following the logic here: 2241 // https://github.com/golang/go/blob/release-branch.go1.15/src/cmd/internal/obj/arm64/asm7.go#L1798-L1852 2242 if c >= 0 && (c <= 0xfff || (c&0xfff) == 0 && (uint64(c>>12) <= 0xfff)) { 2243 if isBitMaskImmediate(uint64(c)) { 2244 a.loadConstViaBitMaskImmediate(uint64(c), dstRegBits, true) 2245 return 2246 } 2247 } 2248 2249 if t := const16bitAligned(c); t >= 0 { 2250 // If the const can fit within 16-bit alignment, for example, 0xffff, 0xffff_0000 or 0xffff_0000_0000_0000 2251 // We could load it into temporary with movk. 2252 a.load16bitAlignedConst(c>>(16*t), byte(t), dstRegBits, false, true) 2253 } else if t := const16bitAligned(^c); t >= 0 { 2254 // Also, if the reverse of the const can fit within 16-bit range, do the same ^^. 2255 a.load16bitAlignedConst((^c)>>(16*t), byte(t), dstRegBits, true, true) 2256 } else if isBitMaskImmediate(uint64(c)) { 2257 a.loadConstViaBitMaskImmediate(uint64(c), dstRegBits, true) 2258 } else { 2259 a.load64bitConst(c, dstRegBits) 2260 } 2261 case LSR: 2262 if c == 0 { 2263 err = errors.New("LSR with zero constant should be optimized out") 2264 return 2265 } else if c < 0 || c > 63 { 2266 err = fmt.Errorf("LSR requires immediate to be within 0 to 63, but got %d", c) 2267 return 2268 } 2269 2270 // LSR(immediate) is an alias of UBFM 2271 // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LSR--immediate---Logical-Shift-Right--immediate---an-alias-of-UBFM-?lang=en 2272 a.Buf.Write([]byte{ 2273 (dstRegBits << 5) | dstRegBits, 2274 0b111111_00 | dstRegBits>>3, 2275 0b01_000000 | byte(c), 2276 0b110_10011, 2277 }) 2278 case LSL: 2279 if c == 0 { 2280 err = errors.New("LSL with zero constant should be optimized out") 2281 return 2282 } else if c < 0 || c > 63 { 2283 err = fmt.Errorf("LSL requires immediate to be within 0 to 63, but got %d", c) 2284 return 2285 } 2286 2287 // LSL(immediate) is an alias of UBFM 2288 // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LSL--immediate---Logical-Shift-Left--immediate---an-alias-of-UBFM- 2289 cb := byte(c) 2290 a.Buf.Write([]byte{ 2291 (dstRegBits << 5) | dstRegBits, 2292 (0b111111-cb)<<2 | dstRegBits>>3, 2293 0b01_000000 | (64 - cb), 2294 0b110_10011, 2295 }) 2296 2297 default: 2298 return errorEncodingUnsupported(n) 2299 } 2300 return 2301 } 2302 2303 func (a *AssemblerImpl) movk(v uint64, shfitNum int, dstRegBits byte) { 2304 // https://developer.arm.com/documentation/dui0802/a/A64-General-Instructions/MOVK 2305 a.Buf.Write([]byte{ 2306 (byte(v) << 5) | dstRegBits, 2307 byte(v >> 3), 2308 1<<7 | byte(shfitNum)<<5 | (0b000_11111 & byte(v>>11)), 2309 0b1_11_10010, 2310 }) 2311 } 2312 2313 func (a *AssemblerImpl) movz(v uint64, shfitNum int, dstRegBits byte) { 2314 // https://developer.arm.com/documentation/dui0802/a/A64-General-Instructions/MOVZ 2315 a.Buf.Write([]byte{ 2316 (byte(v) << 5) | dstRegBits, 2317 byte(v >> 3), 2318 1<<7 | byte(shfitNum)<<5 | (0b000_11111 & byte(v>>11)), 2319 0b1_10_10010, 2320 }) 2321 } 2322 2323 func (a *AssemblerImpl) movn(v uint64, shfitNum int, dstRegBits byte) { 2324 // https://developer.arm.com/documentation/dui0802/a/A64-General-Instructions/MOVZ 2325 a.Buf.Write([]byte{ 2326 (byte(v) << 5) | dstRegBits, 2327 byte(v >> 3), 2328 1<<7 | byte(shfitNum)<<5 | (0b000_11111 & byte(v>>11)), 2329 0b1_00_10010, 2330 }) 2331 } 2332 2333 // load64bitConst loads a 64-bit constant into the register, following the same logic to decide how to load large 64-bit 2334 // consts as in the Go assembler. 2335 // 2336 // See https://github.com/golang/go/blob/release-branch.go1.15/src/cmd/internal/obj/arm64/asm7.go#L6632-L6759 2337 func (a *AssemblerImpl) load64bitConst(c int64, dstRegBits byte) { 2338 var bits [4]uint64 2339 var zeros, negs int 2340 for i := 0; i < 4; i++ { 2341 bits[i] = uint64((c >> uint(i*16)) & 0xffff) 2342 if v := bits[i]; v == 0 { 2343 zeros++ 2344 } else if v == 0xffff { 2345 negs++ 2346 } 2347 } 2348 2349 if zeros == 3 { 2350 // one MOVZ instruction. 2351 for i, v := range bits { 2352 if v != 0 { 2353 a.movz(v, i, dstRegBits) 2354 } 2355 } 2356 } else if negs == 3 { 2357 // one MOVN instruction. 2358 for i, v := range bits { 2359 if v != 0xffff { 2360 v = ^v 2361 a.movn(v, i, dstRegBits) 2362 } 2363 } 2364 } else if zeros == 2 { 2365 // one MOVZ then one OVK. 2366 var movz bool 2367 for i, v := range bits { 2368 if !movz && v != 0 { // MOVZ. 2369 // https://developer.arm.com/documentation/dui0802/a/A64-General-Instructions/MOVZ 2370 a.movz(v, i, dstRegBits) 2371 movz = true 2372 } else if v != 0 { 2373 a.movk(v, i, dstRegBits) 2374 } 2375 } 2376 2377 } else if negs == 2 { 2378 // one MOVN then one or two MOVK. 2379 var movn bool 2380 for i, v := range bits { // Emit MOVN. 2381 if !movn && v != 0xffff { 2382 v = ^v 2383 // https://developer.arm.com/documentation/dui0802/a/A64-General-Instructions/MOVN 2384 a.movn(v, i, dstRegBits) 2385 movn = true 2386 } else if v != 0xffff { 2387 a.movk(v, i, dstRegBits) 2388 } 2389 } 2390 2391 } else if zeros == 1 { 2392 // one MOVZ then two MOVK. 2393 var movz bool 2394 for i, v := range bits { 2395 if !movz && v != 0 { // MOVZ. 2396 // https://developer.arm.com/documentation/dui0802/a/A64-General-Instructions/MOVZ 2397 a.movz(v, i, dstRegBits) 2398 movz = true 2399 } else if v != 0 { 2400 a.movk(v, i, dstRegBits) 2401 } 2402 } 2403 2404 } else if negs == 1 { 2405 // one MOVN then two MOVK. 2406 var movn bool 2407 for i, v := range bits { // Emit MOVN. 2408 if !movn && v != 0xffff { 2409 v = ^v 2410 // https://developer.arm.com/documentation/dui0802/a/A64-General-Instructions/MOVN 2411 a.movn(v, i, dstRegBits) 2412 movn = true 2413 } else if v != 0xffff { 2414 a.movk(v, i, dstRegBits) 2415 } 2416 } 2417 2418 } else { 2419 // one MOVZ then tree MOVK. 2420 var movz bool 2421 for i, v := range bits { 2422 if !movz && v != 0 { // MOVZ. 2423 // https://developer.arm.com/documentation/dui0802/a/A64-General-Instructions/MOVZ 2424 a.movz(v, i, dstRegBits) 2425 movz = true 2426 } else if v != 0 { 2427 a.movk(v, i, dstRegBits) 2428 } 2429 } 2430 2431 } 2432 } 2433 2434 func (a *AssemblerImpl) load16bitAlignedConst(c int64, shiftNum byte, regBits byte, reverse bool, dst64bit bool) { 2435 var lastByte byte 2436 if reverse { 2437 // MOVN: https://developer.arm.com/documentation/dui0802/a/A64-General-Instructions/MOVZ 2438 lastByte = 0b0_00_10010 2439 } else { 2440 // MOVZ: https://developer.arm.com/documentation/dui0802/a/A64-General-Instructions/MOVN 2441 lastByte = 0b0_10_10010 2442 } 2443 if dst64bit { 2444 lastByte |= 0b1 << 7 2445 } 2446 a.Buf.Write([]byte{ 2447 (byte(c) << 5) | regBits, 2448 byte(c >> 3), 2449 1<<7 | (shiftNum << 5) | byte(c>>11), 2450 lastByte, 2451 }) 2452 } 2453 2454 // loadConstViaBitMaskImmediate loads the constant with ORR (bitmask immediate). 2455 // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ORR--immediate---Bitwise-OR--immediate--?lang=en 2456 func (a *AssemblerImpl) loadConstViaBitMaskImmediate(c uint64, regBits byte, dst64bit bool) { 2457 var size uint32 2458 switch { 2459 case c != c>>32|c<<32: 2460 size = 64 2461 case c != c>>16|c<<48: 2462 size = 32 2463 c = uint64(int32(c)) 2464 case c != c>>8|c<<56: 2465 size = 16 2466 c = uint64(int16(c)) 2467 case c != c>>4|c<<60: 2468 size = 8 2469 c = uint64(int8(c)) 2470 case c != c>>2|c<<62: 2471 size = 4 2472 c = uint64(int64(c<<60) >> 60) 2473 default: 2474 size = 2 2475 c = uint64(int64(c<<62) >> 62) 2476 } 2477 2478 neg := false 2479 if int64(c) < 0 { 2480 c = ^c 2481 neg = true 2482 } 2483 2484 onesSize, nonZeroPos := getOnesSequenceSize(c) 2485 if neg { 2486 nonZeroPos = onesSize + nonZeroPos 2487 onesSize = size - onesSize 2488 } 2489 2490 // See the following article for understanding the encoding. 2491 // https://dinfuehr.github.io/blog/encoding-of-immediate-values-on-aarch64/ 2492 var n byte 2493 mode := 32 2494 if dst64bit && size == 64 { 2495 n = 0b1 2496 mode = 64 2497 } 2498 2499 r := byte((size - nonZeroPos) & (size - 1) & uint32(mode-1)) 2500 s := byte((onesSize - 1) | 63&^(size<<1-1)) 2501 2502 var sf byte 2503 if dst64bit { 2504 sf = 0b1 2505 } 2506 a.Buf.Write([]byte{ 2507 (zeroRegisterBits << 5) | regBits, 2508 s<<2 | (zeroRegisterBits >> 3), 2509 n<<6 | r, 2510 sf<<7 | 0b0_01_10010, 2511 }) 2512 } 2513 2514 func getOnesSequenceSize(x uint64) (size, nonZeroPos uint32) { 2515 // Take 0b00111000 for example: 2516 y := getLowestBit(x) // = 0b0000100 2517 nonZeroPos = setBitPos(y) // = 2 2518 size = setBitPos(x+y) - nonZeroPos // = setBitPos(0b0100000) - 2 = 5 - 2 = 3 2519 return 2520 } 2521 2522 func setBitPos(x uint64) (ret uint32) { 2523 for ; ; ret++ { 2524 if x == 0b1 { 2525 break 2526 } 2527 x = x >> 1 2528 } 2529 return 2530 } 2531 2532 func checkArrangementIndexPair(arr VectorArrangement, index VectorIndex) (err error) { 2533 if arr == VectorArrangementNone { 2534 return nil 2535 } 2536 var valid bool 2537 switch arr { 2538 case VectorArrangement8B: 2539 valid = index < 8 2540 case VectorArrangement16B: 2541 valid = index < 16 2542 case VectorArrangement4H: 2543 valid = index < 4 2544 case VectorArrangement8H: 2545 valid = index < 8 2546 case VectorArrangement2S: 2547 valid = index < 2 2548 case VectorArrangement4S: 2549 valid = index < 4 2550 case VectorArrangement1D: 2551 valid = index < 1 2552 case VectorArrangement2D: 2553 valid = index < 2 2554 case VectorArrangementB: 2555 valid = index < 16 2556 case VectorArrangementH: 2557 valid = index < 8 2558 case VectorArrangementS: 2559 valid = index < 4 2560 case VectorArrangementD: 2561 valid = index < 2 2562 } 2563 if !valid { 2564 err = fmt.Errorf("invalid arrangement and index pair: %s[%d]", arr, index) 2565 } 2566 return 2567 } 2568 2569 func (a *AssemblerImpl) encodeMemoryToVectorRegister(n *nodeImpl) (err error) { 2570 srcBaseRegBits, err := intRegisterBits(n.srcReg) 2571 if err != nil { 2572 return err 2573 } 2574 2575 dstVectorRegBits, err := vectorRegisterBits(n.dstReg) 2576 if err != nil { 2577 return err 2578 } 2579 2580 switch n.instruction { 2581 case VMOV: // translated as LDR(immediate,SIMD&FP) 2582 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/LDR--immediate--SIMD-FP---Load-SIMD-FP-Register--immediate-offset--?lang=en 2583 var size, opcode byte 2584 var dataSize, dataSizeLog2 int64 2585 switch n.vectorArrangement { 2586 case VectorArrangementB: 2587 size, opcode, dataSize, dataSizeLog2 = 0b00, 0b01, 1, 0 2588 case VectorArrangementH: 2589 size, opcode, dataSize, dataSizeLog2 = 0b01, 0b01, 2, 1 2590 case VectorArrangementS: 2591 size, opcode, dataSize, dataSizeLog2 = 0b10, 0b01, 4, 2 2592 case VectorArrangementD: 2593 size, opcode, dataSize, dataSizeLog2 = 0b11, 0b01, 8, 3 2594 case VectorArrangementQ: 2595 size, opcode, dataSize, dataSizeLog2 = 0b00, 0b11, 16, 4 2596 } 2597 const v = 1 // v as in https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Loads-and-Stores?lang=en#ldst_pos 2598 if n.srcReg2 != asm.NilRegister { 2599 offsetRegBits, err := intRegisterBits(n.srcReg2) 2600 if err != nil { 2601 return err 2602 } 2603 a.encodeLoadOrStoreWithRegisterOffset(srcBaseRegBits, offsetRegBits, dstVectorRegBits, opcode, size, v) 2604 } else { 2605 err = a.encodeLoadOrStoreWithConstOffset(srcBaseRegBits, dstVectorRegBits, 2606 n.srcConst, opcode, size, v, dataSize, dataSizeLog2) 2607 } 2608 case LD1R: 2609 if n.srcReg2 != asm.NilRegister || n.srcConst != 0 { 2610 return fmt.Errorf("offset for %s is not implemented", InstructionName(LD1R)) 2611 } 2612 2613 var size, q byte 2614 switch n.vectorArrangement { 2615 case VectorArrangement8B: 2616 size, q = 0b00, 0b0 2617 case VectorArrangement16B: 2618 size, q = 0b00, 0b1 2619 case VectorArrangement4H: 2620 size, q = 0b01, 0b0 2621 case VectorArrangement8H: 2622 size, q = 0b01, 0b1 2623 case VectorArrangement2S: 2624 size, q = 0b10, 0b0 2625 case VectorArrangement4S: 2626 size, q = 0b10, 0b1 2627 case VectorArrangement1D: 2628 size, q = 0b11, 0b0 2629 case VectorArrangement2D: 2630 size, q = 0b11, 0b1 2631 } 2632 2633 // No offset encoding. 2634 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/LD1R--Load-one-single-element-structure-and-Replicate-to-all-lanes--of-one-register--?lang=en#iclass_as_post_index 2635 a.Buf.Write([]byte{ 2636 (srcBaseRegBits << 5) | dstVectorRegBits, 2637 0b11_000000 | size<<2 | srcBaseRegBits>>3, 2638 0b01_000000, 2639 q<<6 | 0b1101, 2640 }) 2641 default: 2642 return errorEncodingUnsupported(n) 2643 } 2644 return 2645 } 2646 2647 func arrangementSizeQ(arr VectorArrangement) (size, q byte) { 2648 switch arr { 2649 case VectorArrangement8B: 2650 size, q = 0b00, 0 2651 case VectorArrangement16B: 2652 size, q = 0b00, 1 2653 case VectorArrangement4H: 2654 size, q = 0b01, 0 2655 case VectorArrangement8H: 2656 size, q = 0b01, 1 2657 case VectorArrangement2S: 2658 size, q = 0b10, 0 2659 case VectorArrangement4S: 2660 size, q = 0b10, 1 2661 case VectorArrangement1D: 2662 size, q = 0b11, 0 2663 case VectorArrangement2D: 2664 size, q = 0b11, 1 2665 } 2666 return 2667 } 2668 2669 func (a *AssemblerImpl) encodeVectorRegisterToMemory(n *nodeImpl) (err error) { 2670 srcVectorRegBits, err := vectorRegisterBits(n.srcReg) 2671 if err != nil { 2672 return err 2673 } 2674 2675 dstBaseRegBits, err := intRegisterBits(n.dstReg) 2676 if err != nil { 2677 return err 2678 } 2679 2680 switch n.instruction { 2681 case VMOV: // translated as STR(immediate,SIMD&FP) 2682 // https://developer.arm.com/documentation/ddi0596/2020-12/SIMD-FP-Instructions/STR--immediate--SIMD-FP---Store-SIMD-FP-register--immediate-offset-- 2683 var size, opcode byte 2684 var dataSize, dataSizeLog2 int64 2685 switch n.vectorArrangement { 2686 case VectorArrangementB: 2687 size, opcode, dataSize, dataSizeLog2 = 0b00, 0b00, 1, 0 2688 case VectorArrangementH: 2689 size, opcode, dataSize, dataSizeLog2 = 0b01, 0b00, 2, 1 2690 case VectorArrangementS: 2691 size, opcode, dataSize, dataSizeLog2 = 0b10, 0b00, 4, 2 2692 case VectorArrangementD: 2693 size, opcode, dataSize, dataSizeLog2 = 0b11, 0b00, 8, 3 2694 case VectorArrangementQ: 2695 size, opcode, dataSize, dataSizeLog2 = 0b00, 0b10, 16, 4 2696 } 2697 const v = 1 // v as in https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Loads-and-Stores?lang=en#ldst_pos 2698 2699 if n.dstReg2 != asm.NilRegister { 2700 offsetRegBits, err := intRegisterBits(n.dstReg2) 2701 if err != nil { 2702 return err 2703 } 2704 a.encodeLoadOrStoreWithRegisterOffset(dstBaseRegBits, offsetRegBits, srcVectorRegBits, opcode, size, v) 2705 } else { 2706 err = a.encodeLoadOrStoreWithConstOffset(dstBaseRegBits, srcVectorRegBits, 2707 n.dstConst, opcode, size, v, dataSize, dataSizeLog2) 2708 } 2709 default: 2710 return errorEncodingUnsupported(n) 2711 } 2712 return 2713 } 2714 2715 func (a *AssemblerImpl) encodeStaticConstToVectorRegister(n *nodeImpl) (err error) { 2716 if n.instruction != VMOV { 2717 return errorEncodingUnsupported(n) 2718 } 2719 2720 dstRegBits, err := vectorRegisterBits(n.dstReg) 2721 if err != nil { 2722 return err 2723 } 2724 2725 // LDR (literal, SIMD&FP) 2726 // https://developer.arm.com/documentation/ddi0596/2020-12/SIMD-FP-Instructions/LDR--literal--SIMD-FP---Load-SIMD-FP-Register--PC-relative-literal-- 2727 var opc byte 2728 var constLength int 2729 switch n.vectorArrangement { 2730 case VectorArrangementS: 2731 opc, constLength = 0b00, 4 2732 case VectorArrangementD: 2733 opc, constLength = 0b01, 8 2734 case VectorArrangementQ: 2735 opc, constLength = 0b10, 16 2736 } 2737 2738 loadLiteralOffsetInBinary := uint64(a.Buf.Len()) 2739 a.pool.AddConst(n.staticConst, loadLiteralOffsetInBinary) 2740 2741 if len(n.staticConst.Raw) != constLength { 2742 return fmt.Errorf("invalid const length for %s: want %d but was %d", 2743 n.vectorArrangement, constLength, len(n.staticConst.Raw)) 2744 } 2745 2746 a.Buf.Write([]byte{dstRegBits, 0x0, 0x0, opc<<6 | 0b11100}) 2747 n.staticConst.AddOffsetFinalizedCallback(func(offsetOfConst uint64) { 2748 // LDR (literal, SIMD&FP) encodes offset divided by 4. 2749 offset := (int(offsetOfConst) - int(loadLiteralOffsetInBinary)) / 4 2750 bin := a.Buf.Bytes() 2751 bin[loadLiteralOffsetInBinary] |= byte(offset << 5) 2752 bin[loadLiteralOffsetInBinary+1] |= byte(offset >> 3) 2753 bin[loadLiteralOffsetInBinary+2] |= byte(offset >> 11) 2754 }) 2755 return 2756 } 2757 2758 // advancedSIMDTwoRegisterMisc holds information to encode instructions as "Advanced SIMD two-register miscellaneous" in 2759 // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en 2760 var advancedSIMDTwoRegisterMisc = map[asm.Instruction]struct { 2761 u, opcode byte 2762 qAndSize map[VectorArrangement]qAndSize 2763 }{ 2764 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/NOT--Bitwise-NOT--vector--?lang=en 2765 NOT: { 2766 u: 0b1, opcode: 0b00101, 2767 qAndSize: map[VectorArrangement]qAndSize{ 2768 VectorArrangement16B: {size: 0b00, q: 0b1}, 2769 VectorArrangement8B: {size: 0b00, q: 0b0}, 2770 }, 2771 }, 2772 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FNEG--vector---Floating-point-Negate--vector--?lang=en 2773 VFNEG: { 2774 u: 0b1, opcode: 0b01111, 2775 qAndSize: map[VectorArrangement]qAndSize{ 2776 VectorArrangement4S: {size: 0b10, q: 0b1}, 2777 VectorArrangement2S: {size: 0b10, q: 0b0}, 2778 VectorArrangement2D: {size: 0b11, q: 0b1}, 2779 }, 2780 }, 2781 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FABS--vector---Floating-point-Absolute-value--vector--?lang=en 2782 VFABS: {u: 0, opcode: 0b01111, qAndSize: map[VectorArrangement]qAndSize{ 2783 VectorArrangement2D: {size: 0b11, q: 0b1}, 2784 VectorArrangement4S: {size: 0b10, q: 0b1}, 2785 VectorArrangement2S: {size: 0b10, q: 0b0}, 2786 }}, 2787 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FSQRT--vector---Floating-point-Square-Root--vector--?lang=en 2788 VFSQRT: {u: 1, opcode: 0b11111, qAndSize: map[VectorArrangement]qAndSize{ 2789 VectorArrangement2D: {size: 0b11, q: 0b1}, 2790 VectorArrangement4S: {size: 0b10, q: 0b1}, 2791 VectorArrangement2S: {size: 0b10, q: 0b0}, 2792 }}, 2793 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FRINTM--vector---Floating-point-Round-to-Integral--toward-Minus-infinity--vector--?lang=en 2794 VFRINTM: {u: 0, opcode: 0b11001, qAndSize: map[VectorArrangement]qAndSize{ 2795 VectorArrangement2D: {size: 0b01, q: 0b1}, 2796 VectorArrangement4S: {size: 0b00, q: 0b1}, 2797 VectorArrangement2S: {size: 0b00, q: 0b0}, 2798 }}, 2799 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FRINTN--vector---Floating-point-Round-to-Integral--to-nearest-with-ties-to-even--vector--?lang=en 2800 VFRINTN: {u: 0, opcode: 0b11000, qAndSize: map[VectorArrangement]qAndSize{ 2801 VectorArrangement2D: {size: 0b01, q: 0b1}, 2802 VectorArrangement4S: {size: 0b00, q: 0b1}, 2803 VectorArrangement2S: {size: 0b00, q: 0b0}, 2804 }}, 2805 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FRINTP--vector---Floating-point-Round-to-Integral--toward-Plus-infinity--vector--?lang=en 2806 VFRINTP: {u: 0, opcode: 0b11000, qAndSize: map[VectorArrangement]qAndSize{ 2807 VectorArrangement2D: {size: 0b11, q: 0b1}, 2808 VectorArrangement4S: {size: 0b10, q: 0b1}, 2809 VectorArrangement2S: {size: 0b10, q: 0b0}, 2810 }}, 2811 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FRINTZ--vector---Floating-point-Round-to-Integral--toward-Zero--vector--?lang=en 2812 VFRINTZ: {u: 0, opcode: 0b11001, qAndSize: map[VectorArrangement]qAndSize{ 2813 VectorArrangement2D: {size: 0b11, q: 0b1}, 2814 VectorArrangement4S: {size: 0b10, q: 0b1}, 2815 VectorArrangement2S: {size: 0b10, q: 0b0}, 2816 }}, 2817 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/CNT--Population-Count-per-byte-?lang=en 2818 VCNT: {u: 0b0, opcode: 0b00101, qAndSize: map[VectorArrangement]qAndSize{ 2819 VectorArrangement8B: {size: 0b00, q: 0b0}, 2820 VectorArrangement16B: {size: 0b00, q: 0b1}, 2821 }}, 2822 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/NEG--vector---Negate--vector--?lang=en 2823 VNEG: {u: 0b1, opcode: 0b01011, qAndSize: defaultQAndSize}, 2824 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/ABS--Absolute-value--vector--?lang=en 2825 VABS: {u: 0b0, opcode: 0b01011, qAndSize: defaultQAndSize}, 2826 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/REV64--Reverse-elements-in-64-bit-doublewords--vector--?lang=en 2827 REV64: {u: 0b0, opcode: 0b00000, qAndSize: defaultQAndSize}, 2828 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/XTN--XTN2--Extract-Narrow-?lang=en 2829 XTN: {u: 0b0, opcode: 0b10010, qAndSize: map[VectorArrangement]qAndSize{ 2830 VectorArrangement2D: {q: 0, size: 0b10}, 2831 VectorArrangement4S: {q: 0, size: 0b01}, 2832 VectorArrangement8H: {q: 0, size: 0b00}, 2833 }}, 2834 SHLL: {u: 0b1, opcode: 0b10011, qAndSize: map[VectorArrangement]qAndSize{ 2835 VectorArrangement8B: {q: 0b00, size: 0b00}, 2836 VectorArrangement4H: {q: 0b00, size: 0b01}, 2837 VectorArrangement2S: {q: 0b00, size: 0b10}, 2838 }}, 2839 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/CMEQ--zero---Compare-bitwise-Equal-to-zero--vector--?lang=en 2840 CMEQZERO: {u: 0b0, opcode: 0b01001, qAndSize: defaultQAndSize}, 2841 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/SADDLP--Signed-Add-Long-Pairwise-?lang=en 2842 SADDLP: {u: 0b0, opcode: 0b00010, qAndSize: defaultQAndSize}, 2843 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/UADDLP--Unsigned-Add-Long-Pairwise-?lang=en 2844 UADDLP: {u: 0b1, opcode: 0b00010, qAndSize: defaultQAndSize}, 2845 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FCVTZS--vector--integer---Floating-point-Convert-to-Signed-integer--rounding-toward-Zero--vector--?lang=en 2846 VFCVTZS: {u: 0b0, opcode: 0b11011, qAndSize: map[VectorArrangement]qAndSize{ 2847 VectorArrangement4S: {size: 0b10, q: 0b1}, 2848 VectorArrangement2S: {size: 0b10, q: 0b0}, 2849 VectorArrangement2D: {size: 0b11, q: 0b1}, 2850 }}, 2851 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FCVTZU--vector--integer---Floating-point-Convert-to-Unsigned-integer--rounding-toward-Zero--vector--?lang=en 2852 VFCVTZU: {u: 0b1, opcode: 0b11011, qAndSize: map[VectorArrangement]qAndSize{ 2853 VectorArrangement4S: {size: 0b10, q: 0b1}, 2854 VectorArrangement2S: {size: 0b10, q: 0b0}, 2855 VectorArrangement2D: {size: 0b11, q: 0b1}, 2856 }}, 2857 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/SQXTN--SQXTN2--Signed-saturating-extract-Narrow-?lang=en 2858 SQXTN: {u: 0b0, opcode: 0b10100, qAndSize: map[VectorArrangement]qAndSize{ 2859 VectorArrangement8B: {q: 0b0, size: 0b00}, 2860 VectorArrangement4H: {q: 0b0, size: 0b01}, 2861 VectorArrangement2S: {q: 0b0, size: 0b10}, 2862 }}, 2863 2864 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/SQXTN--SQXTN2--Signed-saturating-extract-Narrow-?lang=en 2865 SQXTN2: {u: 0b0, opcode: 0b10100, qAndSize: map[VectorArrangement]qAndSize{ 2866 VectorArrangement16B: {q: 0b1, size: 0b00}, 2867 VectorArrangement8H: {q: 0b1, size: 0b01}, 2868 VectorArrangement4S: {q: 0b1, size: 0b10}, 2869 }}, 2870 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/UQXTN--UQXTN2--Unsigned-saturating-extract-Narrow-?lang=en 2871 UQXTN: {u: 0b1, opcode: 0b10100, qAndSize: defaultQAndSize}, 2872 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/SQXTUN--SQXTUN2--Signed-saturating-extract-Unsigned-Narrow-?lang=en 2873 SQXTUN: {u: 0b1, opcode: 0b10010, qAndSize: map[VectorArrangement]qAndSize{ 2874 VectorArrangement8B: {q: 0b0, size: 0b00}, 2875 VectorArrangement4H: {q: 0b0, size: 0b01}, 2876 VectorArrangement2S: {q: 0b0, size: 0b10}, 2877 }}, 2878 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/SQXTUN--SQXTUN2--Signed-saturating-extract-Unsigned-Narrow-?lang=en 2879 SQXTUN2: {u: 0b1, opcode: 0b10010, qAndSize: map[VectorArrangement]qAndSize{ 2880 VectorArrangement16B: {q: 0b1, size: 0b00}, 2881 VectorArrangement8H: {q: 0b1, size: 0b01}, 2882 VectorArrangement4S: {q: 0b1, size: 0b10}, 2883 }}, 2884 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/SCVTF--vector--integer---Signed-integer-Convert-to-Floating-point--vector--?lang=en 2885 VSCVTF: {u: 0b0, opcode: 0b11101, qAndSize: map[VectorArrangement]qAndSize{ 2886 VectorArrangement2D: {q: 0b1, size: 0b01}, 2887 VectorArrangement4S: {q: 0b1, size: 0b00}, 2888 VectorArrangement2S: {q: 0b0, size: 0b00}, 2889 }}, 2890 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/UCVTF--vector--integer---Unsigned-integer-Convert-to-Floating-point--vector--?lang=en 2891 VUCVTF: {u: 0b1, opcode: 0b11101, qAndSize: map[VectorArrangement]qAndSize{ 2892 VectorArrangement2D: {q: 0b1, size: 0b01}, 2893 VectorArrangement4S: {q: 0b1, size: 0b00}, 2894 VectorArrangement2S: {q: 0b0, size: 0b00}, 2895 }}, 2896 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FCVTL--FCVTL2--Floating-point-Convert-to-higher-precision-Long--vector--?lang=en 2897 FCVTL: {u: 0b0, opcode: 0b10111, qAndSize: map[VectorArrangement]qAndSize{ 2898 VectorArrangement2S: {size: 0b01, q: 0b0}, 2899 VectorArrangement4H: {size: 0b00, q: 0b0}, 2900 }}, 2901 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FCVTN--FCVTN2--Floating-point-Convert-to-lower-precision-Narrow--vector--?lang=en 2902 FCVTN: {u: 0b0, opcode: 0b10110, qAndSize: map[VectorArrangement]qAndSize{ 2903 VectorArrangement2S: {size: 0b01, q: 0b0}, 2904 VectorArrangement4H: {size: 0b00, q: 0b0}, 2905 }}, 2906 } 2907 2908 // advancedSIMDThreeDifferent holds information to encode instructions as "Advanced SIMD three different" in 2909 // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en 2910 var advancedSIMDThreeDifferent = map[asm.Instruction]struct { 2911 u, opcode byte 2912 qAndSize map[VectorArrangement]qAndSize 2913 }{ 2914 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/UMLAL--UMLAL2--vector---Unsigned-Multiply-Add-Long--vector--?lang=en 2915 VUMLAL: {u: 0b1, opcode: 0b1000, qAndSize: map[VectorArrangement]qAndSize{ 2916 VectorArrangement2S: {q: 0b0, size: 0b10}, 2917 VectorArrangement4H: {q: 0b0, size: 0b01}, 2918 VectorArrangement8B: {q: 0b0, size: 0b00}, 2919 }}, 2920 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/SMULL--SMULL2--vector---Signed-Multiply-Long--vector--?lang=en 2921 SMULL: {u: 0b0, opcode: 0b1100, qAndSize: map[VectorArrangement]qAndSize{ 2922 VectorArrangement8B: {q: 0b0, size: 0b00}, 2923 VectorArrangement4H: {q: 0b0, size: 0b01}, 2924 VectorArrangement2S: {q: 0b0, size: 0b10}, 2925 }}, 2926 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/SMULL--SMULL2--vector---Signed-Multiply-Long--vector--?lang=en 2927 SMULL2: {u: 0b0, opcode: 0b1100, qAndSize: map[VectorArrangement]qAndSize{ 2928 VectorArrangement16B: {q: 0b1, size: 0b00}, 2929 VectorArrangement8H: {q: 0b1, size: 0b01}, 2930 VectorArrangement4S: {q: 0b1, size: 0b10}, 2931 }}, 2932 // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en 2933 UMULL: {u: 0b1, opcode: 0b1100, qAndSize: map[VectorArrangement]qAndSize{ 2934 VectorArrangement8B: {q: 0b0, size: 0b00}, 2935 VectorArrangement4H: {q: 0b0, size: 0b01}, 2936 VectorArrangement2S: {q: 0b0, size: 0b10}, 2937 }}, 2938 // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en 2939 UMULL2: {u: 0b1, opcode: 0b1100, qAndSize: map[VectorArrangement]qAndSize{ 2940 VectorArrangement16B: {q: 0b1, size: 0b00}, 2941 VectorArrangement8H: {q: 0b1, size: 0b01}, 2942 VectorArrangement4S: {q: 0b1, size: 0b10}, 2943 }}, 2944 } 2945 2946 // advancedSIMDThreeSame holds information to encode instructions as "Advanced SIMD three same" in 2947 // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en 2948 var advancedSIMDThreeSame = map[asm.Instruction]struct { 2949 u, opcode byte 2950 qAndSize map[VectorArrangement]qAndSize 2951 }{ 2952 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/AND--vector---Bitwise-AND--vector--?lang=en 2953 VAND: { 2954 u: 0b0, opcode: 0b00011, 2955 qAndSize: map[VectorArrangement]qAndSize{ 2956 VectorArrangement16B: {size: 0b00, q: 0b1}, 2957 VectorArrangement8B: {size: 0b00, q: 0b0}, 2958 }, 2959 }, 2960 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/BSL--Bitwise-Select-?lang=en 2961 BSL: { 2962 u: 0b1, opcode: 0b00011, 2963 qAndSize: map[VectorArrangement]qAndSize{ 2964 VectorArrangement16B: {size: 0b01, q: 0b1}, 2965 VectorArrangement8B: {size: 0b01, q: 0b0}, 2966 }, 2967 }, 2968 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/EOR--vector---Bitwise-Exclusive-OR--vector--?lang=en 2969 EOR: { 2970 u: 0b1, opcode: 0b00011, 2971 qAndSize: map[VectorArrangement]qAndSize{ 2972 VectorArrangement16B: {size: 0b00, q: 0b1}, 2973 VectorArrangement8B: {size: 0b00, q: 0b0}, 2974 }, 2975 }, 2976 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/ORR--vector--register---Bitwise-inclusive-OR--vector--register--?lang=en 2977 VORR: { 2978 u: 0b0, opcode: 0b00011, 2979 qAndSize: map[VectorArrangement]qAndSize{ 2980 VectorArrangement16B: {size: 0b10, q: 0b1}, 2981 VectorArrangement8B: {size: 0b10, q: 0b0}, 2982 }, 2983 }, 2984 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/BIC--vector--register---Bitwise-bit-Clear--vector--register--?lang=en 2985 BIC: { 2986 u: 0b0, opcode: 0b00011, 2987 qAndSize: map[VectorArrangement]qAndSize{ 2988 VectorArrangement16B: {size: 0b01, q: 0b1}, 2989 VectorArrangement8B: {size: 0b01, q: 0b0}, 2990 }, 2991 }, 2992 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FADD--vector---Floating-point-Add--vector--?lang=en 2993 VFADDS: { 2994 u: 0b0, opcode: 0b11010, 2995 qAndSize: map[VectorArrangement]qAndSize{ 2996 VectorArrangement4S: {size: 0b00, q: 0b1}, 2997 VectorArrangement2S: {size: 0b00, q: 0b0}, 2998 }, 2999 }, 3000 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FADD--vector---Floating-point-Add--vector--?lang=en 3001 VFADDD: { 3002 u: 0b0, opcode: 0b11010, 3003 qAndSize: map[VectorArrangement]qAndSize{ 3004 VectorArrangement2D: {size: 0b01, q: 0b1}, 3005 }, 3006 }, 3007 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FSUB--vector---Floating-point-Subtract--vector--?lang=en 3008 VFSUBS: { 3009 u: 0b0, opcode: 0b11010, 3010 qAndSize: map[VectorArrangement]qAndSize{ 3011 VectorArrangement4S: {size: 0b10, q: 0b1}, 3012 VectorArrangement2S: {size: 0b10, q: 0b0}, 3013 }, 3014 }, 3015 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FSUB--vector---Floating-point-Subtract--vector--?lang=en 3016 VFSUBD: { 3017 u: 0b0, opcode: 0b11010, 3018 qAndSize: map[VectorArrangement]qAndSize{ 3019 VectorArrangement2D: {size: 0b11, q: 0b1}, 3020 }, 3021 }, 3022 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/UMAXP--Unsigned-Maximum-Pairwise-?lang=en 3023 UMAXP: {u: 0b1, opcode: 0b10100, qAndSize: defaultQAndSize}, 3024 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/CMEQ--register---Compare-bitwise-Equal--vector--?lang=en 3025 CMEQ: {u: 0b1, opcode: 0b10001, qAndSize: defaultQAndSize}, 3026 // https://developer.arm.com/documentation/dui0801/g/A64-SIMD-Vector-Instructions/ADDP--vector- 3027 VADDP: {u: 0b0, opcode: 0b10111, qAndSize: defaultQAndSize}, 3028 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/ADD--vector---Add--vector--?lang=en 3029 VADD: {u: 0, opcode: 0b10000, qAndSize: defaultQAndSize}, 3030 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/SUB--vector---Subtract--vector--?lang=en 3031 VSUB: {u: 1, opcode: 0b10000, qAndSize: defaultQAndSize}, 3032 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/SSHL--Signed-Shift-Left--register--?lang=en 3033 SSHL: {u: 0, opcode: 0b01000, qAndSize: defaultQAndSize}, 3034 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/SSHL--Signed-Shift-Left--register--?lang=en 3035 USHL: {u: 0b1, opcode: 0b01000, qAndSize: defaultQAndSize}, 3036 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/CMGT--register---Compare-signed-Greater-than--vector--?lang=en 3037 CMGT: {u: 0b0, opcode: 0b00110, qAndSize: defaultQAndSize}, 3038 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/CMHI--register---Compare-unsigned-Higher--vector--?lang=en 3039 CMHI: {u: 0b1, opcode: 0b00110, qAndSize: defaultQAndSize}, 3040 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/CMGE--register---Compare-signed-Greater-than-or-Equal--vector--?lang=en 3041 CMGE: {u: 0b0, opcode: 0b00111, qAndSize: defaultQAndSize}, 3042 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/CMHS--register---Compare-unsigned-Higher-or-Same--vector--?lang=en 3043 CMHS: {u: 0b1, opcode: 0b00111, qAndSize: defaultQAndSize}, 3044 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FCMEQ--register---Floating-point-Compare-Equal--vector--?lang=en 3045 FCMEQ: { 3046 u: 0b0, opcode: 0b11100, 3047 qAndSize: map[VectorArrangement]qAndSize{ 3048 VectorArrangement4S: {size: 0b00, q: 0b1}, 3049 VectorArrangement2S: {size: 0b00, q: 0b0}, 3050 VectorArrangement2D: {size: 0b01, q: 0b1}, 3051 }, 3052 }, 3053 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FCMGT--register---Floating-point-Compare-Greater-than--vector--?lang=en 3054 FCMGT: { 3055 u: 0b1, opcode: 0b11100, 3056 qAndSize: map[VectorArrangement]qAndSize{ 3057 VectorArrangement4S: {size: 0b10, q: 0b1}, 3058 VectorArrangement2S: {size: 0b10, q: 0b0}, 3059 VectorArrangement2D: {size: 0b11, q: 0b1}, 3060 }, 3061 }, 3062 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FCMGE--register---Floating-point-Compare-Greater-than-or-Equal--vector--?lang=en 3063 FCMGE: { 3064 u: 0b1, opcode: 0b11100, 3065 qAndSize: map[VectorArrangement]qAndSize{ 3066 VectorArrangement4S: {size: 0b00, q: 0b1}, 3067 VectorArrangement2S: {size: 0b00, q: 0b0}, 3068 VectorArrangement2D: {size: 0b01, q: 0b1}, 3069 }, 3070 }, 3071 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FMIN--vector---Floating-point-minimum--vector--?lang=en 3072 VFMIN: { 3073 u: 0b0, opcode: 0b11110, 3074 qAndSize: map[VectorArrangement]qAndSize{ 3075 VectorArrangement4S: {size: 0b10, q: 0b1}, 3076 VectorArrangement2S: {size: 0b10, q: 0b0}, 3077 VectorArrangement2D: {size: 0b11, q: 0b1}, 3078 }, 3079 }, 3080 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FMAX--vector---Floating-point-Maximum--vector--?lang=en 3081 VFMAX: { 3082 u: 0b0, opcode: 0b11110, 3083 qAndSize: map[VectorArrangement]qAndSize{ 3084 VectorArrangement4S: {size: 0b00, q: 0b1}, 3085 VectorArrangement2S: {size: 0b00, q: 0b0}, 3086 VectorArrangement2D: {size: 0b01, q: 0b1}, 3087 }, 3088 }, 3089 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FMUL--vector---Floating-point-Multiply--vector--?lang=en 3090 VFMUL: { 3091 u: 0b1, opcode: 0b11011, 3092 qAndSize: map[VectorArrangement]qAndSize{ 3093 VectorArrangement4S: {size: 0b00, q: 0b1}, 3094 VectorArrangement2S: {size: 0b00, q: 0b0}, 3095 VectorArrangement2D: {size: 0b01, q: 0b1}, 3096 }, 3097 }, 3098 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FDIV--vector---Floating-point-Divide--vector--?lang=en 3099 VFDIV: { 3100 u: 0b1, opcode: 0b11111, 3101 qAndSize: map[VectorArrangement]qAndSize{ 3102 VectorArrangement4S: {size: 0b00, q: 0b1}, 3103 VectorArrangement2S: {size: 0b00, q: 0b0}, 3104 VectorArrangement2D: {size: 0b01, q: 0b1}, 3105 }, 3106 }, 3107 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/MUL--vector---Multiply--vector--?lang=en 3108 VMUL: {u: 0b0, opcode: 0b10011, qAndSize: defaultQAndSize}, 3109 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/SQADD--Signed-saturating-Add-?lang=en 3110 VSQADD: {u: 0b0, opcode: 0b00001, qAndSize: defaultQAndSize}, 3111 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/UQADD--Unsigned-saturating-Add-?lang=en 3112 VUQADD: {u: 0b1, opcode: 0b00001, qAndSize: defaultQAndSize}, 3113 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/SMIN--Signed-Minimum--vector--?lang=en 3114 SMIN: {u: 0b0, opcode: 0b01101, qAndSize: defaultQAndSize}, 3115 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/SMAX--Signed-Maximum--vector--?lang=en 3116 SMAX: {u: 0b0, opcode: 0b01100, qAndSize: defaultQAndSize}, 3117 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/UMIN--Unsigned-Minimum--vector--?lang=en 3118 UMIN: {u: 0b1, opcode: 0b01101, qAndSize: defaultQAndSize}, 3119 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/UMAX--Unsigned-Maximum--vector--?lang=en 3120 UMAX: {u: 0b1, opcode: 0b01100, qAndSize: defaultQAndSize}, 3121 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/URHADD--Unsigned-Rounding-Halving-Add-?lang=en 3122 URHADD: {u: 0b1, opcode: 0b00010, qAndSize: defaultQAndSize}, 3123 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/SQSUB--Signed-saturating-Subtract-?lang=en 3124 VSQSUB: {u: 0b0, opcode: 0b00101, qAndSize: defaultQAndSize}, 3125 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/UQSUB--Unsigned-saturating-Subtract-?lang=en 3126 VUQSUB: {u: 0b1, opcode: 0b00101, qAndSize: defaultQAndSize}, 3127 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/BIT--Bitwise-Insert-if-True-?lang=en 3128 VBIT: {u: 0b1, opcode: 0b00011, qAndSize: map[VectorArrangement]qAndSize{ 3129 VectorArrangement8B: {q: 0b0, size: 0b10}, 3130 VectorArrangement16B: {q: 0b1, size: 0b10}, 3131 }}, 3132 SQRDMULH: {u: 0b1, opcode: 0b10110, qAndSize: map[VectorArrangement]qAndSize{ 3133 VectorArrangement4H: {q: 0b0, size: 0b01}, 3134 VectorArrangement8H: {q: 0b1, size: 0b01}, 3135 VectorArrangement2S: {q: 0b0, size: 0b10}, 3136 VectorArrangement4S: {q: 0b1, size: 0b10}, 3137 }}, 3138 } 3139 3140 // aAndSize is a pair of "Q" and "size" that appear in https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en 3141 type qAndSize struct{ q, size byte } 3142 3143 // defaultQAndSize maps a vector arrangement to the default qAndSize which is encoded by many instructions. 3144 var defaultQAndSize = map[VectorArrangement]qAndSize{ 3145 VectorArrangement8B: {size: 0b00, q: 0b0}, 3146 VectorArrangement16B: {size: 0b00, q: 0b1}, 3147 VectorArrangement4H: {size: 0b01, q: 0b0}, 3148 VectorArrangement8H: {size: 0b01, q: 0b1}, 3149 VectorArrangement2S: {size: 0b10, q: 0b0}, 3150 VectorArrangement4S: {size: 0b10, q: 0b1}, 3151 VectorArrangement1D: {size: 0b11, q: 0b0}, 3152 VectorArrangement2D: {size: 0b11, q: 0b1}, 3153 } 3154 3155 // advancedSIMDAcrossLanes holds information to encode instructions as "Advanced SIMD across lanes" in 3156 // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en 3157 var advancedSIMDAcrossLanes = map[asm.Instruction]struct { 3158 u, opcode byte 3159 qAndSize map[VectorArrangement]qAndSize 3160 }{ 3161 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/ADDV--Add-across-Vector-?lang=en 3162 ADDV: { 3163 u: 0b0, opcode: 0b11011, 3164 qAndSize: map[VectorArrangement]qAndSize{ 3165 VectorArrangement16B: {size: 0b00, q: 0b1}, 3166 VectorArrangement8B: {size: 0b00, q: 0b0}, 3167 VectorArrangement8H: {size: 0b01, q: 0b1}, 3168 VectorArrangement4H: {size: 0b01, q: 0b0}, 3169 VectorArrangement4S: {size: 0b10, q: 0b1}, 3170 }, 3171 }, 3172 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/UMINV--Unsigned-Minimum-across-Vector-?lang=en 3173 UMINV: { 3174 u: 0b1, opcode: 0b11010, 3175 qAndSize: map[VectorArrangement]qAndSize{ 3176 VectorArrangement16B: {size: 0b00, q: 0b1}, 3177 VectorArrangement8B: {size: 0b00, q: 0b0}, 3178 VectorArrangement8H: {size: 0b01, q: 0b1}, 3179 VectorArrangement4H: {size: 0b01, q: 0b0}, 3180 VectorArrangement4S: {size: 0b10, q: 0b1}, 3181 }, 3182 }, 3183 UADDLV: {u: 0b1, opcode: 0b00011, qAndSize: map[VectorArrangement]qAndSize{ 3184 VectorArrangement16B: {size: 0b00, q: 0b1}, 3185 VectorArrangement8B: {size: 0b00, q: 0b0}, 3186 VectorArrangement8H: {size: 0b01, q: 0b1}, 3187 VectorArrangement4H: {size: 0b01, q: 0b0}, 3188 VectorArrangement4S: {size: 0b10, q: 0b1}, 3189 }}, 3190 } 3191 3192 // advancedSIMDScalarPairwise holds information to encode instructions as "Advanced SIMD scalar pairwise" in 3193 // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en 3194 var advancedSIMDScalarPairwise = map[asm.Instruction]struct { 3195 u, opcode byte 3196 size map[VectorArrangement]byte 3197 }{ 3198 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/ADDP--scalar---Add-Pair-of-elements--scalar--?lang=en 3199 ADDP: {u: 0b0, opcode: 0b11011, size: map[VectorArrangement]byte{VectorArrangement2D: 0b11}}, 3200 } 3201 3202 // advancedSIMDCopy holds information to encode instructions as "Advanced SIMD copy" in 3203 // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en 3204 var advancedSIMDCopy = map[asm.Instruction]struct { 3205 op byte 3206 // TODO: extract common implementation of resolver. 3207 resolver func(srcIndex, dstIndex VectorIndex, arr VectorArrangement) (imm5, imm4, q byte, err error) 3208 }{ 3209 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/DUP--element---Duplicate-vector-element-to-vector-or-scalar-?lang=en 3210 DUPELEM: {op: 0, resolver: func(srcIndex, dstIndex VectorIndex, arr VectorArrangement) (imm5, imm4, q byte, err error) { 3211 imm4 = 0b0000 3212 q = 0b1 3213 3214 switch arr { 3215 case VectorArrangementB: 3216 imm5 |= 0b1 3217 imm5 |= byte(srcIndex) << 1 3218 case VectorArrangementH: 3219 imm5 |= 0b10 3220 imm5 |= byte(srcIndex) << 2 3221 case VectorArrangementS: 3222 imm5 |= 0b100 3223 imm5 |= byte(srcIndex) << 3 3224 case VectorArrangementD: 3225 imm5 |= 0b1000 3226 imm5 |= byte(srcIndex) << 4 3227 default: 3228 err = fmt.Errorf("unsupported arrangement for DUPELEM: %d", arr) 3229 } 3230 3231 return 3232 }}, 3233 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/DUP--general---Duplicate-general-purpose-register-to-vector-?lang=en 3234 DUPGEN: {op: 0b0, resolver: func(srcIndex, dstIndex VectorIndex, arr VectorArrangement) (imm5, imm4, q byte, err error) { 3235 imm4 = 0b0001 3236 switch arr { 3237 case VectorArrangement8B: 3238 imm5 = 0b1 3239 case VectorArrangement16B: 3240 imm5 = 0b1 3241 q = 0b1 3242 case VectorArrangement4H: 3243 imm5 = 0b10 3244 case VectorArrangement8H: 3245 imm5 = 0b10 3246 q = 0b1 3247 case VectorArrangement2S: 3248 imm5 = 0b100 3249 case VectorArrangement4S: 3250 imm5 = 0b100 3251 q = 0b1 3252 case VectorArrangement2D: 3253 imm5 = 0b1000 3254 q = 0b1 3255 default: 3256 err = fmt.Errorf("unsupported arrangement for DUPGEN: %s", arr) 3257 } 3258 return 3259 }}, 3260 // https://developer.arm.com/documentation/ddi0596/2020-12/SIMD-FP-Instructions/INS--general---Insert-vector-element-from-general-purpose-register-?lang=en 3261 INSGEN: {op: 0b0, resolver: func(srcIndex, dstIndex VectorIndex, arr VectorArrangement) (imm5, imm4, q byte, err error) { 3262 imm4, q = 0b0011, 0b1 3263 switch arr { 3264 case VectorArrangementB: 3265 imm5 |= 0b1 3266 imm5 |= byte(dstIndex) << 1 3267 case VectorArrangementH: 3268 imm5 |= 0b10 3269 imm5 |= byte(dstIndex) << 2 3270 case VectorArrangementS: 3271 imm5 |= 0b100 3272 imm5 |= byte(dstIndex) << 3 3273 case VectorArrangementD: 3274 imm5 |= 0b1000 3275 imm5 |= byte(dstIndex) << 4 3276 default: 3277 err = fmt.Errorf("unsupported arrangement for INSGEN: %s", arr) 3278 } 3279 return 3280 }}, 3281 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/UMOV--Unsigned-Move-vector-element-to-general-purpose-register-?lang=en 3282 UMOV: {op: 0b0, resolver: func(srcIndex, dstIndex VectorIndex, arr VectorArrangement) (imm5, imm4, q byte, err error) { 3283 imm4 = 0b0111 3284 switch arr { 3285 case VectorArrangementB: 3286 imm5 |= 0b1 3287 imm5 |= byte(srcIndex) << 1 3288 case VectorArrangementH: 3289 imm5 |= 0b10 3290 imm5 |= byte(srcIndex) << 2 3291 case VectorArrangementS: 3292 imm5 |= 0b100 3293 imm5 |= byte(srcIndex) << 3 3294 case VectorArrangementD: 3295 imm5 |= 0b1000 3296 imm5 |= byte(srcIndex) << 4 3297 q = 0b1 3298 default: 3299 err = fmt.Errorf("unsupported arrangement for UMOV: %s", arr) 3300 } 3301 return 3302 }}, 3303 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/SMOV--Signed-Move-vector-element-to-general-purpose-register-?lang=en 3304 SMOV32: {op: 0b0, resolver: func(srcIndex, dstIndex VectorIndex, arr VectorArrangement) (imm5, imm4, q byte, err error) { 3305 imm4 = 0b0101 3306 switch arr { 3307 case VectorArrangementB: 3308 imm5 |= 0b1 3309 imm5 |= byte(srcIndex) << 1 3310 case VectorArrangementH: 3311 imm5 |= 0b10 3312 imm5 |= byte(srcIndex) << 2 3313 default: 3314 err = fmt.Errorf("unsupported arrangement for SMOV32: %s", arr) 3315 } 3316 return 3317 }}, 3318 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/INS--element---Insert-vector-element-from-another-vector-element-?lang=en 3319 INSELEM: {op: 0b1, resolver: func(srcIndex, dstIndex VectorIndex, arr VectorArrangement) (imm5, imm4, q byte, err error) { 3320 q = 0b1 3321 switch arr { 3322 case VectorArrangementB: 3323 imm5 |= 0b1 3324 imm5 |= byte(dstIndex) << 1 3325 imm4 = byte(srcIndex) 3326 case VectorArrangementH: 3327 imm5 |= 0b10 3328 imm5 |= byte(dstIndex) << 2 3329 imm4 = byte(srcIndex) << 1 3330 case VectorArrangementS: 3331 imm5 |= 0b100 3332 imm5 |= byte(dstIndex) << 3 3333 imm4 = byte(srcIndex) << 2 3334 case VectorArrangementD: 3335 imm5 |= 0b1000 3336 imm5 |= byte(dstIndex) << 4 3337 imm4 = byte(srcIndex) << 3 3338 default: 3339 err = fmt.Errorf("unsupported arrangement for INSELEM: %d", arr) 3340 } 3341 return 3342 }}, 3343 } 3344 3345 // advancedSIMDTableLookup holds information to encode instructions as "Advanced SIMD table lookup" in 3346 // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en 3347 var advancedSIMDTableLookup = map[asm.Instruction]struct { 3348 op, op2, Len byte 3349 q map[VectorArrangement]byte 3350 }{ 3351 TBL1: {op: 0, op2: 0, Len: 0b00, q: map[VectorArrangement]byte{VectorArrangement16B: 0b1, VectorArrangement8B: 0b0}}, 3352 TBL2: {op: 0, op2: 0, Len: 0b01, q: map[VectorArrangement]byte{VectorArrangement16B: 0b1, VectorArrangement8B: 0b0}}, 3353 } 3354 3355 // advancedSIMDShiftByImmediate holds information to encode instructions as "Advanced SIMD shift by immediate" in 3356 // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en 3357 var advancedSIMDShiftByImmediate = map[asm.Instruction]struct { 3358 U, opcode byte 3359 q map[VectorArrangement]byte 3360 immResolver func(shiftAmount int64, arr VectorArrangement) (immh, immb byte, err error) 3361 }{ 3362 // https://developer.arm.com/documentation/ddi0596/2020-12/SIMD-FP-Instructions/SSHLL--SSHLL2--Signed-Shift-Left-Long--immediate-- 3363 SSHLL: { 3364 U: 0b0, opcode: 0b10100, 3365 q: map[VectorArrangement]byte{VectorArrangement8B: 0b0, VectorArrangement4H: 0b0, VectorArrangement2S: 0b0}, 3366 immResolver: immResolverForSIMDSiftLeftByImmediate, 3367 }, 3368 // https://developer.arm.com/documentation/ddi0596/2020-12/SIMD-FP-Instructions/SSHLL--SSHLL2--Signed-Shift-Left-Long--immediate-- 3369 SSHLL2: { 3370 U: 0b0, opcode: 0b10100, 3371 q: map[VectorArrangement]byte{VectorArrangement16B: 0b1, VectorArrangement8H: 0b1, VectorArrangement4S: 0b1}, 3372 immResolver: immResolverForSIMDSiftLeftByImmediate, 3373 }, 3374 // https://developer.arm.com/documentation/ddi0596/2020-12/SIMD-FP-Instructions/USHLL--USHLL2--Unsigned-Shift-Left-Long--immediate-- 3375 USHLL: { 3376 U: 0b1, opcode: 0b10100, 3377 q: map[VectorArrangement]byte{VectorArrangement8B: 0b0, VectorArrangement4H: 0b0, VectorArrangement2S: 0b0}, 3378 immResolver: immResolverForSIMDSiftLeftByImmediate, 3379 }, 3380 // https://developer.arm.com/documentation/ddi0596/2020-12/SIMD-FP-Instructions/USHLL--USHLL2--Unsigned-Shift-Left-Long--immediate-- 3381 USHLL2: { 3382 U: 0b1, opcode: 0b10100, 3383 q: map[VectorArrangement]byte{VectorArrangement16B: 0b1, VectorArrangement8H: 0b1, VectorArrangement4S: 0b1}, 3384 immResolver: immResolverForSIMDSiftLeftByImmediate, 3385 }, 3386 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/SSHR--Signed-Shift-Right--immediate--?lang=en 3387 SSHR: { 3388 U: 0b0, opcode: 0b00000, 3389 q: map[VectorArrangement]byte{ 3390 VectorArrangement16B: 0b1, VectorArrangement8H: 0b1, VectorArrangement4S: 0b1, VectorArrangement2D: 0b1, 3391 VectorArrangement8B: 0b0, VectorArrangement4H: 0b0, VectorArrangement2S: 0b0, 3392 }, 3393 immResolver: func(shiftAmount int64, arr VectorArrangement) (immh, immb byte, err error) { 3394 switch arr { 3395 case VectorArrangement16B, VectorArrangement8B: 3396 immh = 0b0001 3397 immb = 8 - byte(shiftAmount&0b111) 3398 case VectorArrangement8H, VectorArrangement4H: 3399 v := 16 - byte(shiftAmount&0b1111) 3400 immb = v & 0b111 3401 immh = 0b0010 | (v >> 3) 3402 case VectorArrangement4S, VectorArrangement2S: 3403 v := 32 - byte(shiftAmount&0b11111) 3404 immb = v & 0b111 3405 immh = 0b0100 | (v >> 3) 3406 case VectorArrangement2D: 3407 v := 64 - byte(shiftAmount&0b111111) 3408 immb = v & 0b111 3409 immh = 0b1000 | (v >> 3) 3410 default: 3411 err = fmt.Errorf("unsupported arrangement %s", arr) 3412 } 3413 return 3414 }, 3415 }, 3416 } 3417 3418 // advancedSIMDPermute holds information to encode instructions as "Advanced SIMD permute" in 3419 // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en 3420 var advancedSIMDPermute = map[asm.Instruction]struct { 3421 opcode byte 3422 }{ 3423 ZIP1: {opcode: 0b011}, 3424 } 3425 3426 func immResolverForSIMDSiftLeftByImmediate(shiftAmount int64, arr VectorArrangement) (immh, immb byte, err error) { 3427 switch arr { 3428 case VectorArrangement16B, VectorArrangement8B: 3429 immb = byte(shiftAmount) 3430 immh = 0b0001 3431 case VectorArrangement8H, VectorArrangement4H: 3432 immb = byte(shiftAmount) & 0b111 3433 immh = 0b0010 | byte(shiftAmount>>3) 3434 case VectorArrangement4S, VectorArrangement2S: 3435 immb = byte(shiftAmount) & 0b111 3436 immh = 0b0100 | byte(shiftAmount>>3) 3437 default: 3438 err = fmt.Errorf("unsupported arrangement %s", arr) 3439 } 3440 return 3441 } 3442 3443 // encodeAdvancedSIMDCopy encodes instruction as "Advanced SIMD copy" in 3444 // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en 3445 func (a *AssemblerImpl) encodeAdvancedSIMDCopy(srcRegBits, dstRegBits, op, imm5, imm4, q byte) { 3446 a.Buf.Write([]byte{ 3447 (srcRegBits << 5) | dstRegBits, 3448 imm4<<3 | 0b1<<2 | srcRegBits>>3, 3449 imm5, 3450 q<<6 | op<<5 | 0b1110, 3451 }) 3452 } 3453 3454 // encodeAdvancedSIMDThreeSame encodes instruction as "Advanced SIMD three same" in 3455 // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en 3456 func (a *AssemblerImpl) encodeAdvancedSIMDThreeSame(src1, src2, dst, opcode, size, q, u byte) { 3457 a.Buf.Write([]byte{ 3458 (src2 << 5) | dst, 3459 opcode<<3 | 1<<2 | src2>>3, 3460 size<<6 | 0b1<<5 | src1, 3461 q<<6 | u<<5 | 0b1110, 3462 }) 3463 } 3464 3465 // encodeAdvancedSIMDThreeDifferent encodes instruction as "Advanced SIMD three different" in 3466 // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en 3467 func (a *AssemblerImpl) encodeAdvancedSIMDThreeDifferent(src1, src2, dst, opcode, size, q, u byte) { 3468 a.Buf.Write([]byte{ 3469 (src2 << 5) | dst, 3470 opcode<<4 | src2>>3, 3471 size<<6 | 0b1<<5 | src1, 3472 q<<6 | u<<5 | 0b1110, 3473 }) 3474 } 3475 3476 // encodeAdvancedSIMDPermute encodes instruction as "Advanced SIMD permute" in 3477 // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en 3478 func (a *AssemblerImpl) encodeAdvancedSIMDPermute(src1, src2, dst, opcode, size, q byte) { 3479 a.Buf.Write([]byte{ 3480 (src2 << 5) | dst, 3481 opcode<<4 | 0b1<<3 | src2>>3, 3482 size<<6 | src1, 3483 q<<6 | 0b1110, 3484 }) 3485 } 3486 3487 func (a *AssemblerImpl) encodeVectorRegisterToVectorRegister(n *nodeImpl) (err error) { 3488 var srcVectorRegBits byte 3489 if n.srcReg != RegRZR { 3490 srcVectorRegBits, err = vectorRegisterBits(n.srcReg) 3491 } else if n.instruction == CMEQZERO { 3492 // CMEQZERO has RegRZR as the src, and we apply the instruction to the same register as the destination. 3493 srcVectorRegBits, err = vectorRegisterBits(n.dstReg) 3494 } 3495 3496 if err != nil { 3497 return err 3498 } 3499 3500 dstVectorRegBits, err := vectorRegisterBits(n.dstReg) 3501 if err != nil { 3502 return err 3503 } 3504 3505 if simdCopy, ok := advancedSIMDCopy[n.instruction]; ok { 3506 imm5, imm4, q, err := simdCopy.resolver(n.srcVectorIndex, n.dstVectorIndex, n.vectorArrangement) 3507 if err != nil { 3508 return err 3509 } 3510 a.encodeAdvancedSIMDCopy(srcVectorRegBits, dstVectorRegBits, simdCopy.op, imm5, imm4, q) 3511 return nil 3512 } 3513 3514 if scalarPairwise, ok := advancedSIMDScalarPairwise[n.instruction]; ok { 3515 // See "Advanced SIMD scalar pairwise" in 3516 // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en 3517 size, ok := scalarPairwise.size[n.vectorArrangement] 3518 if !ok { 3519 return fmt.Errorf("unsupported vector arrangement %s for %s", n.vectorArrangement, InstructionName(n.instruction)) 3520 } 3521 a.Buf.Write([]byte{ 3522 (srcVectorRegBits << 5) | dstVectorRegBits, 3523 scalarPairwise.opcode<<4 | 1<<3 | srcVectorRegBits>>3, 3524 size<<6 | 0b11<<4 | scalarPairwise.opcode>>4, 3525 0b1<<6 | scalarPairwise.u<<5 | 0b11110, 3526 }) 3527 return 3528 } 3529 3530 if twoRegMisc, ok := advancedSIMDTwoRegisterMisc[n.instruction]; ok { 3531 // See "Advanced SIMD two-register miscellaneous" in 3532 // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en 3533 qs, ok := twoRegMisc.qAndSize[n.vectorArrangement] 3534 if !ok { 3535 return fmt.Errorf("unsupported vector arrangement %s for %s", n.vectorArrangement, InstructionName(n.instruction)) 3536 } 3537 a.Buf.Write([]byte{ 3538 (srcVectorRegBits << 5) | dstVectorRegBits, 3539 twoRegMisc.opcode<<4 | 0b1<<3 | srcVectorRegBits>>3, 3540 qs.size<<6 | 0b1<<5 | twoRegMisc.opcode>>4, 3541 qs.q<<6 | twoRegMisc.u<<5 | 0b01110, 3542 }) 3543 return nil 3544 } 3545 3546 if threeSame, ok := advancedSIMDThreeSame[n.instruction]; ok { 3547 qs, ok := threeSame.qAndSize[n.vectorArrangement] 3548 if !ok { 3549 return fmt.Errorf("unsupported vector arrangement %s for %s", n.vectorArrangement, InstructionName(n.instruction)) 3550 } 3551 a.encodeAdvancedSIMDThreeSame(srcVectorRegBits, dstVectorRegBits, dstVectorRegBits, threeSame.opcode, qs.size, qs.q, threeSame.u) 3552 return nil 3553 } 3554 3555 if threeDifferent, ok := advancedSIMDThreeDifferent[n.instruction]; ok { 3556 qs, ok := threeDifferent.qAndSize[n.vectorArrangement] 3557 if !ok { 3558 return fmt.Errorf("unsupported vector arrangement %s for %s", n.vectorArrangement, InstructionName(n.instruction)) 3559 } 3560 a.encodeAdvancedSIMDThreeDifferent(srcVectorRegBits, dstVectorRegBits, dstVectorRegBits, threeDifferent.opcode, qs.size, qs.q, threeDifferent.u) 3561 return nil 3562 } 3563 3564 if acrossLanes, ok := advancedSIMDAcrossLanes[n.instruction]; ok { 3565 // See "Advanced SIMD across lanes" in 3566 // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en 3567 qs, ok := acrossLanes.qAndSize[n.vectorArrangement] 3568 if !ok { 3569 return fmt.Errorf("unsupported vector arrangement %s for %s", n.vectorArrangement, InstructionName(n.instruction)) 3570 } 3571 a.Buf.Write([]byte{ 3572 (srcVectorRegBits << 5) | dstVectorRegBits, 3573 acrossLanes.opcode<<4 | 0b1<<3 | srcVectorRegBits>>3, 3574 qs.size<<6 | 0b11000<<1 | acrossLanes.opcode>>4, 3575 qs.q<<6 | acrossLanes.u<<5 | 0b01110, 3576 }) 3577 return nil 3578 } 3579 3580 if lookup, ok := advancedSIMDTableLookup[n.instruction]; ok { 3581 q, ok := lookup.q[n.vectorArrangement] 3582 if !ok { 3583 return fmt.Errorf("unsupported vector arrangement %s for %s", n.vectorArrangement, InstructionName(n.instruction)) 3584 } 3585 a.Buf.Write([]byte{ 3586 (srcVectorRegBits << 5) | dstVectorRegBits, 3587 lookup.Len<<5 | lookup.op<<4 | srcVectorRegBits>>3, 3588 lookup.op2<<6 | dstVectorRegBits, 3589 q<<6 | 0b1110, 3590 }) 3591 return 3592 } 3593 3594 if shiftByImmediate, ok := advancedSIMDShiftByImmediate[n.instruction]; ok { 3595 immh, immb, err := shiftByImmediate.immResolver(n.srcConst, n.vectorArrangement) 3596 if err != nil { 3597 return err 3598 } 3599 3600 q, ok := shiftByImmediate.q[n.vectorArrangement] 3601 if !ok { 3602 return fmt.Errorf("unsupported vector arrangement %s for %s", n.vectorArrangement, InstructionName(n.instruction)) 3603 } 3604 3605 a.Buf.Write([]byte{ 3606 (srcVectorRegBits << 5) | dstVectorRegBits, 3607 shiftByImmediate.opcode<<3 | 0b1<<2 | srcVectorRegBits>>3, 3608 immh<<3 | immb, 3609 q<<6 | shiftByImmediate.U<<5 | 0b1111, 3610 }) 3611 return nil 3612 } 3613 3614 if permute, ok := advancedSIMDPermute[n.instruction]; ok { 3615 size, q := arrangementSizeQ(n.vectorArrangement) 3616 a.encodeAdvancedSIMDPermute(srcVectorRegBits, dstVectorRegBits, dstVectorRegBits, permute.opcode, size, q) 3617 return 3618 } 3619 return errorEncodingUnsupported(n) 3620 } 3621 3622 func (a *AssemblerImpl) encodeTwoVectorRegistersToVectorRegister(n *nodeImpl) (err error) { 3623 var srcRegBits, srcRegBits2, dstRegBits byte 3624 srcRegBits, err = vectorRegisterBits(n.srcReg) 3625 if err != nil { 3626 return err 3627 } 3628 3629 srcRegBits2, err = vectorRegisterBits(n.srcReg2) 3630 if err != nil { 3631 return err 3632 } 3633 3634 dstRegBits, err = vectorRegisterBits(n.dstReg) 3635 if err != nil { 3636 return err 3637 } 3638 3639 if threeSame, ok := advancedSIMDThreeSame[n.instruction]; ok { 3640 qs, ok := threeSame.qAndSize[n.vectorArrangement] 3641 if !ok { 3642 return fmt.Errorf("unsupported vector arrangement %s for %s", n.vectorArrangement, InstructionName(n.instruction)) 3643 } 3644 a.encodeAdvancedSIMDThreeSame(srcRegBits, srcRegBits2, dstRegBits, threeSame.opcode, qs.size, qs.q, threeSame.u) 3645 return nil 3646 } 3647 3648 if threeDifferent, ok := advancedSIMDThreeDifferent[n.instruction]; ok { 3649 qs, ok := threeDifferent.qAndSize[n.vectorArrangement] 3650 if !ok { 3651 return fmt.Errorf("unsupported vector arrangement %s for %s", n.vectorArrangement, InstructionName(n.instruction)) 3652 } 3653 a.encodeAdvancedSIMDThreeDifferent(srcRegBits, srcRegBits2, dstRegBits, threeDifferent.opcode, qs.size, qs.q, threeDifferent.u) 3654 return nil 3655 } 3656 3657 if permute, ok := advancedSIMDPermute[n.instruction]; ok { 3658 size, q := arrangementSizeQ(n.vectorArrangement) 3659 a.encodeAdvancedSIMDPermute(srcRegBits, srcRegBits2, dstRegBits, permute.opcode, size, q) 3660 return 3661 } 3662 3663 if n.instruction == EXT { 3664 // EXT is the only instruction in "Advanced SIMD extract", so inline the encoding here. 3665 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/EXT--Extract-vector-from-pair-of-vectors-?lang=en 3666 var q, imm4 byte 3667 switch n.vectorArrangement { 3668 case VectorArrangement16B: 3669 imm4 = 0b1111 & byte(n.srcConst) 3670 q = 0b1 3671 case VectorArrangement8B: 3672 imm4 = 0b111 & byte(n.srcConst) 3673 default: 3674 return fmt.Errorf("invalid arrangement %s for EXT", n.vectorArrangement) 3675 } 3676 a.Buf.Write([]byte{ 3677 (srcRegBits2 << 5) | dstRegBits, 3678 imm4<<3 | srcRegBits2>>3, 3679 srcRegBits, 3680 q<<6 | 0b101110, 3681 }) 3682 return 3683 } 3684 return 3685 } 3686 3687 func (a *AssemblerImpl) encodeVectorRegisterToRegister(n *nodeImpl) (err error) { 3688 if err = checkArrangementIndexPair(n.vectorArrangement, n.srcVectorIndex); err != nil { 3689 return 3690 } 3691 3692 srcVecRegBits, err := vectorRegisterBits(n.srcReg) 3693 if err != nil { 3694 return err 3695 } 3696 3697 dstRegBits, err := intRegisterBits(n.dstReg) 3698 if err != nil { 3699 return err 3700 } 3701 3702 if simdCopy, ok := advancedSIMDCopy[n.instruction]; ok { 3703 imm5, imm4, q, err := simdCopy.resolver(n.srcVectorIndex, n.dstVectorIndex, n.vectorArrangement) 3704 if err != nil { 3705 return err 3706 } 3707 a.encodeAdvancedSIMDCopy(srcVecRegBits, dstRegBits, simdCopy.op, imm5, imm4, q) 3708 return nil 3709 } 3710 return errorEncodingUnsupported(n) 3711 } 3712 3713 func (a *AssemblerImpl) encodeRegisterToVectorRegister(n *nodeImpl) (err error) { 3714 srcRegBits, err := intRegisterBits(n.srcReg) 3715 if err != nil { 3716 return err 3717 } 3718 3719 dstVectorRegBits, err := vectorRegisterBits(n.dstReg) 3720 if err != nil { 3721 return err 3722 } 3723 3724 if simdCopy, ok := advancedSIMDCopy[n.instruction]; ok { 3725 imm5, imm4, q, err := simdCopy.resolver(n.srcVectorIndex, n.dstVectorIndex, n.vectorArrangement) 3726 if err != nil { 3727 return err 3728 } 3729 a.encodeAdvancedSIMDCopy(srcRegBits, dstVectorRegBits, simdCopy.op, imm5, imm4, q) 3730 return nil 3731 } 3732 return errorEncodingUnsupported(n) 3733 } 3734 3735 var zeroRegisterBits byte = 0b11111 3736 3737 func isIntRegister(r asm.Register) bool { 3738 return RegR0 <= r && r <= RegRZR 3739 } 3740 3741 func isVectorRegister(r asm.Register) bool { 3742 return RegV0 <= r && r <= RegV31 3743 } 3744 3745 func isConditionalRegister(r asm.Register) bool { 3746 return RegCondEQ <= r && r <= RegCondNV 3747 } 3748 3749 func intRegisterBits(r asm.Register) (ret byte, err error) { 3750 if !isIntRegister(r) { 3751 err = fmt.Errorf("%s is not integer", RegisterName(r)) 3752 } else { 3753 ret = byte(r - RegR0) 3754 } 3755 return 3756 } 3757 3758 func vectorRegisterBits(r asm.Register) (ret byte, err error) { 3759 if !isVectorRegister(r) { 3760 err = fmt.Errorf("%s is not vector", RegisterName(r)) 3761 } else { 3762 ret = byte(r - RegV0) 3763 } 3764 return 3765 } 3766 3767 func registerBits(r asm.Register) (ret byte) { 3768 if isIntRegister(r) { 3769 ret = byte(r - RegR0) 3770 } else { 3771 ret = byte(r - RegV0) 3772 } 3773 return 3774 }