github.com/wasilibs/wazerox@v0.0.0-20240124024944-4923be63ab5f/internal/asm/arm64/impl.go (about) 1 package arm64 2 3 import ( 4 "encoding/binary" 5 "errors" 6 "fmt" 7 8 "github.com/wasilibs/wazerox/internal/asm" 9 ) 10 11 type nodeImpl struct { 12 // jumpTarget holds the target node in the linked for the jump-kind instruction. 13 jumpTarget *nodeImpl 14 // next holds the next node from this node in the assembled linked list. 15 next *nodeImpl 16 staticConst *asm.StaticConst 17 18 instruction asm.Instruction 19 types operandTypes 20 srcReg, srcReg2, dstReg, dstReg2 asm.Register 21 srcConst, dstConst asm.ConstantValue 22 23 offsetInBinary asm.NodeOffsetInBinary 24 25 // readInstructionAddressBeforeTargetInstruction holds the instruction right before the target of 26 // read instruction address instruction. See asm.assemblerBase.CompileReadInstructionAddress. 27 readInstructionAddressBeforeTargetInstruction asm.Instruction 28 29 vectorArrangement VectorArrangement 30 srcVectorIndex, dstVectorIndex VectorIndex 31 } 32 33 // AssignJumpTarget implements the same method as documented on asm.Node. 34 func (n *nodeImpl) AssignJumpTarget(target asm.Node) { 35 n.jumpTarget = target.(*nodeImpl) 36 } 37 38 // AssignDestinationConstant implements the same method as documented on asm.Node. 39 func (n *nodeImpl) AssignDestinationConstant(value asm.ConstantValue) { 40 n.dstConst = value 41 } 42 43 // AssignSourceConstant implements the same method as documented on asm.Node. 44 func (n *nodeImpl) AssignSourceConstant(value asm.ConstantValue) { 45 n.srcConst = value 46 } 47 48 // OffsetInBinary implements the same method as documented on asm.Node. 49 func (n *nodeImpl) OffsetInBinary() asm.NodeOffsetInBinary { 50 return n.offsetInBinary 51 } 52 53 // String implements fmt.Stringer. 54 // 55 // This is for debugging purpose, and the format is similar to the AT&T assembly syntax, 56 // meaning that this should look like "INSTRUCTION ${from}, ${to}" where each operand 57 // might be embraced by '[]' to represent the memory location, and multiple operands 58 // are embraced by `()`. 59 func (n *nodeImpl) String() (ret string) { 60 instName := InstructionName(n.instruction) 61 switch n.types { 62 case operandTypesNoneToNone: 63 ret = instName 64 case operandTypesNoneToRegister: 65 ret = fmt.Sprintf("%s %s", instName, RegisterName(n.dstReg)) 66 case operandTypesNoneToBranch: 67 ret = fmt.Sprintf("%s {%v}", instName, n.jumpTarget) 68 case operandTypesRegisterToRegister: 69 ret = fmt.Sprintf("%s %s, %s", instName, RegisterName(n.srcReg), RegisterName(n.dstReg)) 70 case operandTypesLeftShiftedRegisterToRegister: 71 ret = fmt.Sprintf("%s (%s, %s << %d), %s", instName, RegisterName(n.srcReg), RegisterName(n.srcReg2), n.srcConst, RegisterName(n.dstReg)) 72 case operandTypesTwoRegistersToRegister: 73 ret = fmt.Sprintf("%s (%s, %s), %s", instName, RegisterName(n.srcReg), RegisterName(n.srcReg2), RegisterName(n.dstReg)) 74 case operandTypesThreeRegistersToRegister: 75 ret = fmt.Sprintf("%s (%s, %s, %s), %s)", instName, RegisterName(n.srcReg), RegisterName(n.srcReg2), RegisterName(n.dstReg), RegisterName(n.dstReg2)) 76 case operandTypesTwoRegistersToNone: 77 ret = fmt.Sprintf("%s (%s, %s)", instName, RegisterName(n.srcReg), RegisterName(n.srcReg2)) 78 case operandTypesRegisterAndConstToNone: 79 ret = fmt.Sprintf("%s (%s, 0x%x)", instName, RegisterName(n.srcReg), n.srcConst) 80 case operandTypesRegisterAndConstToRegister: 81 ret = fmt.Sprintf("%s (%s, 0x%x), %s", instName, RegisterName(n.srcReg), n.srcConst, RegisterName(n.dstReg)) 82 case operandTypesRegisterToMemory: 83 if n.dstReg2 != asm.NilRegister { 84 ret = fmt.Sprintf("%s %s, [%s + %s]", instName, RegisterName(n.srcReg), RegisterName(n.dstReg), RegisterName(n.dstReg2)) 85 } else { 86 ret = fmt.Sprintf("%s %s, [%s + 0x%x]", instName, RegisterName(n.srcReg), RegisterName(n.dstReg), n.dstConst) 87 } 88 case operandTypesMemoryToRegister: 89 if n.srcReg2 != asm.NilRegister { 90 ret = fmt.Sprintf("%s [%s + %s], %s", instName, RegisterName(n.srcReg), RegisterName(n.srcReg2), RegisterName(n.dstReg)) 91 } else { 92 ret = fmt.Sprintf("%s [%s + 0x%x], %s", instName, RegisterName(n.srcReg), n.srcConst, RegisterName(n.dstReg)) 93 } 94 case operandTypesConstToRegister: 95 ret = fmt.Sprintf("%s 0x%x, %s", instName, n.srcConst, RegisterName(n.dstReg)) 96 case operandTypesRegisterToVectorRegister: 97 ret = fmt.Sprintf("%s %s, %s.%s[%d]", instName, RegisterName(n.srcReg), RegisterName(n.dstReg), n.vectorArrangement, n.dstVectorIndex) 98 case operandTypesVectorRegisterToRegister: 99 ret = fmt.Sprintf("%s %s.%s[%d], %s", instName, RegisterName(n.srcReg), n.vectorArrangement, n.srcVectorIndex, RegisterName(n.dstReg)) 100 case operandTypesVectorRegisterToMemory: 101 if n.dstReg2 != asm.NilRegister { 102 ret = fmt.Sprintf("%s %s.%s, [%s + %s]", instName, RegisterName(n.srcReg), n.vectorArrangement, RegisterName(n.dstReg), RegisterName(n.dstReg2)) 103 } else { 104 ret = fmt.Sprintf("%s %s.%s, [%s + 0x%x]", instName, RegisterName(n.srcReg), n.vectorArrangement, RegisterName(n.dstReg), n.dstConst) 105 } 106 case operandTypesMemoryToVectorRegister: 107 ret = fmt.Sprintf("%s [%s], %s.%s", instName, RegisterName(n.srcReg), RegisterName(n.dstReg), n.vectorArrangement) 108 case operandTypesVectorRegisterToVectorRegister: 109 ret = fmt.Sprintf("%s %[2]s.%[4]s, %[3]s.%[4]s", instName, RegisterName(n.srcReg), RegisterName(n.dstReg), n.vectorArrangement) 110 case operandTypesStaticConstToVectorRegister: 111 ret = fmt.Sprintf("%s $%#x %s.%s", instName, n.staticConst.Raw, RegisterName(n.dstReg), n.vectorArrangement) 112 case operandTypesTwoVectorRegistersToVectorRegister: 113 ret = fmt.Sprintf("%s (%s.%[5]s, %[3]s.%[5]s), %[4]s.%[5]s", instName, RegisterName(n.srcReg), RegisterName(n.srcReg2), RegisterName(n.dstReg), n.vectorArrangement) 114 } 115 return 116 } 117 118 // operandTypes represents types of operands of a node. 119 type operandTypes byte 120 121 const ( 122 operandTypesNoneToNone operandTypes = iota 123 operandTypesNoneToRegister 124 operandTypesNoneToBranch 125 operandTypesRegisterToRegister 126 operandTypesLeftShiftedRegisterToRegister 127 operandTypesTwoRegistersToRegister 128 operandTypesThreeRegistersToRegister 129 operandTypesTwoRegistersToNone 130 operandTypesRegisterAndConstToNone 131 operandTypesRegisterAndConstToRegister 132 operandTypesRegisterToMemory 133 operandTypesMemoryToRegister 134 operandTypesConstToRegister 135 operandTypesRegisterToVectorRegister 136 operandTypesVectorRegisterToRegister 137 operandTypesMemoryToVectorRegister 138 operandTypesVectorRegisterToMemory 139 operandTypesVectorRegisterToVectorRegister 140 operandTypesTwoVectorRegistersToVectorRegister 141 operandTypesStaticConstToVectorRegister 142 ) 143 144 // String implements fmt.Stringer 145 func (o operandTypes) String() (ret string) { 146 switch o { 147 case operandTypesNoneToNone: 148 ret = "NoneToNone" 149 case operandTypesNoneToRegister: 150 ret = "NoneToRegister" 151 case operandTypesNoneToBranch: 152 ret = "NoneToBranch" 153 case operandTypesRegisterToRegister: 154 ret = "RegisterToRegister" 155 case operandTypesLeftShiftedRegisterToRegister: 156 ret = "LeftShiftedRegisterToRegister" 157 case operandTypesTwoRegistersToRegister: 158 ret = "TwoRegistersToRegister" 159 case operandTypesThreeRegistersToRegister: 160 ret = "ThreeRegistersToRegister" 161 case operandTypesTwoRegistersToNone: 162 ret = "TwoRegistersToNone" 163 case operandTypesRegisterAndConstToNone: 164 ret = "RegisterAndConstToNone" 165 case operandTypesRegisterAndConstToRegister: 166 ret = "RegisterAndConstToRegister" 167 case operandTypesRegisterToMemory: 168 ret = "RegisterToMemory" 169 case operandTypesMemoryToRegister: 170 ret = "MemoryToRegister" 171 case operandTypesConstToRegister: 172 ret = "ConstToRegister" 173 case operandTypesRegisterToVectorRegister: 174 ret = "RegisterToVectorRegister" 175 case operandTypesVectorRegisterToRegister: 176 ret = "VectorRegisterToRegister" 177 case operandTypesMemoryToVectorRegister: 178 ret = "MemoryToVectorRegister" 179 case operandTypesVectorRegisterToMemory: 180 ret = "VectorRegisterToMemory" 181 case operandTypesVectorRegisterToVectorRegister: 182 ret = "VectorRegisterToVectorRegister" 183 case operandTypesTwoVectorRegistersToVectorRegister: 184 ret = "TwoVectorRegistersToVectorRegister" 185 case operandTypesStaticConstToVectorRegister: 186 ret = "StaticConstToVectorRegister" 187 } 188 return 189 } 190 191 const ( 192 maxSignedInt26 int64 = 1<<25 - 1 193 minSignedInt26 int64 = -(1 << 25) 194 195 maxSignedInt19 int64 = 1<<19 - 1 196 minSignedInt19 int64 = -(1 << 19) 197 ) 198 199 // AssemblerImpl implements Assembler. 200 type AssemblerImpl struct { 201 root *nodeImpl 202 current *nodeImpl 203 asm.BaseAssemblerImpl 204 relativeJumpNodes []*nodeImpl 205 adrInstructionNodes []*nodeImpl 206 nodePool nodePool 207 pool asm.StaticConstPool 208 nodeCount int 209 210 // MaxDisplacementForConstantPool is fixed to defaultMaxDisplacementForConstPool 211 // but have it as a field here for testability. 212 MaxDisplacementForConstantPool int 213 214 temporaryRegister asm.Register 215 } 216 217 const nodePageSize = 128 218 219 type nodePage = [nodePageSize]nodeImpl 220 221 // nodePool is the central allocation pool for nodeImpl used by a single AssemblerImpl. 222 // This reduces the allocations over compilation by reusing AssemblerImpl. 223 type nodePool struct { 224 pages []*nodePage 225 index int 226 } 227 228 // allocNode allocates a new nodeImpl for use from the pool. 229 // This expands the pool if there is no space left for it. 230 func (n *nodePool) allocNode() *nodeImpl { 231 if n.index == nodePageSize { 232 if len(n.pages) == cap(n.pages) { 233 n.pages = append(n.pages, new(nodePage)) 234 } else { 235 i := len(n.pages) 236 n.pages = n.pages[:i+1] 237 if n.pages[i] == nil { 238 n.pages[i] = new(nodePage) 239 } 240 } 241 n.index = 0 242 } 243 ret := &n.pages[len(n.pages)-1][n.index] 244 n.index++ 245 return ret 246 } 247 248 func (n *nodePool) reset() { 249 for _, ns := range n.pages { 250 pages := ns[:] 251 for i := range pages { 252 pages[i] = nodeImpl{} 253 } 254 } 255 n.pages = n.pages[:0] 256 n.index = nodePageSize 257 } 258 259 func NewAssembler(temporaryRegister asm.Register) *AssemblerImpl { 260 return &AssemblerImpl{ 261 nodePool: nodePool{index: nodePageSize}, 262 temporaryRegister: temporaryRegister, 263 pool: asm.NewStaticConstPool(), 264 MaxDisplacementForConstantPool: defaultMaxDisplacementForConstPool, 265 } 266 } 267 268 // AllocateNOP implements asm.AssemblerBase. 269 func (a *AssemblerImpl) AllocateNOP() asm.Node { 270 n := a.nodePool.allocNode() 271 n.instruction = NOP 272 n.types = operandTypesNoneToNone 273 return n 274 } 275 276 // Add implements asm.AssemblerBase. 277 func (a *AssemblerImpl) Add(n asm.Node) { 278 a.addNode(n.(*nodeImpl)) 279 } 280 281 // Reset implements asm.AssemblerBase. 282 func (a *AssemblerImpl) Reset() { 283 pool := a.pool 284 pool.Reset() 285 *a = AssemblerImpl{ 286 nodePool: a.nodePool, 287 pool: pool, 288 temporaryRegister: a.temporaryRegister, 289 adrInstructionNodes: a.adrInstructionNodes[:0], 290 relativeJumpNodes: a.relativeJumpNodes[:0], 291 BaseAssemblerImpl: asm.BaseAssemblerImpl{ 292 SetBranchTargetOnNextNodes: a.SetBranchTargetOnNextNodes[:0], 293 JumpTableEntries: a.JumpTableEntries[:0], 294 }, 295 } 296 a.nodePool.reset() 297 } 298 299 // newNode creates a new Node and appends it into the linked list. 300 func (a *AssemblerImpl) newNode(instruction asm.Instruction, types operandTypes) *nodeImpl { 301 n := a.nodePool.allocNode() 302 n.instruction = instruction 303 n.types = types 304 305 a.addNode(n) 306 return n 307 } 308 309 // addNode appends the new node into the linked list. 310 func (a *AssemblerImpl) addNode(node *nodeImpl) { 311 a.nodeCount++ 312 313 if a.root == nil { 314 a.root = node 315 a.current = node 316 } else { 317 parent := a.current 318 parent.next = node 319 a.current = node 320 } 321 322 for _, o := range a.SetBranchTargetOnNextNodes { 323 origin := o.(*nodeImpl) 324 origin.jumpTarget = node 325 } 326 // Reuse the underlying slice to avoid re-allocations. 327 a.SetBranchTargetOnNextNodes = a.SetBranchTargetOnNextNodes[:0] 328 } 329 330 // Assemble implements asm.AssemblerBase 331 func (a *AssemblerImpl) Assemble(buf asm.Buffer) error { 332 // arm64 has 32-bit fixed length instructions, 333 // but note that some nodes are encoded as multiple instructions, 334 // so the resulting binary might not be the size of count*8. 335 buf.Grow(a.nodeCount * 8) 336 337 for n := a.root; n != nil; n = n.next { 338 n.offsetInBinary = uint64(buf.Len()) 339 if err := a.encodeNode(buf, n); err != nil { 340 return err 341 } 342 a.maybeFlushConstPool(buf, n.next == nil) 343 } 344 345 code := buf.Bytes() 346 347 if err := a.FinalizeJumpTableEntry(code); err != nil { 348 return err 349 } 350 351 for _, rel := range a.relativeJumpNodes { 352 if err := a.relativeBranchFinalize(code, rel); err != nil { 353 return err 354 } 355 } 356 357 for _, adr := range a.adrInstructionNodes { 358 if err := a.finalizeADRInstructionNode(code, adr); err != nil { 359 return err 360 } 361 } 362 return nil 363 } 364 365 const defaultMaxDisplacementForConstPool = (1 << 20) - 1 - 4 // -4 for unconditional branch to skip the constants. 366 367 // maybeFlushConstPool flushes the constant pool if endOfBinary or a boundary condition was met. 368 func (a *AssemblerImpl) maybeFlushConstPool(buf asm.Buffer, endOfBinary bool) { 369 if a.pool.Empty() { 370 return 371 } 372 373 // If endOfBinary = true, we no longer need to emit the instructions, therefore 374 // flush all the constants. 375 if endOfBinary || 376 // Also, if the offset between the first usage of the constant pool and 377 // the first constant would exceed 2^20 -1(= 2MiB-1), which is the maximum offset 378 // for LDR(literal)/ADR instruction, flush all the constants in the pool. 379 (buf.Len()+a.pool.PoolSizeInBytes-int(a.pool.FirstUseOffsetInBinary)) >= a.MaxDisplacementForConstantPool { 380 381 // Before emitting consts, we have to add br instruction to skip the const pool. 382 // https://github.com/golang/go/blob/release-branch.go1.15/src/cmd/internal/obj/arm64/asm7.go#L1123-L1129 383 skipOffset := a.pool.PoolSizeInBytes/4 + 1 384 if a.pool.PoolSizeInBytes%4 != 0 { 385 skipOffset++ 386 } 387 if endOfBinary { 388 // If this is the end of binary, we never reach this block, 389 // so offset can be zero (which is the behavior of Go's assembler). 390 skipOffset = 0 391 } 392 393 buf.Append4Bytes( 394 byte(skipOffset), 395 byte(skipOffset>>8), 396 byte(skipOffset>>16), 397 0x14, 398 ) 399 400 // Then adding the consts into the binary. 401 for _, c := range a.pool.Consts { 402 c.SetOffsetInBinary(uint64(buf.Len())) 403 buf.AppendBytes(c.Raw) 404 } 405 406 // arm64 instructions are 4-byte (32-bit) aligned, so we must pad the zero consts here. 407 if pad := buf.Len() % 4; pad != 0 { 408 buf.AppendBytes(make([]byte, 4-pad)) 409 } 410 411 // After the flush, reset the constant pool. 412 a.pool.Reset() 413 } 414 } 415 416 // encodeNode encodes the given node into writer. 417 func (a *AssemblerImpl) encodeNode(buf asm.Buffer, n *nodeImpl) (err error) { 418 switch n.types { 419 case operandTypesNoneToNone: 420 err = a.encodeNoneToNone(buf, n) 421 case operandTypesNoneToRegister: 422 err = a.encodeJumpToRegister(buf, n) 423 case operandTypesNoneToBranch: 424 err = a.encodeRelativeBranch(buf, n) 425 case operandTypesRegisterToRegister: 426 err = a.encodeRegisterToRegister(buf, n) 427 case operandTypesLeftShiftedRegisterToRegister: 428 err = a.encodeLeftShiftedRegisterToRegister(buf, n) 429 case operandTypesTwoRegistersToRegister: 430 err = a.encodeTwoRegistersToRegister(buf, n) 431 case operandTypesThreeRegistersToRegister: 432 err = a.encodeThreeRegistersToRegister(buf, n) 433 case operandTypesTwoRegistersToNone: 434 err = a.encodeTwoRegistersToNone(buf, n) 435 case operandTypesRegisterAndConstToNone: 436 err = a.encodeRegisterAndConstToNone(buf, n) 437 case operandTypesRegisterToMemory: 438 err = a.encodeRegisterToMemory(buf, n) 439 case operandTypesMemoryToRegister: 440 err = a.encodeMemoryToRegister(buf, n) 441 case operandTypesRegisterAndConstToRegister, operandTypesConstToRegister: 442 err = a.encodeConstToRegister(buf, n) 443 case operandTypesRegisterToVectorRegister: 444 err = a.encodeRegisterToVectorRegister(buf, n) 445 case operandTypesVectorRegisterToRegister: 446 err = a.encodeVectorRegisterToRegister(buf, n) 447 case operandTypesMemoryToVectorRegister: 448 err = a.encodeMemoryToVectorRegister(buf, n) 449 case operandTypesVectorRegisterToMemory: 450 err = a.encodeVectorRegisterToMemory(buf, n) 451 case operandTypesVectorRegisterToVectorRegister: 452 err = a.encodeVectorRegisterToVectorRegister(buf, n) 453 case operandTypesStaticConstToVectorRegister: 454 err = a.encodeStaticConstToVectorRegister(buf, n) 455 case operandTypesTwoVectorRegistersToVectorRegister: 456 err = a.encodeTwoVectorRegistersToVectorRegister(buf, n) 457 default: 458 err = fmt.Errorf("encoder undefined for [%s] operand type", n.types) 459 } 460 if err != nil { 461 err = fmt.Errorf("%w: %s", err, n) // Ensure the error is debuggable by including the string value of the node. 462 } 463 return 464 } 465 466 // CompileStandAlone implements the same method as documented on asm.AssemblerBase. 467 func (a *AssemblerImpl) CompileStandAlone(instruction asm.Instruction) asm.Node { 468 return a.newNode(instruction, operandTypesNoneToNone) 469 } 470 471 // CompileConstToRegister implements the same method as documented on asm.AssemblerBase. 472 func (a *AssemblerImpl) CompileConstToRegister( 473 instruction asm.Instruction, 474 value asm.ConstantValue, 475 destinationReg asm.Register, 476 ) (inst asm.Node) { 477 n := a.newNode(instruction, operandTypesConstToRegister) 478 n.srcConst = value 479 n.dstReg = destinationReg 480 return n 481 } 482 483 // CompileRegisterToRegister implements the same method as documented on asm.AssemblerBase. 484 func (a *AssemblerImpl) CompileRegisterToRegister(instruction asm.Instruction, from, to asm.Register) { 485 n := a.newNode(instruction, operandTypesRegisterToRegister) 486 n.srcReg = from 487 n.dstReg = to 488 } 489 490 // CompileMemoryToRegister implements the same method as documented on asm.AssemblerBase. 491 func (a *AssemblerImpl) CompileMemoryToRegister( 492 instruction asm.Instruction, 493 sourceBaseReg asm.Register, 494 sourceOffsetConst asm.ConstantValue, 495 destinationReg asm.Register, 496 ) { 497 n := a.newNode(instruction, operandTypesMemoryToRegister) 498 n.srcReg = sourceBaseReg 499 n.srcConst = sourceOffsetConst 500 n.dstReg = destinationReg 501 } 502 503 // CompileRegisterToMemory implements the same method as documented on asm.AssemblerBase. 504 func (a *AssemblerImpl) CompileRegisterToMemory( 505 instruction asm.Instruction, 506 sourceRegister, destinationBaseRegister asm.Register, 507 destinationOffsetConst asm.ConstantValue, 508 ) { 509 n := a.newNode(instruction, operandTypesRegisterToMemory) 510 n.srcReg = sourceRegister 511 n.dstReg = destinationBaseRegister 512 n.dstConst = destinationOffsetConst 513 } 514 515 // CompileJump implements the same method as documented on asm.AssemblerBase. 516 func (a *AssemblerImpl) CompileJump(jmpInstruction asm.Instruction) asm.Node { 517 return a.newNode(jmpInstruction, operandTypesNoneToBranch) 518 } 519 520 // CompileJumpToRegister implements the same method as documented on asm.AssemblerBase. 521 func (a *AssemblerImpl) CompileJumpToRegister(jmpInstruction asm.Instruction, reg asm.Register) { 522 n := a.newNode(jmpInstruction, operandTypesNoneToRegister) 523 n.dstReg = reg 524 } 525 526 // CompileReadInstructionAddress implements the same method as documented on asm.AssemblerBase. 527 func (a *AssemblerImpl) CompileReadInstructionAddress( 528 destinationRegister asm.Register, 529 beforeAcquisitionTargetInstruction asm.Instruction, 530 ) { 531 n := a.newNode(ADR, operandTypesMemoryToRegister) 532 n.dstReg = destinationRegister 533 n.readInstructionAddressBeforeTargetInstruction = beforeAcquisitionTargetInstruction 534 } 535 536 // CompileMemoryWithRegisterOffsetToRegister implements Assembler.CompileMemoryWithRegisterOffsetToRegister 537 func (a *AssemblerImpl) CompileMemoryWithRegisterOffsetToRegister( 538 instruction asm.Instruction, 539 srcBaseReg, srcOffsetReg, dstReg asm.Register, 540 ) { 541 n := a.newNode(instruction, operandTypesMemoryToRegister) 542 n.dstReg = dstReg 543 n.srcReg = srcBaseReg 544 n.srcReg2 = srcOffsetReg 545 } 546 547 // CompileMemoryWithRegisterSourceToRegister implements Assembler.CompileMemoryWithRegisterSourceToRegister 548 func (a *AssemblerImpl) CompileMemoryWithRegisterSourceToRegister(instruction asm.Instruction, srcReg, dstReg asm.Register) { 549 n := a.newNode(instruction, operandTypesMemoryToRegister) 550 n.dstReg = dstReg 551 n.srcReg = srcReg 552 } 553 554 // CompileRegisterToMemoryWithRegisterOffset implements Assembler.CompileRegisterToMemoryWithRegisterOffset 555 func (a *AssemblerImpl) CompileRegisterToMemoryWithRegisterOffset( 556 instruction asm.Instruction, 557 srcReg, dstBaseReg, dstOffsetReg asm.Register, 558 ) { 559 n := a.newNode(instruction, operandTypesRegisterToMemory) 560 n.srcReg = srcReg 561 n.dstReg = dstBaseReg 562 n.dstReg2 = dstOffsetReg 563 } 564 565 // CompileRegisterToMemoryWithRegisterDest implements Assembler.CompileRegisterToMemoryWithRegisterDest 566 func (a *AssemblerImpl) CompileRegisterToMemoryWithRegisterDest(instruction asm.Instruction, srcReg, dstReg asm.Register) { 567 n := a.newNode(instruction, operandTypesRegisterToMemory) 568 n.srcReg = srcReg 569 n.dstReg = dstReg 570 } 571 572 // CompileTwoRegistersToRegister implements Assembler.CompileTwoRegistersToRegister 573 func (a *AssemblerImpl) CompileTwoRegistersToRegister(instruction asm.Instruction, src1, src2, dst asm.Register) { 574 n := a.newNode(instruction, operandTypesTwoRegistersToRegister) 575 n.srcReg = src1 576 n.srcReg2 = src2 577 n.dstReg = dst 578 } 579 580 // CompileThreeRegistersToRegister implements Assembler.CompileThreeRegistersToRegister 581 func (a *AssemblerImpl) CompileThreeRegistersToRegister( 582 instruction asm.Instruction, 583 src1, src2, src3, dst asm.Register, 584 ) { 585 n := a.newNode(instruction, operandTypesThreeRegistersToRegister) 586 n.srcReg = src1 587 n.srcReg2 = src2 588 n.dstReg = src3 // To minimize the size of nodeImpl struct, we reuse dstReg for the third source operand. 589 n.dstReg2 = dst 590 } 591 592 // CompileTwoRegistersToNone implements Assembler.CompileTwoRegistersToNone 593 func (a *AssemblerImpl) CompileTwoRegistersToNone(instruction asm.Instruction, src1, src2 asm.Register) { 594 n := a.newNode(instruction, operandTypesTwoRegistersToNone) 595 n.srcReg = src1 596 n.srcReg2 = src2 597 } 598 599 // CompileRegisterAndConstToNone implements Assembler.CompileRegisterAndConstToNone 600 func (a *AssemblerImpl) CompileRegisterAndConstToNone( 601 instruction asm.Instruction, 602 src asm.Register, 603 srcConst asm.ConstantValue, 604 ) { 605 n := a.newNode(instruction, operandTypesRegisterAndConstToNone) 606 n.srcReg = src 607 n.srcConst = srcConst 608 } 609 610 // CompileRegisterAndConstToRegister implements Assembler.CompileRegisterAndConstToRegister 611 func (a *AssemblerImpl) CompileRegisterAndConstToRegister( 612 instruction asm.Instruction, 613 src asm.Register, 614 srcConst asm.ConstantValue, 615 dst asm.Register, 616 ) { 617 n := a.newNode(instruction, operandTypesRegisterAndConstToRegister) 618 n.srcReg = src 619 n.srcConst = srcConst 620 n.dstReg = dst 621 } 622 623 // CompileLeftShiftedRegisterToRegister implements Assembler.CompileLeftShiftedRegisterToRegister 624 func (a *AssemblerImpl) CompileLeftShiftedRegisterToRegister( 625 instruction asm.Instruction, 626 shiftedSourceReg asm.Register, 627 shiftNum asm.ConstantValue, 628 srcReg, dstReg asm.Register, 629 ) { 630 n := a.newNode(instruction, operandTypesLeftShiftedRegisterToRegister) 631 n.srcReg = srcReg 632 n.srcReg2 = shiftedSourceReg 633 n.srcConst = shiftNum 634 n.dstReg = dstReg 635 } 636 637 // CompileConditionalRegisterSet implements Assembler.CompileConditionalRegisterSet 638 func (a *AssemblerImpl) CompileConditionalRegisterSet(cond asm.ConditionalRegisterState, dstReg asm.Register) { 639 n := a.newNode(CSET, operandTypesRegisterToRegister) 640 n.srcReg = conditionalRegisterStateToRegister(cond) 641 n.dstReg = dstReg 642 } 643 644 // CompileMemoryToVectorRegister implements Assembler.CompileMemoryToVectorRegister 645 func (a *AssemblerImpl) CompileMemoryToVectorRegister( 646 instruction asm.Instruction, srcBaseReg asm.Register, dstOffset asm.ConstantValue, dstReg asm.Register, arrangement VectorArrangement, 647 ) { 648 n := a.newNode(instruction, operandTypesMemoryToVectorRegister) 649 n.srcReg = srcBaseReg 650 n.srcConst = dstOffset 651 n.dstReg = dstReg 652 n.vectorArrangement = arrangement 653 } 654 655 // CompileMemoryWithRegisterOffsetToVectorRegister implements Assembler.CompileMemoryWithRegisterOffsetToVectorRegister 656 func (a *AssemblerImpl) CompileMemoryWithRegisterOffsetToVectorRegister(instruction asm.Instruction, 657 srcBaseReg, srcOffsetRegister asm.Register, dstReg asm.Register, arrangement VectorArrangement, 658 ) { 659 n := a.newNode(instruction, operandTypesMemoryToVectorRegister) 660 n.srcReg = srcBaseReg 661 n.srcReg2 = srcOffsetRegister 662 n.dstReg = dstReg 663 n.vectorArrangement = arrangement 664 } 665 666 // CompileVectorRegisterToMemory implements Assembler.CompileVectorRegisterToMemory 667 func (a *AssemblerImpl) CompileVectorRegisterToMemory( 668 instruction asm.Instruction, srcReg, dstBaseReg asm.Register, dstOffset asm.ConstantValue, arrangement VectorArrangement, 669 ) { 670 n := a.newNode(instruction, operandTypesVectorRegisterToMemory) 671 n.srcReg = srcReg 672 n.dstReg = dstBaseReg 673 n.dstConst = dstOffset 674 n.vectorArrangement = arrangement 675 } 676 677 // CompileVectorRegisterToMemoryWithRegisterOffset implements Assembler.CompileVectorRegisterToMemoryWithRegisterOffset 678 func (a *AssemblerImpl) CompileVectorRegisterToMemoryWithRegisterOffset(instruction asm.Instruction, 679 srcReg, dstBaseReg, dstOffsetRegister asm.Register, arrangement VectorArrangement, 680 ) { 681 n := a.newNode(instruction, operandTypesVectorRegisterToMemory) 682 n.srcReg = srcReg 683 n.dstReg = dstBaseReg 684 n.dstReg2 = dstOffsetRegister 685 n.vectorArrangement = arrangement 686 } 687 688 // CompileRegisterToVectorRegister implements Assembler.CompileRegisterToVectorRegister 689 func (a *AssemblerImpl) CompileRegisterToVectorRegister( 690 instruction asm.Instruction, srcReg, dstReg asm.Register, arrangement VectorArrangement, index VectorIndex, 691 ) { 692 n := a.newNode(instruction, operandTypesRegisterToVectorRegister) 693 n.srcReg = srcReg 694 n.dstReg = dstReg 695 n.vectorArrangement = arrangement 696 n.dstVectorIndex = index 697 } 698 699 // CompileVectorRegisterToRegister implements Assembler.CompileVectorRegisterToRegister 700 func (a *AssemblerImpl) CompileVectorRegisterToRegister(instruction asm.Instruction, srcReg, dstReg asm.Register, 701 arrangement VectorArrangement, index VectorIndex, 702 ) { 703 n := a.newNode(instruction, operandTypesVectorRegisterToRegister) 704 n.srcReg = srcReg 705 n.dstReg = dstReg 706 n.vectorArrangement = arrangement 707 n.srcVectorIndex = index 708 } 709 710 // CompileVectorRegisterToVectorRegister implements Assembler.CompileVectorRegisterToVectorRegister 711 func (a *AssemblerImpl) CompileVectorRegisterToVectorRegister( 712 instruction asm.Instruction, srcReg, dstReg asm.Register, arrangement VectorArrangement, srcIndex, dstIndex VectorIndex, 713 ) { 714 n := a.newNode(instruction, operandTypesVectorRegisterToVectorRegister) 715 n.srcReg = srcReg 716 n.dstReg = dstReg 717 n.vectorArrangement = arrangement 718 n.srcVectorIndex = srcIndex 719 n.dstVectorIndex = dstIndex 720 } 721 722 // CompileVectorRegisterToVectorRegisterWithConst implements Assembler.CompileVectorRegisterToVectorRegisterWithConst 723 func (a *AssemblerImpl) CompileVectorRegisterToVectorRegisterWithConst(instruction asm.Instruction, 724 srcReg, dstReg asm.Register, arrangement VectorArrangement, c asm.ConstantValue, 725 ) { 726 n := a.newNode(instruction, operandTypesVectorRegisterToVectorRegister) 727 n.srcReg = srcReg 728 n.srcConst = c 729 n.dstReg = dstReg 730 n.vectorArrangement = arrangement 731 } 732 733 // CompileStaticConstToRegister implements Assembler.CompileStaticConstToVectorRegister 734 func (a *AssemblerImpl) CompileStaticConstToRegister(instruction asm.Instruction, c *asm.StaticConst, dstReg asm.Register) { 735 n := a.newNode(instruction, operandTypesMemoryToRegister) 736 n.staticConst = c 737 n.dstReg = dstReg 738 } 739 740 // CompileStaticConstToVectorRegister implements Assembler.CompileStaticConstToVectorRegister 741 func (a *AssemblerImpl) CompileStaticConstToVectorRegister(instruction asm.Instruction, 742 c *asm.StaticConst, dstReg asm.Register, arrangement VectorArrangement, 743 ) { 744 n := a.newNode(instruction, operandTypesStaticConstToVectorRegister) 745 n.staticConst = c 746 n.dstReg = dstReg 747 n.vectorArrangement = arrangement 748 } 749 750 // CompileTwoVectorRegistersToVectorRegister implements Assembler.CompileTwoVectorRegistersToVectorRegister. 751 func (a *AssemblerImpl) CompileTwoVectorRegistersToVectorRegister(instruction asm.Instruction, srcReg, srcReg2, dstReg asm.Register, 752 arrangement VectorArrangement, 753 ) { 754 n := a.newNode(instruction, operandTypesTwoVectorRegistersToVectorRegister) 755 n.srcReg = srcReg 756 n.srcReg2 = srcReg2 757 n.dstReg = dstReg 758 n.vectorArrangement = arrangement 759 } 760 761 // CompileTwoVectorRegistersToVectorRegisterWithConst implements Assembler.CompileTwoVectorRegistersToVectorRegisterWithConst. 762 func (a *AssemblerImpl) CompileTwoVectorRegistersToVectorRegisterWithConst(instruction asm.Instruction, 763 srcReg, srcReg2, dstReg asm.Register, arrangement VectorArrangement, c asm.ConstantValue, 764 ) { 765 n := a.newNode(instruction, operandTypesTwoVectorRegistersToVectorRegister) 766 n.srcReg = srcReg 767 n.srcReg2 = srcReg2 768 n.srcConst = c 769 n.dstReg = dstReg 770 n.vectorArrangement = arrangement 771 } 772 773 func errorEncodingUnsupported(n *nodeImpl) error { 774 return fmt.Errorf("%s is unsupported for %s type", InstructionName(n.instruction), n.types) 775 } 776 777 func (a *AssemblerImpl) encodeNoneToNone(buf asm.Buffer, n *nodeImpl) error { 778 switch n.instruction { 779 case UDF: 780 buf.Append4Bytes(0, 0, 0, 0) 781 return nil 782 case DMB: 783 buf.Append4Bytes( 784 0b10111111, 785 0b00111011, 786 0b00000011, 787 0b11010101, 788 ) 789 return nil 790 case NOP: 791 return nil 792 default: 793 return errorEncodingUnsupported(n) 794 } 795 } 796 797 func (a *AssemblerImpl) encodeJumpToRegister(buf asm.Buffer, n *nodeImpl) error { 798 // "Unconditional branch (register)" in https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Branches--Exception-Generating-and-System-instructions 799 var opc byte 800 switch n.instruction { 801 case RET: 802 opc = 0b0010 803 case B: 804 opc = 0b0000 805 default: 806 return errorEncodingUnsupported(n) 807 } 808 809 regBits, err := intRegisterBits(n.dstReg) 810 if err != nil { 811 return fmt.Errorf("invalid destination register: %w", err) 812 } 813 814 buf.Append4Bytes( 815 0x00|(regBits<<5), 816 0x00|(regBits>>3), 817 0b000_11111|(opc<<5), 818 0b1101011_0|(opc>>3), 819 ) 820 return err 821 } 822 823 func (a *AssemblerImpl) relativeBranchFinalize(code []byte, n *nodeImpl) error { 824 var condBits byte 825 const condBitsUnconditional = 0xff // Indicates this is not conditional jump. 826 827 // https://developer.arm.com/documentation/den0024/a/CHDEEABE 828 switch n.instruction { 829 case B: 830 condBits = condBitsUnconditional 831 case BCONDEQ: 832 condBits = 0b0000 833 case BCONDGE: 834 condBits = 0b1010 835 case BCONDGT: 836 condBits = 0b1100 837 case BCONDHI: 838 condBits = 0b1000 839 case BCONDHS: 840 condBits = 0b0010 841 case BCONDLE: 842 condBits = 0b1101 843 case BCONDLO: 844 condBits = 0b0011 845 case BCONDLS: 846 condBits = 0b1001 847 case BCONDLT: 848 condBits = 0b1011 849 case BCONDMI: 850 condBits = 0b0100 851 case BCONDPL: 852 condBits = 0b0101 853 case BCONDNE: 854 condBits = 0b0001 855 case BCONDVS: 856 condBits = 0b0110 857 case BCONDVC: 858 condBits = 0b0111 859 } 860 861 branchInstOffset := int64(n.OffsetInBinary()) 862 offset := int64(n.jumpTarget.OffsetInBinary()) - branchInstOffset 863 if offset%4 != 0 { 864 return errors.New("BUG: relative jump offset must be 4 bytes aligned") 865 } 866 867 branchInst := code[branchInstOffset : branchInstOffset+4] 868 if condBits == condBitsUnconditional { 869 imm26 := offset >> 2 // divide by 4. 870 if imm26 < minSignedInt26 || imm26 > maxSignedInt26 { 871 // In theory this could happen if a Wasm binary has a huge single label (more than 128MB for a single block), 872 // and in that case, we use load the offset into a register and do the register jump, but to avoid the complexity, 873 // we impose this limit for now as that would be *unlikely* happen in practice. 874 return fmt.Errorf("relative jump offset %d/4 must be within %d and %d", offset, minSignedInt26, maxSignedInt26) 875 } 876 // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/B--Branch-?lang=en 877 branchInst[0] = byte(imm26) 878 branchInst[1] = byte(imm26 >> 8) 879 branchInst[2] = byte(imm26 >> 16) 880 branchInst[3] = (byte(imm26 >> 24 & 0b000000_11)) | 0b000101_00 881 } else { 882 imm19 := offset >> 2 // divide by 4. 883 if imm19 < minSignedInt19 || imm19 > maxSignedInt19 { 884 // This should be a bug in our compiler as the conditional jumps are only used in the small offsets (~a few bytes), 885 // and if ever happens, compiler can be fixed. 886 return fmt.Errorf("BUG: relative jump offset %d/4(=%d) must be within %d and %d", offset, imm19, minSignedInt19, maxSignedInt19) 887 } 888 // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/B-cond--Branch-conditionally-?lang=en 889 branchInst[0] = (byte(imm19<<5) & 0b111_0_0000) | condBits 890 branchInst[1] = byte(imm19 >> 3) 891 branchInst[2] = byte(imm19 >> 11) 892 branchInst[3] = 0b01010100 893 } 894 return nil 895 } 896 897 func (a *AssemblerImpl) encodeRelativeBranch(buf asm.Buffer, n *nodeImpl) error { 898 switch n.instruction { 899 case B, BCONDEQ, BCONDGE, BCONDGT, BCONDHI, BCONDHS, BCONDLE, BCONDLO, BCONDLS, BCONDLT, BCONDMI, BCONDNE, BCONDVS, BCONDVC, BCONDPL: 900 default: 901 return errorEncodingUnsupported(n) 902 } 903 904 if n.jumpTarget == nil { 905 return fmt.Errorf("branch target must be set for %s", InstructionName(n.instruction)) 906 } 907 908 // At this point, we don't yet know that target's branch, so emit the placeholder (4 bytes). 909 buf.Append4Bytes(0, 0, 0, 0) 910 a.relativeJumpNodes = append(a.relativeJumpNodes, n) 911 return nil 912 } 913 914 func checkRegisterToRegisterType(src, dst asm.Register, requireSrcInt, requireDstInt bool) (err error) { 915 isSrcInt, isDstInt := isIntRegister(src), isIntRegister(dst) 916 if isSrcInt && !requireSrcInt { 917 err = fmt.Errorf("src requires float register but got %s", RegisterName(src)) 918 } else if !isSrcInt && requireSrcInt { 919 err = fmt.Errorf("src requires int register but got %s", RegisterName(src)) 920 } else if isDstInt && !requireDstInt { 921 err = fmt.Errorf("dst requires float register but got %s", RegisterName(dst)) 922 } else if !isDstInt && requireDstInt { 923 err = fmt.Errorf("dst requires int register but got %s", RegisterName(dst)) 924 } 925 return 926 } 927 928 func (a *AssemblerImpl) encodeRegisterToRegister(buf asm.Buffer, n *nodeImpl) (err error) { 929 switch inst := n.instruction; inst { 930 case ADD, ADDW, SUB: 931 if err = checkRegisterToRegisterType(n.srcReg, n.dstReg, true, true); err != nil { 932 return 933 } 934 935 // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Register?lang=en#addsub_shift 936 var sfops byte 937 switch inst { 938 case ADD: 939 sfops = 0b100 940 case ADDW: 941 case SUB: 942 sfops = 0b110 943 } 944 945 srcRegBits, dstRegBits := registerBits(n.srcReg), registerBits(n.dstReg) 946 buf.Append4Bytes( 947 (dstRegBits<<5)|dstRegBits, 948 dstRegBits>>3, 949 srcRegBits, 950 (sfops<<5)|0b01011, 951 ) 952 case CLZ, CLZW, RBIT, RBITW: 953 if err = checkRegisterToRegisterType(n.srcReg, n.dstReg, true, true); err != nil { 954 return 955 } 956 957 var sf, opcode byte 958 switch inst { 959 case CLZ: 960 // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/CLZ--Count-Leading-Zeros-?lang=en 961 sf, opcode = 0b1, 0b000_100 962 case CLZW: 963 // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/CLZ--Count-Leading-Zeros-?lang=en 964 sf, opcode = 0b0, 0b000_100 965 case RBIT: 966 // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/RBIT--Reverse-Bits-?lang=en 967 sf, opcode = 0b1, 0b000_000 968 case RBITW: 969 // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/RBIT--Reverse-Bits-?lang=en 970 sf, opcode = 0b0, 0b000_000 971 } 972 if inst == CLZ { 973 sf = 1 974 } 975 976 srcRegBits, dstRegBits := registerBits(n.srcReg), registerBits(n.dstReg) 977 buf.Append4Bytes( 978 (srcRegBits<<5)|dstRegBits, 979 opcode<<2|(srcRegBits>>3), 980 0b110_00000, 981 (sf<<7)|0b0_1011010, 982 ) 983 case CSET: 984 if !isConditionalRegister(n.srcReg) { 985 return fmt.Errorf("CSET requires conditional register but got %s", RegisterName(n.srcReg)) 986 } 987 988 dstRegBits, err := intRegisterBits(n.dstReg) 989 if err != nil { 990 return err 991 } 992 993 // CSET encodes the conditional bits with its least significant bit inverted. 994 // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/CSET--Conditional-Set--an-alias-of-CSINC-?lang=en 995 // 996 // https://developer.arm.com/documentation/den0024/a/CHDEEABE 997 var conditionalBits byte 998 switch n.srcReg { 999 case RegCondEQ: 1000 conditionalBits = 0b0001 1001 case RegCondNE: 1002 conditionalBits = 0b0000 1003 case RegCondHS: 1004 conditionalBits = 0b0011 1005 case RegCondLO: 1006 conditionalBits = 0b0010 1007 case RegCondMI: 1008 conditionalBits = 0b0101 1009 case RegCondPL: 1010 conditionalBits = 0b0100 1011 case RegCondVS: 1012 conditionalBits = 0b0111 1013 case RegCondVC: 1014 conditionalBits = 0b0110 1015 case RegCondHI: 1016 conditionalBits = 0b1001 1017 case RegCondLS: 1018 conditionalBits = 0b1000 1019 case RegCondGE: 1020 conditionalBits = 0b1011 1021 case RegCondLT: 1022 conditionalBits = 0b1010 1023 case RegCondGT: 1024 conditionalBits = 0b1101 1025 case RegCondLE: 1026 conditionalBits = 0b1100 1027 case RegCondAL: 1028 conditionalBits = 0b1111 1029 case RegCondNV: 1030 conditionalBits = 0b1110 1031 } 1032 1033 // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/CSET--Conditional-Set--an-alias-of-CSINC-?lang=en 1034 buf.Append4Bytes( 1035 0b111_00000|dstRegBits, 1036 (conditionalBits<<4)|0b0000_0111, 1037 0b100_11111, 1038 0b10011010, 1039 ) 1040 1041 case FABSD, FABSS, FNEGD, FNEGS, FSQRTD, FSQRTS, FCVTSD, FCVTDS, FRINTMD, FRINTMS, 1042 FRINTND, FRINTNS, FRINTPD, FRINTPS, FRINTZD, FRINTZS: 1043 if err = checkRegisterToRegisterType(n.srcReg, n.dstReg, false, false); err != nil { 1044 return 1045 } 1046 1047 srcRegBits, dstRegBits := registerBits(n.srcReg), registerBits(n.dstReg) 1048 1049 // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en#floatdp1 1050 var tp, opcode byte 1051 switch inst { 1052 case FABSD: 1053 opcode, tp = 0b000001, 0b01 1054 case FABSS: 1055 opcode, tp = 0b000001, 0b00 1056 case FNEGD: 1057 opcode, tp = 0b000010, 0b01 1058 case FNEGS: 1059 opcode, tp = 0b000010, 0b00 1060 case FSQRTD: 1061 opcode, tp = 0b000011, 0b01 1062 case FSQRTS: 1063 opcode, tp = 0b000011, 0b00 1064 case FCVTSD: 1065 opcode, tp = 0b000101, 0b00 1066 case FCVTDS: 1067 opcode, tp = 0b000100, 0b01 1068 case FRINTMD: 1069 opcode, tp = 0b001010, 0b01 1070 case FRINTMS: 1071 opcode, tp = 0b001010, 0b00 1072 case FRINTND: 1073 opcode, tp = 0b001000, 0b01 1074 case FRINTNS: 1075 opcode, tp = 0b001000, 0b00 1076 case FRINTPD: 1077 opcode, tp = 0b001001, 0b01 1078 case FRINTPS: 1079 opcode, tp = 0b001001, 0b00 1080 case FRINTZD: 1081 opcode, tp = 0b001011, 0b01 1082 case FRINTZS: 1083 opcode, tp = 0b001011, 0b00 1084 } 1085 buf.Append4Bytes( 1086 (srcRegBits<<5)|dstRegBits, 1087 (opcode<<7)|0b0_10000_00|(srcRegBits>>3), 1088 tp<<6|0b00_1_00000|opcode>>1, 1089 0b0_00_11110, 1090 ) 1091 1092 case FADDD, FADDS, FDIVS, FDIVD, FMAXD, FMAXS, FMIND, FMINS, FMULS, FMULD: 1093 if err = checkRegisterToRegisterType(n.srcReg, n.dstReg, false, false); err != nil { 1094 return 1095 } 1096 1097 srcRegBits, dstRegBits := registerBits(n.srcReg), registerBits(n.dstReg) 1098 1099 // "Floating-point data-processing (2 source)" in 1100 // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en#floatdp1 1101 var tp, opcode byte 1102 switch inst { 1103 case FADDD: 1104 opcode, tp = 0b0010, 0b01 1105 case FADDS: 1106 opcode, tp = 0b0010, 0b00 1107 case FDIVD: 1108 opcode, tp = 0b0001, 0b01 1109 case FDIVS: 1110 opcode, tp = 0b0001, 0b00 1111 case FMAXD: 1112 opcode, tp = 0b0100, 0b01 1113 case FMAXS: 1114 opcode, tp = 0b0100, 0b00 1115 case FMIND: 1116 opcode, tp = 0b0101, 0b01 1117 case FMINS: 1118 opcode, tp = 0b0101, 0b00 1119 case FMULS: 1120 opcode, tp = 0b0000, 0b00 1121 case FMULD: 1122 opcode, tp = 0b0000, 0b01 1123 } 1124 1125 buf.Append4Bytes( 1126 (dstRegBits<<5)|dstRegBits, 1127 opcode<<4|0b0000_10_00|(dstRegBits>>3), 1128 tp<<6|0b00_1_00000|srcRegBits, 1129 0b0001_1110, 1130 ) 1131 1132 case FCVTZSD, FCVTZSDW, FCVTZSS, FCVTZSSW, FCVTZUD, FCVTZUDW, FCVTZUS, FCVTZUSW: 1133 if err = checkRegisterToRegisterType(n.srcReg, n.dstReg, false, true); err != nil { 1134 return 1135 } 1136 1137 srcRegBits, dstRegBits := registerBits(n.srcReg), registerBits(n.dstReg) 1138 1139 // "Conversion between floating-point and integer" in 1140 // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en#floatdp1 1141 var sf, tp, opcode byte 1142 switch inst { 1143 case FCVTZSD: // Double to signed 64-bit 1144 sf, tp, opcode = 0b1, 0b01, 0b000 1145 case FCVTZSDW: // Double to signed 32-bit. 1146 sf, tp, opcode = 0b0, 0b01, 0b000 1147 case FCVTZSS: // Single to signed 64-bit. 1148 sf, tp, opcode = 0b1, 0b00, 0b000 1149 case FCVTZSSW: // Single to signed 32-bit. 1150 sf, tp, opcode = 0b0, 0b00, 0b000 1151 case FCVTZUD: // Double to unsigned 64-bit. 1152 sf, tp, opcode = 0b1, 0b01, 0b001 1153 case FCVTZUDW: // Double to unsigned 32-bit. 1154 sf, tp, opcode = 0b0, 0b01, 0b001 1155 case FCVTZUS: // Single to unsigned 64-bit. 1156 sf, tp, opcode = 0b1, 0b00, 0b001 1157 case FCVTZUSW: // Single to unsigned 32-bit. 1158 sf, tp, opcode = 0b0, 0b00, 0b001 1159 } 1160 1161 buf.Append4Bytes( 1162 (srcRegBits<<5)|dstRegBits, 1163 0|(srcRegBits>>3), 1164 tp<<6|0b00_1_11_000|opcode, 1165 sf<<7|0b0_0_0_11110, 1166 ) 1167 1168 case FMOVD, FMOVS: 1169 isSrcInt, isDstInt := isIntRegister(n.srcReg), isIntRegister(n.dstReg) 1170 if isSrcInt && isDstInt { 1171 return errors.New("FMOV needs at least one of operands to be integer") 1172 } 1173 1174 srcRegBits, dstRegBits := registerBits(n.srcReg), registerBits(n.dstReg) 1175 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FMOV--register---Floating-point-Move-register-without-conversion-?lang=en 1176 if !isSrcInt && !isDstInt { // Float to float. 1177 var tp byte 1178 if inst == FMOVD { 1179 tp = 0b01 1180 } 1181 buf.Append4Bytes( 1182 (srcRegBits<<5)|dstRegBits, 1183 0b0_10000_00|(srcRegBits>>3), 1184 tp<<6|0b00_1_00000, 1185 0b000_11110, 1186 ) 1187 } else if isSrcInt && !isDstInt { // Int to float. 1188 var tp, sf byte 1189 if inst == FMOVD { 1190 tp, sf = 0b01, 0b1 1191 } 1192 buf.Append4Bytes( 1193 (srcRegBits<<5)|dstRegBits, 1194 srcRegBits>>3, 1195 tp<<6|0b00_1_00_111, 1196 sf<<7|0b0_00_11110, 1197 ) 1198 } else { // Float to int. 1199 var tp, sf byte 1200 if inst == FMOVD { 1201 tp, sf = 0b01, 0b1 1202 } 1203 buf.Append4Bytes( 1204 (srcRegBits<<5)|dstRegBits, 1205 srcRegBits>>3, 1206 tp<<6|0b00_1_00_110, 1207 sf<<7|0b0_00_11110, 1208 ) 1209 } 1210 1211 case MOVD, MOVW: 1212 if err = checkRegisterToRegisterType(n.srcReg, n.dstReg, true, true); err != nil { 1213 return 1214 } 1215 srcRegBits, dstRegBits := registerBits(n.srcReg), registerBits(n.dstReg) 1216 1217 if n.srcReg == RegSP || n.dstReg == RegSP { 1218 // Moving between stack pointers. 1219 // https://developer.arm.com/documentation/ddi0602/2021-12/Base-Instructions/MOV--to-from-SP---Move-between-register-and-stack-pointer--an-alias-of-ADD--immediate-- 1220 buf.Append4Bytes( 1221 (srcRegBits<<5)|dstRegBits, 1222 srcRegBits>>3, 1223 0x0, 1224 0b1001_0001, 1225 ) 1226 return 1227 } 1228 1229 if n.srcReg == RegRZR && inst == MOVD { 1230 // If this is 64-bit mov from zero register, then we encode this as MOVK. 1231 // See "Move wide (immediate)" in 1232 // https://developer.arm.com/documentation/ddi0602/2021-06/Index-by-Encoding/Data-Processing----Immediate 1233 buf.Append4Bytes( 1234 dstRegBits, 1235 0x0, 1236 0b1000_0000, 1237 0b1_10_10010, 1238 ) 1239 } else { 1240 // MOV can be encoded as ORR (shifted register): "ORR Wd, WZR, Wm". 1241 // https://developer.arm.com/documentation/100069/0609/A64-General-Instructions/MOV--register- 1242 var sf byte 1243 if inst == MOVD { 1244 sf = 0b1 1245 } 1246 buf.Append4Bytes( 1247 (zeroRegisterBits<<5)|dstRegBits, 1248 zeroRegisterBits>>3, 1249 0b000_00000|srcRegBits, 1250 sf<<7|0b0_01_01010, 1251 ) 1252 } 1253 1254 case MRS: 1255 if n.srcReg != RegFPSR { 1256 return fmt.Errorf("MRS has only support for FPSR register as a src but got %s", RegisterName(n.srcReg)) 1257 } 1258 1259 // For how to specify FPSR register, see "Accessing FPSR" in: 1260 // https://developer.arm.com/documentation/ddi0595/2021-12/AArch64-Registers/FPSR--Floating-point-Status-Register?lang=en 1261 dstRegBits := registerBits(n.dstReg) 1262 buf.Append4Bytes( 1263 0b001<<5|dstRegBits, 1264 0b0100<<4|0b0100, 1265 0b0011_0000|0b11<<3|0b011, 1266 0b1101_0101, 1267 ) 1268 1269 case MSR: 1270 if n.dstReg != RegFPSR { 1271 return fmt.Errorf("MSR has only support for FPSR register as a dst but got %s", RegisterName(n.srcReg)) 1272 } 1273 1274 // For how to specify FPSR register, see "Accessing FPSR" in: 1275 // https://developer.arm.com/documentation/ddi0595/2021-12/AArch64-Registers/FPSR--Floating-point-Status-Register?lang=en 1276 srcRegBits := registerBits(n.srcReg) 1277 buf.Append4Bytes( 1278 0b001<<5|srcRegBits, 1279 0b0100<<4|0b0100, 1280 0b0001_0000|0b11<<3|0b011, 1281 0b1101_0101, 1282 ) 1283 1284 case MUL, MULW: 1285 // Multiplications are encoded as MADD (zero register, src, dst), dst = zero + (src * dst) = src * dst. 1286 // See "Data-processing (3 source)" in 1287 // https://developer.arm.com/documentation/ddi0602/2021-06/Index-by-Encoding/Data-Processing----Register?lang=en 1288 if err = checkRegisterToRegisterType(n.srcReg, n.dstReg, true, true); err != nil { 1289 return 1290 } 1291 1292 var sf byte 1293 if inst == MUL { 1294 sf = 0b1 1295 } 1296 1297 srcRegBits, dstRegBits := registerBits(n.srcReg), registerBits(n.dstReg) 1298 1299 buf.Append4Bytes( 1300 dstRegBits<<5|dstRegBits, 1301 zeroRegisterBits<<2|dstRegBits>>3, 1302 srcRegBits, 1303 sf<<7|0b11011, 1304 ) 1305 1306 case NEG, NEGW: 1307 srcRegBits, dstRegBits := registerBits(n.srcReg), registerBits(n.dstReg) 1308 1309 if err = checkRegisterToRegisterType(n.srcReg, n.dstReg, true, true); err != nil { 1310 return 1311 } 1312 1313 // NEG is encoded as "SUB dst, XZR, src" = "dst = 0 - src" 1314 // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Register?lang=en#addsub_shift 1315 var sf byte 1316 if inst == NEG { 1317 sf = 0b1 1318 } 1319 1320 buf.Append4Bytes( 1321 (zeroRegisterBits<<5)|dstRegBits, 1322 zeroRegisterBits>>3, 1323 srcRegBits, 1324 sf<<7|0b0_10_00000|0b0_00_01011, 1325 ) 1326 1327 case SDIV, SDIVW, UDIV, UDIVW: 1328 srcRegBits, dstRegBits := registerBits(n.srcReg), registerBits(n.dstReg) 1329 1330 if err = checkRegisterToRegisterType(n.srcReg, n.dstReg, true, true); err != nil { 1331 return 1332 } 1333 1334 // See "Data-processing (2 source)" in 1335 // https://developer.arm.com/documentation/ddi0602/2021-06/Index-by-Encoding/Data-Processing----Register?lang=en 1336 var sf, opcode byte 1337 switch inst { 1338 case SDIV: 1339 sf, opcode = 0b1, 0b000011 1340 case SDIVW: 1341 sf, opcode = 0b0, 0b000011 1342 case UDIV: 1343 sf, opcode = 0b1, 0b000010 1344 case UDIVW: 1345 sf, opcode = 0b0, 0b000010 1346 } 1347 1348 buf.Append4Bytes( 1349 (dstRegBits<<5)|dstRegBits, 1350 opcode<<2|(dstRegBits>>3), 1351 0b110_00000|srcRegBits, 1352 sf<<7|0b0_00_11010, 1353 ) 1354 1355 case SCVTFD, SCVTFWD, SCVTFS, SCVTFWS, UCVTFD, UCVTFS, UCVTFWD, UCVTFWS: 1356 srcRegBits, dstRegBits := registerBits(n.srcReg), registerBits(n.dstReg) 1357 1358 if err = checkRegisterToRegisterType(n.srcReg, n.dstReg, true, false); err != nil { 1359 return 1360 } 1361 1362 // "Conversion between floating-point and integer" in 1363 // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en#floatdp1 1364 var sf, tp, opcode byte 1365 switch inst { 1366 case SCVTFD: // 64-bit integer to double 1367 sf, tp, opcode = 0b1, 0b01, 0b010 1368 case SCVTFWD: // 32-bit integer to double 1369 sf, tp, opcode = 0b0, 0b01, 0b010 1370 case SCVTFS: // 64-bit integer to single 1371 sf, tp, opcode = 0b1, 0b00, 0b010 1372 case SCVTFWS: // 32-bit integer to single 1373 sf, tp, opcode = 0b0, 0b00, 0b010 1374 case UCVTFD: // 64-bit to double 1375 sf, tp, opcode = 0b1, 0b01, 0b011 1376 case UCVTFWD: // 32-bit to double 1377 sf, tp, opcode = 0b0, 0b01, 0b011 1378 case UCVTFS: // 64-bit to single 1379 sf, tp, opcode = 0b1, 0b00, 0b011 1380 case UCVTFWS: // 32-bit to single 1381 sf, tp, opcode = 0b0, 0b00, 0b011 1382 } 1383 1384 buf.Append4Bytes( 1385 (srcRegBits<<5)|dstRegBits, 1386 srcRegBits>>3, 1387 tp<<6|0b00_1_00_000|opcode, 1388 sf<<7|0b0_0_0_11110, 1389 ) 1390 1391 case SXTB, SXTBW, SXTH, SXTHW, SXTW: 1392 if err = checkRegisterToRegisterType(n.srcReg, n.dstReg, true, true); err != nil { 1393 return 1394 } 1395 1396 srcRegBits, dstRegBits := registerBits(n.srcReg), registerBits(n.dstReg) 1397 if n.srcReg == RegRZR { 1398 // If the source is zero register, we encode as MOV dst, zero. 1399 var sf byte 1400 if inst == MOVD { 1401 sf = 0b1 1402 } 1403 buf.Append4Bytes( 1404 (zeroRegisterBits<<5)|dstRegBits, 1405 zeroRegisterBits>>3, 1406 0b000_00000|srcRegBits, 1407 sf<<7|0b0_01_01010, 1408 ) 1409 return 1410 } 1411 1412 // SXTB is encoded as "SBFM Wd, Wn, #0, #7" 1413 // https://developer.arm.com/documentation/dui0801/g/A64-General-Instructions/SXTB 1414 // SXTH is encoded as "SBFM Wd, Wn, #0, #15" 1415 // https://developer.arm.com/documentation/dui0801/g/A64-General-Instructions/SXTH 1416 // SXTW is encoded as "SBFM Xd, Xn, #0, #31" 1417 // https://developer.arm.com/documentation/dui0802/b/A64-General-Instructions/SXTW 1418 1419 var n, sf, imms, opc byte 1420 switch inst { 1421 case SXTB: 1422 n, sf, imms = 0b1, 0b1, 0x7 1423 case SXTBW: 1424 n, sf, imms = 0b0, 0b0, 0x7 1425 case SXTH: 1426 n, sf, imms = 0b1, 0b1, 0xf 1427 case SXTHW: 1428 n, sf, imms = 0b0, 0b0, 0xf 1429 case SXTW: 1430 n, sf, imms = 0b1, 0b1, 0x1f 1431 } 1432 1433 buf.Append4Bytes( 1434 (srcRegBits<<5)|dstRegBits, 1435 imms<<2|(srcRegBits>>3), 1436 n<<6, 1437 sf<<7|opc<<5|0b10011, 1438 ) 1439 default: 1440 return errorEncodingUnsupported(n) 1441 } 1442 return 1443 } 1444 1445 func (a *AssemblerImpl) encodeLeftShiftedRegisterToRegister(buf asm.Buffer, n *nodeImpl) error { 1446 baseRegBits, err := intRegisterBits(n.srcReg) 1447 if err != nil { 1448 return err 1449 } 1450 shiftTargetRegBits, err := intRegisterBits(n.srcReg2) 1451 if err != nil { 1452 return err 1453 } 1454 dstRegBits, err := intRegisterBits(n.dstReg) 1455 if err != nil { 1456 return err 1457 } 1458 1459 switch n.instruction { 1460 case ADD: 1461 // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Register?lang=en#addsub_shift 1462 const logicalLeftShiftBits = 0b00 1463 if n.srcConst < 0 || n.srcConst > 64 { 1464 return fmt.Errorf("shift amount must fit in unsigned 6-bit integer (0-64) but got %d", n.srcConst) 1465 } 1466 shiftByte := byte(n.srcConst) 1467 buf.Append4Bytes( 1468 (baseRegBits<<5)|dstRegBits, 1469 (shiftByte<<2)|(baseRegBits>>3), 1470 (logicalLeftShiftBits<<6)|shiftTargetRegBits, 1471 0b1000_1011, 1472 ) 1473 return err 1474 default: 1475 return errorEncodingUnsupported(n) 1476 } 1477 } 1478 1479 func (a *AssemblerImpl) encodeTwoRegistersToRegister(buf asm.Buffer, n *nodeImpl) (err error) { 1480 switch inst := n.instruction; inst { 1481 case AND, ANDW, ORR, ORRW, ORN, ORNW, EOR, EORW: 1482 // See "Logical (shifted register)" in 1483 // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Register?lang=en 1484 srcRegBits, srcReg2Bits, dstRegBits := registerBits(n.srcReg), registerBits(n.srcReg2), registerBits(n.dstReg) 1485 var sf, opc, n byte 1486 switch inst { 1487 case AND: 1488 sf, opc = 0b1, 0b00 1489 case ANDW: 1490 sf, opc = 0b0, 0b00 1491 case ORR: 1492 sf, opc = 0b1, 0b01 1493 case ORRW: 1494 sf, opc = 0b0, 0b01 1495 case ORN: 1496 sf, opc, n = 0b1, 0b01, 0b1 1497 case ORNW: 1498 sf, opc, n = 0b0, 0b01, 0b1 1499 case EOR: 1500 sf, opc = 0b1, 0b10 1501 case EORW: 1502 sf, opc = 0b0, 0b10 1503 } 1504 buf.Append4Bytes( 1505 (srcReg2Bits<<5)|dstRegBits, 1506 srcReg2Bits>>3, 1507 (n<<5)|srcRegBits, 1508 sf<<7|opc<<5|0b01010, 1509 ) 1510 case ASR, ASRW, LSL, LSLW, LSR, LSRW, ROR, RORW: 1511 // See "Data-processing (2 source)" in 1512 // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Register?lang=en 1513 srcRegBits, srcReg2Bits, dstRegBits := registerBits(n.srcReg), registerBits(n.srcReg2), registerBits(n.dstReg) 1514 1515 var sf, opcode byte 1516 switch inst { 1517 case ASR: 1518 sf, opcode = 0b1, 0b001010 1519 case ASRW: 1520 sf, opcode = 0b0, 0b001010 1521 case LSL: 1522 sf, opcode = 0b1, 0b001000 1523 case LSLW: 1524 sf, opcode = 0b0, 0b001000 1525 case LSR: 1526 sf, opcode = 0b1, 0b001001 1527 case LSRW: 1528 sf, opcode = 0b0, 0b001001 1529 case ROR: 1530 sf, opcode = 0b1, 0b001011 1531 case RORW: 1532 sf, opcode = 0b0, 0b001011 1533 } 1534 buf.Append4Bytes( 1535 (srcReg2Bits<<5)|dstRegBits, 1536 opcode<<2|(srcReg2Bits>>3), 1537 0b110_00000|srcRegBits, 1538 sf<<7|0b0_00_11010, 1539 ) 1540 case SDIV, SDIVW, UDIV, UDIVW: 1541 srcRegBits, srcReg2Bits, dstRegBits := registerBits(n.srcReg), registerBits(n.srcReg2), registerBits(n.dstReg) 1542 1543 // See "Data-processing (2 source)" in 1544 // https://developer.arm.com/documentation/ddi0602/2021-06/Index-by-Encoding/Data-Processing----Register?lang=en 1545 var sf, opcode byte 1546 switch inst { 1547 case SDIV: 1548 sf, opcode = 0b1, 0b000011 1549 case SDIVW: 1550 sf, opcode = 0b0, 0b000011 1551 case UDIV: 1552 sf, opcode = 0b1, 0b000010 1553 case UDIVW: 1554 sf, opcode = 0b0, 0b000010 1555 } 1556 1557 buf.Append4Bytes( 1558 (srcReg2Bits<<5)|dstRegBits, 1559 opcode<<2|(srcReg2Bits>>3), 1560 0b110_00000|srcRegBits, 1561 sf<<7|0b0_00_11010, 1562 ) 1563 case SUB, SUBW: 1564 srcRegBits, srcReg2Bits, dstRegBits := registerBits(n.srcReg), registerBits(n.srcReg2), registerBits(n.dstReg) 1565 1566 // See "Add/subtract (shifted register)" in 1567 // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Register?lang=en 1568 var sf byte 1569 if inst == SUB { 1570 sf = 0b1 1571 } 1572 1573 buf.Append4Bytes( 1574 (srcReg2Bits<<5)|dstRegBits, 1575 srcReg2Bits>>3, 1576 srcRegBits, 1577 sf<<7|0b0_10_01011, 1578 ) 1579 case FSUBD, FSUBS: 1580 srcRegBits, srcReg2Bits, dstRegBits := registerBits(n.srcReg), registerBits(n.srcReg2), registerBits(n.dstReg) 1581 1582 // See "Floating-point data-processing (2 source)" in 1583 // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en 1584 var tp byte 1585 if inst == FSUBD { 1586 tp = 0b01 1587 } 1588 buf.Append4Bytes( 1589 (srcReg2Bits<<5)|dstRegBits, 1590 0b0011_10_00|(srcReg2Bits>>3), 1591 tp<<6|0b00_1_00000|srcRegBits, 1592 0b0_00_11110, 1593 ) 1594 1595 case LDADDALD, LDADDALW, LDADDALH, LDADDALB, 1596 LDCLRALD, LDCLRALW, LDCLRALH, LDCLRALB, 1597 LDSETALD, LDSETALW, LDSETALH, LDSETALB, 1598 LDEORALD, LDEORALW, LDEORALH, LDEORALB, 1599 SWPALD, SWPALW, SWPALH, SWPALB: 1600 srcRegBits, srcReg2Bits, dstRegBits := registerBits(n.srcReg), registerBits(n.srcReg2), registerBits(n.dstReg) 1601 1602 // While arm's docs don't group the opcodes together for the arithmetic and swap instructions, they are 1603 // actually identical except for the single bit before the opcode being 1 for swap, so we encode them together. 1604 var size, opcode byte 1605 switch n.instruction { 1606 case LDADDALD: 1607 size, opcode = 0b11, 0b0000 1608 case LDADDALW: 1609 size, opcode = 0b10, 0b0000 1610 case LDADDALH: 1611 size, opcode = 0b01, 0b0000 1612 case LDADDALB: 1613 size, opcode = 0b00, 0b0000 1614 case LDCLRALD: 1615 size, opcode = 0b11, 0b0001 1616 case LDCLRALW: 1617 size, opcode = 0b10, 0b0001 1618 case LDCLRALH: 1619 size, opcode = 0b01, 0b0001 1620 case LDCLRALB: 1621 size, opcode = 0b00, 0b0001 1622 case LDSETALD: 1623 size, opcode = 0b11, 0b0011 1624 case LDSETALW: 1625 size, opcode = 0b10, 0b0011 1626 case LDSETALH: 1627 size, opcode = 0b01, 0b0011 1628 case LDSETALB: 1629 size, opcode = 0b00, 0b0011 1630 case LDEORALD: 1631 size, opcode = 0b11, 0b0010 1632 case LDEORALW: 1633 size, opcode = 0b10, 0b0010 1634 case LDEORALH: 1635 size, opcode = 0b01, 0b0010 1636 case LDEORALB: 1637 size, opcode = 0b00, 0b0010 1638 case SWPALD: 1639 size, opcode = 0b11, 0b1000 1640 case SWPALW: 1641 size, opcode = 0b10, 0b1000 1642 case SWPALH: 1643 size, opcode = 0b01, 0b1000 1644 case SWPALB: 1645 size, opcode = 0b00, 0b1000 1646 } 1647 1648 buf.Append4Bytes( 1649 (srcReg2Bits<<5)|dstRegBits, 1650 (opcode<<4)|(srcReg2Bits>>3), 1651 0b111_00000|srcRegBits, 1652 (size<<6)|0b00_111_000, 1653 ) 1654 1655 case CASALD, CASALW, CASALH, CASALB: 1656 srcRegBits, srcReg2Bits, dstRegBits := registerBits(n.srcReg), registerBits(n.srcReg2), registerBits(n.dstReg) 1657 1658 var size byte 1659 switch n.instruction { 1660 case CASALD: 1661 size = 0b11 1662 case CASALW: 1663 size = 0b10 1664 case CASALH: 1665 size = 0b01 1666 case CASALB: 1667 size = 0b00 1668 } 1669 1670 buf.Append4Bytes( 1671 (srcReg2Bits<<5)|dstRegBits, 1672 0b111111_00|(srcReg2Bits>>3), 1673 0b111_00000|srcRegBits, 1674 (size<<6)|0b00_001_000, 1675 ) 1676 1677 default: 1678 return errorEncodingUnsupported(n) 1679 } 1680 return 1681 } 1682 1683 func (a *AssemblerImpl) encodeThreeRegistersToRegister(buf asm.Buffer, n *nodeImpl) error { 1684 switch n.instruction { 1685 case MSUB, MSUBW: 1686 // Dst = Src2 - (Src1 * Src3) 1687 // "Data-processing (3 source)" in: 1688 // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Register?lang=en 1689 src1RegBits, err := intRegisterBits(n.srcReg) 1690 if err != nil { 1691 return err 1692 } 1693 src2RegBits, err := intRegisterBits(n.srcReg2) 1694 if err != nil { 1695 return err 1696 } 1697 src3RegBits, err := intRegisterBits(n.dstReg) 1698 if err != nil { 1699 return err 1700 } 1701 dstRegBits, err := intRegisterBits(n.dstReg2) 1702 if err != nil { 1703 return err 1704 } 1705 1706 var sf byte // is zero for MSUBW (32-bit MSUB). 1707 if n.instruction == MSUB { 1708 sf = 0b1 1709 } 1710 1711 buf.Append4Bytes( 1712 (src3RegBits<<5)|dstRegBits, 1713 0b1_0000000|(src2RegBits<<2)|(src3RegBits>>3), 1714 src1RegBits, 1715 sf<<7|0b00_11011, 1716 ) 1717 return nil 1718 1719 default: 1720 return errorEncodingUnsupported(n) 1721 } 1722 } 1723 1724 func (a *AssemblerImpl) encodeTwoRegistersToNone(buf asm.Buffer, n *nodeImpl) error { 1725 switch n.instruction { 1726 case CMPW, CMP: 1727 // Compare on two registers is an alias for "SUBS (src1, src2) ZERO" 1728 // which can be encoded as SUBS (shifted registers) with zero shifting. 1729 // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Register?lang=en#addsub_shift 1730 src1RegBits, err := intRegisterBits(n.srcReg) 1731 if err != nil { 1732 return err 1733 } 1734 src2RegBits, err := intRegisterBits(n.srcReg2) 1735 if err != nil { 1736 return err 1737 } 1738 1739 var op byte 1740 if n.instruction == CMP { 1741 op = 0b111 1742 } else { 1743 op = 0b011 1744 } 1745 1746 buf.Append4Bytes( 1747 (src2RegBits<<5)|zeroRegisterBits, 1748 src2RegBits>>3, 1749 src1RegBits, 1750 0b01011|(op<<5), 1751 ) 1752 return nil 1753 case FCMPS, FCMPD: 1754 // "Floating-point compare" section in: 1755 // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en 1756 src1RegBits, err := vectorRegisterBits(n.srcReg) 1757 if err != nil { 1758 return err 1759 } 1760 src2RegBits, err := vectorRegisterBits(n.srcReg2) 1761 if err != nil { 1762 return err 1763 } 1764 1765 var ftype byte // is zero for FCMPS (single precision float compare). 1766 if n.instruction == FCMPD { 1767 ftype = 0b01 1768 } 1769 buf.Append4Bytes( 1770 src2RegBits<<5, 1771 0b001000_00|(src2RegBits>>3), 1772 ftype<<6|0b1_00000|src1RegBits, 1773 0b000_11110, 1774 ) 1775 return nil 1776 default: 1777 return errorEncodingUnsupported(n) 1778 } 1779 } 1780 1781 func (a *AssemblerImpl) encodeRegisterAndConstToNone(buf asm.Buffer, n *nodeImpl) error { 1782 if n.instruction != CMP { 1783 return errorEncodingUnsupported(n) 1784 } 1785 1786 // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/CMP--immediate---Compare--immediate---an-alias-of-SUBS--immediate--?lang=en 1787 if n.srcConst < 0 || n.srcConst > 4095 { 1788 return fmt.Errorf("immediate for CMP must fit in 0 to 4095 but got %d", n.srcConst) 1789 } else if n.srcReg == RegRZR { 1790 return errors.New("zero register is not supported for CMP (immediate)") 1791 } 1792 1793 srcRegBits, err := intRegisterBits(n.srcReg) 1794 if err != nil { 1795 return err 1796 } 1797 1798 buf.Append4Bytes( 1799 (srcRegBits<<5)|zeroRegisterBits, 1800 (byte(n.srcConst)<<2)|(srcRegBits>>3), 1801 byte(n.srcConst>>6), 1802 0b111_10001, 1803 ) 1804 return nil 1805 } 1806 1807 func fitInSigned9Bits(v int64) bool { 1808 return v >= -256 && v <= 255 1809 } 1810 1811 func (a *AssemblerImpl) encodeLoadOrStoreWithRegisterOffset( 1812 buf asm.Buffer, baseRegBits, offsetRegBits, targetRegBits byte, opcode, size, v byte, 1813 ) { 1814 // See "Load/store register (register offset)". 1815 // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Loads-and-Stores?lang=en#ldst_regoff 1816 buf.Append4Bytes( 1817 (baseRegBits<<5)|targetRegBits, 1818 0b011_010_00|(baseRegBits>>3), 1819 opcode<<6|0b00_1_00000|offsetRegBits, 1820 size<<6|v<<2|0b00_111_0_00, 1821 ) 1822 } 1823 1824 func (a *AssemblerImpl) encodeLoadOrStoreWithAcquireRelease( 1825 buf asm.Buffer, baseRegBits, targetRegBits byte, l, size byte, 1826 ) { 1827 buf.Append4Bytes( 1828 (baseRegBits<<5)|targetRegBits, 1829 0b1_11111_00|(baseRegBits>>3), 1830 0b1_0_011111|l<<6, 1831 size<<6|0b00_001000, 1832 ) 1833 } 1834 1835 // validateMemoryOffset validates the memory offset if the given offset can be encoded in the assembler. 1836 // In theory, offset can be any, but for simplicity of our homemade assembler, we limit the offset range 1837 // that can be encoded enough for supporting compiler. 1838 func validateMemoryOffset(offset int64) error { 1839 if offset > 255 && offset%4 != 0 { 1840 // This is because we only have large offsets for load/store with Wasm value stack or reading type IDs, and its offset 1841 // is always multiplied by 4 or 8 (== the size of uint32 or uint64 == the type of wasm.FunctionTypeID or value stack in Go) 1842 return fmt.Errorf("large memory offset (>255) must be a multiple of 4 but got %d", offset) 1843 } else if offset < -256 { // 9-bit signed integer's minimum = 2^8. 1844 return fmt.Errorf("negative memory offset must be larget than or equal -256 but got %d", offset) 1845 } else if offset > 1<<31-1 { 1846 return fmt.Errorf("large memory offset must be less than %d but got %d", 1<<31-1, offset) 1847 } else { 1848 return nil 1849 } 1850 } 1851 1852 // encodeLoadOrStoreWithConstOffset encodes load/store instructions with the constant offset. 1853 // 1854 // Note: Encoding strategy intentionally matches the Go assembler: https://go.dev/doc/asm 1855 func (a *AssemblerImpl) encodeLoadOrStoreWithConstOffset( 1856 buf asm.Buffer, 1857 baseRegBits, targetRegBits byte, 1858 offset int64, 1859 opcode, size, v byte, 1860 datasize, datasizeLog2 int64, 1861 ) (err error) { 1862 if err = validateMemoryOffset(offset); err != nil { 1863 return 1864 } 1865 1866 if fitInSigned9Bits(offset) { 1867 // See "LDAPR/STLR (unscaled immediate)" 1868 // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Loads-and-Stores?lang=en#ldapstl_unscaled 1869 if offset < 0 || offset%datasize != 0 { 1870 // This case is encoded as one "unscaled signed store". 1871 buf.Append4Bytes( 1872 (baseRegBits<<5)|targetRegBits, 1873 byte(offset<<4)|(baseRegBits>>3), 1874 opcode<<6|(0b00_00_11111&byte(offset>>4)), 1875 size<<6|v<<2|0b00_1_11_0_00, 1876 ) 1877 return 1878 } 1879 } 1880 1881 // At this point we have the assumption that offset is positive. 1882 // Plus if it is a multiple of datasize, then it can be encoded as a single "unsigned immediate". 1883 if offset%datasize == 0 && 1884 offset < (1<<12)<<datasizeLog2 { 1885 m := offset / datasize 1886 buf.Append4Bytes( 1887 (baseRegBits<<5)|targetRegBits, 1888 (byte(m<<2))|(baseRegBits>>3), 1889 opcode<<6|0b00_111111&byte(m>>6), 1890 size<<6|v<<2|0b00_1_11_0_01, 1891 ) 1892 return 1893 } 1894 1895 // Otherwise, we need multiple instructions. 1896 tmpRegBits := registerBits(a.temporaryRegister) 1897 offset32 := int32(offset) 1898 1899 // Go's assembler adds a const into the const pool at this point, 1900 // regardless of its usage; e.g. if we enter the then block of the following if statement, 1901 // the const is not used but it is added into the const pool. 1902 c := asm.NewStaticConst(make([]byte, 4)) 1903 binary.LittleEndian.PutUint32(c.Raw, uint32(offset)) 1904 a.pool.AddConst(c, uint64(buf.Len())) 1905 1906 // https://github.com/golang/go/blob/release-branch.go1.15/src/cmd/internal/obj/arm64/asm7.go#L3529-L3532 1907 // If the offset is within 24-bits, we can load it with two ADD instructions. 1908 hi := offset32 - (offset32 & (0xfff << uint(datasizeLog2))) 1909 if hi&^0xfff000 == 0 { 1910 var sfops byte = 0b100 1911 m := ((offset32 - hi) >> datasizeLog2) & 0xfff 1912 hi >>= 12 1913 1914 // https://github.com/golang/go/blob/release-branch.go1.15/src/cmd/internal/obj/arm64/asm7.go#L3534-L3535 1915 buf.Append4Bytes( 1916 (baseRegBits<<5)|tmpRegBits, 1917 (byte(hi)<<2)|(baseRegBits>>3), 1918 0b01<<6 /* shift by 12 */ |byte(hi>>6), 1919 sfops<<5|0b10001, 1920 ) 1921 1922 buf.Append4Bytes( 1923 (tmpRegBits<<5)|targetRegBits, 1924 (byte(m<<2))|(tmpRegBits>>3), 1925 opcode<<6|0b00_111111&byte(m>>6), 1926 size<<6|v<<2|0b00_1_11_0_01, 1927 ) 1928 } else { 1929 // This case we load the const via ldr(literal) into tem register, 1930 // and the target const is placed after this instruction below. 1931 loadLiteralOffsetInBinary := uint64(buf.Len()) 1932 1933 // First we emit the ldr(literal) with offset zero as we don't yet know the const's placement in the binary. 1934 // https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/LDR--literal---Load-Register--literal-- 1935 buf.Append4Bytes(tmpRegBits, 0x0, 0x0, 0b00_011_0_00) 1936 1937 // Set the callback for the constant, and we set properly the offset in the callback. 1938 1939 c.AddOffsetFinalizedCallback(func(offsetOfConst uint64) { 1940 // ldr(literal) encodes offset divided by 4. 1941 offset := (int(offsetOfConst) - int(loadLiteralOffsetInBinary)) / 4 1942 bin := buf.Bytes() 1943 bin[loadLiteralOffsetInBinary] |= byte(offset << 5) 1944 bin[loadLiteralOffsetInBinary+1] |= byte(offset >> 3) 1945 bin[loadLiteralOffsetInBinary+2] |= byte(offset >> 11) 1946 }) 1947 1948 // Then, load the constant with the register offset. 1949 // https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/LDR--register---Load-Register--register-- 1950 buf.Append4Bytes( 1951 (baseRegBits<<5)|targetRegBits, 1952 0b011_010_00|(baseRegBits>>3), 1953 opcode<<6|0b00_1_00000|tmpRegBits, 1954 size<<6|v<<2|0b00_111_0_00, 1955 ) 1956 } 1957 return 1958 } 1959 1960 func (a *AssemblerImpl) encodeRegisterToMemory(buf asm.Buffer, n *nodeImpl) (err error) { 1961 // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Loads-and-Stores?lang=en#ldst_regoff 1962 var ( 1963 size, v byte 1964 datasize, datasizeLog2 int64 1965 isTargetFloat bool 1966 isRelease bool 1967 ) 1968 switch n.instruction { 1969 case STRD: 1970 size, v, datasize, datasizeLog2 = 0b11, 0x0, 8, 3 1971 case STRW: 1972 size, v, datasize, datasizeLog2 = 0b10, 0x0, 4, 2 1973 case STRH: 1974 size, v, datasize, datasizeLog2 = 0b01, 0x0, 2, 1 1975 case STRB: 1976 size, v, datasize, datasizeLog2 = 0b00, 0x0, 1, 0 1977 case FSTRD: 1978 size, v, datasize, datasizeLog2, isTargetFloat = 0b11, 0x1, 8, 3, true 1979 case FSTRS: 1980 size, v, datasize, datasizeLog2, isTargetFloat = 0b10, 0x1, 4, 2, true 1981 case STLRD: 1982 size, isRelease = 0b11, true 1983 case STLRW: 1984 size, isRelease = 0b10, true 1985 case STLRH: 1986 size, isRelease = 0b01, true 1987 case STLRB: 1988 size, isRelease = 0b00, true 1989 default: 1990 return errorEncodingUnsupported(n) 1991 } 1992 1993 var srcRegBits byte 1994 if isTargetFloat { 1995 srcRegBits, err = vectorRegisterBits(n.srcReg) 1996 } else { 1997 srcRegBits, err = intRegisterBits(n.srcReg) 1998 } 1999 if err != nil { 2000 return 2001 } 2002 2003 baseRegBits, err := intRegisterBits(n.dstReg) 2004 if err != nil { 2005 return err 2006 } 2007 2008 if isRelease { 2009 a.encodeLoadOrStoreWithAcquireRelease(buf, baseRegBits, srcRegBits, 0, size) 2010 return nil 2011 } 2012 2013 const opcode = 0x00 // opcode for store instructions. 2014 if n.dstReg2 != asm.NilRegister { 2015 offsetRegBits, err := intRegisterBits(n.dstReg2) 2016 if err != nil { 2017 return err 2018 } 2019 a.encodeLoadOrStoreWithRegisterOffset(buf, baseRegBits, offsetRegBits, srcRegBits, opcode, size, v) 2020 } else { 2021 err = a.encodeLoadOrStoreWithConstOffset(buf, baseRegBits, srcRegBits, n.dstConst, opcode, size, v, datasize, datasizeLog2) 2022 } 2023 return 2024 } 2025 2026 func (a *AssemblerImpl) encodeADR(buf asm.Buffer, n *nodeImpl) (err error) { 2027 dstRegBits, err := intRegisterBits(n.dstReg) 2028 if err != nil { 2029 return err 2030 } 2031 2032 adrInstructionOffsetInBinary := uint64(buf.Len()) 2033 2034 // At this point, we don't yet know the target offset to read from, 2035 // so we emit the ADR instruction with 0 offset, and replace later in the callback. 2036 // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ADR--Form-PC-relative-address-?lang=en 2037 buf.Append4Bytes(dstRegBits, 0x0, 0x0, 0b10000) 2038 2039 // This case, the ADR's target offset is for the staticConst's initial address. 2040 if sc := n.staticConst; sc != nil { 2041 a.pool.AddConst(sc, adrInstructionOffsetInBinary) 2042 sc.AddOffsetFinalizedCallback(func(offsetOfConst uint64) { 2043 adrInstructionBytes := buf.Bytes()[adrInstructionOffsetInBinary : adrInstructionOffsetInBinary+4] 2044 offset := int(offsetOfConst) - int(adrInstructionOffsetInBinary) 2045 2046 // See https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ADR--Form-PC-relative-address-?lang=en 2047 adrInstructionBytes[3] |= byte(offset & 0b00000011 << 5) 2048 offset >>= 2 2049 adrInstructionBytes[0] |= byte(offset << 5) 2050 offset >>= 3 2051 adrInstructionBytes[1] |= byte(offset) 2052 offset >>= 8 2053 adrInstructionBytes[2] |= byte(offset) 2054 }) 2055 return 2056 } else { 2057 a.adrInstructionNodes = append(a.adrInstructionNodes, n) 2058 } 2059 return 2060 } 2061 2062 func (a *AssemblerImpl) finalizeADRInstructionNode(code []byte, n *nodeImpl) (err error) { 2063 // Find the target instruction node. 2064 targetNode := n 2065 for ; targetNode != nil; targetNode = targetNode.next { 2066 if targetNode.instruction == n.readInstructionAddressBeforeTargetInstruction { 2067 targetNode = targetNode.next 2068 break 2069 } 2070 } 2071 2072 if targetNode == nil { 2073 return fmt.Errorf("BUG: target instruction %s not found for ADR", InstructionName(n.readInstructionAddressBeforeTargetInstruction)) 2074 } 2075 2076 offset := targetNode.OffsetInBinary() - n.OffsetInBinary() 2077 if i64 := int64(offset); i64 >= 1<<20 || i64 < -1<<20 { 2078 // We could support offset over 20-bit range by special casing them here, 2079 // but 20-bit range should be enough for our impl. If the necessity comes up, 2080 // we could add the special casing here to support arbitrary large offset. 2081 return fmt.Errorf("BUG: too large offset for ADR: %#x", offset) 2082 } 2083 2084 adrInstructionBytes := code[n.OffsetInBinary() : n.OffsetInBinary()+4] 2085 // According to the binary format of ADR instruction: 2086 // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ADR--Form-PC-relative-address-?lang=en 2087 adrInstructionBytes[3] |= byte(offset & 0b00000011 << 5) 2088 offset >>= 2 2089 adrInstructionBytes[0] |= byte(offset << 5) 2090 offset >>= 3 2091 adrInstructionBytes[1] |= byte(offset) 2092 offset >>= 8 2093 adrInstructionBytes[2] |= byte(offset) 2094 return nil 2095 } 2096 2097 func (a *AssemblerImpl) encodeMemoryToRegister(buf asm.Buffer, n *nodeImpl) (err error) { 2098 // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Loads-and-Stores?lang=en#ldst_regoff 2099 var ( 2100 size, v, opcode byte 2101 datasize, datasizeLog2 int64 2102 isTargetFloat bool 2103 isAcquire bool 2104 ) 2105 switch n.instruction { 2106 case ADR: 2107 return a.encodeADR(buf, n) 2108 case FLDRD: 2109 size, v, datasize, datasizeLog2, opcode, isTargetFloat = 0b11, 0x1, 8, 3, 0b01, true 2110 case FLDRS: 2111 size, v, datasize, datasizeLog2, opcode, isTargetFloat = 0b10, 0x1, 4, 2, 0b01, true 2112 case LDRD: 2113 size, v, datasize, datasizeLog2, opcode = 0b11, 0x0, 8, 3, 0b01 2114 case LDRW: 2115 size, v, datasize, datasizeLog2, opcode = 0b10, 0x0, 4, 2, 0b01 2116 case LDRSHD: 2117 size, v, datasize, datasizeLog2, opcode = 0b01, 0x0, 2, 1, 0b10 2118 case LDRSHW: 2119 size, v, datasize, datasizeLog2, opcode = 0b01, 0x0, 2, 1, 0b11 2120 case LDRH: 2121 size, v, datasize, datasizeLog2, opcode = 0b01, 0x0, 2, 1, 0b01 2122 case LDRSBD: 2123 size, v, datasize, datasizeLog2, opcode = 0b00, 0x0, 1, 0, 0b10 2124 case LDRSBW: 2125 size, v, datasize, datasizeLog2, opcode = 0b00, 0x0, 1, 0, 0b11 2126 case LDRB: 2127 size, v, datasize, datasizeLog2, opcode = 0b00, 0x0, 1, 0, 0b01 2128 case LDRSW: 2129 size, v, datasize, datasizeLog2, opcode = 0b10, 0x0, 4, 2, 0b10 2130 case LDARD: 2131 size, isAcquire = 0b11, true 2132 case LDARW: 2133 size, isAcquire = 0b10, true 2134 case LDARH: 2135 size, isAcquire = 0b01, true 2136 case LDARB: 2137 size, isAcquire = 0b00, true 2138 default: 2139 return errorEncodingUnsupported(n) 2140 } 2141 2142 var dstRegBits byte 2143 if isTargetFloat { 2144 dstRegBits, err = vectorRegisterBits(n.dstReg) 2145 } else { 2146 dstRegBits, err = intRegisterBits(n.dstReg) 2147 } 2148 if err != nil { 2149 return 2150 } 2151 baseRegBits, err := intRegisterBits(n.srcReg) 2152 if err != nil { 2153 return err 2154 } 2155 2156 if isAcquire { 2157 a.encodeLoadOrStoreWithAcquireRelease(buf, baseRegBits, dstRegBits, 1, size) 2158 return nil 2159 } 2160 2161 if n.srcReg2 != asm.NilRegister { 2162 offsetRegBits, err := intRegisterBits(n.srcReg2) 2163 if err != nil { 2164 return err 2165 } 2166 a.encodeLoadOrStoreWithRegisterOffset(buf, baseRegBits, offsetRegBits, dstRegBits, opcode, 2167 size, v) 2168 } else { 2169 err = a.encodeLoadOrStoreWithConstOffset(buf, baseRegBits, dstRegBits, n.srcConst, opcode, 2170 size, v, datasize, datasizeLog2) 2171 } 2172 return 2173 } 2174 2175 // const16bitAligned check if the value is on the 16-bit alignment. 2176 // If so, returns the shift num divided by 16, and otherwise -1. 2177 func const16bitAligned(v int64) (ret int) { 2178 ret = -1 2179 for s := 0; s < 64; s += 16 { 2180 if (uint64(v) &^ (uint64(0xffff) << uint(s))) == 0 { 2181 ret = s / 16 2182 break 2183 } 2184 } 2185 return 2186 } 2187 2188 // isBitMaskImmediate determines if the value can be encoded as "bitmask immediate". 2189 // 2190 // Such an immediate is a 32-bit or 64-bit pattern viewed as a vector of identical elements of size e = 2, 4, 8, 16, 32, or 64 bits. 2191 // Each element contains the same sub-pattern: a single run of 1 to e-1 non-zero bits, rotated by 0 to e-1 bits. 2192 // 2193 // See https://developer.arm.com/documentation/dui0802/b/A64-General-Instructions/MOV--bitmask-immediate- 2194 func isBitMaskImmediate(x uint64) bool { 2195 // All zeros and ones are not "bitmask immediate" by defainition. 2196 if x == 0 || x == 0xffff_ffff_ffff_ffff { 2197 return false 2198 } 2199 2200 switch { 2201 case x != x>>32|x<<32: 2202 // e = 64 2203 case x != x>>16|x<<48: 2204 // e = 32 (x == x>>32|x<<32). 2205 // e.g. 0x00ff_ff00_00ff_ff00 2206 x = uint64(int32(x)) 2207 case x != x>>8|x<<56: 2208 // e = 16 (x == x>>16|x<<48). 2209 // e.g. 0x00ff_00ff_00ff_00ff 2210 x = uint64(int16(x)) 2211 case x != x>>4|x<<60: 2212 // e = 8 (x == x>>8|x<<56). 2213 // e.g. 0x0f0f_0f0f_0f0f_0f0f 2214 x = uint64(int8(x)) 2215 default: 2216 // e = 4 or 2. 2217 return true 2218 } 2219 return sequenceOfSetbits(x) || sequenceOfSetbits(^x) 2220 } 2221 2222 // sequenceOfSetbits returns true if the number's binary representation is the sequence set bit (1). 2223 // For example: 0b1110 -> true, 0b1010 -> false 2224 func sequenceOfSetbits(x uint64) bool { 2225 y := getLowestBit(x) 2226 // If x is a sequence of set bit, this should results in the number 2227 // with only one set bit (i.e. power of two). 2228 y += x 2229 return (y-1)&y == 0 2230 } 2231 2232 func getLowestBit(x uint64) uint64 { 2233 // See https://stackoverflow.com/questions/12247186/find-the-lowest-set-bit 2234 return x & (^x + 1) 2235 } 2236 2237 func (a *AssemblerImpl) addOrSub64BitRegisters(buf asm.Buffer, sfops byte, sp bool, dstRegBits, src1RegBits, src2RegBits byte) { 2238 // src1Reg = src1Reg +/- src2Reg 2239 if sp { 2240 // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ADD--extended-register---Add--extended-register--?lang=en 2241 buf.Append4Bytes( 2242 (src1RegBits<<5)|dstRegBits, 2243 0b011<<5|src1RegBits>>3, 2244 1<<5|src2RegBits, 2245 sfops<<5|0b01011, 2246 ) 2247 } else { 2248 buf.Append4Bytes( 2249 (src1RegBits<<5)|dstRegBits, 2250 src1RegBits>>3, 2251 src2RegBits, 2252 sfops<<5|0b01011, 2253 ) 2254 } 2255 } 2256 2257 func bitmaskImmediate(c uint64, is64bit bool) (immr, imms, N byte) { 2258 var size uint32 2259 switch { 2260 case c != c>>32|c<<32: 2261 size = 64 2262 case c != c>>16|c<<48: 2263 size = 32 2264 c = uint64(int32(c)) 2265 case c != c>>8|c<<56: 2266 size = 16 2267 c = uint64(int16(c)) 2268 case c != c>>4|c<<60: 2269 size = 8 2270 c = uint64(int8(c)) 2271 case c != c>>2|c<<62: 2272 size = 4 2273 c = uint64(int64(c<<60) >> 60) 2274 default: 2275 size = 2 2276 c = uint64(int64(c<<62) >> 62) 2277 } 2278 2279 neg := false 2280 if int64(c) < 0 { 2281 c = ^c 2282 neg = true 2283 } 2284 2285 onesSize, nonZeroPos := getOnesSequenceSize(c) 2286 if neg { 2287 nonZeroPos = onesSize + nonZeroPos 2288 onesSize = size - onesSize 2289 } 2290 2291 var mode byte = 32 2292 if is64bit { 2293 N, mode = 0b1, 64 2294 } 2295 2296 immr = byte((size - nonZeroPos) & (size - 1) & uint32(mode-1)) 2297 imms = byte((onesSize - 1) | 63&^(size<<1-1)) 2298 return 2299 } 2300 2301 func (a *AssemblerImpl) encodeConstToRegister(buf asm.Buffer, n *nodeImpl) (err error) { 2302 // Alias for readability. 2303 c := n.srcConst 2304 2305 dstRegBits, err := intRegisterBits(n.dstReg) 2306 if err != nil { 2307 return err 2308 } 2309 2310 // See "Logical (immediate)" in 2311 // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Immediate 2312 switch n.instruction { 2313 case ANDIMM32, ANDIMM64, ANDSW, ANDS: 2314 if !isBitMaskImmediate(uint64(c)) { 2315 err = fmt.Errorf("const %d must be valid bitmask immediate for %s", c, InstructionName(n.instruction)) 2316 return 2317 } 2318 srcRegBits := dstRegBits 2319 var sf, opc, immr, imms, N byte 2320 switch n.instruction { 2321 case ANDIMM32: 2322 sf, opc = 0b0, 0b00 2323 immr, imms, N = bitmaskImmediate(uint64(c), false) 2324 case ANDIMM64: 2325 sf, opc = 0b1, 0b00 2326 immr, imms, N = bitmaskImmediate(uint64(c), true) 2327 case ANDSW: 2328 srcRegBits, err = intRegisterBits(n.srcReg) 2329 if err != nil { 2330 return err 2331 } 2332 sf, opc = 0b0, 0b11 2333 immr, imms, N = bitmaskImmediate(uint64(c), false) 2334 case ANDS: 2335 srcRegBits, err = intRegisterBits(n.srcReg) 2336 if err != nil { 2337 return err 2338 } 2339 sf, opc = 0b1, 0b11 2340 immr, imms, N = bitmaskImmediate(uint64(c), true) 2341 } 2342 buf.Append4Bytes( 2343 (srcRegBits<<5)|dstRegBits, 2344 imms<<2|srcRegBits>>3, 2345 N<<6|immr, 2346 sf<<7|opc<<5|0b10010, 2347 ) 2348 return 2349 } 2350 2351 switch inst := n.instruction; inst { 2352 case ADD, ADDS, SUB, SUBS: 2353 srcRegBits := dstRegBits 2354 if n.srcReg != asm.NilRegister { 2355 srcRegBits, err = intRegisterBits(n.srcReg) 2356 if err != nil { 2357 return err 2358 } 2359 } 2360 2361 var sfops byte 2362 if inst == ADD { 2363 sfops = 0b100 2364 } else if inst == ADDS { 2365 sfops = 0b101 2366 } else if inst == SUB { 2367 sfops = 0b110 2368 } else if inst == SUBS { 2369 sfops = 0b111 2370 } 2371 2372 isSP := n.srcReg == RegSP || n.dstReg == RegSP 2373 if c == 0 { 2374 // If the constant equals zero, we encode it as ADD (register) with zero register. 2375 a.addOrSub64BitRegisters(buf, sfops, isSP, dstRegBits, srcRegBits, zeroRegisterBits) 2376 return 2377 } 2378 2379 if c >= 0 && (c <= 0xfff || (c&0xfff) == 0 && (uint64(c>>12) <= 0xfff)) { 2380 // If the const can be represented as "imm12" or "imm12 << 12": one instruction 2381 // https://github.com/golang/go/blob/release-branch.go1.15/src/cmd/internal/obj/arm64/asm7.go#L2992 2382 2383 if c <= 0xfff { 2384 buf.Append4Bytes( 2385 (srcRegBits<<5)|dstRegBits, 2386 (byte(c)<<2)|(srcRegBits>>3), 2387 byte(c>>6), 2388 sfops<<5|0b10001, 2389 ) 2390 } else { 2391 c >>= 12 2392 buf.Append4Bytes( 2393 (srcRegBits<<5)|dstRegBits, 2394 (byte(c)<<2)|(srcRegBits>>3), 2395 0b01<<6 /* shift by 12 */ |byte(c>>6), 2396 sfops<<5|0b10001, 2397 ) 2398 } 2399 return 2400 } 2401 2402 if t := const16bitAligned(c); t >= 0 { 2403 // If the const can fit within 16-bit alignment, for example, 0xffff, 0xffff_0000 or 0xffff_0000_0000_0000 2404 // We could load it into temporary with movk. 2405 // https://github.com/golang/go/blob/release-branch.go1.15/src/cmd/internal/obj/arm64/asm7.go#L4029 2406 tmpRegBits := registerBits(a.temporaryRegister) 2407 2408 // MOVZ $c, tmpReg with shifting. 2409 a.load16bitAlignedConst(buf, c>>(16*t), byte(t), tmpRegBits, false, true) 2410 2411 // ADD/SUB tmpReg, dstReg 2412 a.addOrSub64BitRegisters(buf, sfops, isSP, dstRegBits, srcRegBits, tmpRegBits) 2413 return 2414 } else if t := const16bitAligned(^c); t >= 0 { 2415 // Also if the reverse of the const can fit within 16-bit range, do the same ^^. 2416 // https://github.com/golang/go/blob/release-branch.go1.15/src/cmd/internal/obj/arm64/asm7.go#L4029 2417 tmpRegBits := registerBits(a.temporaryRegister) 2418 2419 // MOVN $c, tmpReg with shifting. 2420 a.load16bitAlignedConst(buf, ^c>>(16*t), byte(t), tmpRegBits, true, true) 2421 2422 // ADD/SUB tmpReg, dstReg 2423 a.addOrSub64BitRegisters(buf, sfops, isSP, dstRegBits, srcRegBits, tmpRegBits) 2424 return 2425 } 2426 2427 if uc := uint64(c); isBitMaskImmediate(uc) { 2428 // If the const can be represented as "bitmask immediate", we load it via ORR into temp register. 2429 // https://github.com/golang/go/blob/release-branch.go1.15/src/cmd/internal/obj/arm64/asm7.go#L6570-L6583 2430 tmpRegBits := registerBits(a.temporaryRegister) 2431 // OOR $c, tmpReg 2432 a.loadConstViaBitMaskImmediate(buf, uc, tmpRegBits, true) 2433 2434 // ADD/SUB tmpReg, dstReg 2435 a.addOrSub64BitRegisters(buf, sfops, isSP, dstRegBits, srcRegBits, tmpRegBits) 2436 return 2437 } 2438 2439 // If the value fits within 24-bit, then we emit two add instructions 2440 if 0 <= c && c <= 0xffffff && inst != SUBS && inst != ADDS { 2441 // https://github.com/golang/go/blob/release-branch.go1.15/src/cmd/internal/obj/arm64/asm7.go#L3849-L3862 2442 buf.Append4Bytes( 2443 (dstRegBits<<5)|dstRegBits, 2444 (byte(c)<<2)|(dstRegBits>>3), 2445 byte(c&0xfff>>6), 2446 sfops<<5|0b10001, 2447 ) 2448 c = c >> 12 2449 buf.Append4Bytes( 2450 (dstRegBits<<5)|dstRegBits, 2451 (byte(c)<<2)|(dstRegBits>>3), 2452 0b01_000000 /* shift by 12 */ |byte(c>>6), 2453 sfops<<5|0b10001, 2454 ) 2455 return 2456 } 2457 2458 // https://github.com/golang/go/blob/release-branch.go1.15/src/cmd/internal/obj/arm64/asm7.go#L3163-L3203 2459 // Otherwise we use MOVZ and MOVNs for loading const into tmpRegister. 2460 tmpRegBits := registerBits(a.temporaryRegister) 2461 a.load64bitConst(buf, c, tmpRegBits) 2462 a.addOrSub64BitRegisters(buf, sfops, isSP, dstRegBits, srcRegBits, tmpRegBits) 2463 case MOVW: 2464 if c == 0 { 2465 buf.Append4Bytes( 2466 (zeroRegisterBits<<5)|dstRegBits, 2467 zeroRegisterBits>>3, 2468 0b000_00000|zeroRegisterBits, 2469 0b0_01_01010, 2470 ) 2471 return 2472 } 2473 2474 // Following the logic here: 2475 // https://github.com/golang/go/blob/release-branch.go1.15/src/cmd/internal/obj/arm64/asm7.go#L1637 2476 c32 := uint32(c) 2477 ic := int64(c32) 2478 if ic >= 0 && (ic <= 0xfff || (ic&0xfff) == 0 && (uint64(ic>>12) <= 0xfff)) { 2479 if isBitMaskImmediate(uint64(c)) { 2480 a.loadConstViaBitMaskImmediate(buf, uint64(c), dstRegBits, false) 2481 return 2482 } 2483 } 2484 2485 if t := const16bitAligned(int64(c32)); t >= 0 { 2486 // If the const can fit within 16-bit alignment, for example, 0xffff, 0xffff_0000 or 0xffff_0000_0000_0000 2487 // We could load it into temporary with movk. 2488 a.load16bitAlignedConst(buf, int64(c32)>>(16*t), byte(t), dstRegBits, false, false) 2489 } else if t := const16bitAligned(int64(^c32)); t >= 0 { 2490 // Also, if the reverse of the const can fit within 16-bit range, do the same ^^. 2491 a.load16bitAlignedConst(buf, int64(^c32)>>(16*t), byte(t), dstRegBits, true, false) 2492 } else if isBitMaskImmediate(uint64(c)) { 2493 a.loadConstViaBitMaskImmediate(buf, uint64(c), dstRegBits, false) 2494 } else { 2495 // Otherwise, we use MOVZ and MOVK to load it. 2496 // https://github.com/golang/go/blob/release-branch.go1.15/src/cmd/internal/obj/arm64/asm7.go#L6623-L6630 2497 c16 := uint16(c32) 2498 // MOVZ: https://developer.arm.com/documentation/dui0802/a/A64-General-Instructions/MOVZ 2499 buf.Append4Bytes( 2500 (byte(c16)<<5)|dstRegBits, 2501 byte(c16>>3), 2502 1<<7|byte(c16>>11), 2503 0b0_10_10010, 2504 ) 2505 // MOVK: https://developer.arm.com/documentation/dui0802/a/A64-General-Instructions/MOVK 2506 c16 = uint16(c32 >> 16) 2507 if c16 != 0 { 2508 buf.Append4Bytes( 2509 (byte(c16)<<5)|dstRegBits, 2510 byte(c16>>3), 2511 1<<7|0b0_01_00000 /* shift by 16 */ |byte(c16>>11), 2512 0b0_11_10010, 2513 ) 2514 } 2515 } 2516 case MOVD: 2517 // Following the logic here: 2518 // https://github.com/golang/go/blob/release-branch.go1.15/src/cmd/internal/obj/arm64/asm7.go#L1798-L1852 2519 if c >= 0 && (c <= 0xfff || (c&0xfff) == 0 && (uint64(c>>12) <= 0xfff)) { 2520 if isBitMaskImmediate(uint64(c)) { 2521 a.loadConstViaBitMaskImmediate(buf, uint64(c), dstRegBits, true) 2522 return 2523 } 2524 } 2525 2526 if t := const16bitAligned(c); t >= 0 { 2527 // If the const can fit within 16-bit alignment, for example, 0xffff, 0xffff_0000 or 0xffff_0000_0000_0000 2528 // We could load it into temporary with movk. 2529 a.load16bitAlignedConst(buf, c>>(16*t), byte(t), dstRegBits, false, true) 2530 } else if t := const16bitAligned(^c); t >= 0 { 2531 // Also, if the reverse of the const can fit within 16-bit range, do the same ^^. 2532 a.load16bitAlignedConst(buf, (^c)>>(16*t), byte(t), dstRegBits, true, true) 2533 } else if isBitMaskImmediate(uint64(c)) { 2534 a.loadConstViaBitMaskImmediate(buf, uint64(c), dstRegBits, true) 2535 } else { 2536 a.load64bitConst(buf, c, dstRegBits) 2537 } 2538 case LSR: 2539 if c == 0 { 2540 err = errors.New("LSR with zero constant should be optimized out") 2541 return 2542 } else if c < 0 || c > 63 { 2543 err = fmt.Errorf("LSR requires immediate to be within 0 to 63, but got %d", c) 2544 return 2545 } 2546 2547 // LSR(immediate) is an alias of UBFM 2548 // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LSR--immediate---Logical-Shift-Right--immediate---an-alias-of-UBFM-?lang=en 2549 buf.Append4Bytes( 2550 (dstRegBits<<5)|dstRegBits, 2551 0b111111_00|dstRegBits>>3, 2552 0b01_000000|byte(c), 2553 0b110_10011, 2554 ) 2555 case LSL: 2556 if c == 0 { 2557 err = errors.New("LSL with zero constant should be optimized out") 2558 return 2559 } else if c < 0 || c > 63 { 2560 err = fmt.Errorf("LSL requires immediate to be within 0 to 63, but got %d", c) 2561 return 2562 } 2563 2564 // LSL(immediate) is an alias of UBFM 2565 // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/LSL--immediate---Logical-Shift-Left--immediate---an-alias-of-UBFM- 2566 cb := byte(c) 2567 buf.Append4Bytes( 2568 (dstRegBits<<5)|dstRegBits, 2569 (0b111111-cb)<<2|dstRegBits>>3, 2570 0b01_000000|(64-cb), 2571 0b110_10011, 2572 ) 2573 2574 default: 2575 return errorEncodingUnsupported(n) 2576 } 2577 return 2578 } 2579 2580 func (a *AssemblerImpl) movk(buf asm.Buffer, v uint64, shfitNum int, dstRegBits byte) { 2581 // https://developer.arm.com/documentation/dui0802/a/A64-General-Instructions/MOVK 2582 buf.Append4Bytes( 2583 (byte(v)<<5)|dstRegBits, 2584 byte(v>>3), 2585 1<<7|byte(shfitNum)<<5|(0b000_11111&byte(v>>11)), 2586 0b1_11_10010, 2587 ) 2588 } 2589 2590 func (a *AssemblerImpl) movz(buf asm.Buffer, v uint64, shfitNum int, dstRegBits byte) { 2591 // https://developer.arm.com/documentation/dui0802/a/A64-General-Instructions/MOVZ 2592 buf.Append4Bytes( 2593 (byte(v)<<5)|dstRegBits, 2594 byte(v>>3), 2595 1<<7|byte(shfitNum)<<5|(0b000_11111&byte(v>>11)), 2596 0b1_10_10010, 2597 ) 2598 } 2599 2600 func (a *AssemblerImpl) movn(buf asm.Buffer, v uint64, shfitNum int, dstRegBits byte) { 2601 // https://developer.arm.com/documentation/dui0802/a/A64-General-Instructions/MOVZ 2602 buf.Append4Bytes( 2603 (byte(v)<<5)|dstRegBits, 2604 byte(v>>3), 2605 1<<7|byte(shfitNum)<<5|(0b000_11111&byte(v>>11)), 2606 0b1_00_10010, 2607 ) 2608 } 2609 2610 // load64bitConst loads a 64-bit constant into the register, following the same logic to decide how to load large 64-bit 2611 // consts as in the Go assembler. 2612 // 2613 // See https://github.com/golang/go/blob/release-branch.go1.15/src/cmd/internal/obj/arm64/asm7.go#L6632-L6759 2614 func (a *AssemblerImpl) load64bitConst(buf asm.Buffer, c int64, dstRegBits byte) { 2615 var bits [4]uint64 2616 var zeros, negs int 2617 for i := 0; i < 4; i++ { 2618 bits[i] = uint64((c >> uint(i*16)) & 0xffff) 2619 if v := bits[i]; v == 0 { 2620 zeros++ 2621 } else if v == 0xffff { 2622 negs++ 2623 } 2624 } 2625 2626 if zeros == 3 { 2627 // one MOVZ instruction. 2628 for i, v := range bits { 2629 if v != 0 { 2630 a.movz(buf, v, i, dstRegBits) 2631 } 2632 } 2633 } else if negs == 3 { 2634 // one MOVN instruction. 2635 for i, v := range bits { 2636 if v != 0xffff { 2637 v = ^v 2638 a.movn(buf, v, i, dstRegBits) 2639 } 2640 } 2641 } else if zeros == 2 { 2642 // one MOVZ then one OVK. 2643 var movz bool 2644 for i, v := range bits { 2645 if !movz && v != 0 { // MOVZ. 2646 // https://developer.arm.com/documentation/dui0802/a/A64-General-Instructions/MOVZ 2647 a.movz(buf, v, i, dstRegBits) 2648 movz = true 2649 } else if v != 0 { 2650 a.movk(buf, v, i, dstRegBits) 2651 } 2652 } 2653 2654 } else if negs == 2 { 2655 // one MOVN then one or two MOVK. 2656 var movn bool 2657 for i, v := range bits { // Emit MOVN. 2658 if !movn && v != 0xffff { 2659 v = ^v 2660 // https://developer.arm.com/documentation/dui0802/a/A64-General-Instructions/MOVN 2661 a.movn(buf, v, i, dstRegBits) 2662 movn = true 2663 } else if v != 0xffff { 2664 a.movk(buf, v, i, dstRegBits) 2665 } 2666 } 2667 2668 } else if zeros == 1 { 2669 // one MOVZ then two MOVK. 2670 var movz bool 2671 for i, v := range bits { 2672 if !movz && v != 0 { // MOVZ. 2673 // https://developer.arm.com/documentation/dui0802/a/A64-General-Instructions/MOVZ 2674 a.movz(buf, v, i, dstRegBits) 2675 movz = true 2676 } else if v != 0 { 2677 a.movk(buf, v, i, dstRegBits) 2678 } 2679 } 2680 2681 } else if negs == 1 { 2682 // one MOVN then two MOVK. 2683 var movn bool 2684 for i, v := range bits { // Emit MOVN. 2685 if !movn && v != 0xffff { 2686 v = ^v 2687 // https://developer.arm.com/documentation/dui0802/a/A64-General-Instructions/MOVN 2688 a.movn(buf, v, i, dstRegBits) 2689 movn = true 2690 } else if v != 0xffff { 2691 a.movk(buf, v, i, dstRegBits) 2692 } 2693 } 2694 2695 } else { 2696 // one MOVZ then tree MOVK. 2697 var movz bool 2698 for i, v := range bits { 2699 if !movz && v != 0 { // MOVZ. 2700 // https://developer.arm.com/documentation/dui0802/a/A64-General-Instructions/MOVZ 2701 a.movz(buf, v, i, dstRegBits) 2702 movz = true 2703 } else if v != 0 { 2704 a.movk(buf, v, i, dstRegBits) 2705 } 2706 } 2707 2708 } 2709 } 2710 2711 func (a *AssemblerImpl) load16bitAlignedConst(buf asm.Buffer, c int64, shiftNum byte, regBits byte, reverse bool, dst64bit bool) { 2712 var lastByte byte 2713 if reverse { 2714 // MOVN: https://developer.arm.com/documentation/dui0802/a/A64-General-Instructions/MOVZ 2715 lastByte = 0b0_00_10010 2716 } else { 2717 // MOVZ: https://developer.arm.com/documentation/dui0802/a/A64-General-Instructions/MOVN 2718 lastByte = 0b0_10_10010 2719 } 2720 if dst64bit { 2721 lastByte |= 0b1 << 7 2722 } 2723 buf.Append4Bytes( 2724 (byte(c)<<5)|regBits, 2725 byte(c>>3), 2726 1<<7|(shiftNum<<5)|byte(c>>11), 2727 lastByte, 2728 ) 2729 } 2730 2731 // loadConstViaBitMaskImmediate loads the constant with ORR (bitmask immediate). 2732 // https://developer.arm.com/documentation/ddi0596/2021-12/Base-Instructions/ORR--immediate---Bitwise-OR--immediate--?lang=en 2733 func (a *AssemblerImpl) loadConstViaBitMaskImmediate(buf asm.Buffer, c uint64, regBits byte, dst64bit bool) { 2734 var size uint32 2735 switch { 2736 case c != c>>32|c<<32: 2737 size = 64 2738 case c != c>>16|c<<48: 2739 size = 32 2740 c = uint64(int32(c)) 2741 case c != c>>8|c<<56: 2742 size = 16 2743 c = uint64(int16(c)) 2744 case c != c>>4|c<<60: 2745 size = 8 2746 c = uint64(int8(c)) 2747 case c != c>>2|c<<62: 2748 size = 4 2749 c = uint64(int64(c<<60) >> 60) 2750 default: 2751 size = 2 2752 c = uint64(int64(c<<62) >> 62) 2753 } 2754 2755 neg := false 2756 if int64(c) < 0 { 2757 c = ^c 2758 neg = true 2759 } 2760 2761 onesSize, nonZeroPos := getOnesSequenceSize(c) 2762 if neg { 2763 nonZeroPos = onesSize + nonZeroPos 2764 onesSize = size - onesSize 2765 } 2766 2767 // See the following article for understanding the encoding. 2768 // https://dinfuehr.github.io/blog/encoding-of-immediate-values-on-aarch64/ 2769 var n byte 2770 mode := 32 2771 if dst64bit && size == 64 { 2772 n = 0b1 2773 mode = 64 2774 } 2775 2776 r := byte((size - nonZeroPos) & (size - 1) & uint32(mode-1)) 2777 s := byte((onesSize - 1) | 63&^(size<<1-1)) 2778 2779 var sf byte 2780 if dst64bit { 2781 sf = 0b1 2782 } 2783 buf.Append4Bytes( 2784 (zeroRegisterBits<<5)|regBits, 2785 s<<2|(zeroRegisterBits>>3), 2786 n<<6|r, 2787 sf<<7|0b0_01_10010, 2788 ) 2789 } 2790 2791 func getOnesSequenceSize(x uint64) (size, nonZeroPos uint32) { 2792 // Take 0b00111000 for example: 2793 y := getLowestBit(x) // = 0b0000100 2794 nonZeroPos = setBitPos(y) // = 2 2795 size = setBitPos(x+y) - nonZeroPos // = setBitPos(0b0100000) - 2 = 5 - 2 = 3 2796 return 2797 } 2798 2799 func setBitPos(x uint64) (ret uint32) { 2800 for ; ; ret++ { 2801 if x == 0b1 { 2802 break 2803 } 2804 x = x >> 1 2805 } 2806 return 2807 } 2808 2809 func checkArrangementIndexPair(arr VectorArrangement, index VectorIndex) (err error) { 2810 if arr == VectorArrangementNone { 2811 return nil 2812 } 2813 var valid bool 2814 switch arr { 2815 case VectorArrangement8B: 2816 valid = index < 8 2817 case VectorArrangement16B: 2818 valid = index < 16 2819 case VectorArrangement4H: 2820 valid = index < 4 2821 case VectorArrangement8H: 2822 valid = index < 8 2823 case VectorArrangement2S: 2824 valid = index < 2 2825 case VectorArrangement4S: 2826 valid = index < 4 2827 case VectorArrangement1D: 2828 valid = index < 1 2829 case VectorArrangement2D: 2830 valid = index < 2 2831 case VectorArrangementB: 2832 valid = index < 16 2833 case VectorArrangementH: 2834 valid = index < 8 2835 case VectorArrangementS: 2836 valid = index < 4 2837 case VectorArrangementD: 2838 valid = index < 2 2839 } 2840 if !valid { 2841 err = fmt.Errorf("invalid arrangement and index pair: %s[%d]", arr, index) 2842 } 2843 return 2844 } 2845 2846 func (a *AssemblerImpl) encodeMemoryToVectorRegister(buf asm.Buffer, n *nodeImpl) (err error) { 2847 srcBaseRegBits, err := intRegisterBits(n.srcReg) 2848 if err != nil { 2849 return err 2850 } 2851 2852 dstVectorRegBits, err := vectorRegisterBits(n.dstReg) 2853 if err != nil { 2854 return err 2855 } 2856 2857 switch n.instruction { 2858 case VMOV: // translated as LDR(immediate,SIMD&FP) 2859 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/LDR--immediate--SIMD-FP---Load-SIMD-FP-Register--immediate-offset--?lang=en 2860 var size, opcode byte 2861 var dataSize, dataSizeLog2 int64 2862 switch n.vectorArrangement { 2863 case VectorArrangementB: 2864 size, opcode, dataSize, dataSizeLog2 = 0b00, 0b01, 1, 0 2865 case VectorArrangementH: 2866 size, opcode, dataSize, dataSizeLog2 = 0b01, 0b01, 2, 1 2867 case VectorArrangementS: 2868 size, opcode, dataSize, dataSizeLog2 = 0b10, 0b01, 4, 2 2869 case VectorArrangementD: 2870 size, opcode, dataSize, dataSizeLog2 = 0b11, 0b01, 8, 3 2871 case VectorArrangementQ: 2872 size, opcode, dataSize, dataSizeLog2 = 0b00, 0b11, 16, 4 2873 } 2874 const v = 1 // v as in https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Loads-and-Stores?lang=en#ldst_pos 2875 if n.srcReg2 != asm.NilRegister { 2876 offsetRegBits, err := intRegisterBits(n.srcReg2) 2877 if err != nil { 2878 return err 2879 } 2880 a.encodeLoadOrStoreWithRegisterOffset(buf, srcBaseRegBits, offsetRegBits, dstVectorRegBits, opcode, size, v) 2881 } else { 2882 err = a.encodeLoadOrStoreWithConstOffset(buf, srcBaseRegBits, dstVectorRegBits, 2883 n.srcConst, opcode, size, v, dataSize, dataSizeLog2) 2884 } 2885 case LD1R: 2886 if n.srcReg2 != asm.NilRegister || n.srcConst != 0 { 2887 return fmt.Errorf("offset for %s is not implemented", InstructionName(LD1R)) 2888 } 2889 2890 var size, q byte 2891 switch n.vectorArrangement { 2892 case VectorArrangement8B: 2893 size, q = 0b00, 0b0 2894 case VectorArrangement16B: 2895 size, q = 0b00, 0b1 2896 case VectorArrangement4H: 2897 size, q = 0b01, 0b0 2898 case VectorArrangement8H: 2899 size, q = 0b01, 0b1 2900 case VectorArrangement2S: 2901 size, q = 0b10, 0b0 2902 case VectorArrangement4S: 2903 size, q = 0b10, 0b1 2904 case VectorArrangement1D: 2905 size, q = 0b11, 0b0 2906 case VectorArrangement2D: 2907 size, q = 0b11, 0b1 2908 } 2909 2910 // No offset encoding. 2911 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/LD1R--Load-one-single-element-structure-and-Replicate-to-all-lanes--of-one-register--?lang=en#iclass_as_post_index 2912 buf.Append4Bytes( 2913 (srcBaseRegBits<<5)|dstVectorRegBits, 2914 0b11_000000|size<<2|srcBaseRegBits>>3, 2915 0b01_000000, 2916 q<<6|0b1101, 2917 ) 2918 default: 2919 return errorEncodingUnsupported(n) 2920 } 2921 return 2922 } 2923 2924 func arrangementSizeQ(arr VectorArrangement) (size, q byte) { 2925 switch arr { 2926 case VectorArrangement8B: 2927 size, q = 0b00, 0 2928 case VectorArrangement16B: 2929 size, q = 0b00, 1 2930 case VectorArrangement4H: 2931 size, q = 0b01, 0 2932 case VectorArrangement8H: 2933 size, q = 0b01, 1 2934 case VectorArrangement2S: 2935 size, q = 0b10, 0 2936 case VectorArrangement4S: 2937 size, q = 0b10, 1 2938 case VectorArrangement1D: 2939 size, q = 0b11, 0 2940 case VectorArrangement2D: 2941 size, q = 0b11, 1 2942 } 2943 return 2944 } 2945 2946 func (a *AssemblerImpl) encodeVectorRegisterToMemory(buf asm.Buffer, n *nodeImpl) (err error) { 2947 srcVectorRegBits, err := vectorRegisterBits(n.srcReg) 2948 if err != nil { 2949 return err 2950 } 2951 2952 dstBaseRegBits, err := intRegisterBits(n.dstReg) 2953 if err != nil { 2954 return err 2955 } 2956 2957 switch n.instruction { 2958 case VMOV: // translated as STR(immediate,SIMD&FP) 2959 // https://developer.arm.com/documentation/ddi0596/2020-12/SIMD-FP-Instructions/STR--immediate--SIMD-FP---Store-SIMD-FP-register--immediate-offset-- 2960 var size, opcode byte 2961 var dataSize, dataSizeLog2 int64 2962 switch n.vectorArrangement { 2963 case VectorArrangementB: 2964 size, opcode, dataSize, dataSizeLog2 = 0b00, 0b00, 1, 0 2965 case VectorArrangementH: 2966 size, opcode, dataSize, dataSizeLog2 = 0b01, 0b00, 2, 1 2967 case VectorArrangementS: 2968 size, opcode, dataSize, dataSizeLog2 = 0b10, 0b00, 4, 2 2969 case VectorArrangementD: 2970 size, opcode, dataSize, dataSizeLog2 = 0b11, 0b00, 8, 3 2971 case VectorArrangementQ: 2972 size, opcode, dataSize, dataSizeLog2 = 0b00, 0b10, 16, 4 2973 } 2974 const v = 1 // v as in https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Loads-and-Stores?lang=en#ldst_pos 2975 2976 if n.dstReg2 != asm.NilRegister { 2977 offsetRegBits, err := intRegisterBits(n.dstReg2) 2978 if err != nil { 2979 return err 2980 } 2981 a.encodeLoadOrStoreWithRegisterOffset(buf, dstBaseRegBits, offsetRegBits, srcVectorRegBits, opcode, size, v) 2982 } else { 2983 err = a.encodeLoadOrStoreWithConstOffset(buf, dstBaseRegBits, srcVectorRegBits, 2984 n.dstConst, opcode, size, v, dataSize, dataSizeLog2) 2985 } 2986 default: 2987 return errorEncodingUnsupported(n) 2988 } 2989 return 2990 } 2991 2992 func (a *AssemblerImpl) encodeStaticConstToVectorRegister(buf asm.Buffer, n *nodeImpl) (err error) { 2993 if n.instruction != VMOV { 2994 return errorEncodingUnsupported(n) 2995 } 2996 2997 dstRegBits, err := vectorRegisterBits(n.dstReg) 2998 if err != nil { 2999 return err 3000 } 3001 3002 // LDR (literal, SIMD&FP) 3003 // https://developer.arm.com/documentation/ddi0596/2020-12/SIMD-FP-Instructions/LDR--literal--SIMD-FP---Load-SIMD-FP-Register--PC-relative-literal-- 3004 var opc byte 3005 var constLength int 3006 switch n.vectorArrangement { 3007 case VectorArrangementS: 3008 opc, constLength = 0b00, 4 3009 case VectorArrangementD: 3010 opc, constLength = 0b01, 8 3011 case VectorArrangementQ: 3012 opc, constLength = 0b10, 16 3013 } 3014 3015 loadLiteralOffsetInBinary := uint64(buf.Len()) 3016 a.pool.AddConst(n.staticConst, loadLiteralOffsetInBinary) 3017 3018 if len(n.staticConst.Raw) != constLength { 3019 return fmt.Errorf("invalid const length for %s: want %d but was %d", 3020 n.vectorArrangement, constLength, len(n.staticConst.Raw)) 3021 } 3022 3023 buf.Append4Bytes(dstRegBits, 0x0, 0x0, opc<<6|0b11100) 3024 n.staticConst.AddOffsetFinalizedCallback(func(offsetOfConst uint64) { 3025 // LDR (literal, SIMD&FP) encodes offset divided by 4. 3026 offset := (int(offsetOfConst) - int(loadLiteralOffsetInBinary)) / 4 3027 bin := buf.Bytes() 3028 bin[loadLiteralOffsetInBinary] |= byte(offset << 5) 3029 bin[loadLiteralOffsetInBinary+1] |= byte(offset >> 3) 3030 bin[loadLiteralOffsetInBinary+2] |= byte(offset >> 11) 3031 }) 3032 return 3033 } 3034 3035 // advancedSIMDTwoRegisterMisc holds information to encode instructions as "Advanced SIMD two-register miscellaneous" in 3036 // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en 3037 var advancedSIMDTwoRegisterMisc = map[asm.Instruction]struct { 3038 qAndSize map[VectorArrangement]qAndSize 3039 u, opcode byte 3040 }{ 3041 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/NOT--Bitwise-NOT--vector--?lang=en 3042 NOT: { 3043 u: 0b1, opcode: 0b00101, 3044 qAndSize: map[VectorArrangement]qAndSize{ 3045 VectorArrangement16B: {size: 0b00, q: 0b1}, 3046 VectorArrangement8B: {size: 0b00, q: 0b0}, 3047 }, 3048 }, 3049 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FNEG--vector---Floating-point-Negate--vector--?lang=en 3050 VFNEG: { 3051 u: 0b1, opcode: 0b01111, 3052 qAndSize: map[VectorArrangement]qAndSize{ 3053 VectorArrangement4S: {size: 0b10, q: 0b1}, 3054 VectorArrangement2S: {size: 0b10, q: 0b0}, 3055 VectorArrangement2D: {size: 0b11, q: 0b1}, 3056 }, 3057 }, 3058 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FABS--vector---Floating-point-Absolute-value--vector--?lang=en 3059 VFABS: {u: 0, opcode: 0b01111, qAndSize: map[VectorArrangement]qAndSize{ 3060 VectorArrangement2D: {size: 0b11, q: 0b1}, 3061 VectorArrangement4S: {size: 0b10, q: 0b1}, 3062 VectorArrangement2S: {size: 0b10, q: 0b0}, 3063 }}, 3064 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FSQRT--vector---Floating-point-Square-Root--vector--?lang=en 3065 VFSQRT: {u: 1, opcode: 0b11111, qAndSize: map[VectorArrangement]qAndSize{ 3066 VectorArrangement2D: {size: 0b11, q: 0b1}, 3067 VectorArrangement4S: {size: 0b10, q: 0b1}, 3068 VectorArrangement2S: {size: 0b10, q: 0b0}, 3069 }}, 3070 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FRINTM--vector---Floating-point-Round-to-Integral--toward-Minus-infinity--vector--?lang=en 3071 VFRINTM: {u: 0, opcode: 0b11001, qAndSize: map[VectorArrangement]qAndSize{ 3072 VectorArrangement2D: {size: 0b01, q: 0b1}, 3073 VectorArrangement4S: {size: 0b00, q: 0b1}, 3074 VectorArrangement2S: {size: 0b00, q: 0b0}, 3075 }}, 3076 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FRINTN--vector---Floating-point-Round-to-Integral--to-nearest-with-ties-to-even--vector--?lang=en 3077 VFRINTN: {u: 0, opcode: 0b11000, qAndSize: map[VectorArrangement]qAndSize{ 3078 VectorArrangement2D: {size: 0b01, q: 0b1}, 3079 VectorArrangement4S: {size: 0b00, q: 0b1}, 3080 VectorArrangement2S: {size: 0b00, q: 0b0}, 3081 }}, 3082 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FRINTP--vector---Floating-point-Round-to-Integral--toward-Plus-infinity--vector--?lang=en 3083 VFRINTP: {u: 0, opcode: 0b11000, qAndSize: map[VectorArrangement]qAndSize{ 3084 VectorArrangement2D: {size: 0b11, q: 0b1}, 3085 VectorArrangement4S: {size: 0b10, q: 0b1}, 3086 VectorArrangement2S: {size: 0b10, q: 0b0}, 3087 }}, 3088 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FRINTZ--vector---Floating-point-Round-to-Integral--toward-Zero--vector--?lang=en 3089 VFRINTZ: {u: 0, opcode: 0b11001, qAndSize: map[VectorArrangement]qAndSize{ 3090 VectorArrangement2D: {size: 0b11, q: 0b1}, 3091 VectorArrangement4S: {size: 0b10, q: 0b1}, 3092 VectorArrangement2S: {size: 0b10, q: 0b0}, 3093 }}, 3094 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/CNT--Population-Count-per-byte-?lang=en 3095 VCNT: {u: 0b0, opcode: 0b00101, qAndSize: map[VectorArrangement]qAndSize{ 3096 VectorArrangement8B: {size: 0b00, q: 0b0}, 3097 VectorArrangement16B: {size: 0b00, q: 0b1}, 3098 }}, 3099 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/NEG--vector---Negate--vector--?lang=en 3100 VNEG: {u: 0b1, opcode: 0b01011, qAndSize: defaultQAndSize}, 3101 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/ABS--Absolute-value--vector--?lang=en 3102 VABS: {u: 0b0, opcode: 0b01011, qAndSize: defaultQAndSize}, 3103 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/REV64--Reverse-elements-in-64-bit-doublewords--vector--?lang=en 3104 REV64: {u: 0b0, opcode: 0b00000, qAndSize: defaultQAndSize}, 3105 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/XTN--XTN2--Extract-Narrow-?lang=en 3106 XTN: {u: 0b0, opcode: 0b10010, qAndSize: map[VectorArrangement]qAndSize{ 3107 VectorArrangement2D: {q: 0, size: 0b10}, 3108 VectorArrangement4S: {q: 0, size: 0b01}, 3109 VectorArrangement8H: {q: 0, size: 0b00}, 3110 }}, 3111 SHLL: {u: 0b1, opcode: 0b10011, qAndSize: map[VectorArrangement]qAndSize{ 3112 VectorArrangement8B: {q: 0b00, size: 0b00}, 3113 VectorArrangement4H: {q: 0b00, size: 0b01}, 3114 VectorArrangement2S: {q: 0b00, size: 0b10}, 3115 }}, 3116 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/CMEQ--zero---Compare-bitwise-Equal-to-zero--vector--?lang=en 3117 CMEQZERO: {u: 0b0, opcode: 0b01001, qAndSize: defaultQAndSize}, 3118 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/SADDLP--Signed-Add-Long-Pairwise-?lang=en 3119 SADDLP: {u: 0b0, opcode: 0b00010, qAndSize: defaultQAndSize}, 3120 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/UADDLP--Unsigned-Add-Long-Pairwise-?lang=en 3121 UADDLP: {u: 0b1, opcode: 0b00010, qAndSize: defaultQAndSize}, 3122 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FCVTZS--vector--integer---Floating-point-Convert-to-Signed-integer--rounding-toward-Zero--vector--?lang=en 3123 VFCVTZS: {u: 0b0, opcode: 0b11011, qAndSize: map[VectorArrangement]qAndSize{ 3124 VectorArrangement4S: {size: 0b10, q: 0b1}, 3125 VectorArrangement2S: {size: 0b10, q: 0b0}, 3126 VectorArrangement2D: {size: 0b11, q: 0b1}, 3127 }}, 3128 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FCVTZU--vector--integer---Floating-point-Convert-to-Unsigned-integer--rounding-toward-Zero--vector--?lang=en 3129 VFCVTZU: {u: 0b1, opcode: 0b11011, qAndSize: map[VectorArrangement]qAndSize{ 3130 VectorArrangement4S: {size: 0b10, q: 0b1}, 3131 VectorArrangement2S: {size: 0b10, q: 0b0}, 3132 VectorArrangement2D: {size: 0b11, q: 0b1}, 3133 }}, 3134 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/SQXTN--SQXTN2--Signed-saturating-extract-Narrow-?lang=en 3135 SQXTN: {u: 0b0, opcode: 0b10100, qAndSize: map[VectorArrangement]qAndSize{ 3136 VectorArrangement8B: {q: 0b0, size: 0b00}, 3137 VectorArrangement4H: {q: 0b0, size: 0b01}, 3138 VectorArrangement2S: {q: 0b0, size: 0b10}, 3139 }}, 3140 3141 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/SQXTN--SQXTN2--Signed-saturating-extract-Narrow-?lang=en 3142 SQXTN2: {u: 0b0, opcode: 0b10100, qAndSize: map[VectorArrangement]qAndSize{ 3143 VectorArrangement16B: {q: 0b1, size: 0b00}, 3144 VectorArrangement8H: {q: 0b1, size: 0b01}, 3145 VectorArrangement4S: {q: 0b1, size: 0b10}, 3146 }}, 3147 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/UQXTN--UQXTN2--Unsigned-saturating-extract-Narrow-?lang=en 3148 UQXTN: {u: 0b1, opcode: 0b10100, qAndSize: defaultQAndSize}, 3149 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/SQXTUN--SQXTUN2--Signed-saturating-extract-Unsigned-Narrow-?lang=en 3150 SQXTUN: {u: 0b1, opcode: 0b10010, qAndSize: map[VectorArrangement]qAndSize{ 3151 VectorArrangement8B: {q: 0b0, size: 0b00}, 3152 VectorArrangement4H: {q: 0b0, size: 0b01}, 3153 VectorArrangement2S: {q: 0b0, size: 0b10}, 3154 }}, 3155 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/SQXTUN--SQXTUN2--Signed-saturating-extract-Unsigned-Narrow-?lang=en 3156 SQXTUN2: {u: 0b1, opcode: 0b10010, qAndSize: map[VectorArrangement]qAndSize{ 3157 VectorArrangement16B: {q: 0b1, size: 0b00}, 3158 VectorArrangement8H: {q: 0b1, size: 0b01}, 3159 VectorArrangement4S: {q: 0b1, size: 0b10}, 3160 }}, 3161 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/SCVTF--vector--integer---Signed-integer-Convert-to-Floating-point--vector--?lang=en 3162 VSCVTF: {u: 0b0, opcode: 0b11101, qAndSize: map[VectorArrangement]qAndSize{ 3163 VectorArrangement2D: {q: 0b1, size: 0b01}, 3164 VectorArrangement4S: {q: 0b1, size: 0b00}, 3165 VectorArrangement2S: {q: 0b0, size: 0b00}, 3166 }}, 3167 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/UCVTF--vector--integer---Unsigned-integer-Convert-to-Floating-point--vector--?lang=en 3168 VUCVTF: {u: 0b1, opcode: 0b11101, qAndSize: map[VectorArrangement]qAndSize{ 3169 VectorArrangement2D: {q: 0b1, size: 0b01}, 3170 VectorArrangement4S: {q: 0b1, size: 0b00}, 3171 VectorArrangement2S: {q: 0b0, size: 0b00}, 3172 }}, 3173 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FCVTL--FCVTL2--Floating-point-Convert-to-higher-precision-Long--vector--?lang=en 3174 FCVTL: {u: 0b0, opcode: 0b10111, qAndSize: map[VectorArrangement]qAndSize{ 3175 VectorArrangement2S: {size: 0b01, q: 0b0}, 3176 VectorArrangement4H: {size: 0b00, q: 0b0}, 3177 }}, 3178 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FCVTN--FCVTN2--Floating-point-Convert-to-lower-precision-Narrow--vector--?lang=en 3179 FCVTN: {u: 0b0, opcode: 0b10110, qAndSize: map[VectorArrangement]qAndSize{ 3180 VectorArrangement2S: {size: 0b01, q: 0b0}, 3181 VectorArrangement4H: {size: 0b00, q: 0b0}, 3182 }}, 3183 } 3184 3185 // advancedSIMDThreeDifferent holds information to encode instructions as "Advanced SIMD three different" in 3186 // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en 3187 var advancedSIMDThreeDifferent = map[asm.Instruction]struct { 3188 qAndSize map[VectorArrangement]qAndSize 3189 u, opcode byte 3190 }{ 3191 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/UMLAL--UMLAL2--vector---Unsigned-Multiply-Add-Long--vector--?lang=en 3192 VUMLAL: {u: 0b1, opcode: 0b1000, qAndSize: map[VectorArrangement]qAndSize{ 3193 VectorArrangement2S: {q: 0b0, size: 0b10}, 3194 VectorArrangement4H: {q: 0b0, size: 0b01}, 3195 VectorArrangement8B: {q: 0b0, size: 0b00}, 3196 }}, 3197 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/SMULL--SMULL2--vector---Signed-Multiply-Long--vector--?lang=en 3198 SMULL: {u: 0b0, opcode: 0b1100, qAndSize: map[VectorArrangement]qAndSize{ 3199 VectorArrangement8B: {q: 0b0, size: 0b00}, 3200 VectorArrangement4H: {q: 0b0, size: 0b01}, 3201 VectorArrangement2S: {q: 0b0, size: 0b10}, 3202 }}, 3203 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/SMULL--SMULL2--vector---Signed-Multiply-Long--vector--?lang=en 3204 SMULL2: {u: 0b0, opcode: 0b1100, qAndSize: map[VectorArrangement]qAndSize{ 3205 VectorArrangement16B: {q: 0b1, size: 0b00}, 3206 VectorArrangement8H: {q: 0b1, size: 0b01}, 3207 VectorArrangement4S: {q: 0b1, size: 0b10}, 3208 }}, 3209 // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en 3210 UMULL: {u: 0b1, opcode: 0b1100, qAndSize: map[VectorArrangement]qAndSize{ 3211 VectorArrangement8B: {q: 0b0, size: 0b00}, 3212 VectorArrangement4H: {q: 0b0, size: 0b01}, 3213 VectorArrangement2S: {q: 0b0, size: 0b10}, 3214 }}, 3215 // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en 3216 UMULL2: {u: 0b1, opcode: 0b1100, qAndSize: map[VectorArrangement]qAndSize{ 3217 VectorArrangement16B: {q: 0b1, size: 0b00}, 3218 VectorArrangement8H: {q: 0b1, size: 0b01}, 3219 VectorArrangement4S: {q: 0b1, size: 0b10}, 3220 }}, 3221 } 3222 3223 // advancedSIMDThreeSame holds information to encode instructions as "Advanced SIMD three same" in 3224 // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en 3225 var advancedSIMDThreeSame = map[asm.Instruction]struct { 3226 qAndSize map[VectorArrangement]qAndSize 3227 u, opcode byte 3228 }{ 3229 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/AND--vector---Bitwise-AND--vector--?lang=en 3230 VAND: { 3231 u: 0b0, opcode: 0b00011, 3232 qAndSize: map[VectorArrangement]qAndSize{ 3233 VectorArrangement16B: {size: 0b00, q: 0b1}, 3234 VectorArrangement8B: {size: 0b00, q: 0b0}, 3235 }, 3236 }, 3237 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/BSL--Bitwise-Select-?lang=en 3238 BSL: { 3239 u: 0b1, opcode: 0b00011, 3240 qAndSize: map[VectorArrangement]qAndSize{ 3241 VectorArrangement16B: {size: 0b01, q: 0b1}, 3242 VectorArrangement8B: {size: 0b01, q: 0b0}, 3243 }, 3244 }, 3245 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/EOR--vector---Bitwise-Exclusive-OR--vector--?lang=en 3246 EOR: { 3247 u: 0b1, opcode: 0b00011, 3248 qAndSize: map[VectorArrangement]qAndSize{ 3249 VectorArrangement16B: {size: 0b00, q: 0b1}, 3250 VectorArrangement8B: {size: 0b00, q: 0b0}, 3251 }, 3252 }, 3253 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/ORR--vector--register---Bitwise-inclusive-OR--vector--register--?lang=en 3254 VORR: { 3255 u: 0b0, opcode: 0b00011, 3256 qAndSize: map[VectorArrangement]qAndSize{ 3257 VectorArrangement16B: {size: 0b10, q: 0b1}, 3258 VectorArrangement8B: {size: 0b10, q: 0b0}, 3259 }, 3260 }, 3261 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/BIC--vector--register---Bitwise-bit-Clear--vector--register--?lang=en 3262 BIC: { 3263 u: 0b0, opcode: 0b00011, 3264 qAndSize: map[VectorArrangement]qAndSize{ 3265 VectorArrangement16B: {size: 0b01, q: 0b1}, 3266 VectorArrangement8B: {size: 0b01, q: 0b0}, 3267 }, 3268 }, 3269 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FADD--vector---Floating-point-Add--vector--?lang=en 3270 VFADDS: { 3271 u: 0b0, opcode: 0b11010, 3272 qAndSize: map[VectorArrangement]qAndSize{ 3273 VectorArrangement4S: {size: 0b00, q: 0b1}, 3274 VectorArrangement2S: {size: 0b00, q: 0b0}, 3275 }, 3276 }, 3277 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FADD--vector---Floating-point-Add--vector--?lang=en 3278 VFADDD: { 3279 u: 0b0, opcode: 0b11010, 3280 qAndSize: map[VectorArrangement]qAndSize{ 3281 VectorArrangement2D: {size: 0b01, q: 0b1}, 3282 }, 3283 }, 3284 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FSUB--vector---Floating-point-Subtract--vector--?lang=en 3285 VFSUBS: { 3286 u: 0b0, opcode: 0b11010, 3287 qAndSize: map[VectorArrangement]qAndSize{ 3288 VectorArrangement4S: {size: 0b10, q: 0b1}, 3289 VectorArrangement2S: {size: 0b10, q: 0b0}, 3290 }, 3291 }, 3292 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FSUB--vector---Floating-point-Subtract--vector--?lang=en 3293 VFSUBD: { 3294 u: 0b0, opcode: 0b11010, 3295 qAndSize: map[VectorArrangement]qAndSize{ 3296 VectorArrangement2D: {size: 0b11, q: 0b1}, 3297 }, 3298 }, 3299 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/UMAXP--Unsigned-Maximum-Pairwise-?lang=en 3300 UMAXP: {u: 0b1, opcode: 0b10100, qAndSize: defaultQAndSize}, 3301 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/CMEQ--register---Compare-bitwise-Equal--vector--?lang=en 3302 CMEQ: {u: 0b1, opcode: 0b10001, qAndSize: defaultQAndSize}, 3303 // https://developer.arm.com/documentation/dui0801/g/A64-SIMD-Vector-Instructions/ADDP--vector- 3304 VADDP: {u: 0b0, opcode: 0b10111, qAndSize: defaultQAndSize}, 3305 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/ADD--vector---Add--vector--?lang=en 3306 VADD: {u: 0, opcode: 0b10000, qAndSize: defaultQAndSize}, 3307 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/SUB--vector---Subtract--vector--?lang=en 3308 VSUB: {u: 1, opcode: 0b10000, qAndSize: defaultQAndSize}, 3309 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/SSHL--Signed-Shift-Left--register--?lang=en 3310 SSHL: {u: 0, opcode: 0b01000, qAndSize: defaultQAndSize}, 3311 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/SSHL--Signed-Shift-Left--register--?lang=en 3312 USHL: {u: 0b1, opcode: 0b01000, qAndSize: defaultQAndSize}, 3313 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/CMGT--register---Compare-signed-Greater-than--vector--?lang=en 3314 CMGT: {u: 0b0, opcode: 0b00110, qAndSize: defaultQAndSize}, 3315 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/CMHI--register---Compare-unsigned-Higher--vector--?lang=en 3316 CMHI: {u: 0b1, opcode: 0b00110, qAndSize: defaultQAndSize}, 3317 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/CMGE--register---Compare-signed-Greater-than-or-Equal--vector--?lang=en 3318 CMGE: {u: 0b0, opcode: 0b00111, qAndSize: defaultQAndSize}, 3319 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/CMHS--register---Compare-unsigned-Higher-or-Same--vector--?lang=en 3320 CMHS: {u: 0b1, opcode: 0b00111, qAndSize: defaultQAndSize}, 3321 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FCMEQ--register---Floating-point-Compare-Equal--vector--?lang=en 3322 FCMEQ: { 3323 u: 0b0, opcode: 0b11100, 3324 qAndSize: map[VectorArrangement]qAndSize{ 3325 VectorArrangement4S: {size: 0b00, q: 0b1}, 3326 VectorArrangement2S: {size: 0b00, q: 0b0}, 3327 VectorArrangement2D: {size: 0b01, q: 0b1}, 3328 }, 3329 }, 3330 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FCMGT--register---Floating-point-Compare-Greater-than--vector--?lang=en 3331 FCMGT: { 3332 u: 0b1, opcode: 0b11100, 3333 qAndSize: map[VectorArrangement]qAndSize{ 3334 VectorArrangement4S: {size: 0b10, q: 0b1}, 3335 VectorArrangement2S: {size: 0b10, q: 0b0}, 3336 VectorArrangement2D: {size: 0b11, q: 0b1}, 3337 }, 3338 }, 3339 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FCMGE--register---Floating-point-Compare-Greater-than-or-Equal--vector--?lang=en 3340 FCMGE: { 3341 u: 0b1, opcode: 0b11100, 3342 qAndSize: map[VectorArrangement]qAndSize{ 3343 VectorArrangement4S: {size: 0b00, q: 0b1}, 3344 VectorArrangement2S: {size: 0b00, q: 0b0}, 3345 VectorArrangement2D: {size: 0b01, q: 0b1}, 3346 }, 3347 }, 3348 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FMIN--vector---Floating-point-minimum--vector--?lang=en 3349 VFMIN: { 3350 u: 0b0, opcode: 0b11110, 3351 qAndSize: map[VectorArrangement]qAndSize{ 3352 VectorArrangement4S: {size: 0b10, q: 0b1}, 3353 VectorArrangement2S: {size: 0b10, q: 0b0}, 3354 VectorArrangement2D: {size: 0b11, q: 0b1}, 3355 }, 3356 }, 3357 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FMAX--vector---Floating-point-Maximum--vector--?lang=en 3358 VFMAX: { 3359 u: 0b0, opcode: 0b11110, 3360 qAndSize: map[VectorArrangement]qAndSize{ 3361 VectorArrangement4S: {size: 0b00, q: 0b1}, 3362 VectorArrangement2S: {size: 0b00, q: 0b0}, 3363 VectorArrangement2D: {size: 0b01, q: 0b1}, 3364 }, 3365 }, 3366 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FMUL--vector---Floating-point-Multiply--vector--?lang=en 3367 VFMUL: { 3368 u: 0b1, opcode: 0b11011, 3369 qAndSize: map[VectorArrangement]qAndSize{ 3370 VectorArrangement4S: {size: 0b00, q: 0b1}, 3371 VectorArrangement2S: {size: 0b00, q: 0b0}, 3372 VectorArrangement2D: {size: 0b01, q: 0b1}, 3373 }, 3374 }, 3375 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FDIV--vector---Floating-point-Divide--vector--?lang=en 3376 VFDIV: { 3377 u: 0b1, opcode: 0b11111, 3378 qAndSize: map[VectorArrangement]qAndSize{ 3379 VectorArrangement4S: {size: 0b00, q: 0b1}, 3380 VectorArrangement2S: {size: 0b00, q: 0b0}, 3381 VectorArrangement2D: {size: 0b01, q: 0b1}, 3382 }, 3383 }, 3384 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/MUL--vector---Multiply--vector--?lang=en 3385 VMUL: {u: 0b0, opcode: 0b10011, qAndSize: defaultQAndSize}, 3386 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/SQADD--Signed-saturating-Add-?lang=en 3387 VSQADD: {u: 0b0, opcode: 0b00001, qAndSize: defaultQAndSize}, 3388 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/UQADD--Unsigned-saturating-Add-?lang=en 3389 VUQADD: {u: 0b1, opcode: 0b00001, qAndSize: defaultQAndSize}, 3390 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/SMIN--Signed-Minimum--vector--?lang=en 3391 SMIN: {u: 0b0, opcode: 0b01101, qAndSize: defaultQAndSize}, 3392 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/SMAX--Signed-Maximum--vector--?lang=en 3393 SMAX: {u: 0b0, opcode: 0b01100, qAndSize: defaultQAndSize}, 3394 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/UMIN--Unsigned-Minimum--vector--?lang=en 3395 UMIN: {u: 0b1, opcode: 0b01101, qAndSize: defaultQAndSize}, 3396 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/UMAX--Unsigned-Maximum--vector--?lang=en 3397 UMAX: {u: 0b1, opcode: 0b01100, qAndSize: defaultQAndSize}, 3398 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/URHADD--Unsigned-Rounding-Halving-Add-?lang=en 3399 URHADD: {u: 0b1, opcode: 0b00010, qAndSize: defaultQAndSize}, 3400 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/SQSUB--Signed-saturating-Subtract-?lang=en 3401 VSQSUB: {u: 0b0, opcode: 0b00101, qAndSize: defaultQAndSize}, 3402 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/UQSUB--Unsigned-saturating-Subtract-?lang=en 3403 VUQSUB: {u: 0b1, opcode: 0b00101, qAndSize: defaultQAndSize}, 3404 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/BIT--Bitwise-Insert-if-True-?lang=en 3405 VBIT: {u: 0b1, opcode: 0b00011, qAndSize: map[VectorArrangement]qAndSize{ 3406 VectorArrangement8B: {q: 0b0, size: 0b10}, 3407 VectorArrangement16B: {q: 0b1, size: 0b10}, 3408 }}, 3409 SQRDMULH: {u: 0b1, opcode: 0b10110, qAndSize: map[VectorArrangement]qAndSize{ 3410 VectorArrangement4H: {q: 0b0, size: 0b01}, 3411 VectorArrangement8H: {q: 0b1, size: 0b01}, 3412 VectorArrangement2S: {q: 0b0, size: 0b10}, 3413 VectorArrangement4S: {q: 0b1, size: 0b10}, 3414 }}, 3415 } 3416 3417 // aAndSize is a pair of "Q" and "size" that appear in https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en 3418 type qAndSize struct{ q, size byte } 3419 3420 // defaultQAndSize maps a vector arrangement to the default qAndSize which is encoded by many instructions. 3421 var defaultQAndSize = map[VectorArrangement]qAndSize{ 3422 VectorArrangement8B: {size: 0b00, q: 0b0}, 3423 VectorArrangement16B: {size: 0b00, q: 0b1}, 3424 VectorArrangement4H: {size: 0b01, q: 0b0}, 3425 VectorArrangement8H: {size: 0b01, q: 0b1}, 3426 VectorArrangement2S: {size: 0b10, q: 0b0}, 3427 VectorArrangement4S: {size: 0b10, q: 0b1}, 3428 VectorArrangement1D: {size: 0b11, q: 0b0}, 3429 VectorArrangement2D: {size: 0b11, q: 0b1}, 3430 } 3431 3432 // advancedSIMDAcrossLanes holds information to encode instructions as "Advanced SIMD across lanes" in 3433 // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en 3434 var advancedSIMDAcrossLanes = map[asm.Instruction]struct { 3435 qAndSize map[VectorArrangement]qAndSize 3436 u, opcode byte 3437 }{ 3438 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/ADDV--Add-across-Vector-?lang=en 3439 ADDV: { 3440 u: 0b0, opcode: 0b11011, 3441 qAndSize: map[VectorArrangement]qAndSize{ 3442 VectorArrangement16B: {size: 0b00, q: 0b1}, 3443 VectorArrangement8B: {size: 0b00, q: 0b0}, 3444 VectorArrangement8H: {size: 0b01, q: 0b1}, 3445 VectorArrangement4H: {size: 0b01, q: 0b0}, 3446 VectorArrangement4S: {size: 0b10, q: 0b1}, 3447 }, 3448 }, 3449 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/UMINV--Unsigned-Minimum-across-Vector-?lang=en 3450 UMINV: { 3451 u: 0b1, opcode: 0b11010, 3452 qAndSize: map[VectorArrangement]qAndSize{ 3453 VectorArrangement16B: {size: 0b00, q: 0b1}, 3454 VectorArrangement8B: {size: 0b00, q: 0b0}, 3455 VectorArrangement8H: {size: 0b01, q: 0b1}, 3456 VectorArrangement4H: {size: 0b01, q: 0b0}, 3457 VectorArrangement4S: {size: 0b10, q: 0b1}, 3458 }, 3459 }, 3460 UADDLV: {u: 0b1, opcode: 0b00011, qAndSize: map[VectorArrangement]qAndSize{ 3461 VectorArrangement16B: {size: 0b00, q: 0b1}, 3462 VectorArrangement8B: {size: 0b00, q: 0b0}, 3463 VectorArrangement8H: {size: 0b01, q: 0b1}, 3464 VectorArrangement4H: {size: 0b01, q: 0b0}, 3465 VectorArrangement4S: {size: 0b10, q: 0b1}, 3466 }}, 3467 } 3468 3469 // advancedSIMDScalarPairwise holds information to encode instructions as "Advanced SIMD scalar pairwise" in 3470 // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en 3471 var advancedSIMDScalarPairwise = map[asm.Instruction]struct { 3472 size map[VectorArrangement]byte 3473 u, opcode byte 3474 }{ 3475 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/ADDP--scalar---Add-Pair-of-elements--scalar--?lang=en 3476 ADDP: {u: 0b0, opcode: 0b11011, size: map[VectorArrangement]byte{VectorArrangement2D: 0b11}}, 3477 } 3478 3479 // advancedSIMDCopy holds information to encode instructions as "Advanced SIMD copy" in 3480 // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en 3481 var advancedSIMDCopy = map[asm.Instruction]struct { 3482 // TODO: extract common implementation of resolver. 3483 resolver func(srcIndex, dstIndex VectorIndex, arr VectorArrangement) (imm5, imm4, q byte, err error) 3484 op byte 3485 }{ 3486 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/DUP--element---Duplicate-vector-element-to-vector-or-scalar-?lang=en 3487 DUPELEM: {op: 0, resolver: func(srcIndex, dstIndex VectorIndex, arr VectorArrangement) (imm5, imm4, q byte, err error) { 3488 imm4 = 0b0000 3489 q = 0b1 3490 3491 switch arr { 3492 case VectorArrangementB: 3493 imm5 |= 0b1 3494 imm5 |= byte(srcIndex) << 1 3495 case VectorArrangementH: 3496 imm5 |= 0b10 3497 imm5 |= byte(srcIndex) << 2 3498 case VectorArrangementS: 3499 imm5 |= 0b100 3500 imm5 |= byte(srcIndex) << 3 3501 case VectorArrangementD: 3502 imm5 |= 0b1000 3503 imm5 |= byte(srcIndex) << 4 3504 default: 3505 err = fmt.Errorf("unsupported arrangement for DUPELEM: %d", arr) 3506 } 3507 3508 return 3509 }}, 3510 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/DUP--general---Duplicate-general-purpose-register-to-vector-?lang=en 3511 DUPGEN: {op: 0b0, resolver: func(srcIndex, dstIndex VectorIndex, arr VectorArrangement) (imm5, imm4, q byte, err error) { 3512 imm4 = 0b0001 3513 switch arr { 3514 case VectorArrangement8B: 3515 imm5 = 0b1 3516 case VectorArrangement16B: 3517 imm5 = 0b1 3518 q = 0b1 3519 case VectorArrangement4H: 3520 imm5 = 0b10 3521 case VectorArrangement8H: 3522 imm5 = 0b10 3523 q = 0b1 3524 case VectorArrangement2S: 3525 imm5 = 0b100 3526 case VectorArrangement4S: 3527 imm5 = 0b100 3528 q = 0b1 3529 case VectorArrangement2D: 3530 imm5 = 0b1000 3531 q = 0b1 3532 default: 3533 err = fmt.Errorf("unsupported arrangement for DUPGEN: %s", arr) 3534 } 3535 return 3536 }}, 3537 // https://developer.arm.com/documentation/ddi0596/2020-12/SIMD-FP-Instructions/INS--general---Insert-vector-element-from-general-purpose-register-?lang=en 3538 INSGEN: {op: 0b0, resolver: func(srcIndex, dstIndex VectorIndex, arr VectorArrangement) (imm5, imm4, q byte, err error) { 3539 imm4, q = 0b0011, 0b1 3540 switch arr { 3541 case VectorArrangementB: 3542 imm5 |= 0b1 3543 imm5 |= byte(dstIndex) << 1 3544 case VectorArrangementH: 3545 imm5 |= 0b10 3546 imm5 |= byte(dstIndex) << 2 3547 case VectorArrangementS: 3548 imm5 |= 0b100 3549 imm5 |= byte(dstIndex) << 3 3550 case VectorArrangementD: 3551 imm5 |= 0b1000 3552 imm5 |= byte(dstIndex) << 4 3553 default: 3554 err = fmt.Errorf("unsupported arrangement for INSGEN: %s", arr) 3555 } 3556 return 3557 }}, 3558 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/UMOV--Unsigned-Move-vector-element-to-general-purpose-register-?lang=en 3559 UMOV: {op: 0b0, resolver: func(srcIndex, dstIndex VectorIndex, arr VectorArrangement) (imm5, imm4, q byte, err error) { 3560 imm4 = 0b0111 3561 switch arr { 3562 case VectorArrangementB: 3563 imm5 |= 0b1 3564 imm5 |= byte(srcIndex) << 1 3565 case VectorArrangementH: 3566 imm5 |= 0b10 3567 imm5 |= byte(srcIndex) << 2 3568 case VectorArrangementS: 3569 imm5 |= 0b100 3570 imm5 |= byte(srcIndex) << 3 3571 case VectorArrangementD: 3572 imm5 |= 0b1000 3573 imm5 |= byte(srcIndex) << 4 3574 q = 0b1 3575 default: 3576 err = fmt.Errorf("unsupported arrangement for UMOV: %s", arr) 3577 } 3578 return 3579 }}, 3580 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/SMOV--Signed-Move-vector-element-to-general-purpose-register-?lang=en 3581 SMOV32: {op: 0b0, resolver: func(srcIndex, dstIndex VectorIndex, arr VectorArrangement) (imm5, imm4, q byte, err error) { 3582 imm4 = 0b0101 3583 switch arr { 3584 case VectorArrangementB: 3585 imm5 |= 0b1 3586 imm5 |= byte(srcIndex) << 1 3587 case VectorArrangementH: 3588 imm5 |= 0b10 3589 imm5 |= byte(srcIndex) << 2 3590 default: 3591 err = fmt.Errorf("unsupported arrangement for SMOV32: %s", arr) 3592 } 3593 return 3594 }}, 3595 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/INS--element---Insert-vector-element-from-another-vector-element-?lang=en 3596 INSELEM: {op: 0b1, resolver: func(srcIndex, dstIndex VectorIndex, arr VectorArrangement) (imm5, imm4, q byte, err error) { 3597 q = 0b1 3598 switch arr { 3599 case VectorArrangementB: 3600 imm5 |= 0b1 3601 imm5 |= byte(dstIndex) << 1 3602 imm4 = byte(srcIndex) 3603 case VectorArrangementH: 3604 imm5 |= 0b10 3605 imm5 |= byte(dstIndex) << 2 3606 imm4 = byte(srcIndex) << 1 3607 case VectorArrangementS: 3608 imm5 |= 0b100 3609 imm5 |= byte(dstIndex) << 3 3610 imm4 = byte(srcIndex) << 2 3611 case VectorArrangementD: 3612 imm5 |= 0b1000 3613 imm5 |= byte(dstIndex) << 4 3614 imm4 = byte(srcIndex) << 3 3615 default: 3616 err = fmt.Errorf("unsupported arrangement for INSELEM: %d", arr) 3617 } 3618 return 3619 }}, 3620 } 3621 3622 // advancedSIMDTableLookup holds information to encode instructions as "Advanced SIMD table lookup" in 3623 // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en 3624 var advancedSIMDTableLookup = map[asm.Instruction]struct { 3625 q map[VectorArrangement]byte 3626 op, op2, Len byte 3627 }{ 3628 TBL1: {op: 0, op2: 0, Len: 0b00, q: map[VectorArrangement]byte{VectorArrangement16B: 0b1, VectorArrangement8B: 0b0}}, 3629 TBL2: {op: 0, op2: 0, Len: 0b01, q: map[VectorArrangement]byte{VectorArrangement16B: 0b1, VectorArrangement8B: 0b0}}, 3630 } 3631 3632 // advancedSIMDShiftByImmediate holds information to encode instructions as "Advanced SIMD shift by immediate" in 3633 // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en 3634 var advancedSIMDShiftByImmediate = map[asm.Instruction]struct { 3635 q map[VectorArrangement]byte 3636 immResolver func(shiftAmount int64, arr VectorArrangement) (immh, immb byte, err error) 3637 U, opcode byte 3638 }{ 3639 // https://developer.arm.com/documentation/ddi0596/2020-12/SIMD-FP-Instructions/SSHLL--SSHLL2--Signed-Shift-Left-Long--immediate-- 3640 SSHLL: { 3641 U: 0b0, opcode: 0b10100, 3642 q: map[VectorArrangement]byte{VectorArrangement8B: 0b0, VectorArrangement4H: 0b0, VectorArrangement2S: 0b0}, 3643 immResolver: immResolverForSIMDSiftLeftByImmediate, 3644 }, 3645 // https://developer.arm.com/documentation/ddi0596/2020-12/SIMD-FP-Instructions/SSHLL--SSHLL2--Signed-Shift-Left-Long--immediate-- 3646 SSHLL2: { 3647 U: 0b0, opcode: 0b10100, 3648 q: map[VectorArrangement]byte{VectorArrangement16B: 0b1, VectorArrangement8H: 0b1, VectorArrangement4S: 0b1}, 3649 immResolver: immResolverForSIMDSiftLeftByImmediate, 3650 }, 3651 // https://developer.arm.com/documentation/ddi0596/2020-12/SIMD-FP-Instructions/USHLL--USHLL2--Unsigned-Shift-Left-Long--immediate-- 3652 USHLL: { 3653 U: 0b1, opcode: 0b10100, 3654 q: map[VectorArrangement]byte{VectorArrangement8B: 0b0, VectorArrangement4H: 0b0, VectorArrangement2S: 0b0}, 3655 immResolver: immResolverForSIMDSiftLeftByImmediate, 3656 }, 3657 // https://developer.arm.com/documentation/ddi0596/2020-12/SIMD-FP-Instructions/USHLL--USHLL2--Unsigned-Shift-Left-Long--immediate-- 3658 USHLL2: { 3659 U: 0b1, opcode: 0b10100, 3660 q: map[VectorArrangement]byte{VectorArrangement16B: 0b1, VectorArrangement8H: 0b1, VectorArrangement4S: 0b1}, 3661 immResolver: immResolverForSIMDSiftLeftByImmediate, 3662 }, 3663 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/SSHR--Signed-Shift-Right--immediate--?lang=en 3664 SSHR: { 3665 U: 0b0, opcode: 0b00000, 3666 q: map[VectorArrangement]byte{ 3667 VectorArrangement16B: 0b1, VectorArrangement8H: 0b1, VectorArrangement4S: 0b1, VectorArrangement2D: 0b1, 3668 VectorArrangement8B: 0b0, VectorArrangement4H: 0b0, VectorArrangement2S: 0b0, 3669 }, 3670 immResolver: func(shiftAmount int64, arr VectorArrangement) (immh, immb byte, err error) { 3671 switch arr { 3672 case VectorArrangement16B, VectorArrangement8B: 3673 immh = 0b0001 3674 immb = 8 - byte(shiftAmount&0b111) 3675 case VectorArrangement8H, VectorArrangement4H: 3676 v := 16 - byte(shiftAmount&0b1111) 3677 immb = v & 0b111 3678 immh = 0b0010 | (v >> 3) 3679 case VectorArrangement4S, VectorArrangement2S: 3680 v := 32 - byte(shiftAmount&0b11111) 3681 immb = v & 0b111 3682 immh = 0b0100 | (v >> 3) 3683 case VectorArrangement2D: 3684 v := 64 - byte(shiftAmount&0b111111) 3685 immb = v & 0b111 3686 immh = 0b1000 | (v >> 3) 3687 default: 3688 err = fmt.Errorf("unsupported arrangement %s", arr) 3689 } 3690 return 3691 }, 3692 }, 3693 } 3694 3695 // advancedSIMDPermute holds information to encode instructions as "Advanced SIMD permute" in 3696 // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en 3697 var advancedSIMDPermute = map[asm.Instruction]struct { 3698 opcode byte 3699 }{ 3700 ZIP1: {opcode: 0b011}, 3701 } 3702 3703 func immResolverForSIMDSiftLeftByImmediate(shiftAmount int64, arr VectorArrangement) (immh, immb byte, err error) { 3704 switch arr { 3705 case VectorArrangement16B, VectorArrangement8B: 3706 immb = byte(shiftAmount) 3707 immh = 0b0001 3708 case VectorArrangement8H, VectorArrangement4H: 3709 immb = byte(shiftAmount) & 0b111 3710 immh = 0b0010 | byte(shiftAmount>>3) 3711 case VectorArrangement4S, VectorArrangement2S: 3712 immb = byte(shiftAmount) & 0b111 3713 immh = 0b0100 | byte(shiftAmount>>3) 3714 default: 3715 err = fmt.Errorf("unsupported arrangement %s", arr) 3716 } 3717 return 3718 } 3719 3720 // encodeAdvancedSIMDCopy encodes instruction as "Advanced SIMD copy" in 3721 // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en 3722 func (a *AssemblerImpl) encodeAdvancedSIMDCopy(buf asm.Buffer, srcRegBits, dstRegBits, op, imm5, imm4, q byte) { 3723 buf.Append4Bytes( 3724 (srcRegBits<<5)|dstRegBits, 3725 imm4<<3|0b1<<2|srcRegBits>>3, 3726 imm5, 3727 q<<6|op<<5|0b1110, 3728 ) 3729 } 3730 3731 // encodeAdvancedSIMDThreeSame encodes instruction as "Advanced SIMD three same" in 3732 // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en 3733 func (a *AssemblerImpl) encodeAdvancedSIMDThreeSame(buf asm.Buffer, src1, src2, dst, opcode, size, q, u byte) { 3734 buf.Append4Bytes( 3735 (src2<<5)|dst, 3736 opcode<<3|1<<2|src2>>3, 3737 size<<6|0b1<<5|src1, 3738 q<<6|u<<5|0b1110, 3739 ) 3740 } 3741 3742 // encodeAdvancedSIMDThreeDifferent encodes instruction as "Advanced SIMD three different" in 3743 // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en 3744 func (a *AssemblerImpl) encodeAdvancedSIMDThreeDifferent(buf asm.Buffer, src1, src2, dst, opcode, size, q, u byte) { 3745 buf.Append4Bytes( 3746 (src2<<5)|dst, 3747 opcode<<4|src2>>3, 3748 size<<6|0b1<<5|src1, 3749 q<<6|u<<5|0b1110, 3750 ) 3751 } 3752 3753 // encodeAdvancedSIMDPermute encodes instruction as "Advanced SIMD permute" in 3754 // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en 3755 func (a *AssemblerImpl) encodeAdvancedSIMDPermute(buf asm.Buffer, src1, src2, dst, opcode, size, q byte) { 3756 buf.Append4Bytes( 3757 (src2<<5)|dst, 3758 opcode<<4|0b1<<3|src2>>3, 3759 size<<6|src1, 3760 q<<6|0b1110, 3761 ) 3762 } 3763 3764 func (a *AssemblerImpl) encodeVectorRegisterToVectorRegister(buf asm.Buffer, n *nodeImpl) (err error) { 3765 var srcVectorRegBits byte 3766 if n.srcReg != RegRZR { 3767 srcVectorRegBits, err = vectorRegisterBits(n.srcReg) 3768 } else if n.instruction == CMEQZERO { 3769 // CMEQZERO has RegRZR as the src, and we apply the instruction to the same register as the destination. 3770 srcVectorRegBits, err = vectorRegisterBits(n.dstReg) 3771 } 3772 3773 if err != nil { 3774 return err 3775 } 3776 3777 dstVectorRegBits, err := vectorRegisterBits(n.dstReg) 3778 if err != nil { 3779 return err 3780 } 3781 3782 if simdCopy, ok := advancedSIMDCopy[n.instruction]; ok { 3783 imm5, imm4, q, err := simdCopy.resolver(n.srcVectorIndex, n.dstVectorIndex, n.vectorArrangement) 3784 if err != nil { 3785 return err 3786 } 3787 a.encodeAdvancedSIMDCopy(buf, srcVectorRegBits, dstVectorRegBits, simdCopy.op, imm5, imm4, q) 3788 return nil 3789 } 3790 3791 if scalarPairwise, ok := advancedSIMDScalarPairwise[n.instruction]; ok { 3792 // See "Advanced SIMD scalar pairwise" in 3793 // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en 3794 size, ok := scalarPairwise.size[n.vectorArrangement] 3795 if !ok { 3796 return fmt.Errorf("unsupported vector arrangement %s for %s", n.vectorArrangement, InstructionName(n.instruction)) 3797 } 3798 buf.Append4Bytes( 3799 (srcVectorRegBits<<5)|dstVectorRegBits, 3800 scalarPairwise.opcode<<4|1<<3|srcVectorRegBits>>3, 3801 size<<6|0b11<<4|scalarPairwise.opcode>>4, 3802 0b1<<6|scalarPairwise.u<<5|0b11110, 3803 ) 3804 return 3805 } 3806 3807 if twoRegMisc, ok := advancedSIMDTwoRegisterMisc[n.instruction]; ok { 3808 // See "Advanced SIMD two-register miscellaneous" in 3809 // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en 3810 qs, ok := twoRegMisc.qAndSize[n.vectorArrangement] 3811 if !ok { 3812 return fmt.Errorf("unsupported vector arrangement %s for %s", n.vectorArrangement, InstructionName(n.instruction)) 3813 } 3814 buf.Append4Bytes( 3815 (srcVectorRegBits<<5)|dstVectorRegBits, 3816 twoRegMisc.opcode<<4|0b1<<3|srcVectorRegBits>>3, 3817 qs.size<<6|0b1<<5|twoRegMisc.opcode>>4, 3818 qs.q<<6|twoRegMisc.u<<5|0b01110, 3819 ) 3820 return nil 3821 } 3822 3823 if threeSame, ok := advancedSIMDThreeSame[n.instruction]; ok { 3824 qs, ok := threeSame.qAndSize[n.vectorArrangement] 3825 if !ok { 3826 return fmt.Errorf("unsupported vector arrangement %s for %s", n.vectorArrangement, InstructionName(n.instruction)) 3827 } 3828 a.encodeAdvancedSIMDThreeSame(buf, srcVectorRegBits, dstVectorRegBits, dstVectorRegBits, threeSame.opcode, qs.size, qs.q, threeSame.u) 3829 return nil 3830 } 3831 3832 if threeDifferent, ok := advancedSIMDThreeDifferent[n.instruction]; ok { 3833 qs, ok := threeDifferent.qAndSize[n.vectorArrangement] 3834 if !ok { 3835 return fmt.Errorf("unsupported vector arrangement %s for %s", n.vectorArrangement, InstructionName(n.instruction)) 3836 } 3837 a.encodeAdvancedSIMDThreeDifferent(buf, srcVectorRegBits, dstVectorRegBits, dstVectorRegBits, threeDifferent.opcode, qs.size, qs.q, threeDifferent.u) 3838 return nil 3839 } 3840 3841 if acrossLanes, ok := advancedSIMDAcrossLanes[n.instruction]; ok { 3842 // See "Advanced SIMD across lanes" in 3843 // https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en 3844 qs, ok := acrossLanes.qAndSize[n.vectorArrangement] 3845 if !ok { 3846 return fmt.Errorf("unsupported vector arrangement %s for %s", n.vectorArrangement, InstructionName(n.instruction)) 3847 } 3848 buf.Append4Bytes( 3849 (srcVectorRegBits<<5)|dstVectorRegBits, 3850 acrossLanes.opcode<<4|0b1<<3|srcVectorRegBits>>3, 3851 qs.size<<6|0b11000<<1|acrossLanes.opcode>>4, 3852 qs.q<<6|acrossLanes.u<<5|0b01110, 3853 ) 3854 return nil 3855 } 3856 3857 if lookup, ok := advancedSIMDTableLookup[n.instruction]; ok { 3858 q, ok := lookup.q[n.vectorArrangement] 3859 if !ok { 3860 return fmt.Errorf("unsupported vector arrangement %s for %s", n.vectorArrangement, InstructionName(n.instruction)) 3861 } 3862 buf.Append4Bytes( 3863 (srcVectorRegBits<<5)|dstVectorRegBits, 3864 lookup.Len<<5|lookup.op<<4|srcVectorRegBits>>3, 3865 lookup.op2<<6|dstVectorRegBits, 3866 q<<6|0b1110, 3867 ) 3868 return 3869 } 3870 3871 if shiftByImmediate, ok := advancedSIMDShiftByImmediate[n.instruction]; ok { 3872 immh, immb, err := shiftByImmediate.immResolver(n.srcConst, n.vectorArrangement) 3873 if err != nil { 3874 return err 3875 } 3876 3877 q, ok := shiftByImmediate.q[n.vectorArrangement] 3878 if !ok { 3879 return fmt.Errorf("unsupported vector arrangement %s for %s", n.vectorArrangement, InstructionName(n.instruction)) 3880 } 3881 3882 buf.Append4Bytes( 3883 (srcVectorRegBits<<5)|dstVectorRegBits, 3884 shiftByImmediate.opcode<<3|0b1<<2|srcVectorRegBits>>3, 3885 immh<<3|immb, 3886 q<<6|shiftByImmediate.U<<5|0b1111, 3887 ) 3888 return nil 3889 } 3890 3891 if permute, ok := advancedSIMDPermute[n.instruction]; ok { 3892 size, q := arrangementSizeQ(n.vectorArrangement) 3893 a.encodeAdvancedSIMDPermute(buf, srcVectorRegBits, dstVectorRegBits, dstVectorRegBits, permute.opcode, size, q) 3894 return 3895 } 3896 return errorEncodingUnsupported(n) 3897 } 3898 3899 func (a *AssemblerImpl) encodeTwoVectorRegistersToVectorRegister(buf asm.Buffer, n *nodeImpl) (err error) { 3900 var srcRegBits, srcRegBits2, dstRegBits byte 3901 srcRegBits, err = vectorRegisterBits(n.srcReg) 3902 if err != nil { 3903 return err 3904 } 3905 3906 srcRegBits2, err = vectorRegisterBits(n.srcReg2) 3907 if err != nil { 3908 return err 3909 } 3910 3911 dstRegBits, err = vectorRegisterBits(n.dstReg) 3912 if err != nil { 3913 return err 3914 } 3915 3916 if threeSame, ok := advancedSIMDThreeSame[n.instruction]; ok { 3917 qs, ok := threeSame.qAndSize[n.vectorArrangement] 3918 if !ok { 3919 return fmt.Errorf("unsupported vector arrangement %s for %s", n.vectorArrangement, InstructionName(n.instruction)) 3920 } 3921 a.encodeAdvancedSIMDThreeSame(buf, srcRegBits, srcRegBits2, dstRegBits, threeSame.opcode, qs.size, qs.q, threeSame.u) 3922 return nil 3923 } 3924 3925 if threeDifferent, ok := advancedSIMDThreeDifferent[n.instruction]; ok { 3926 qs, ok := threeDifferent.qAndSize[n.vectorArrangement] 3927 if !ok { 3928 return fmt.Errorf("unsupported vector arrangement %s for %s", n.vectorArrangement, InstructionName(n.instruction)) 3929 } 3930 a.encodeAdvancedSIMDThreeDifferent(buf, srcRegBits, srcRegBits2, dstRegBits, threeDifferent.opcode, qs.size, qs.q, threeDifferent.u) 3931 return nil 3932 } 3933 3934 if permute, ok := advancedSIMDPermute[n.instruction]; ok { 3935 size, q := arrangementSizeQ(n.vectorArrangement) 3936 a.encodeAdvancedSIMDPermute(buf, srcRegBits, srcRegBits2, dstRegBits, permute.opcode, size, q) 3937 return 3938 } 3939 3940 if n.instruction == EXT { 3941 // EXT is the only instruction in "Advanced SIMD extract", so inline the encoding here. 3942 // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/EXT--Extract-vector-from-pair-of-vectors-?lang=en 3943 var q, imm4 byte 3944 switch n.vectorArrangement { 3945 case VectorArrangement16B: 3946 imm4 = 0b1111 & byte(n.srcConst) 3947 q = 0b1 3948 case VectorArrangement8B: 3949 imm4 = 0b111 & byte(n.srcConst) 3950 default: 3951 return fmt.Errorf("invalid arrangement %s for EXT", n.vectorArrangement) 3952 } 3953 buf.Append4Bytes( 3954 (srcRegBits2<<5)|dstRegBits, 3955 imm4<<3|srcRegBits2>>3, 3956 srcRegBits, 3957 q<<6|0b101110, 3958 ) 3959 return 3960 } 3961 return 3962 } 3963 3964 func (a *AssemblerImpl) encodeVectorRegisterToRegister(buf asm.Buffer, n *nodeImpl) (err error) { 3965 if err = checkArrangementIndexPair(n.vectorArrangement, n.srcVectorIndex); err != nil { 3966 return 3967 } 3968 3969 srcVecRegBits, err := vectorRegisterBits(n.srcReg) 3970 if err != nil { 3971 return err 3972 } 3973 3974 dstRegBits, err := intRegisterBits(n.dstReg) 3975 if err != nil { 3976 return err 3977 } 3978 3979 if simdCopy, ok := advancedSIMDCopy[n.instruction]; ok { 3980 imm5, imm4, q, err := simdCopy.resolver(n.srcVectorIndex, n.dstVectorIndex, n.vectorArrangement) 3981 if err != nil { 3982 return err 3983 } 3984 a.encodeAdvancedSIMDCopy(buf, srcVecRegBits, dstRegBits, simdCopy.op, imm5, imm4, q) 3985 return nil 3986 } 3987 return errorEncodingUnsupported(n) 3988 } 3989 3990 func (a *AssemblerImpl) encodeRegisterToVectorRegister(buf asm.Buffer, n *nodeImpl) (err error) { 3991 srcRegBits, err := intRegisterBits(n.srcReg) 3992 if err != nil { 3993 return err 3994 } 3995 3996 dstVectorRegBits, err := vectorRegisterBits(n.dstReg) 3997 if err != nil { 3998 return err 3999 } 4000 4001 if simdCopy, ok := advancedSIMDCopy[n.instruction]; ok { 4002 imm5, imm4, q, err := simdCopy.resolver(n.srcVectorIndex, n.dstVectorIndex, n.vectorArrangement) 4003 if err != nil { 4004 return err 4005 } 4006 a.encodeAdvancedSIMDCopy(buf, srcRegBits, dstVectorRegBits, simdCopy.op, imm5, imm4, q) 4007 return nil 4008 } 4009 return errorEncodingUnsupported(n) 4010 } 4011 4012 var zeroRegisterBits byte = 0b11111 4013 4014 func isIntRegister(r asm.Register) bool { 4015 return RegR0 <= r && r <= RegSP 4016 } 4017 4018 func isVectorRegister(r asm.Register) bool { 4019 return RegV0 <= r && r <= RegV31 4020 } 4021 4022 func isConditionalRegister(r asm.Register) bool { 4023 return RegCondEQ <= r && r <= RegCondNV 4024 } 4025 4026 func intRegisterBits(r asm.Register) (ret byte, err error) { 4027 if !isIntRegister(r) { 4028 err = fmt.Errorf("%s is not integer", RegisterName(r)) 4029 } else if r == RegSP { 4030 // SP has the same bit representations as RegRZR. 4031 r = RegRZR 4032 } 4033 ret = byte(r - RegR0) 4034 return 4035 } 4036 4037 func vectorRegisterBits(r asm.Register) (ret byte, err error) { 4038 if !isVectorRegister(r) { 4039 err = fmt.Errorf("%s is not vector", RegisterName(r)) 4040 } else { 4041 ret = byte(r - RegV0) 4042 } 4043 return 4044 } 4045 4046 func registerBits(r asm.Register) (ret byte) { 4047 if isIntRegister(r) { 4048 if r == RegSP { 4049 // SP has the same bit representations as RegRZR. 4050 r = RegRZR 4051 } 4052 ret = byte(r - RegR0) 4053 } else { 4054 ret = byte(r - RegV0) 4055 } 4056 return 4057 }