github.com/apache/arrow/go/v10@v10.0.1/parquet/internal/bmi/bitmap_neon_arm64.s (about) 1 //+build !noasm !appengine 2 3 // (C2GOASM doesn't work correctly for Arm64) 4 // func _extract_bits_neon(bitmap, selectBitmap uint64) (res uint64) 5 TEXT ·_extract_bits_neon(SB), $0-24 6 7 MOVD bitmap+0(FP), R0 8 MOVD selectBitmap+8(FP), R1 9 10 WORD $0xa9bf7bfd // stp x29, x30, [sp, #-16]! 11 WORD $0x910003fd // mov x29, sp 12 CBZ R1, LBB0_4 13 WORD $0xaa0003e8 // mov x8, x0 14 WORD $0xaa1f03e0 // mov x0, xzr 15 WORD $0x52800029 // mov w9, #1 16 LBB0_2: 17 WORD $0x8a08002a // and x10, x1, x8 18 WORD $0xcb0103eb // neg x11, x1 19 WORD $0xea0b015f // tst x10, x11 20 WORD $0xd100042c // sub x12, x1, #1 21 WORD $0x9a8903ea // csel x10, xzr, x9, eq 22 WORD $0xea010181 // ands x1, x12, x1 23 WORD $0xaa000140 // orr x0, x10, x0 24 WORD $0xd37ff929 // lsl x9, x9, #1 25 BNE LBB0_2 26 WORD $0xa8c17bfd // ldp x29, x30, [sp], #16 27 MOVD R0, res+16(FP) 28 RET 29 LBB0_4: 30 WORD $0xaa1f03e0 // mov x0, xzr 31 WORD $0xa8c17bfd // ldp x29, x30, [sp], #16 32 RET 33 34 // func _levels_to_bitmap_neon(levels unsafe.Pointer, numLevels int, rhs int16) (res uint64) 35 TEXT ·_levels_to_bitmap_neon(SB), $0-32 36 37 MOVD levels+0(FP), R0 38 MOVD numLevels+8(FP), R1 39 MOVD rhs+16(FP), R2 40 41 WORD $0xa9bf7bfd // stp x29, x30, [sp, #-16]! 42 WORD $0x7100043f // cmp w1, #1 43 WORD $0x910003fd // mov x29, sp 44 BLT LBB1_3 45 46 WORD $0x71000c3f // cmp w1, #3 47 WORD $0x2a0103e9 // mov w9, w1 48 BHI LBB1_4 49 WORD $0xaa1f03ea // mov x10, xzr 50 WORD $0xaa1f03e8 // mov x8, xzr 51 JMP LBB1_7 52 LBB1_3: 53 WORD $0xaa1f03e8 // mov x8, xzr 54 JMP LBB1_8 55 LBB1_4: 56 VMOVQ $0x0000000000000000, $0x0000000000000001, V1 // adrp x11, .LCPI1_0; ldr q1, [x11, :lo12:.LCPI1_0] 57 WORD $0x5280004b // mov w11, #2 58 WORD $0x0e040c43 // dup v3.2s, w2 59 WORD $0x4e080d62 // dup v2.2d, x11 60 WORD $0x5280002b // mov w11, #1 61 WORD $0x927e752a // and x10, x9, #0xfffffffc 62 WORD $0x0f305464 // shl v4.2s, v3.2s, #16 63 WORD $0x4e080d63 // dup v3.2d, x11 64 WORD $0x5280008b // mov w11, #4 65 WORD $0x91001008 // add x8, x0, #4 66 WORD $0x6f00e400 // movi v0.2d, #0000000000000000 67 WORD $0x0f300484 // sshr v4.2s, v4.2s, #16 68 WORD $0x4e080d65 // dup v5.2d, x11 69 WORD $0xaa0a03eb // mov x11, x10 70 WORD $0x6f00e406 // movi v6.2d, #0000000000000000 71 LBB1_5: 72 WORD $0x78dfc10c // ldursh w12, [x8, #-4] 73 WORD $0x79c0010d // ldrsh w13, [x8] 74 WORD $0x78dfe10e // ldursh w14, [x8, #-2] 75 WORD $0x4ee28431 // add v17.2d, v1.2d, v2.2d 76 WORD $0x1e270187 // fmov s7, w12 77 WORD $0x79c0050c // ldrsh w12, [x8, #2] 78 WORD $0x1e2701b0 // fmov s16, w13 79 WORD $0x4e0c1dc7 // mov v7.s[1], w14 80 WORD $0x0ea434e7 // cmgt v7.2s, v7.2s, v4.2s 81 WORD $0x4e0c1d90 // mov v16.s[1], w12 82 WORD $0x0ea43610 // cmgt v16.2s, v16.2s, v4.2s 83 WORD $0x2f20a4e7 // ushll v7.2d, v7.2s, #0 84 WORD $0x2f20a610 // ushll v16.2d, v16.2s, #0 85 WORD $0x4e231ce7 // and v7.16b, v7.16b, v3.16b 86 WORD $0x4e231e10 // and v16.16b, v16.16b, v3.16b 87 WORD $0x6ee144e7 // ushl v7.2d, v7.2d, v1.2d 88 WORD $0x6ef14610 // ushl v16.2d, v16.2d, v17.2d 89 WORD $0xf100116b // subs x11, x11, #4 90 WORD $0x4ee58421 // add v1.2d, v1.2d, v5.2d 91 WORD $0x4ea01ce0 // orr v0.16b, v7.16b, v0.16b 92 WORD $0x4ea61e06 // orr v6.16b, v16.16b, v6.16b 93 WORD $0x91002108 // add x8, x8, #8 94 BNE LBB1_5 95 WORD $0x4ea01cc0 // orr v0.16b, v6.16b, v0.16b 96 WORD $0x4e180401 // dup v1.2d, v0.d[1] 97 WORD $0x4ea11c00 // orr v0.16b, v0.16b, v1.16b 98 WORD $0xeb09015f // cmp x10, x9 99 WORD $0x9e660008 // fmov x8, d0 100 BEQ LBB1_8 101 LBB1_7: 102 WORD $0x78ea780b // ldrsh w11, [x0, x10, lsl #1] 103 WORD $0x6b22a17f // cmp w11, w2, sxth 104 WORD $0x1a9fd7eb // cset w11, gt 105 WORD $0x9aca216b // lsl x11, x11, x10 106 WORD $0x9100054a // add x10, x10, #1 107 WORD $0xeb0a013f // cmp x9, x10 108 WORD $0xaa080168 // orr x8, x11, x8 109 BNE LBB1_7 110 LBB1_8: 111 WORD $0xa8c17bfd // ldp x29, x30, [sp], #16 112 MOVD R8, res+24(FP) 113 RET 114