github.com/apache/arrow/go/v10@v10.0.1/parquet/internal/bmi/_lib/bitmap_neon.s (about) 1 .text 2 .file "bitmap_bmi2.c" 3 .globl extract_bits_neon // -- Begin function extract_bits_neon 4 .p2align 2 5 .type extract_bits_neon,@function 6 extract_bits_neon: // @extract_bits_neon 7 // %bb.0: 8 stp x29, x30, [sp, #-16]! // 16-byte Folded Spill 9 mov x29, sp 10 cbz x1, .LBB0_4 11 // %bb.1: 12 mov x8, x0 13 mov x0, xzr 14 mov w9, #1 15 .LBB0_2: // =>This Inner Loop Header: Depth=1 16 and x10, x1, x8 17 neg x11, x1 18 tst x10, x11 19 sub x12, x1, #1 // =1 20 csel x10, xzr, x9, eq 21 ands x1, x12, x1 22 orr x0, x10, x0 23 lsl x9, x9, #1 24 b.ne .LBB0_2 25 // %bb.3: 26 ldp x29, x30, [sp], #16 // 16-byte Folded Reload 27 ret 28 .LBB0_4: 29 mov x0, xzr 30 ldp x29, x30, [sp], #16 // 16-byte Folded Reload 31 ret 32 .Lfunc_end0: 33 .size extract_bits_neon, .Lfunc_end0-extract_bits_neon 34 // -- End function 35 .section .rodata.cst16,"aM",@progbits,16 36 .p2align 4 // -- Begin function levels_to_bitmap_neon 37 .LCPI1_0: 38 .xword 0 // 0x0 39 .xword 1 // 0x1 40 .text 41 .globl levels_to_bitmap_neon 42 .p2align 2 43 .type levels_to_bitmap_neon,@function 44 levels_to_bitmap_neon: // @levels_to_bitmap_neon 45 // %bb.0: 46 stp x29, x30, [sp, #-16]! // 16-byte Folded Spill 47 cmp w1, #1 // =1 48 mov x29, sp 49 b.lt .LBB1_3 50 // %bb.1: 51 cmp w1, #3 // =3 52 mov w9, w1 53 b.hi .LBB1_4 54 // %bb.2: 55 mov x10, xzr 56 mov x8, xzr 57 b .LBB1_7 58 .LBB1_3: 59 mov x8, xzr 60 b .LBB1_8 61 .LBB1_4: 62 adrp x11, .LCPI1_0 63 ldr q1, [x11, :lo12:.LCPI1_0] 64 mov w11, #2 65 dup v3.2s, w2 66 dup v2.2d, x11 67 mov w11, #1 68 and x10, x9, #0xfffffffc 69 shl v4.2s, v3.2s, #16 70 dup v3.2d, x11 71 mov w11, #4 72 add x8, x0, #4 // =4 73 movi v0.2d, #0000000000000000 74 sshr v4.2s, v4.2s, #16 75 dup v5.2d, x11 76 mov x11, x10 77 movi v6.2d, #0000000000000000 78 .LBB1_5: // =>This Inner Loop Header: Depth=1 79 ldursh w12, [x8, #-4] 80 ldrsh w13, [x8] 81 ldursh w14, [x8, #-2] 82 add v17.2d, v1.2d, v2.2d 83 fmov s7, w12 84 ldrsh w12, [x8, #2] 85 fmov s16, w13 86 mov v7.s[1], w14 87 cmgt v7.2s, v7.2s, v4.2s 88 mov v16.s[1], w12 89 cmgt v16.2s, v16.2s, v4.2s 90 ushll v7.2d, v7.2s, #0 91 ushll v16.2d, v16.2s, #0 92 and v7.16b, v7.16b, v3.16b 93 and v16.16b, v16.16b, v3.16b 94 ushl v7.2d, v7.2d, v1.2d 95 ushl v16.2d, v16.2d, v17.2d 96 subs x11, x11, #4 // =4 97 add v1.2d, v1.2d, v5.2d 98 orr v0.16b, v7.16b, v0.16b 99 orr v6.16b, v16.16b, v6.16b 100 add x8, x8, #8 // =8 101 b.ne .LBB1_5 102 // %bb.6: 103 orr v0.16b, v6.16b, v0.16b 104 dup v1.2d, v0.d[1] 105 orr v0.16b, v0.16b, v1.16b 106 cmp x10, x9 107 fmov x8, d0 108 b.eq .LBB1_8 109 .LBB1_7: // =>This Inner Loop Header: Depth=1 110 ldrsh w11, [x0, x10, lsl #1] 111 cmp w11, w2, sxth 112 cset w11, gt 113 lsl x11, x11, x10 114 add x10, x10, #1 // =1 115 cmp x9, x10 116 orr x8, x11, x8 117 b.ne .LBB1_7 118 .LBB1_8: 119 mov x0, x8 120 ldp x29, x30, [sp], #16 // 16-byte Folded Reload 121 ret 122 .Lfunc_end1: 123 .size levels_to_bitmap_neon, .Lfunc_end1-levels_to_bitmap_neon 124 // -- End function 125 .ident "clang version 10.0.0-4ubuntu1 " 126 .section ".note.GNU-stack","",@progbits 127 .addrsig