github.com/apache/arrow/go/v10@v10.0.1/parquet/internal/bmi/_lib/bitmap_neon.s (about)

     1  	.text
     2  	.file	"bitmap_bmi2.c"
     3  	.globl	extract_bits_neon       // -- Begin function extract_bits_neon
     4  	.p2align	2
     5  	.type	extract_bits_neon,@function
     6  extract_bits_neon:                      // @extract_bits_neon
     7  // %bb.0:
     8  	stp	x29, x30, [sp, #-16]!   // 16-byte Folded Spill
     9  	mov	x29, sp
    10  	cbz	x1, .LBB0_4
    11  // %bb.1:
    12  	mov	x8, x0
    13  	mov	x0, xzr
    14  	mov	w9, #1
    15  .LBB0_2:                                // =>This Inner Loop Header: Depth=1
    16  	and	x10, x1, x8
    17  	neg	x11, x1
    18  	tst	x10, x11
    19  	sub	x12, x1, #1             // =1
    20  	csel	x10, xzr, x9, eq
    21  	ands	x1, x12, x1
    22  	orr	x0, x10, x0
    23  	lsl	x9, x9, #1
    24  	b.ne	.LBB0_2
    25  // %bb.3:
    26  	ldp	x29, x30, [sp], #16     // 16-byte Folded Reload
    27  	ret
    28  .LBB0_4:
    29  	mov	x0, xzr
    30  	ldp	x29, x30, [sp], #16     // 16-byte Folded Reload
    31  	ret
    32  .Lfunc_end0:
    33  	.size	extract_bits_neon, .Lfunc_end0-extract_bits_neon
    34                                          // -- End function
    35  	.section	.rodata.cst16,"aM",@progbits,16
    36  	.p2align	4               // -- Begin function levels_to_bitmap_neon
    37  .LCPI1_0:
    38  	.xword	0                       // 0x0
    39  	.xword	1                       // 0x1
    40  	.text
    41  	.globl	levels_to_bitmap_neon
    42  	.p2align	2
    43  	.type	levels_to_bitmap_neon,@function
    44  levels_to_bitmap_neon:                  // @levels_to_bitmap_neon
    45  // %bb.0:
    46  	stp	x29, x30, [sp, #-16]!   // 16-byte Folded Spill
    47  	cmp	w1, #1                  // =1
    48  	mov	x29, sp
    49  	b.lt	.LBB1_3
    50  // %bb.1:
    51  	cmp	w1, #3                  // =3
    52  	mov	w9, w1
    53  	b.hi	.LBB1_4
    54  // %bb.2:
    55  	mov	x10, xzr
    56  	mov	x8, xzr
    57  	b	.LBB1_7
    58  .LBB1_3:
    59  	mov	x8, xzr
    60  	b	.LBB1_8
    61  .LBB1_4:
    62  	adrp	x11, .LCPI1_0
    63  	ldr	q1, [x11, :lo12:.LCPI1_0]
    64  	mov	w11, #2
    65  	dup	v3.2s, w2
    66  	dup	v2.2d, x11
    67  	mov	w11, #1
    68  	and	x10, x9, #0xfffffffc
    69  	shl	v4.2s, v3.2s, #16
    70  	dup	v3.2d, x11
    71  	mov	w11, #4
    72  	add	x8, x0, #4              // =4
    73  	movi	v0.2d, #0000000000000000
    74  	sshr	v4.2s, v4.2s, #16
    75  	dup	v5.2d, x11
    76  	mov	x11, x10
    77  	movi	v6.2d, #0000000000000000
    78  .LBB1_5:                                // =>This Inner Loop Header: Depth=1
    79  	ldursh	w12, [x8, #-4]
    80  	ldrsh	w13, [x8]
    81  	ldursh	w14, [x8, #-2]
    82  	add	v17.2d, v1.2d, v2.2d
    83  	fmov	s7, w12
    84  	ldrsh	w12, [x8, #2]
    85  	fmov	s16, w13
    86  	mov	v7.s[1], w14
    87  	cmgt	v7.2s, v7.2s, v4.2s
    88  	mov	v16.s[1], w12
    89  	cmgt	v16.2s, v16.2s, v4.2s
    90  	ushll	v7.2d, v7.2s, #0
    91  	ushll	v16.2d, v16.2s, #0
    92  	and	v7.16b, v7.16b, v3.16b
    93  	and	v16.16b, v16.16b, v3.16b
    94  	ushl	v7.2d, v7.2d, v1.2d
    95  	ushl	v16.2d, v16.2d, v17.2d
    96  	subs	x11, x11, #4            // =4
    97  	add	v1.2d, v1.2d, v5.2d
    98  	orr	v0.16b, v7.16b, v0.16b
    99  	orr	v6.16b, v16.16b, v6.16b
   100  	add	x8, x8, #8              // =8
   101  	b.ne	.LBB1_5
   102  // %bb.6:
   103  	orr	v0.16b, v6.16b, v0.16b
   104  	dup	v1.2d, v0.d[1]
   105  	orr	v0.16b, v0.16b, v1.16b
   106  	cmp	x10, x9
   107  	fmov	x8, d0
   108  	b.eq	.LBB1_8
   109  .LBB1_7:                                // =>This Inner Loop Header: Depth=1
   110  	ldrsh	w11, [x0, x10, lsl #1]
   111  	cmp	w11, w2, sxth
   112  	cset	w11, gt
   113  	lsl	x11, x11, x10
   114  	add	x10, x10, #1            // =1
   115  	cmp	x9, x10
   116  	orr	x8, x11, x8
   117  	b.ne	.LBB1_7
   118  .LBB1_8:
   119  	mov	x0, x8
   120  	ldp	x29, x30, [sp], #16     // 16-byte Folded Reload
   121  	ret
   122  .Lfunc_end1:
   123  	.size	levels_to_bitmap_neon, .Lfunc_end1-levels_to_bitmap_neon
   124                                          // -- End function
   125  	.ident	"clang version 10.0.0-4ubuntu1 "
   126  	.section	".note.GNU-stack","",@progbits
   127  	.addrsig