github.com/apache/arrow/go/v14@v14.0.1/parquet/internal/bmi/_lib/bitmap_neon.s (about)

     1  	.text
     2  	.file	"bitmap_bmi2.c"
     3  	.section	.rodata.cst16,"aM",@progbits,16
     4  	.p2align	4               // -- Begin function levels_to_bitmap_neon
     5  .LCPI1_0:
     6  	.xword	0                       // 0x0
     7  	.xword	1                       // 0x1
     8  	.text
     9  	.globl	levels_to_bitmap_neon
    10  	.p2align	2
    11  	.type	levels_to_bitmap_neon,@function
    12  levels_to_bitmap_neon:                  // @levels_to_bitmap_neon
    13  // %bb.0:
    14  	stp	x29, x30, [sp, #-16]!   // 16-byte Folded Spill
    15  	cmp	w1, #1                  // =1
    16  	mov	x29, sp
    17  	b.lt	.LBB1_3
    18  // %bb.1:
    19  	cmp	w1, #3                  // =3
    20  	mov	w9, w1
    21  	b.hi	.LBB1_4
    22  // %bb.2:
    23  	mov	x10, xzr
    24  	mov	x8, xzr
    25  	b	.LBB1_7
    26  .LBB1_3:
    27  	mov	x8, xzr
    28  	b	.LBB1_8
    29  .LBB1_4:
    30  	adrp	x11, .LCPI1_0
    31  	ldr	q1, [x11, :lo12:.LCPI1_0]
    32  	mov	w11, #2
    33  	dup	v3.2s, w2
    34  	dup	v2.2d, x11
    35  	mov	w11, #1
    36  	and	x10, x9, #0xfffffffc
    37  	shl	v4.2s, v3.2s, #16
    38  	dup	v3.2d, x11
    39  	mov	w11, #4
    40  	add	x8, x0, #4              // =4
    41  	movi	v0.2d, #0000000000000000
    42  	sshr	v4.2s, v4.2s, #16
    43  	dup	v5.2d, x11
    44  	mov	x11, x10
    45  	movi	v6.2d, #0000000000000000
    46  .LBB1_5:                                // =>This Inner Loop Header: Depth=1
    47  	ldursh	w12, [x8, #-4]
    48  	ldrsh	w13, [x8]
    49  	ldursh	w14, [x8, #-2]
    50  	add	v17.2d, v1.2d, v2.2d
    51  	fmov	s7, w12
    52  	ldrsh	w12, [x8, #2]
    53  	fmov	s16, w13
    54  	mov	v7.s[1], w14
    55  	cmgt	v7.2s, v7.2s, v4.2s
    56  	mov	v16.s[1], w12
    57  	cmgt	v16.2s, v16.2s, v4.2s
    58  	ushll	v7.2d, v7.2s, #0
    59  	ushll	v16.2d, v16.2s, #0
    60  	and	v7.16b, v7.16b, v3.16b
    61  	and	v16.16b, v16.16b, v3.16b
    62  	ushl	v7.2d, v7.2d, v1.2d
    63  	ushl	v16.2d, v16.2d, v17.2d
    64  	subs	x11, x11, #4            // =4
    65  	add	v1.2d, v1.2d, v5.2d
    66  	orr	v0.16b, v7.16b, v0.16b
    67  	orr	v6.16b, v16.16b, v6.16b
    68  	add	x8, x8, #8              // =8
    69  	b.ne	.LBB1_5
    70  // %bb.6:
    71  	orr	v0.16b, v6.16b, v0.16b
    72  	dup	v1.2d, v0.d[1]
    73  	orr	v0.16b, v0.16b, v1.16b
    74  	cmp	x10, x9
    75  	fmov	x8, d0
    76  	b.eq	.LBB1_8
    77  .LBB1_7:                                // =>This Inner Loop Header: Depth=1
    78  	ldrsh	w11, [x0, x10, lsl #1]
    79  	cmp	w11, w2, sxth
    80  	cset	w11, gt
    81  	lsl	x11, x11, x10
    82  	add	x10, x10, #1            // =1
    83  	cmp	x9, x10
    84  	orr	x8, x11, x8
    85  	b.ne	.LBB1_7
    86  .LBB1_8:
    87  	mov	x0, x8
    88  	ldp	x29, x30, [sp], #16     // 16-byte Folded Reload
    89  	ret
    90  .Lfunc_end1:
    91  	.size	levels_to_bitmap_neon, .Lfunc_end1-levels_to_bitmap_neon
    92                                          // -- End function
    93  	.ident	"clang version 10.0.0-4ubuntu1 "
    94  	.section	".note.GNU-stack","",@progbits
    95  	.addrsig