github.com/apache/arrow/go/v16@v16.1.0/arrow/math/_lib/uint64_neon.s (about)

     1  	.text
     2  	.file	"uint64.c"
     3  	.globl	sum_uint64_neon         // -- Begin function sum_uint64_neon
     4  	.p2align	2
     5  	.type	sum_uint64_neon,@function
     6  sum_uint64_neon:                        // @sum_uint64_neon
     7  // %bb.0:
     8  	stp	x29, x30, [sp, #-16]!   // 16-byte Folded Spill
     9  	mov	x29, sp
    10  	cbz	x1, .LBB0_3
    11  // %bb.1:
    12  	cmp	x1, #3                  // =3
    13  	b.hi	.LBB0_4
    14  // %bb.2:
    15  	mov	x8, xzr
    16  	mov	x9, xzr
    17  	b	.LBB0_7
    18  .LBB0_3:
    19  	mov	x9, xzr
    20  	str	x9, [x2]
    21  	ldp	x29, x30, [sp], #16     // 16-byte Folded Reload
    22  	ret
    23  .LBB0_4:
    24  	and	x8, x1, #0xfffffffffffffffc
    25  	add	x9, x0, #16             // =16
    26  	movi	v0.2d, #0000000000000000
    27  	mov	x10, x8
    28  	movi	v1.2d, #0000000000000000
    29  .LBB0_5:                                // =>This Inner Loop Header: Depth=1
    30  	ldp	q2, q3, [x9, #-16]
    31  	subs	x10, x10, #4            // =4
    32  	add	x9, x9, #32             // =32
    33  	add	v0.2d, v2.2d, v0.2d
    34  	add	v1.2d, v3.2d, v1.2d
    35  	b.ne	.LBB0_5
    36  // %bb.6:
    37  	add	v0.2d, v1.2d, v0.2d
    38  	addp	d0, v0.2d
    39  	cmp	x8, x1
    40  	fmov	x9, d0
    41  	b.eq	.LBB0_9
    42  .LBB0_7:
    43  	add	x10, x0, x8, lsl #3
    44  	sub	x8, x1, x8
    45  .LBB0_8:                                // =>This Inner Loop Header: Depth=1
    46  	ldr	x11, [x10], #8
    47  	subs	x8, x8, #1              // =1
    48  	add	x9, x11, x9
    49  	b.ne	.LBB0_8
    50  .LBB0_9:
    51  	str	x9, [x2]
    52  	ldp	x29, x30, [sp], #16     // 16-byte Folded Reload
    53  	ret
    54  .Lfunc_end0:
    55  	.size	sum_uint64_neon, .Lfunc_end0-sum_uint64_neon
    56                                          // -- End function
    57  
    58  	.ident	"clang version 9.0.1-12 "
    59  	.section	".note.GNU-stack","",@progbits
    60  	.addrsig