github.com/apache/arrow/go/v16@v16.1.0/arrow/math/_lib/uint64_neon.s (about) 1 .text 2 .file "uint64.c" 3 .globl sum_uint64_neon // -- Begin function sum_uint64_neon 4 .p2align 2 5 .type sum_uint64_neon,@function 6 sum_uint64_neon: // @sum_uint64_neon 7 // %bb.0: 8 stp x29, x30, [sp, #-16]! // 16-byte Folded Spill 9 mov x29, sp 10 cbz x1, .LBB0_3 11 // %bb.1: 12 cmp x1, #3 // =3 13 b.hi .LBB0_4 14 // %bb.2: 15 mov x8, xzr 16 mov x9, xzr 17 b .LBB0_7 18 .LBB0_3: 19 mov x9, xzr 20 str x9, [x2] 21 ldp x29, x30, [sp], #16 // 16-byte Folded Reload 22 ret 23 .LBB0_4: 24 and x8, x1, #0xfffffffffffffffc 25 add x9, x0, #16 // =16 26 movi v0.2d, #0000000000000000 27 mov x10, x8 28 movi v1.2d, #0000000000000000 29 .LBB0_5: // =>This Inner Loop Header: Depth=1 30 ldp q2, q3, [x9, #-16] 31 subs x10, x10, #4 // =4 32 add x9, x9, #32 // =32 33 add v0.2d, v2.2d, v0.2d 34 add v1.2d, v3.2d, v1.2d 35 b.ne .LBB0_5 36 // %bb.6: 37 add v0.2d, v1.2d, v0.2d 38 addp d0, v0.2d 39 cmp x8, x1 40 fmov x9, d0 41 b.eq .LBB0_9 42 .LBB0_7: 43 add x10, x0, x8, lsl #3 44 sub x8, x1, x8 45 .LBB0_8: // =>This Inner Loop Header: Depth=1 46 ldr x11, [x10], #8 47 subs x8, x8, #1 // =1 48 add x9, x11, x9 49 b.ne .LBB0_8 50 .LBB0_9: 51 str x9, [x2] 52 ldp x29, x30, [sp], #16 // 16-byte Folded Reload 53 ret 54 .Lfunc_end0: 55 .size sum_uint64_neon, .Lfunc_end0-sum_uint64_neon 56 // -- End function 57 58 .ident "clang version 9.0.1-12 " 59 .section ".note.GNU-stack","",@progbits 60 .addrsig