github.com/apache/arrow/go/v14@v14.0.2/internal/utils/_lib/min_max_neon.s (about)

     1  	.text
     2  	.file	"min_max.c"
     3  	.globl	int32_max_min_neon      // -- Begin function int32_max_min_neon
     4  	.p2align	2
     5  	.type	int32_max_min_neon,@function
     6  int32_max_min_neon:                     // @int32_max_min_neon
     7  // %bb.0:
     8  	stp	x29, x30, [sp, #-16]!   // 16-byte Folded Spill
     9  	cmp	w1, #1                  // =1
    10  	mov	x29, sp
    11  	b.lt	.LBB0_3
    12  // %bb.1:
    13  	cmp	w1, #3                  // =3
    14  	mov	w8, w1
    15  	b.hi	.LBB0_4
    16  // %bb.2:
    17  	mov	x9, xzr
    18  	mov	w11, #-2147483648
    19  	mov	w10, #2147483647
    20  	b	.LBB0_7
    21  .LBB0_3:
    22  	mov	w10, #2147483647
    23  	mov	w11, #-2147483648
    24  	str	w11, [x3]
    25  	str	w10, [x2]
    26  	ldp	x29, x30, [sp], #16     // 16-byte Folded Reload
    27  	ret
    28  .LBB0_4:
    29  	and	x9, x8, #0xfffffffc
    30  	add	x10, x0, #8             // =8
    31  	movi	v2.2s, #128, lsl #24
    32  	mvni	v0.2s, #128, lsl #24
    33  	mvni	v1.2s, #128, lsl #24
    34  	mov	x11, x9
    35  	movi	v3.2s, #128, lsl #24
    36  .LBB0_5:                                // =>This Inner Loop Header: Depth=1
    37  	ldp	d4, d5, [x10, #-8]
    38  	subs	x11, x11, #4            // =4
    39  	add	x10, x10, #16           // =16
    40  	smin	v0.2s, v0.2s, v4.2s
    41  	smin	v1.2s, v1.2s, v5.2s
    42  	smax	v2.2s, v2.2s, v4.2s
    43  	smax	v3.2s, v3.2s, v5.2s
    44  	b.ne	.LBB0_5
    45  // %bb.6:
    46  	smax	v2.2s, v2.2s, v3.2s
    47  	smin	v0.2s, v0.2s, v1.2s
    48  	dup	v1.2s, v2.s[1]
    49  	dup	v3.2s, v0.s[1]
    50  	smax	v1.2s, v2.2s, v1.2s
    51  	smin	v0.2s, v0.2s, v3.2s
    52  	cmp	x9, x8
    53  	fmov	w11, s1
    54  	fmov	w10, s0
    55  	b.eq	.LBB0_9
    56  .LBB0_7:
    57  	add	x12, x0, x9, lsl #2
    58  	sub	x8, x8, x9
    59  .LBB0_8:                                // =>This Inner Loop Header: Depth=1
    60  	ldr	w9, [x12], #4
    61  	cmp	w10, w9
    62  	csel	w10, w10, w9, lt
    63  	cmp	w11, w9
    64  	csel	w11, w11, w9, gt
    65  	subs	x8, x8, #1              // =1
    66  	b.ne	.LBB0_8
    67  .LBB0_9:
    68  	str	w11, [x3]
    69  	str	w10, [x2]
    70  	ldp	x29, x30, [sp], #16     // 16-byte Folded Reload
    71  	ret
    72  .Lfunc_end0:
    73  	.size	int32_max_min_neon, .Lfunc_end0-int32_max_min_neon
    74                                          // -- End function
    75  	.globl	uint32_max_min_neon     // -- Begin function uint32_max_min_neon
    76  	.p2align	2
    77  	.type	uint32_max_min_neon,@function
    78  uint32_max_min_neon:                    // @uint32_max_min_neon
    79  // %bb.0:
    80  	stp	x29, x30, [sp, #-16]!   // 16-byte Folded Spill
    81  	cmp	w1, #1                  // =1
    82  	mov	x29, sp
    83  	b.lt	.LBB1_3
    84  // %bb.1:
    85  	cmp	w1, #3                  // =3
    86  	mov	w8, w1
    87  	b.hi	.LBB1_4
    88  // %bb.2:
    89  	mov	x9, xzr
    90  	mov	w10, wzr
    91  	mov	w11, #-1
    92  	b	.LBB1_7
    93  .LBB1_3:
    94  	mov	w10, wzr
    95  	mov	w11, #-1
    96  	str	w10, [x3]
    97  	str	w11, [x2]
    98  	ldp	x29, x30, [sp], #16     // 16-byte Folded Reload
    99  	ret
   100  .LBB1_4:
   101  	and	x9, x8, #0xfffffffc
   102  	movi	v1.2d, #0000000000000000
   103  	movi	v0.2d, #0xffffffffffffffff
   104  	add	x10, x0, #8             // =8
   105  	movi	v2.2d, #0xffffffffffffffff
   106  	mov	x11, x9
   107  	movi	v3.2d, #0000000000000000
   108  .LBB1_5:                                // =>This Inner Loop Header: Depth=1
   109  	ldp	d4, d5, [x10, #-8]
   110  	subs	x11, x11, #4            // =4
   111  	add	x10, x10, #16           // =16
   112  	umin	v0.2s, v0.2s, v4.2s
   113  	umin	v2.2s, v2.2s, v5.2s
   114  	umax	v1.2s, v1.2s, v4.2s
   115  	umax	v3.2s, v3.2s, v5.2s
   116  	b.ne	.LBB1_5
   117  // %bb.6:
   118  	umax	v1.2s, v1.2s, v3.2s
   119  	umin	v0.2s, v0.2s, v2.2s
   120  	dup	v2.2s, v1.s[1]
   121  	dup	v3.2s, v0.s[1]
   122  	umax	v1.2s, v1.2s, v2.2s
   123  	umin	v0.2s, v0.2s, v3.2s
   124  	cmp	x9, x8
   125  	fmov	w10, s1
   126  	fmov	w11, s0
   127  	b.eq	.LBB1_9
   128  .LBB1_7:
   129  	add	x12, x0, x9, lsl #2
   130  	sub	x8, x8, x9
   131  .LBB1_8:                                // =>This Inner Loop Header: Depth=1
   132  	ldr	w9, [x12], #4
   133  	cmp	w11, w9
   134  	csel	w11, w11, w9, lo
   135  	cmp	w10, w9
   136  	csel	w10, w10, w9, hi
   137  	subs	x8, x8, #1              // =1
   138  	b.ne	.LBB1_8
   139  .LBB1_9:
   140  	str	w10, [x3]
   141  	str	w11, [x2]
   142  	ldp	x29, x30, [sp], #16     // 16-byte Folded Reload
   143  	ret
   144  .Lfunc_end1:
   145  	.size	uint32_max_min_neon, .Lfunc_end1-uint32_max_min_neon
   146                                          // -- End function
   147  	.globl	int64_max_min_neon      // -- Begin function int64_max_min_neon
   148  	.p2align	2
   149  	.type	int64_max_min_neon,@function
   150  int64_max_min_neon:                     // @int64_max_min_neon
   151  // %bb.0:
   152  	stp	x29, x30, [sp, #-16]!   // 16-byte Folded Spill
   153  	cmp	w1, #1                  // =1
   154  	mov	x29, sp
   155  	b.lt	.LBB2_3
   156  // %bb.1:
   157  	mov	w8, w1
   158  	mov	x11, #-9223372036854775808
   159  	cmp	w1, #3                  // =3
   160  	mov	x10, #9223372036854775807
   161  	b.hi	.LBB2_4
   162  // %bb.2:
   163  	mov	x9, xzr
   164  	b	.LBB2_7
   165  .LBB2_3:
   166  	mov	x10, #9223372036854775807
   167  	mov	x11, #-9223372036854775808
   168  	str	x11, [x3]
   169  	str	x10, [x2]
   170  	ldp	x29, x30, [sp], #16     // 16-byte Folded Reload
   171  	ret
   172  .LBB2_4:
   173  	and	x9, x8, #0xfffffffc
   174  	dup	v1.2d, x11
   175  	dup	v0.2d, x10
   176  	add	x10, x0, #16            // =16
   177  	mov	x11, x9
   178  	mov	v2.16b, v0.16b
   179  	mov	v3.16b, v1.16b
   180  .LBB2_5:                                // =>This Inner Loop Header: Depth=1
   181  	ldp	q4, q5, [x10, #-16]
   182  	mov	v6.16b, v3.16b
   183  	mov	v7.16b, v1.16b
   184  	mov	v3.16b, v2.16b
   185  	mov	v1.16b, v0.16b
   186  	cmgt	v0.2d, v4.2d, v0.2d
   187  	cmgt	v2.2d, v5.2d, v2.2d
   188  	bsl	v0.16b, v1.16b, v4.16b
   189  	cmgt	v1.2d, v7.2d, v4.2d
   190  	bsl	v2.16b, v3.16b, v5.16b
   191  	cmgt	v3.2d, v6.2d, v5.2d
   192  	subs	x11, x11, #4            // =4
   193  	bsl	v1.16b, v7.16b, v4.16b
   194  	bsl	v3.16b, v6.16b, v5.16b
   195  	add	x10, x10, #32           // =32
   196  	b.ne	.LBB2_5
   197  // %bb.6:
   198  	cmgt	v4.2d, v1.2d, v3.2d
   199  	cmgt	v5.2d, v2.2d, v0.2d
   200  	bsl	v4.16b, v1.16b, v3.16b
   201  	bsl	v5.16b, v0.16b, v2.16b
   202  	dup	v0.2d, v4.d[1]
   203  	dup	v1.2d, v5.d[1]
   204  	cmgt	v2.2d, v4.2d, v0.2d
   205  	cmgt	v3.2d, v1.2d, v5.2d
   206  	bsl	v2.16b, v4.16b, v0.16b
   207  	bsl	v3.16b, v5.16b, v1.16b
   208  	cmp	x9, x8
   209  	fmov	x11, d2
   210  	fmov	x10, d3
   211  	b.eq	.LBB2_9
   212  .LBB2_7:
   213  	add	x12, x0, x9, lsl #3
   214  	sub	x8, x8, x9
   215  .LBB2_8:                                // =>This Inner Loop Header: Depth=1
   216  	ldr	x9, [x12], #8
   217  	cmp	x10, x9
   218  	csel	x10, x10, x9, lt
   219  	cmp	x11, x9
   220  	csel	x11, x11, x9, gt
   221  	subs	x8, x8, #1              // =1
   222  	b.ne	.LBB2_8
   223  .LBB2_9:
   224  	str	x11, [x3]
   225  	str	x10, [x2]
   226  	ldp	x29, x30, [sp], #16     // 16-byte Folded Reload
   227  	ret
   228  .Lfunc_end2:
   229  	.size	int64_max_min_neon, .Lfunc_end2-int64_max_min_neon
   230                                          // -- End function
   231  	.globl	uint64_max_min_neon     // -- Begin function uint64_max_min_neon
   232  	.p2align	2
   233  	.type	uint64_max_min_neon,@function
   234  uint64_max_min_neon:                    // @uint64_max_min_neon
   235  // %bb.0:
   236  	stp	x29, x30, [sp, #-16]!   // 16-byte Folded Spill
   237  	cmp	w1, #1                  // =1
   238  	mov	x29, sp
   239  	b.lt	.LBB3_3
   240  // %bb.1:
   241  	cmp	w1, #3                  // =3
   242  	mov	w8, w1
   243  	b.hi	.LBB3_4
   244  // %bb.2:
   245  	mov	x9, xzr
   246  	mov	x10, xzr
   247  	mov	x11, #-1
   248  	b	.LBB3_7
   249  .LBB3_3:
   250  	mov	x10, xzr
   251  	mov	x11, #-1
   252  	str	x10, [x3]
   253  	str	x11, [x2]
   254  	ldp	x29, x30, [sp], #16     // 16-byte Folded Reload
   255  	ret
   256  .LBB3_4:
   257  	and	x9, x8, #0xfffffffc
   258  	add	x10, x0, #16            // =16
   259  	movi	v1.2d, #0000000000000000
   260  	movi	v0.2d, #0xffffffffffffffff
   261  	movi	v2.2d, #0xffffffffffffffff
   262  	mov	x11, x9
   263  	movi	v3.2d, #0000000000000000
   264  .LBB3_5:                                // =>This Inner Loop Header: Depth=1
   265  	ldp	q4, q5, [x10, #-16]
   266  	mov	v6.16b, v3.16b
   267  	mov	v7.16b, v1.16b
   268  	mov	v3.16b, v2.16b
   269  	mov	v1.16b, v0.16b
   270  	cmhi	v0.2d, v4.2d, v0.2d
   271  	cmhi	v2.2d, v5.2d, v2.2d
   272  	bsl	v0.16b, v1.16b, v4.16b
   273  	cmhi	v1.2d, v7.2d, v4.2d
   274  	bsl	v2.16b, v3.16b, v5.16b
   275  	cmhi	v3.2d, v6.2d, v5.2d
   276  	subs	x11, x11, #4            // =4
   277  	bsl	v1.16b, v7.16b, v4.16b
   278  	bsl	v3.16b, v6.16b, v5.16b
   279  	add	x10, x10, #32           // =32
   280  	b.ne	.LBB3_5
   281  // %bb.6:
   282  	cmhi	v4.2d, v1.2d, v3.2d
   283  	cmhi	v5.2d, v2.2d, v0.2d
   284  	bsl	v4.16b, v1.16b, v3.16b
   285  	bsl	v5.16b, v0.16b, v2.16b
   286  	dup	v0.2d, v4.d[1]
   287  	dup	v1.2d, v5.d[1]
   288  	cmhi	v2.2d, v4.2d, v0.2d
   289  	cmhi	v3.2d, v1.2d, v5.2d
   290  	bsl	v2.16b, v4.16b, v0.16b
   291  	bsl	v3.16b, v5.16b, v1.16b
   292  	cmp	x9, x8
   293  	fmov	x10, d2
   294  	fmov	x11, d3
   295  	b.eq	.LBB3_9
   296  .LBB3_7:
   297  	add	x12, x0, x9, lsl #3
   298  	sub	x8, x8, x9
   299  .LBB3_8:                                // =>This Inner Loop Header: Depth=1
   300  	ldr	x9, [x12], #8
   301  	cmp	x11, x9
   302  	csel	x11, x11, x9, lo
   303  	cmp	x10, x9
   304  	csel	x10, x10, x9, hi
   305  	subs	x8, x8, #1              // =1
   306  	b.ne	.LBB3_8
   307  .LBB3_9:
   308  	str	x10, [x3]
   309  	str	x11, [x2]
   310  	ldp	x29, x30, [sp], #16     // 16-byte Folded Reload
   311  	ret
   312  .Lfunc_end3:
   313  	.size	uint64_max_min_neon, .Lfunc_end3-uint64_max_min_neon
   314                                          // -- End function
   315  
   316  	.ident	"clang version 9.0.1-12 "
   317  	.section	".note.GNU-stack","",@progbits
   318  	.addrsig