github.com/apache/arrow/go/v14@v14.0.2/internal/utils/min_max_neon_arm64.s (about)

     1  //+build !noasm !appengine
     2  
     3  // ARROW-15336
     4  // (C2GOASM doesn't work correctly for Arm64)
     5  // Partly GENERATED BY asm2plan9s.
     6  
     7  
     8  // func _int32_max_min_neon(values unsafe.Pointer, length int, minout, maxout unsafe.Pointer)
     9  TEXT ·_int32_max_min_neon(SB), $0-32
    10  
    11  	MOVD    values+0(FP), R0
    12  	MOVD    length+8(FP), R1
    13  	MOVD    minout+16(FP), R2
    14  	MOVD    maxout+24(FP), R3
    15  
    16  	WORD $0xa9bf7bfd // stp x29, x30, [sp, #-16]!
    17  	WORD $0x7100043f // cmp    w1, #1
    18  	WORD $0x910003fd // mov    x29, sp
    19  	BLT LBB0_3
    20  
    21  	WORD $0x71000c3f // cmp    w1, #3
    22  	WORD $0x2a0103e8 // mov    w8, w1
    23  	BHI LBB0_4
    24  
    25  	WORD $0xaa1f03e9 // mov    x9, xzr
    26  	WORD $0x52b0000b // mov    w11, #-2147483648
    27  	WORD $0x12b0000a // mov    w10, #2147483647
    28  	JMP LBB0_7
    29  LBB0_3:
    30  	WORD $0x12b0000a // mov    w10, #2147483647
    31  	WORD $0x52b0000b // mov    w11, #-2147483648
    32  	WORD $0xb900006b // str    w11, [x3]
    33  	WORD $0xb900004a // str    w10, [x2]
    34  	WORD $0xa8c17bfd // ldp    x29, x30, [sp], #16
    35  	RET
    36  LBB0_4:
    37  	WORD $0x927e7509 // and    x9, x8, #0xfffffffc
    38  	WORD $0x9100200a // add    x10, x0, #8
    39  	WORD $0x0f046402 // movi    v2.2s, #128, lsl #24
    40  	WORD $0x2f046400 // mvni    v0.2s, #128, lsl #24
    41  	WORD $0x2f046401 // mvni    v1.2s, #128, lsl #24
    42  	WORD $0xaa0903eb // mov    x11, x9
    43  	WORD $0x0f046403 // movi    v3.2s, #128, lsl #24
    44  LBB0_5:
    45  	WORD $0x6d7f9544 // ldp    d4, d5, [x10, #-8]
    46  	WORD $0xf100116b // subs    x11, x11, #4
    47  	WORD $0x9100414a // add    x10, x10, #16
    48  	WORD $0x0ea46c00 // smin    v0.2s, v0.2s, v4.2s
    49  	WORD $0x0ea56c21 // smin    v1.2s, v1.2s, v5.2s
    50  	WORD $0x0ea46442 // smax    v2.2s, v2.2s, v4.2s
    51  	WORD $0x0ea56463 // smax    v3.2s, v3.2s, v5.2s
    52  	BNE LBB0_5
    53  
    54  	WORD $0x0ea36442 // smax    v2.2s, v2.2s, v3.2s
    55  	WORD $0x0ea16c00 // smin    v0.2s, v0.2s, v1.2s
    56  	WORD $0x0e0c0441 // dup    v1.2s, v2.s[1]
    57  	WORD $0x0e0c0403 // dup    v3.2s, v0.s[1]
    58  	WORD $0x0ea16441 // smax    v1.2s, v2.2s, v1.2s
    59  	WORD $0x0ea36c00 // smin    v0.2s, v0.2s, v3.2s
    60  	WORD $0xeb08013f // cmp    x9, x8
    61  	WORD $0x1e26002b // fmov    w11, s1
    62  	WORD $0x1e26000a // fmov    w10, s0
    63  	BEQ LBB0_9
    64  LBB0_7:
    65  	WORD $0x8b09080c // add    x12, x0, x9, lsl #2
    66  	WORD $0xcb090108 // sub    x8, x8, x9
    67  LBB0_8:
    68  	WORD $0xb8404589 // ldr    w9, [x12], #4
    69  	WORD $0x6b09015f // cmp    w10, w9
    70  	WORD $0x1a89b14a // csel    w10, w10, w9, lt
    71  	WORD $0x6b09017f // cmp    w11, w9
    72  	WORD $0x1a89c16b // csel    w11, w11, w9, gt
    73  	WORD $0xf1000508 // subs    x8, x8, #1
    74  	BNE LBB0_8
    75  LBB0_9:
    76  	WORD $0xb900006b // str    w11, [x3]
    77  	WORD $0xb900004a // str    w10, [x2]
    78  	WORD $0xa8c17bfd // ldp    x29, x30, [sp], #16
    79  	RET
    80  
    81  // func _uint32_max_min_neon(values unsafe.Pointer, length int, minout, maxout unsafe.Pointer)
    82  TEXT ·_uint32_max_min_neon(SB), $0-32
    83  
    84  	MOVD    values+0(FP), R0
    85  	MOVD    length+8(FP), R1
    86  	MOVD    minout+16(FP), R2
    87  	MOVD    maxout+24(FP), R3
    88      
    89  	WORD $0xa9bf7bfd // stp x29, x30, [sp, #-16]!
    90  	WORD $0x7100043f // cmp    w1, #1
    91  	WORD $0x910003fd // mov    x29, sp
    92  	BLT LBB1_3
    93  
    94  	WORD $0x71000c3f // cmp    w1, #3
    95  	WORD $0x2a0103e8 // mov    w8, w1
    96  	BHI LBB1_4
    97  
    98  	WORD $0xaa1f03e9 // mov    x9, xzr
    99  	WORD $0x2a1f03ea // mov    w10, wzr
   100  	WORD $0x1280000b // mov    w11, #-1
   101  	JMP LBB1_7
   102  LBB1_3:
   103  	WORD $0x2a1f03ea // mov    w10, wzr
   104  	WORD $0x1280000b // mov    w11, #-1
   105  	WORD $0xb900006a // str    w10, [x3]
   106  	WORD $0xb900004b // str    w11, [x2]
   107  	WORD $0xa8c17bfd // ldp    x29, x30, [sp], #16
   108  	RET
   109  LBB1_4:
   110  	WORD $0x927e7509 // and    x9, x8, #0xfffffffc
   111  	WORD $0x6f00e401 // movi    v1.2d, #0000000000000000
   112  	WORD $0x6f07e7e0 // movi    v0.2d, #0xffffffffffffffff
   113  	WORD $0x9100200a // add    x10, x0, #8
   114  	WORD $0x6f07e7e2 // movi    v2.2d, #0xffffffffffffffff
   115  	WORD $0xaa0903eb // mov    x11, x9
   116  	WORD $0x6f00e403 // movi    v3.2d, #0000000000000000
   117  LBB1_5:
   118  	WORD $0x6d7f9544 // ldp    d4, d5, [x10, #-8]
   119  	WORD $0xf100116b // subs    x11, x11, #4
   120  	WORD $0x9100414a // add    x10, x10, #16
   121  	WORD $0x2ea46c00 // umin    v0.2s, v0.2s, v4.2s
   122  	WORD $0x2ea56c42 // umin    v2.2s, v2.2s, v5.2s
   123  	WORD $0x2ea46421 // umax    v1.2s, v1.2s, v4.2s
   124  	WORD $0x2ea56463 // umax    v3.2s, v3.2s, v5.2s
   125  	BNE LBB1_5
   126  
   127  	WORD $0x2ea36421 // umax    v1.2s, v1.2s, v3.2s
   128  	WORD $0x2ea26c00 // umin    v0.2s, v0.2s, v2.2s
   129  	WORD $0x0e0c0422 // dup    v2.2s, v1.s[1]
   130  	WORD $0x0e0c0403 // dup    v3.2s, v0.s[1]
   131  	WORD $0x2ea26421 // umax    v1.2s, v1.2s, v2.2s
   132  	WORD $0x2ea36c00 // umin    v0.2s, v0.2s, v3.2s
   133  	WORD $0xeb08013f // cmp    x9, x8
   134  	WORD $0x1e26002a // fmov    w10, s1
   135  	WORD $0x1e26000b // fmov    w11, s0
   136  	BEQ LBB1_9
   137  LBB1_7:
   138  	WORD $0x8b09080c // add    x12, x0, x9, lsl #2
   139  	WORD $0xcb090108 // sub    x8, x8, x9
   140  LBB1_8:
   141  	WORD $0xb8404589 // ldr    w9, [x12], #4
   142  	WORD $0x6b09017f // cmp    w11, w9
   143  	WORD $0x1a89316b // csel    w11, w11, w9, lo
   144  	WORD $0x6b09015f // cmp    w10, w9
   145  	WORD $0x1a89814a // csel    w10, w10, w9, hi
   146  	WORD $0xf1000508 // subs    x8, x8, #1
   147  	BNE LBB1_8
   148  LBB1_9:
   149  	WORD $0xb900006a // str    w10, [x3]
   150  	WORD $0xb900004b // str    w11, [x2]
   151  	WORD $0xa8c17bfd // ldp    x29, x30, [sp], #16
   152  	RET
   153  
   154  // func _int64_max_min_neon(values unsafe.Pointer, length int, minout, maxout unsafe.Pointer)
   155  TEXT ·_int64_max_min_neon(SB), $0-32
   156  
   157          MOVD    values+0(FP), R0
   158          MOVD    length+8(FP), R1
   159          MOVD    minout+16(FP), R2
   160          MOVD    maxout+24(FP), R3
   161  
   162  	WORD $0xa9bf7bfd // stp    x29, x30, [sp, #-16]!
   163  	WORD $0x7100043f // cmp    w1, #1
   164  	WORD $0x910003fd // mov    x29, sp
   165  	BLT LBB2_3
   166  
   167  	WORD $0x2a0103e8 // mov    w8, w1
   168  	WORD $0xd2f0000b // mov    x11, #-9223372036854775808
   169  	WORD $0x71000c3f // cmp    w1, #3
   170  	WORD $0x92f0000a // mov    x10, #9223372036854775807
   171  	BHI LBB2_4
   172  
   173  	WORD $0xaa1f03e9 // mov    x9, xzr
   174  	JMP LBB2_7
   175  LBB2_3:
   176  	WORD $0x92f0000a // mov    x10, #9223372036854775807
   177  	WORD $0xd2f0000b // mov    x11, #-9223372036854775808
   178  	WORD $0xf900006b // str    x11, [x3]
   179  	WORD $0xf900004a // str    x10, [x2]
   180  	WORD $0xa8c17bfd // ldp    x29, x30, [sp], #16
   181  	RET
   182  LBB2_4:
   183  	WORD $0x927e7509 // and    x9, x8, #0xfffffffc
   184  	WORD $0x4e080d61 // dup    v1.2d, x11
   185  	WORD $0x4e080d40 // dup    v0.2d, x10
   186  	WORD $0x9100400a // add    x10, x0, #16
   187  	WORD $0xaa0903eb // mov    x11, x9
   188  	WORD $0x4ea01c02 // mov    v2.16b, v0.16b
   189  	WORD $0x4ea11c23 // mov    v3.16b, v1.16b
   190  LBB2_5:
   191  	WORD $0xad7f9544 // ldp    q4, q5, [x10, #-16]
   192  	WORD $0x4ea31c66 // mov    v6.16b, v3.16b
   193  	WORD $0x4ea11c27 // mov    v7.16b, v1.16b
   194  	WORD $0x4ea21c43 // mov    v3.16b, v2.16b
   195  	WORD $0x4ea01c01 // mov    v1.16b, v0.16b
   196  	WORD $0x4ee03480 // cmgt    v0.2d, v4.2d, v0.2d
   197  	WORD $0x4ee234a2 // cmgt    v2.2d, v5.2d, v2.2d
   198  	WORD $0x6e641c20 // bsl    v0.16b, v1.16b, v4.16b
   199  	WORD $0x4ee434e1 // cmgt    v1.2d, v7.2d, v4.2d
   200  	WORD $0x6e651c62 // bsl    v2.16b, v3.16b, v5.16b
   201  	WORD $0x4ee534c3 // cmgt    v3.2d, v6.2d, v5.2d
   202  	WORD $0xf100116b // subs    x11, x11, #4
   203  	WORD $0x6e641ce1 // bsl    v1.16b, v7.16b, v4.16b
   204  	WORD $0x6e651cc3 // bsl    v3.16b, v6.16b, v5.16b
   205  	WORD $0x9100814a // add    x10, x10, #32
   206  	BNE LBB2_5
   207  
   208  	WORD $0x4ee33424 // cmgt    v4.2d, v1.2d, v3.2d
   209  	WORD $0x4ee03445 // cmgt    v5.2d, v2.2d, v0.2d
   210  	WORD $0x6e631c24 // bsl    v4.16b, v1.16b, v3.16b
   211  	WORD $0x6e621c05 // bsl    v5.16b, v0.16b, v2.16b
   212  	WORD $0x4e180480 // dup    v0.2d, v4.d[1]
   213  	WORD $0x4e1804a1 // dup    v1.2d, v5.d[1]
   214  	WORD $0x4ee03482 // cmgt    v2.2d, v4.2d, v0.2d
   215  	WORD $0x4ee53423 // cmgt    v3.2d, v1.2d, v5.2d
   216  	WORD $0x6e601c82 // bsl    v2.16b, v4.16b, v0.16b
   217  	WORD $0x6e611ca3 // bsl    v3.16b, v5.16b, v1.16b
   218  	WORD $0xeb08013f // cmp    x9, x8
   219  	WORD $0x9e66004b // fmov    x11, d2
   220  	WORD $0x9e66006a // fmov    x10, d3
   221  	BEQ LBB2_9
   222  LBB2_7:
   223  	WORD $0x8b090c0c // add    x12, x0, x9, lsl #3
   224  	WORD $0xcb090108 // sub    x8, x8, x9
   225  LBB2_8:
   226  	WORD $0xf8408589 // ldr    x9, [x12], #8
   227  	WORD $0xeb09015f // cmp    x10, x9
   228  	WORD $0x9a89b14a // csel    x10, x10, x9, lt
   229  	WORD $0xeb09017f // cmp    x11, x9
   230  	WORD $0x9a89c16b // csel    x11, x11, x9, gt
   231  	WORD $0xf1000508 // subs    x8, x8, #1
   232  	BNE LBB2_8
   233  LBB2_9:
   234  	WORD $0xf900006b // str    x11, [x3]
   235  	WORD $0xf900004a // str    x10, [x2]
   236  	WORD $0xa8c17bfd // ldp    x29, x30, [sp], #16
   237  	RET
   238  
   239  
   240  // func _uint64_max_min_neon(values unsafe.Pointer, length int, minout, maxout unsafe.Pointer)
   241  TEXT ·_uint64_max_min_neon(SB), $0-32
   242  
   243          MOVD    values+0(FP), R0
   244          MOVD    length+8(FP), R1
   245          MOVD    minout+16(FP), R2
   246          MOVD    maxout+24(FP), R3
   247  
   248  	WORD $0xa9bf7bfd // stp    x29, x30, [sp, #-16]!
   249  	WORD $0x7100043f // cmp    w1, #1
   250  	WORD $0x910003fd // mov    x29, sp
   251  	BLT LBB3_3
   252  
   253  	WORD $0x71000c3f // cmp    w1, #3
   254  	WORD $0x2a0103e8 // mov    w8, w1
   255  	BHI LBB3_4
   256  
   257  	WORD $0xaa1f03e9 // mov    x9, xzr
   258  	WORD $0xaa1f03ea // mov    x10, xzr
   259  	WORD $0x9280000b // mov    x11, #-1
   260  	JMP LBB3_7
   261  LBB3_3:
   262  	WORD $0xaa1f03ea // mov    x10, xzr
   263  	WORD $0x9280000b // mov    x11, #-1
   264  	WORD $0xf900006a // str    x10, [x3]
   265  	WORD $0xf900004b // str    x11, [x2]
   266  	WORD $0xa8c17bfd // ldp    x29, x30, [sp], #16
   267  	RET
   268  LBB3_4:
   269  	WORD $0x927e7509 // and    x9, x8, #0xfffffffc
   270  	WORD $0x9100400a // add    x10, x0, #16
   271  	WORD $0x6f00e401 // movi    v1.2d, #0000000000000000
   272  	WORD $0x6f07e7e0 // movi    v0.2d, #0xffffffffffffffff
   273  	WORD $0x6f07e7e2 // movi    v2.2d, #0xffffffffffffffff
   274  	WORD $0xaa0903eb // mov    x11, x9
   275  	WORD $0x6f00e403 // movi    v3.2d, #0000000000000000
   276  LBB3_5:
   277  	WORD $0xad7f9544 // ldp    q4, q5, [x10, #-16]
   278  	WORD $0x4ea31c66 // mov    v6.16b, v3.16b
   279  	WORD $0x4ea11c27 // mov    v7.16b, v1.16b
   280  	WORD $0x4ea21c43 // mov    v3.16b, v2.16b
   281  	WORD $0x4ea01c01 // mov    v1.16b, v0.16b
   282  	WORD $0x6ee03480 // cmhi    v0.2d, v4.2d, v0.2d
   283  	WORD $0x6ee234a2 // cmhi    v2.2d, v5.2d, v2.2d
   284  	WORD $0x6e641c20 // bsl    v0.16b, v1.16b, v4.16b
   285  	WORD $0x6ee434e1 // cmhi    v1.2d, v7.2d, v4.2d
   286  	WORD $0x6e651c62 // bsl    v2.16b, v3.16b, v5.16b
   287  	WORD $0x6ee534c3 // cmhi    v3.2d, v6.2d, v5.2d
   288  	WORD $0xf100116b // subs    x11, x11, #4
   289  	WORD $0x6e641ce1 // bsl    v1.16b, v7.16b, v4.16b
   290  	WORD $0x6e651cc3 // bsl    v3.16b, v6.16b, v5.16b
   291  	WORD $0x9100814a // add    x10, x10, #32
   292  	BNE LBB3_5
   293  
   294  	WORD $0x6ee33424 // cmhi    v4.2d, v1.2d, v3.2d
   295  	WORD $0x6ee03445 // cmhi    v5.2d, v2.2d, v0.2d
   296  	WORD $0x6e631c24 // bsl    v4.16b, v1.16b, v3.16b
   297  	WORD $0x6e621c05 // bsl    v5.16b, v0.16b, v2.16b
   298  	WORD $0x4e180480 // dup    v0.2d, v4.d[1]
   299  	WORD $0x4e1804a1 // dup    v1.2d, v5.d[1]
   300  	WORD $0x6ee03482 // cmhi    v2.2d, v4.2d, v0.2d
   301  	WORD $0x6ee53423 // cmhi    v3.2d, v1.2d, v5.2d
   302  	WORD $0x6e601c82 // bsl    v2.16b, v4.16b, v0.16b
   303  	WORD $0x6e611ca3 // bsl    v3.16b, v5.16b, v1.16b
   304  	WORD $0xeb08013f // cmp    x9, x8
   305  	WORD $0x9e66004a // fmov    x10, d2
   306  	WORD $0x9e66006b // fmov    x11, d3
   307  	BEQ LBB3_9
   308  LBB3_7:
   309  	WORD $0x8b090c0c // add    x12, x0, x9, lsl #3
   310  	WORD $0xcb090108 // sub    x8, x8, x9
   311  LBB3_8:
   312  	WORD $0xf8408589 // ldr    x9, [x12], #8
   313  	WORD $0xeb09017f // cmp    x11, x9
   314  	WORD $0x9a89316b // csel    x11, x11, x9, lo
   315  	WORD $0xeb09015f // cmp    x10, x9
   316  	WORD $0x9a89814a // csel    x10, x10, x9, hi
   317  	WORD $0xf1000508 // subs    x8, x8, #1
   318  	BNE LBB3_8
   319  LBB3_9:
   320  	WORD $0xf900006a // str    x10, [x3]
   321  	WORD $0xf900004b // str    x11, [x2]
   322  	WORD $0xa8c17bfd // ldp    x29, x30, [sp], #16
   323  	RET
   324