github.com/apache/arrow/go/v14@v14.0.2/internal/utils/min_max_neon_arm64.s (about) 1 //+build !noasm !appengine 2 3 // ARROW-15336 4 // (C2GOASM doesn't work correctly for Arm64) 5 // Partly GENERATED BY asm2plan9s. 6 7 8 // func _int32_max_min_neon(values unsafe.Pointer, length int, minout, maxout unsafe.Pointer) 9 TEXT ·_int32_max_min_neon(SB), $0-32 10 11 MOVD values+0(FP), R0 12 MOVD length+8(FP), R1 13 MOVD minout+16(FP), R2 14 MOVD maxout+24(FP), R3 15 16 WORD $0xa9bf7bfd // stp x29, x30, [sp, #-16]! 17 WORD $0x7100043f // cmp w1, #1 18 WORD $0x910003fd // mov x29, sp 19 BLT LBB0_3 20 21 WORD $0x71000c3f // cmp w1, #3 22 WORD $0x2a0103e8 // mov w8, w1 23 BHI LBB0_4 24 25 WORD $0xaa1f03e9 // mov x9, xzr 26 WORD $0x52b0000b // mov w11, #-2147483648 27 WORD $0x12b0000a // mov w10, #2147483647 28 JMP LBB0_7 29 LBB0_3: 30 WORD $0x12b0000a // mov w10, #2147483647 31 WORD $0x52b0000b // mov w11, #-2147483648 32 WORD $0xb900006b // str w11, [x3] 33 WORD $0xb900004a // str w10, [x2] 34 WORD $0xa8c17bfd // ldp x29, x30, [sp], #16 35 RET 36 LBB0_4: 37 WORD $0x927e7509 // and x9, x8, #0xfffffffc 38 WORD $0x9100200a // add x10, x0, #8 39 WORD $0x0f046402 // movi v2.2s, #128, lsl #24 40 WORD $0x2f046400 // mvni v0.2s, #128, lsl #24 41 WORD $0x2f046401 // mvni v1.2s, #128, lsl #24 42 WORD $0xaa0903eb // mov x11, x9 43 WORD $0x0f046403 // movi v3.2s, #128, lsl #24 44 LBB0_5: 45 WORD $0x6d7f9544 // ldp d4, d5, [x10, #-8] 46 WORD $0xf100116b // subs x11, x11, #4 47 WORD $0x9100414a // add x10, x10, #16 48 WORD $0x0ea46c00 // smin v0.2s, v0.2s, v4.2s 49 WORD $0x0ea56c21 // smin v1.2s, v1.2s, v5.2s 50 WORD $0x0ea46442 // smax v2.2s, v2.2s, v4.2s 51 WORD $0x0ea56463 // smax v3.2s, v3.2s, v5.2s 52 BNE LBB0_5 53 54 WORD $0x0ea36442 // smax v2.2s, v2.2s, v3.2s 55 WORD $0x0ea16c00 // smin v0.2s, v0.2s, v1.2s 56 WORD $0x0e0c0441 // dup v1.2s, v2.s[1] 57 WORD $0x0e0c0403 // dup v3.2s, v0.s[1] 58 WORD $0x0ea16441 // smax v1.2s, v2.2s, v1.2s 59 WORD $0x0ea36c00 // smin v0.2s, v0.2s, v3.2s 60 WORD $0xeb08013f // cmp x9, x8 61 WORD $0x1e26002b // fmov w11, s1 62 WORD $0x1e26000a // fmov w10, s0 63 BEQ LBB0_9 64 LBB0_7: 65 WORD $0x8b09080c // add x12, x0, x9, lsl #2 66 WORD $0xcb090108 // sub x8, x8, x9 67 LBB0_8: 68 WORD $0xb8404589 // ldr w9, [x12], #4 69 WORD $0x6b09015f // cmp w10, w9 70 WORD $0x1a89b14a // csel w10, w10, w9, lt 71 WORD $0x6b09017f // cmp w11, w9 72 WORD $0x1a89c16b // csel w11, w11, w9, gt 73 WORD $0xf1000508 // subs x8, x8, #1 74 BNE LBB0_8 75 LBB0_9: 76 WORD $0xb900006b // str w11, [x3] 77 WORD $0xb900004a // str w10, [x2] 78 WORD $0xa8c17bfd // ldp x29, x30, [sp], #16 79 RET 80 81 // func _uint32_max_min_neon(values unsafe.Pointer, length int, minout, maxout unsafe.Pointer) 82 TEXT ·_uint32_max_min_neon(SB), $0-32 83 84 MOVD values+0(FP), R0 85 MOVD length+8(FP), R1 86 MOVD minout+16(FP), R2 87 MOVD maxout+24(FP), R3 88 89 WORD $0xa9bf7bfd // stp x29, x30, [sp, #-16]! 90 WORD $0x7100043f // cmp w1, #1 91 WORD $0x910003fd // mov x29, sp 92 BLT LBB1_3 93 94 WORD $0x71000c3f // cmp w1, #3 95 WORD $0x2a0103e8 // mov w8, w1 96 BHI LBB1_4 97 98 WORD $0xaa1f03e9 // mov x9, xzr 99 WORD $0x2a1f03ea // mov w10, wzr 100 WORD $0x1280000b // mov w11, #-1 101 JMP LBB1_7 102 LBB1_3: 103 WORD $0x2a1f03ea // mov w10, wzr 104 WORD $0x1280000b // mov w11, #-1 105 WORD $0xb900006a // str w10, [x3] 106 WORD $0xb900004b // str w11, [x2] 107 WORD $0xa8c17bfd // ldp x29, x30, [sp], #16 108 RET 109 LBB1_4: 110 WORD $0x927e7509 // and x9, x8, #0xfffffffc 111 WORD $0x6f00e401 // movi v1.2d, #0000000000000000 112 WORD $0x6f07e7e0 // movi v0.2d, #0xffffffffffffffff 113 WORD $0x9100200a // add x10, x0, #8 114 WORD $0x6f07e7e2 // movi v2.2d, #0xffffffffffffffff 115 WORD $0xaa0903eb // mov x11, x9 116 WORD $0x6f00e403 // movi v3.2d, #0000000000000000 117 LBB1_5: 118 WORD $0x6d7f9544 // ldp d4, d5, [x10, #-8] 119 WORD $0xf100116b // subs x11, x11, #4 120 WORD $0x9100414a // add x10, x10, #16 121 WORD $0x2ea46c00 // umin v0.2s, v0.2s, v4.2s 122 WORD $0x2ea56c42 // umin v2.2s, v2.2s, v5.2s 123 WORD $0x2ea46421 // umax v1.2s, v1.2s, v4.2s 124 WORD $0x2ea56463 // umax v3.2s, v3.2s, v5.2s 125 BNE LBB1_5 126 127 WORD $0x2ea36421 // umax v1.2s, v1.2s, v3.2s 128 WORD $0x2ea26c00 // umin v0.2s, v0.2s, v2.2s 129 WORD $0x0e0c0422 // dup v2.2s, v1.s[1] 130 WORD $0x0e0c0403 // dup v3.2s, v0.s[1] 131 WORD $0x2ea26421 // umax v1.2s, v1.2s, v2.2s 132 WORD $0x2ea36c00 // umin v0.2s, v0.2s, v3.2s 133 WORD $0xeb08013f // cmp x9, x8 134 WORD $0x1e26002a // fmov w10, s1 135 WORD $0x1e26000b // fmov w11, s0 136 BEQ LBB1_9 137 LBB1_7: 138 WORD $0x8b09080c // add x12, x0, x9, lsl #2 139 WORD $0xcb090108 // sub x8, x8, x9 140 LBB1_8: 141 WORD $0xb8404589 // ldr w9, [x12], #4 142 WORD $0x6b09017f // cmp w11, w9 143 WORD $0x1a89316b // csel w11, w11, w9, lo 144 WORD $0x6b09015f // cmp w10, w9 145 WORD $0x1a89814a // csel w10, w10, w9, hi 146 WORD $0xf1000508 // subs x8, x8, #1 147 BNE LBB1_8 148 LBB1_9: 149 WORD $0xb900006a // str w10, [x3] 150 WORD $0xb900004b // str w11, [x2] 151 WORD $0xa8c17bfd // ldp x29, x30, [sp], #16 152 RET 153 154 // func _int64_max_min_neon(values unsafe.Pointer, length int, minout, maxout unsafe.Pointer) 155 TEXT ·_int64_max_min_neon(SB), $0-32 156 157 MOVD values+0(FP), R0 158 MOVD length+8(FP), R1 159 MOVD minout+16(FP), R2 160 MOVD maxout+24(FP), R3 161 162 WORD $0xa9bf7bfd // stp x29, x30, [sp, #-16]! 163 WORD $0x7100043f // cmp w1, #1 164 WORD $0x910003fd // mov x29, sp 165 BLT LBB2_3 166 167 WORD $0x2a0103e8 // mov w8, w1 168 WORD $0xd2f0000b // mov x11, #-9223372036854775808 169 WORD $0x71000c3f // cmp w1, #3 170 WORD $0x92f0000a // mov x10, #9223372036854775807 171 BHI LBB2_4 172 173 WORD $0xaa1f03e9 // mov x9, xzr 174 JMP LBB2_7 175 LBB2_3: 176 WORD $0x92f0000a // mov x10, #9223372036854775807 177 WORD $0xd2f0000b // mov x11, #-9223372036854775808 178 WORD $0xf900006b // str x11, [x3] 179 WORD $0xf900004a // str x10, [x2] 180 WORD $0xa8c17bfd // ldp x29, x30, [sp], #16 181 RET 182 LBB2_4: 183 WORD $0x927e7509 // and x9, x8, #0xfffffffc 184 WORD $0x4e080d61 // dup v1.2d, x11 185 WORD $0x4e080d40 // dup v0.2d, x10 186 WORD $0x9100400a // add x10, x0, #16 187 WORD $0xaa0903eb // mov x11, x9 188 WORD $0x4ea01c02 // mov v2.16b, v0.16b 189 WORD $0x4ea11c23 // mov v3.16b, v1.16b 190 LBB2_5: 191 WORD $0xad7f9544 // ldp q4, q5, [x10, #-16] 192 WORD $0x4ea31c66 // mov v6.16b, v3.16b 193 WORD $0x4ea11c27 // mov v7.16b, v1.16b 194 WORD $0x4ea21c43 // mov v3.16b, v2.16b 195 WORD $0x4ea01c01 // mov v1.16b, v0.16b 196 WORD $0x4ee03480 // cmgt v0.2d, v4.2d, v0.2d 197 WORD $0x4ee234a2 // cmgt v2.2d, v5.2d, v2.2d 198 WORD $0x6e641c20 // bsl v0.16b, v1.16b, v4.16b 199 WORD $0x4ee434e1 // cmgt v1.2d, v7.2d, v4.2d 200 WORD $0x6e651c62 // bsl v2.16b, v3.16b, v5.16b 201 WORD $0x4ee534c3 // cmgt v3.2d, v6.2d, v5.2d 202 WORD $0xf100116b // subs x11, x11, #4 203 WORD $0x6e641ce1 // bsl v1.16b, v7.16b, v4.16b 204 WORD $0x6e651cc3 // bsl v3.16b, v6.16b, v5.16b 205 WORD $0x9100814a // add x10, x10, #32 206 BNE LBB2_5 207 208 WORD $0x4ee33424 // cmgt v4.2d, v1.2d, v3.2d 209 WORD $0x4ee03445 // cmgt v5.2d, v2.2d, v0.2d 210 WORD $0x6e631c24 // bsl v4.16b, v1.16b, v3.16b 211 WORD $0x6e621c05 // bsl v5.16b, v0.16b, v2.16b 212 WORD $0x4e180480 // dup v0.2d, v4.d[1] 213 WORD $0x4e1804a1 // dup v1.2d, v5.d[1] 214 WORD $0x4ee03482 // cmgt v2.2d, v4.2d, v0.2d 215 WORD $0x4ee53423 // cmgt v3.2d, v1.2d, v5.2d 216 WORD $0x6e601c82 // bsl v2.16b, v4.16b, v0.16b 217 WORD $0x6e611ca3 // bsl v3.16b, v5.16b, v1.16b 218 WORD $0xeb08013f // cmp x9, x8 219 WORD $0x9e66004b // fmov x11, d2 220 WORD $0x9e66006a // fmov x10, d3 221 BEQ LBB2_9 222 LBB2_7: 223 WORD $0x8b090c0c // add x12, x0, x9, lsl #3 224 WORD $0xcb090108 // sub x8, x8, x9 225 LBB2_8: 226 WORD $0xf8408589 // ldr x9, [x12], #8 227 WORD $0xeb09015f // cmp x10, x9 228 WORD $0x9a89b14a // csel x10, x10, x9, lt 229 WORD $0xeb09017f // cmp x11, x9 230 WORD $0x9a89c16b // csel x11, x11, x9, gt 231 WORD $0xf1000508 // subs x8, x8, #1 232 BNE LBB2_8 233 LBB2_9: 234 WORD $0xf900006b // str x11, [x3] 235 WORD $0xf900004a // str x10, [x2] 236 WORD $0xa8c17bfd // ldp x29, x30, [sp], #16 237 RET 238 239 240 // func _uint64_max_min_neon(values unsafe.Pointer, length int, minout, maxout unsafe.Pointer) 241 TEXT ·_uint64_max_min_neon(SB), $0-32 242 243 MOVD values+0(FP), R0 244 MOVD length+8(FP), R1 245 MOVD minout+16(FP), R2 246 MOVD maxout+24(FP), R3 247 248 WORD $0xa9bf7bfd // stp x29, x30, [sp, #-16]! 249 WORD $0x7100043f // cmp w1, #1 250 WORD $0x910003fd // mov x29, sp 251 BLT LBB3_3 252 253 WORD $0x71000c3f // cmp w1, #3 254 WORD $0x2a0103e8 // mov w8, w1 255 BHI LBB3_4 256 257 WORD $0xaa1f03e9 // mov x9, xzr 258 WORD $0xaa1f03ea // mov x10, xzr 259 WORD $0x9280000b // mov x11, #-1 260 JMP LBB3_7 261 LBB3_3: 262 WORD $0xaa1f03ea // mov x10, xzr 263 WORD $0x9280000b // mov x11, #-1 264 WORD $0xf900006a // str x10, [x3] 265 WORD $0xf900004b // str x11, [x2] 266 WORD $0xa8c17bfd // ldp x29, x30, [sp], #16 267 RET 268 LBB3_4: 269 WORD $0x927e7509 // and x9, x8, #0xfffffffc 270 WORD $0x9100400a // add x10, x0, #16 271 WORD $0x6f00e401 // movi v1.2d, #0000000000000000 272 WORD $0x6f07e7e0 // movi v0.2d, #0xffffffffffffffff 273 WORD $0x6f07e7e2 // movi v2.2d, #0xffffffffffffffff 274 WORD $0xaa0903eb // mov x11, x9 275 WORD $0x6f00e403 // movi v3.2d, #0000000000000000 276 LBB3_5: 277 WORD $0xad7f9544 // ldp q4, q5, [x10, #-16] 278 WORD $0x4ea31c66 // mov v6.16b, v3.16b 279 WORD $0x4ea11c27 // mov v7.16b, v1.16b 280 WORD $0x4ea21c43 // mov v3.16b, v2.16b 281 WORD $0x4ea01c01 // mov v1.16b, v0.16b 282 WORD $0x6ee03480 // cmhi v0.2d, v4.2d, v0.2d 283 WORD $0x6ee234a2 // cmhi v2.2d, v5.2d, v2.2d 284 WORD $0x6e641c20 // bsl v0.16b, v1.16b, v4.16b 285 WORD $0x6ee434e1 // cmhi v1.2d, v7.2d, v4.2d 286 WORD $0x6e651c62 // bsl v2.16b, v3.16b, v5.16b 287 WORD $0x6ee534c3 // cmhi v3.2d, v6.2d, v5.2d 288 WORD $0xf100116b // subs x11, x11, #4 289 WORD $0x6e641ce1 // bsl v1.16b, v7.16b, v4.16b 290 WORD $0x6e651cc3 // bsl v3.16b, v6.16b, v5.16b 291 WORD $0x9100814a // add x10, x10, #32 292 BNE LBB3_5 293 294 WORD $0x6ee33424 // cmhi v4.2d, v1.2d, v3.2d 295 WORD $0x6ee03445 // cmhi v5.2d, v2.2d, v0.2d 296 WORD $0x6e631c24 // bsl v4.16b, v1.16b, v3.16b 297 WORD $0x6e621c05 // bsl v5.16b, v0.16b, v2.16b 298 WORD $0x4e180480 // dup v0.2d, v4.d[1] 299 WORD $0x4e1804a1 // dup v1.2d, v5.d[1] 300 WORD $0x6ee03482 // cmhi v2.2d, v4.2d, v0.2d 301 WORD $0x6ee53423 // cmhi v3.2d, v1.2d, v5.2d 302 WORD $0x6e601c82 // bsl v2.16b, v4.16b, v0.16b 303 WORD $0x6e611ca3 // bsl v3.16b, v5.16b, v1.16b 304 WORD $0xeb08013f // cmp x9, x8 305 WORD $0x9e66004a // fmov x10, d2 306 WORD $0x9e66006b // fmov x11, d3 307 BEQ LBB3_9 308 LBB3_7: 309 WORD $0x8b090c0c // add x12, x0, x9, lsl #3 310 WORD $0xcb090108 // sub x8, x8, x9 311 LBB3_8: 312 WORD $0xf8408589 // ldr x9, [x12], #8 313 WORD $0xeb09017f // cmp x11, x9 314 WORD $0x9a89316b // csel x11, x11, x9, lo 315 WORD $0xeb09015f // cmp x10, x9 316 WORD $0x9a89814a // csel x10, x10, x9, hi 317 WORD $0xf1000508 // subs x8, x8, #1 318 BNE LBB3_8 319 LBB3_9: 320 WORD $0xf900006a // str x10, [x3] 321 WORD $0xf900004b // str x11, [x2] 322 WORD $0xa8c17bfd // ldp x29, x30, [sp], #16 323 RET 324