github.com/apache/arrow/go/v10@v10.0.1/parquet/internal/utils/bit_packing_neon_arm64.s (about) 1 //+build !noasm !appengine 2 3 // ARROW-15336 4 // (C2GOASM doesn't work correctly for Arm64) 5 // Partly GENERATED BY asm2plan9s. 6 7 DATA LJTI<>+0x000(SB)/2, $0 8 DATA LJTI<>+0x002(SB)/2, $9 9 DATA LJTI<>+0x004(SB)/2, $72 10 DATA LJTI<>+0x006(SB)/2, $133 11 DATA LJTI<>+0x008(SB)/2, $210 12 DATA LJTI<>+0x00a(SB)/2, $291 13 DATA LJTI<>+0x00c(SB)/2, $390 14 DATA LJTI<>+0x00e(SB)/2, $474 15 DATA LJTI<>+0x010(SB)/2, $567 16 DATA LJTI<>+0x012(SB)/2, $657 17 DATA LJTI<>+0x014(SB)/2, $758 18 DATA LJTI<>+0x016(SB)/2, $854 19 DATA LJTI<>+0x018(SB)/2, $957 20 DATA LJTI<>+0x01a(SB)/2, $1048 21 DATA LJTI<>+0x01c(SB)/2, $4601 22 DATA LJTI<>+0x01e(SB)/2, $1250 23 DATA LJTI<>+0x020(SB)/2, $1354 24 DATA LJTI<>+0x022(SB)/2, $1436 25 DATA LJTI<>+0x024(SB)/2, $1541 26 DATA LJTI<>+0x026(SB)/2, $1645 27 DATA LJTI<>+0x028(SB)/2, $1773 28 DATA LJTI<>+0x02a(SB)/2, $1875 29 DATA LJTI<>+0x02c(SB)/2, $2015 30 DATA LJTI<>+0x02e(SB)/2, $2145 31 DATA LJTI<>+0x030(SB)/2, $2292 32 DATA LJTI<>+0x032(SB)/2, $2390 33 DATA LJTI<>+0x034(SB)/2, $3140 34 DATA LJTI<>+0x036(SB)/2, $2533 35 DATA LJTI<>+0x038(SB)/2, $2667 36 DATA LJTI<>+0x03a(SB)/2, $2781 37 DATA LJTI<>+0x03c(SB)/2, $2902 38 DATA LJTI<>+0x03e(SB)/2, $3008 39 DATA LJTI<>+0x040(SB)/2, $3117 40 GLOBL LJTI<>+0(SB), 8, $66 41 42 43 // func _unpack32_neon(in, out unsafe.Pointer, batchSize, nbits int) (num int) 44 TEXT ยท_unpack32_neon(SB), $24-40 45 46 MOVD in+0(FP), R0 47 MOVD out+8(FP), R1 48 MOVD batchSize+16(FP), R2 49 MOVD nbits+24(FP), R3 50 51 WORD $0xa9be7bfd // stp x29, x30, [sp, #-32]! 52 WORD $0x11007c48 // add w8, w2, #31 53 WORD $0x7100005f // cmp w2, #0 54 WORD $0x1a82b108 // csel w8, w8, w2, lt 55 WORD $0xf9000bf3 // str x19, [sp, #16] 56 WORD $0x7100807f // cmp w3, #32 57 WORD $0x121b6913 // and w19, w8, #0xffffffe0 58 WORD $0x910003fd // mov x29, sp 59 BHI LBB0_99 60 WORD $0x2a0303e9 // mov w9, w3 61 MOVD LJTI<>+0x00(SB), R10 62 WORD $0x1000000b // adr x11, LBB0_2 63 WORD $0x7869794c // ldrh w12, [x10, x9, lsl #1] 64 WORD $0x8b0c096b // add x11, x11, x12, lsl #2 65 WORD $0x13057d08 // asr w8, w8, #5 66 WORD $0xd61f0160 // br x11 67 LBB0_2: 68 WORD $0x7100805f // cmp w2, #32 69 BLT LBB0_99 70 WORD $0x51000508 // sub w8, w8, #1 71 WORD $0xd379e108 // lsl x8, x8, #7 72 WORD $0x91020102 // add x2, x8, #128 73 WORD $0xaa0103e0 // mov x0, x1 74 WORD $0x2a1f03e1 // mov w1, wzr 75 WORD $0x94000000 // bl memset 76 JMP LBB0_99 77 LBB0_4: 78 WORD $0x7100805f // cmp w2, #32 79 BLT LBB0_99 80 VMOVQ $0x0000000500000004, $0x0000000700000006, V1 // LCPI0_1 81 VMOVQ $0x0000000900000008, $0x0000000b0000000a, V2 // LCPI0_3 82 VMOVQ $0x0000000d0000000c, $0x0000000f0000000e, V3 // LCPI0_5 83 VMOVQ $0x0000001100000010, $0x0000001300000012, V4 // LCPI0_7 84 VMOVQ $0x0000001500000014, $0x0000001700000016, V5 // LCPI0_9 85 VMOVQ $0x0000001900000018, $0x0000001b0000001a, V6 // LCPI0_11 86 VMOVQ $0x0000001d0000001c, $0x0000001f0000001e, V7 // LCPI0_121 87 WORD $0x91010029 // add x9, x1, #64 88 WORD $0x4f000420 // movi v0.4s, #1 89 WORD $0x6ea0b821 // neg v1.4s, v1.4s 90 WORD $0x6ea0b842 // neg v2.4s, v2.4s 91 WORD $0x6ea0b863 // neg v3.4s, v3.4s 92 WORD $0x6ea0b884 // neg v4.4s, v4.4s 93 WORD $0x6ea0b8a5 // neg v5.4s, v5.4s 94 WORD $0x6ea0b8c6 // neg v6.4s, v6.4s 95 WORD $0x6ea0b8e7 // neg v7.4s, v7.4s 96 WORD $0xaa0003ea // mov x10, x0 97 LBB0_6: 98 WORD $0xb940000b // ldr w11, [x0] 99 WORD $0xf1000508 // subs x8, x8, #1 100 WORD $0x53017d6c // lsr w12, w11, #1 101 WORD $0x1e270170 // fmov s16, w11 102 WORD $0x53027d6d // lsr w13, w11, #2 103 WORD $0x4e0c1d90 // mov v16.s[1], w12 104 WORD $0x53037d6e // lsr w14, w11, #3 105 WORD $0x4e141db0 // mov v16.s[2], w13 106 WORD $0x4e1c1dd0 // mov v16.s[3], w14 107 WORD $0x4e201e10 // and v16.16b, v16.16b, v0.16b 108 WORD $0x3c9c0130 // stur q16, [x9, #-64] 109 WORD $0x4ddfc950 // ld1r { v16.4s }, [x10], #4 110 WORD $0x6ea14610 // ushl v16.4s, v16.4s, v1.4s 111 WORD $0x4e201e10 // and v16.16b, v16.16b, v0.16b 112 WORD $0x3c9d0130 // stur q16, [x9, #-48] 113 WORD $0x4d40c810 // ld1r { v16.4s }, [x0] 114 WORD $0x6ea24610 // ushl v16.4s, v16.4s, v2.4s 115 WORD $0x4e201e10 // and v16.16b, v16.16b, v0.16b 116 WORD $0x3c9e0130 // stur q16, [x9, #-32] 117 WORD $0x4d40c810 // ld1r { v16.4s }, [x0] 118 WORD $0x6ea34610 // ushl v16.4s, v16.4s, v3.4s 119 WORD $0x4e201e10 // and v16.16b, v16.16b, v0.16b 120 WORD $0x3c9f0130 // stur q16, [x9, #-16] 121 WORD $0x4d40c810 // ld1r { v16.4s }, [x0] 122 WORD $0x6ea44610 // ushl v16.4s, v16.4s, v4.4s 123 WORD $0x4e201e10 // and v16.16b, v16.16b, v0.16b 124 WORD $0x3d800130 // str q16, [x9] 125 WORD $0x4d40c810 // ld1r { v16.4s }, [x0] 126 WORD $0x6ea54610 // ushl v16.4s, v16.4s, v5.4s 127 WORD $0x4e201e10 // and v16.16b, v16.16b, v0.16b 128 WORD $0x3d800530 // str q16, [x9, #16] 129 WORD $0x4d40c810 // ld1r { v16.4s }, [x0] 130 WORD $0x6ea64610 // ushl v16.4s, v16.4s, v6.4s 131 WORD $0x4e201e10 // and v16.16b, v16.16b, v0.16b 132 WORD $0x3d800930 // str q16, [x9, #32] 133 WORD $0x4d40c810 // ld1r { v16.4s }, [x0] 134 WORD $0xaa0a03e0 // mov x0, x10 135 WORD $0x6ea74610 // ushl v16.4s, v16.4s, v7.4s 136 WORD $0x4e201e10 // and v16.16b, v16.16b, v0.16b 137 WORD $0x3d800d30 // str q16, [x9, #48] 138 WORD $0x91020129 // add x9, x9, #128 139 BNE LBB0_6 140 JMP LBB0_99 141 LBB0_7: 142 WORD $0x7100805f // cmp w2, #32 143 BLT LBB0_99 144 VMOVQ $0x0000000a00000008, $0x0000000e0000000c, V1 // LCPI0_15 145 VMOVQ $0x0000001200000010, $0x0000001600000014, V2 // LCPI0_17 146 VMOVQ $0x0000001a00000018, $0x0000001e0000001c, V3 // LCPI0_120 147 WORD $0x91010029 // add x9, x1, #64 148 WORD $0x4f000460 // movi v0.4s, #3 149 WORD $0x6ea0b821 // neg v1.4s, v1.4s 150 WORD $0x6ea0b842 // neg v2.4s, v2.4s 151 WORD $0x6ea0b863 // neg v3.4s, v3.4s 152 LBB0_9: 153 WORD $0xb940000a // ldr w10, [x0] 154 WORD $0xaa0003eb // mov x11, x0 155 WORD $0xf1000508 // subs x8, x8, #1 156 WORD $0x53027d4c // lsr w12, w10, #2 157 WORD $0x1e270144 // fmov s4, w10 158 WORD $0x53047d4d // lsr w13, w10, #4 159 WORD $0x4e0c1d84 // mov v4.s[1], w12 160 WORD $0x53067d4e // lsr w14, w10, #6 161 WORD $0x4e141da4 // mov v4.s[2], w13 162 WORD $0x4e1c1dc4 // mov v4.s[3], w14 163 WORD $0x4e201c84 // and v4.16b, v4.16b, v0.16b 164 WORD $0x3c9c0124 // stur q4, [x9, #-64] 165 WORD $0x4ddfc964 // ld1r { v4.4s }, [x11], #4 166 WORD $0x6ea14484 // ushl v4.4s, v4.4s, v1.4s 167 WORD $0x4e201c84 // and v4.16b, v4.16b, v0.16b 168 WORD $0x3c9d0124 // stur q4, [x9, #-48] 169 WORD $0x4d40c804 // ld1r { v4.4s }, [x0] 170 WORD $0x6ea24484 // ushl v4.4s, v4.4s, v2.4s 171 WORD $0x4e201c84 // and v4.16b, v4.16b, v0.16b 172 WORD $0x3c9e0124 // stur q4, [x9, #-32] 173 WORD $0xb840840a // ldr w10, [x0], #8 174 WORD $0x4e040d44 // dup v4.4s, w10 175 WORD $0x6ea34484 // ushl v4.4s, v4.4s, v3.4s 176 WORD $0x4e201c84 // and v4.16b, v4.16b, v0.16b 177 WORD $0x3c9f0124 // stur q4, [x9, #-16] 178 WORD $0xb940016a // ldr w10, [x11] 179 WORD $0x53027d4c // lsr w12, w10, #2 180 WORD $0x1e270144 // fmov s4, w10 181 WORD $0x53047d4d // lsr w13, w10, #4 182 WORD $0x4e0c1d84 // mov v4.s[1], w12 183 WORD $0x53067d4e // lsr w14, w10, #6 184 WORD $0x4e141da4 // mov v4.s[2], w13 185 WORD $0x4e1c1dc4 // mov v4.s[3], w14 186 WORD $0x4e201c84 // and v4.16b, v4.16b, v0.16b 187 WORD $0x3d800124 // str q4, [x9] 188 WORD $0x4d40c964 // ld1r { v4.4s }, [x11] 189 WORD $0x6ea14484 // ushl v4.4s, v4.4s, v1.4s 190 WORD $0x4e201c84 // and v4.16b, v4.16b, v0.16b 191 WORD $0x3d800524 // str q4, [x9, #16] 192 WORD $0x4d40c964 // ld1r { v4.4s }, [x11] 193 WORD $0x6ea24484 // ushl v4.4s, v4.4s, v2.4s 194 WORD $0x4e201c84 // and v4.16b, v4.16b, v0.16b 195 WORD $0x3d800924 // str q4, [x9, #32] 196 WORD $0x4d40c964 // ld1r { v4.4s }, [x11] 197 WORD $0x6ea34484 // ushl v4.4s, v4.4s, v3.4s 198 WORD $0x4e201c84 // and v4.16b, v4.16b, v0.16b 199 WORD $0x3d800d24 // str q4, [x9, #48] 200 WORD $0x91020129 // add x9, x9, #128 201 BNE LBB0_9 202 JMP LBB0_99 203 LBB0_10: 204 WORD $0x7100805f // cmp w2, #32 205 BLT LBB0_99 206 VMOVQ $0x0000000f0000000c, $0x0000001500000012, V1 // LCPI0_21 207 MOVD $0x0000001b00000018, R2 // LCPI0_23 208 VMOVQ $0x0000000700000004, $0x0000000d0000000a, V3 // LCPI0_25 209 VMOVQ $0x0000001300000010, $0x0000001900000016, V4 // LCPI0_27 210 MOVD $0x0000000500000002, R5 // LCPI0_29 211 VMOVQ $0x0000000b00000008, $0x000000110000000e, V6 // LCPI0_31 212 VMOVQ $0x0000001700000014, $0x0000001d0000001a, V7 // LCPI0_119 213 WORD $0x91010029 // add x9, x1, #64 214 WORD $0x4f0004e0 // movi v0.4s, #7 215 WORD $0x6ea0b821 // neg v1.4s, v1.4s 216 WORD $0x2ea0b842 // neg v2.2s, v2.2s 217 WORD $0x6ea0b863 // neg v3.4s, v3.4s 218 WORD $0x6ea0b884 // neg v4.4s, v4.4s 219 WORD $0x2ea0b8a5 // neg v5.2s, v5.2s 220 WORD $0x6ea0b8c6 // neg v6.4s, v6.4s 221 WORD $0x6ea0b8e7 // neg v7.4s, v7.4s 222 LBB0_12: 223 WORD $0xb940000a // ldr w10, [x0] 224 WORD $0xf1000508 // subs x8, x8, #1 225 WORD $0x53037d4b // lsr w11, w10, #3 226 WORD $0x1e270150 // fmov s16, w10 227 WORD $0x53067d4c // lsr w12, w10, #6 228 WORD $0x4e0c1d70 // mov v16.s[1], w11 229 WORD $0x53097d4d // lsr w13, w10, #9 230 WORD $0x4e141d90 // mov v16.s[2], w12 231 WORD $0x4e1c1db0 // mov v16.s[3], w13 232 WORD $0x4e201e10 // and v16.16b, v16.16b, v0.16b 233 WORD $0xaa0003ea // mov x10, x0 234 WORD $0x3c9c0130 // stur q16, [x9, #-64] 235 WORD $0x4ddfc950 // ld1r { v16.4s }, [x10], #4 236 WORD $0x6ea14610 // ushl v16.4s, v16.4s, v1.4s 237 WORD $0x4e201e10 // and v16.16b, v16.16b, v0.16b 238 WORD $0x3c9d0130 // stur q16, [x9, #-48] 239 WORD $0xb940000b // ldr w11, [x0] 240 WORD $0xb940014c // ldr w12, [x10] 241 WORD $0x0e040d70 // dup v16.2s, w11 242 WORD $0x138b798b // extr w11, w12, w11, #30 243 WORD $0x2ea24610 // ushl v16.2s, v16.2s, v2.2s 244 WORD $0x53017d8c // lsr w12, w12, #1 245 WORD $0x4e141d70 // mov v16.s[2], w11 246 WORD $0x4e1c1d90 // mov v16.s[3], w12 247 WORD $0x4e201e10 // and v16.16b, v16.16b, v0.16b 248 WORD $0x3c9e0130 // stur q16, [x9, #-32] 249 WORD $0x4d40c950 // ld1r { v16.4s }, [x10] 250 WORD $0x9100200c // add x12, x0, #8 251 WORD $0x6ea34610 // ushl v16.4s, v16.4s, v3.4s 252 WORD $0x4e201e10 // and v16.16b, v16.16b, v0.16b 253 WORD $0x3c9f0130 // stur q16, [x9, #-16] 254 WORD $0x4d40c950 // ld1r { v16.4s }, [x10] 255 WORD $0x6ea44610 // ushl v16.4s, v16.4s, v4.4s 256 WORD $0x4e201e10 // and v16.16b, v16.16b, v0.16b 257 WORD $0x3d800130 // str q16, [x9] 258 WORD $0xb940014a // ldr w10, [x10] 259 WORD $0xb940080b // ldr w11, [x0, #8] 260 WORD $0x91003000 // add x0, x0, #12 261 WORD $0x531c7d4d // lsr w13, w10, #28 262 WORD $0x138a7d6a // extr w10, w11, w10, #31 263 WORD $0x0e040d70 // dup v16.2s, w11 264 WORD $0x1e2701b1 // fmov s17, w13 265 WORD $0x2ea54610 // ushl v16.2s, v16.2s, v5.2s 266 WORD $0x4e0c1d51 // mov v17.s[1], w10 267 WORD $0x6e180611 // mov v17.d[1], v16.d[0] 268 WORD $0x4e201e30 // and v16.16b, v17.16b, v0.16b 269 WORD $0x3d800530 // str q16, [x9, #16] 270 WORD $0x4d40c990 // ld1r { v16.4s }, [x12] 271 WORD $0x6ea64610 // ushl v16.4s, v16.4s, v6.4s 272 WORD $0x4e201e10 // and v16.16b, v16.16b, v0.16b 273 WORD $0x3d800930 // str q16, [x9, #32] 274 WORD $0x4d40c990 // ld1r { v16.4s }, [x12] 275 WORD $0x6ea74610 // ushl v16.4s, v16.4s, v7.4s 276 WORD $0x4e201e10 // and v16.16b, v16.16b, v0.16b 277 WORD $0x3d800d30 // str q16, [x9, #48] 278 WORD $0x91020129 // add x9, x9, #128 279 BNE LBB0_12 280 JMP LBB0_99 281 LBB0_13: 282 WORD $0x7100805f // cmp w2, #32 283 BLT LBB0_99 284 VMOVQ $0x0000001400000010, $0x0000001c00000018, V1 // LCPI0_118 285 WORD $0x91010029 // add x9, x1, #64 286 WORD $0x4f0005e0 // movi v0.4s, #15 287 WORD $0x6ea0b821 // neg v1.4s, v1.4s 288 LBB0_15: 289 WORD $0xb940000a // ldr w10, [x0] 290 WORD $0xaa0003eb // mov x11, x0 291 WORD $0xf1000508 // subs x8, x8, #1 292 WORD $0x53047d4c // lsr w12, w10, #4 293 WORD $0x1e270142 // fmov s2, w10 294 WORD $0x53087d4d // lsr w13, w10, #8 295 WORD $0x4e0c1d82 // mov v2.s[1], w12 296 WORD $0x530c7d4e // lsr w14, w10, #12 297 WORD $0x4e141da2 // mov v2.s[2], w13 298 WORD $0x4e1c1dc2 // mov v2.s[3], w14 299 WORD $0x4e201c42 // and v2.16b, v2.16b, v0.16b 300 WORD $0x3c9c0122 // stur q2, [x9, #-64] 301 WORD $0x4ddfc962 // ld1r { v2.4s }, [x11], #4 302 WORD $0x6ea14442 // ushl v2.4s, v2.4s, v1.4s 303 WORD $0x4e201c42 // and v2.16b, v2.16b, v0.16b 304 WORD $0x3c9d0122 // stur q2, [x9, #-48] 305 WORD $0xb940016a // ldr w10, [x11] 306 WORD $0x53047d4c // lsr w12, w10, #4 307 WORD $0x1e270142 // fmov s2, w10 308 WORD $0x53087d4d // lsr w13, w10, #8 309 WORD $0x4e0c1d82 // mov v2.s[1], w12 310 WORD $0x530c7d4e // lsr w14, w10, #12 311 WORD $0x4e141da2 // mov v2.s[2], w13 312 WORD $0x4e1c1dc2 // mov v2.s[3], w14 313 WORD $0x4e201c42 // and v2.16b, v2.16b, v0.16b 314 WORD $0x3c9e0122 // stur q2, [x9, #-32] 315 WORD $0x4d40c962 // ld1r { v2.4s }, [x11] 316 WORD $0x9100200b // add x11, x0, #8 317 WORD $0x6ea14442 // ushl v2.4s, v2.4s, v1.4s 318 WORD $0x4e201c42 // and v2.16b, v2.16b, v0.16b 319 WORD $0x3c9f0122 // stur q2, [x9, #-16] 320 WORD $0xb940080a // ldr w10, [x0, #8] 321 WORD $0x53047d4c // lsr w12, w10, #4 322 WORD $0x1e270142 // fmov s2, w10 323 WORD $0x53087d4d // lsr w13, w10, #8 324 WORD $0x4e0c1d82 // mov v2.s[1], w12 325 WORD $0x530c7d4e // lsr w14, w10, #12 326 WORD $0x4e141da2 // mov v2.s[2], w13 327 WORD $0x4e1c1dc2 // mov v2.s[3], w14 328 WORD $0x4e201c42 // and v2.16b, v2.16b, v0.16b 329 WORD $0x3c9f0122 // stur q2, [x9, #-16] 330 WORD $0xb940080a // ldr w10, [x0, #8] 331 WORD $0x53047d4c // lsr w12, w10, #4 332 WORD $0x1e270142 // fmov s2, w10 333 WORD $0x53087d4d // lsr w13, w10, #8 334 WORD $0x4e0c1d82 // mov v2.s[1], w12 335 WORD $0x530c7d4e // lsr w14, w10, #12 336 WORD $0x4e141da2 // mov v2.s[2], w13 337 WORD $0x4e1c1dc2 // mov v2.s[3], w14 338 WORD $0x4e201c42 // and v2.16b, v2.16b, v0.16b 339 WORD $0x3d800122 // str q2, [x9] 340 WORD $0x4d40c962 // ld1r { v2.4s }, [x11] 341 WORD $0x9100300b // add x11, x0, #12 342 WORD $0x6ea14442 // ushl v2.4s, v2.4s, v1.4s 343 WORD $0x4e201c42 // and v2.16b, v2.16b, v0.16b 344 WORD $0x3d800522 // str q2, [x9, #16] 345 WORD $0xb9400c0a // ldr w10, [x0, #12] 346 WORD $0x91004000 // add x0, x0, #16 347 WORD $0x53047d4c // lsr w12, w10, #4 348 WORD $0x1e270142 // fmov s2, w10 349 WORD $0x53087d4d // lsr w13, w10, #8 350 WORD $0x4e0c1d82 // mov v2.s[1], w12 351 WORD $0x530c7d4e // lsr w14, w10, #12 352 WORD $0x4e141da2 // mov v2.s[2], w13 353 WORD $0x4e1c1dc2 // mov v2.s[3], w14 354 WORD $0x4e201c42 // and v2.16b, v2.16b, v0.16b 355 WORD $0x3d800922 // str q2, [x9, #32] 356 WORD $0x4d40c962 // ld1r { v2.4s }, [x11] 357 WORD $0x6ea14442 // ushl v2.4s, v2.4s, v1.4s 358 WORD $0x4e201c42 // and v2.16b, v2.16b, v0.16b 359 WORD $0x3d800d22 // str q2, [x9, #48] 360 WORD $0x91020129 //add x9, x9, #128 361 BNE LBB0_15 362 JMP LBB0_99 363 LBB0_16: 364 WORD $0x7100805f // cmp w2, #32 365 BLT LBB0_99 366 MOVD $0x0000001900000014, R1 // LCPI0_37 367 VMOVQ $0x0000000d00000008, $0x0000001700000012, V2 // LCPI0_39 368 MOVD $0x0000000600000001, R3 // LCPI0_41 369 MOVD $0x0000001500000010, R4 // LCPI0_43 370 VMOVQ $0x0000000900000004, $0x000000130000000e, V5 // LCPI0_45 371 MOVD $0x0000000700000002, R6 // LCPI0_47 372 VMOVQ $0x000000110000000c, $0x0000001b00000016, V7 // LCPI0_117 373 WORD $0x91010029 // add x9, x1, #64 374 WORD $0x4f0007e0 // movi v0.4s, #31 375 WORD $0x2ea0b821 //neg v1.2s, v1.2s 376 WORD $0x6ea0b842 //neg v2.4s, v2.4s 377 WORD $0x2ea0b863 //neg v3.2s, v3.2s 378 WORD $0x2ea0b884 //neg v4.2s, v4.2s 379 WORD $0x6ea0b8a5 //neg v5.4s, v5.4s 380 WORD $0x2ea0b8c6 //neg v6.2s, v6.2s 381 WORD $0x6ea0b8e7 //neg v7.4s, v7.4s 382 LBB0_18: 383 WORD $0xb940000a //ldr w10, [x0] 384 WORD $0xf1000508 //subs x8, x8, #1 385 WORD $0x53057d4b //lsr w11, w10, #5 386 WORD $0x1e270150 //fmov s16, w10 387 WORD $0x530a7d4c //lsr w12, w10, #10 388 WORD $0x4e0c1d70 //mov v16.s[1], w11 389 WORD $0x530f7d4d //lsr w13, w10, #15 390 WORD $0x4e141d90 //mov v16.s[2], w12 391 WORD $0x4e1c1db0 //mov v16.s[3], w13 392 WORD $0x4e201e10 //and v16.16b, v16.16b, v0.16b 393 WORD $0x3c9c0130 // stur q16, [x9, #-64] 394 WORD $0x29402c0a //ldp w10, w11, [x0] 395 WORD $0x9100100c //add x12, x0, #4 396 WORD $0x0e040d50 // dup v16.2s, w10 397 WORD $0x138a796a // extr w10, w11, w10, #30 398 WORD $0x2ea14610 // ushl v16.2s, v16.2s, v1.2s 399 WORD $0x53037d6b // lsr w11, w11, #3 400 WORD $0x4e141d50 // mov v16.s[2], w10 401 WORD $0x4e1c1d70 // mov v16.s[3], w11 402 WORD $0x4e201e10 // and v16.16b, v16.16b, v0.16b 403 WORD $0x3c9d0130 // stur q16, [x9, #-48] 404 WORD $0x4d40c990 // ld1r { v16.4s }, [x12] 405 WORD $0x9100300c // add x12, x0, #12 406 WORD $0x6ea24610 // ushl v16.4s, v16.4s, v2.4s 407 WORD $0x4e201e10 // and v16.16b, v16.16b, v0.16b 408 WORD $0x3c9e0130 // stur q16, [x9, #-32] 409 WORD $0x2940ac0a // ldp w10, w11, [x0, #4] 410 WORD $0x138a716a // extr w10, w11, w10, #28 411 WORD $0x0e040d70 // dup v16.2s, w11 412 WORD $0x2ea34610 // ushl v16.2s, v16.2s, v3.2s 413 WORD $0x1e270151 // fmov s17, w10 414 WORD $0x6e0c0611 // mov v17.s[1], v16.s[0] 415 WORD $0x530b7d6b // lsr w11, w11, #11 416 WORD $0x6e142611 // mov v17.s[2], v16.s[1] 417 WORD $0x4e1c1d71 // mov v17.s[3], w11 418 WORD $0x4e201e30 // and v16.16b, v17.16b, v0.16b 419 WORD $0x3c9f0130 // stur q16, [x9, #-16] 420 WORD $0x29412c0a // ldp w10, w11, [x0, #8] 421 WORD $0x0e040d50 // dup v16.2s, w10 422 WORD $0x531a7d4d // lsr w13, w10, #26 423 WORD $0x2ea44610 // ushl v16.2s, v16.2s, v4.2s 424 WORD $0x138a7d6a // extr w10, w11, w10, #31 425 WORD $0x4e141db0 // mov v16.s[2], w13 426 WORD $0x4e1c1d50 // mov v16.s[3], w10 427 WORD $0x4e201e10 // and v16.16b, v16.16b, v0.16b 428 WORD $0x3d800130 // str q16, [x9] 429 WORD $0x4d40c990 // ld1r { v16.4s }, [x12] 430 WORD $0x9100400c // add x12, x0, #16 431 WORD $0x6ea54610 // ushl v16.4s, v16.4s, v5.4s 432 WORD $0x4e201e10 // and v16.16b, v16.16b, v0.16b 433 WORD $0x3d800530 // str q16, [x9, #16] 434 WORD $0x2941ac0a // ldp w10, w11, [x0, #12] 435 WORD $0x91005000 // add x0, x0, #20 436 WORD $0x53187d4d // lsr w13, w10, #24 437 WORD $0x138a756a // extr w10, w11, w10, #29 438 WORD $0x0e040d70 // dup v16.2s, w11 439 WORD $0x1e2701b1 // fmov s17, w13 440 WORD $0x2ea64610 // ushl v16.2s, v16.2s, v6.2s 441 WORD $0x4e0c1d51 // mov v17.s[1], w10 442 WORD $0x6e180611 // mov v17.d[1], v16.d[0] 443 WORD $0x4e201e30 // and v16.16b, v17.16b, v0.16b 444 WORD $0x3d800930 // str q16, [x9, #32] 445 WORD $0x4d40c990 // ld1r { v16.4s }, [x12] 446 WORD $0x6ea74610 // ushl v16.4s, v16.4s, v7.4s 447 WORD $0x4e201e10 // and v16.16b, v16.16b, v0.16b 448 WORD $0x3d800d30 // str q16, [x9, #48] 449 WORD $0x91020129 // add x9, x9, #128 450 BNE LBB0_18 451 JMP LBB0_99 452 LBB0_19: 453 WORD $0x7100805f // cmp w2, #32 454 BLT LBB0_99 455 MOVD $0x0000000a00000004, R1 // LCPI0_51 456 MOVD $0x0000001600000010, R2 // LCPI0_53 457 VMOVQ $0x0000000e00000008, $0x0000001a00000014, V3 // LCPI0_116 458 WORD $0x91010029 // add x9, x1, #64 459 WORD $0x4f0107e0 // movi v0.4s, #63 460 WORD $0x2ea0b821 // neg v1.2s, v1.2s 461 WORD $0x2ea0b842 // neg v2.2s, v2.2s 462 WORD $0x6ea0b863 // neg v3.4s, v3.4s 463 LBB0_21: 464 WORD $0xb940000a // ldr w10, [x0] 465 WORD $0xf1000508 // subs x8, x8, #1 466 WORD $0x53067d4b // lsr w11, w10, #6 467 WORD $0x1e270144 // fmov s4, w10 468 WORD $0x530c7d4c // lsr w12, w10, #12 469 WORD $0x4e0c1d64 // mov v4.s[1], w11 470 WORD $0x53127d4d // lsr w13, w10, #18 471 WORD $0x4e141d84 // mov v4.s[2], w12 472 WORD $0x4e1c1da4 // mov v4.s[3], w13 473 WORD $0x4e201c84 // and v4.16b, v4.16b, v0.16b 474 WORD $0x3c9c0124 // stur q4, [x9, #-64] 475 WORD $0x29402c0a // ldp w10, w11, [x0] 476 WORD $0x53187d4c // lsr w12, w10, #24 477 WORD $0x138a796a // extr w10, w11, w10, #30 478 WORD $0x0e040d64 // dup v4.2s, w11 479 WORD $0x1e270185 // fmov s5, w12 480 WORD $0x2ea14484 // ushl v4.2s, v4.2s, v1.2s 481 WORD $0x4e0c1d45 // mov v5.s[1], w10 482 WORD $0x6e180485 // mov v5.d[1], v4.d[0] 483 WORD $0x4e201ca4 // and v4.16b, v5.16b, v0.16b 484 WORD $0x3c9d0124 // stur q4, [x9, #-48] 485 WORD $0x2940ac0a // ldp w10, w11, [x0, #4] 486 WORD $0x9100200c // add x12, x0, #8 487 WORD $0x0e040d44 // dup v4.2s, w10 488 WORD $0x138a716a // extr w10, w11, w10, #28 489 WORD $0x2ea24484 // ushl v4.2s, v4.2s, v2.2s 490 WORD $0x53027d6b // lsr w11, w11, #2 491 WORD $0x4e141d44 // mov v4.s[2], w10 492 WORD $0x4e1c1d64 // mov v4.s[3], w11 493 WORD $0x4e201c84 // and v4.16b, v4.16b, v0.16b 494 WORD $0x3c9e0124 // stur q4, [x9, #-32] 495 WORD $0x4d40c984 // ld1r { v4.4s }, [x12] 496 WORD $0x6ea34484 // ushl v4.4s, v4.4s, v3.4s 497 WORD $0x4e201c84 // and v4.16b, v4.16b, v0.16b 498 WORD $0x3c9f0124 // stur q4, [x9, #-16] 499 WORD $0xb9400c0a // ldr w10, [x0, #12] 500 WORD $0x53067d4b // lsr w11, w10, #6 501 WORD $0x1e270144 // fmov s4, w10 502 WORD $0x530c7d4c // lsr w12, w10, #12 503 WORD $0x4e0c1d64 // mov v4.s[1], w11 504 WORD $0x53127d4d // lsr w13, w10, #18 505 WORD $0x4e141d84 // mov v4.s[2], w12 506 WORD $0x4e1c1da4 // mov v4.s[3], w13 507 WORD $0x4e201c84 // and v4.16b, v4.16b, v0.16b 508 WORD $0x3d800124 // str q4, [x9] 509 WORD $0x2941ac0a // ldp w10, w11, [x0, #12] 510 WORD $0x53187d4c // lsr w12, w10, #24 511 WORD $0x138a796a // extr w10, w11, w10, #30 512 WORD $0x0e040d64 // dup v4.2s, w11 513 WORD $0x1e270185 // fmov s5, w12 514 WORD $0x2ea14484 // ushl v4.2s, v4.2s, v1.2s 515 WORD $0x4e0c1d45 // mov v5.s[1], w10 516 WORD $0x6e180485 // mov v5.d[1], v4.d[0] 517 WORD $0x4e201ca4 // and v4.16b, v5.16b, v0.16b 518 WORD $0x3d800524 // str q4, [x9, #16] 519 WORD $0x29422c0a // ldp w10, w11, [x0, #16] 520 WORD $0x9100500c // add x12, x0, #20 521 WORD $0x91006000 // add x0, x0, #24 522 WORD $0x0e040d44 // dup v4.2s, w10 523 WORD $0x138a716a // extr w10, w11, w10, #28 524 WORD $0x2ea24484 // ushl v4.2s, v4.2s, v2.2s 525 WORD $0x53027d6b // lsr w11, w11, #2 526 WORD $0x4e141d44 // mov v4.s[2], w10 527 WORD $0x4e1c1d64 // mov v4.s[3], w11 528 WORD $0x4e201c84 // and v4.16b, v4.16b, v0.16b 529 WORD $0x3d800924 // str q4, [x9, #32] 530 WORD $0x4d40c984 // ld1r { v4.4s }, [x12] 531 WORD $0x6ea34484 // ushl v4.4s, v4.4s, v3.4s 532 WORD $0x4e201c84 // and v4.16b, v4.16b, v0.16b 533 WORD $0x3d800d24 // str q4, [x9, #48] 534 WORD $0x91020129 // add x9, x9, #128 535 BNE LBB0_21 536 JMP LBB0_99 537 LBB0_22: 538 WORD $0x7100805f // cmp w2, #32 539 BLT LBB0_99 540 MOVD $0x0000000d00000006, R1 // LCPI0_59 541 MOVD $0x0000000900000002, R2 // LCPI0_61 542 MOVD $0x0000001700000010, R3 // LCPI0_63 543 MOVD $0x000000130000000c, R4 // LCPI0_65 544 MOVD $0x0000000f00000008, R5 // LCPI0_67 545 VMOVQ $0x0000000b00000004, $0x0000001900000012, V6 // LCPI0_115 546 WORD $0x91010029 // add x9, x1, #64 547 WORD $0x4f0307e0 // movi v0.4s, #127 548 WORD $0x2ea0b821 // neg v1.2s, v1.2s 549 WORD $0x2ea0b842 // neg v2.2s, v2.2s 550 WORD $0x2ea0b863 // neg v3.2s, v3.2s 551 WORD $0x2ea0b884 // neg v4.2s, v4.2s 552 WORD $0x2ea0b8a5 // neg v5.2s, v5.2s 553 WORD $0x6ea0b8c6 // neg v6.4s, v6.4s 554 LBB0_24: 555 WORD $0xb940000a // ldr w10, [x0] 556 WORD $0xf1000508 // subs x8, x8, #1 557 WORD $0x53077d4b // lsr w11, w10, #7 558 WORD $0x1e270147 // fmov s7, w10 559 WORD $0x530e7d4c // lsr w12, w10, #14 560 WORD $0x4e0c1d67 // mov v7.s[1], w11 561 WORD $0x53157d4d // lsr w13, w10, #21 562 WORD $0x4e141d87 // mov v7.s[2], w12 563 WORD $0x4e1c1da7 // mov v7.s[3], w13 564 WORD $0x4e201cf0 // and v16.16b, v7.16b, v0.16b 565 WORD $0x3c9c0130 // stur q16, [x9, #-64] 566 WORD $0xb940040a // ldr w10, [x0, #4] 567 WORD $0x53037d4b // lsr w11, w10, #3 568 WORD $0x530a7d4c // lsr w12, w10, #10 569 WORD $0x4e0c1d67 // mov v7.s[1], w11 570 WORD $0x53117d4a // lsr w10, w10, #17 571 WORD $0x4e141d87 // mov v7.s[2], w12 572 WORD $0x4e1c1d47 // mov v7.s[3], w10 573 WORD $0x4e201ce7 // and v7.16b, v7.16b, v0.16b 574 WORD $0x3c9d0127 // stur q7, [x9, #-48] 575 WORD $0x2940ac0a // ldp w10, w11, [x0, #4] 576 WORD $0x53187d4c // lsr w12, w10, #24 577 WORD $0x138a7d6a // extr w10, w11, w10, #31 578 WORD $0x0e040d67 // dup v7.2s, w11 579 WORD $0x1e270190 // fmov s16, w12 580 WORD $0x2ea144e7 // ushl v7.2s, v7.2s, v1.2s 581 WORD $0x4e0c1d50 // mov v16.s[1], w10 582 WORD $0x6e1804f0 // mov v16.d[1], v7.d[0] 583 WORD $0x4e201e07 // and v7.16b, v16.16b, v0.16b 584 WORD $0x3c9e0127 // stur q7, [x9, #-32] 585 WORD $0x29412c0a // ldp w10, w11, [x0, #8] 586 WORD $0x53147d4c // lsr w12, w10, #20 587 WORD $0x138a6d6a // extr w10, w11, w10, #27 588 WORD $0x0e040d67 // dup v7.2s, w11 589 WORD $0x1e270190 // fmov s16, w12 590 WORD $0x2ea244e7 // ushl v7.2s, v7.2s, v2.2s 591 WORD $0x4e0c1d50 // mov v16.s[1], w10 592 WORD $0x6e1804f0 // mov v16.d[1], v7.d[0] 593 WORD $0x4e201e07 // and v7.16b, v16.16b, v0.16b 594 WORD $0x3c9f0127 // stur q7, [x9, #-16] 595 WORD $0x2941ac0a // ldp w10, w11, [x0, #12] 596 WORD $0x9100600c // add x12, x0, #24 597 WORD $0x0e040d47 // dup v7.2s, w10 598 WORD $0x138a796a // extr w10, w11, w10, #30 599 WORD $0x2ea344e7 // ushl v7.2s, v7.2s, v3.2s 600 WORD $0x53057d6b // lsr w11, w11, #5 601 WORD $0x4e141d47 // mov v7.s[2], w10 602 WORD $0x4e1c1d67 // mov v7.s[3], w11 603 WORD $0x4e201ce7 // and v7.16b, v7.16b, v0.16b 604 WORD $0x3d800127 // str q7, [x9] 605 WORD $0x29422c0a // ldp w10, w11, [x0, #16] 606 WORD $0x0e040d47 // dup v7.2s, w10 607 WORD $0x138a696a // extr w10, w11, w10, #26 608 WORD $0x2ea444e7 // ushl v7.2s, v7.2s, v4.2s 609 WORD $0x53017d6b // lsr w11, w11, #1 610 WORD $0x4e141d47 // mov v7.s[2], w10 611 WORD $0x4e1c1d67 // mov v7.s[3], w11 612 WORD $0x4e201ce7 // and v7.16b, v7.16b, v0.16b 613 WORD $0x3d800527 // str q7, [x9, #16] 614 WORD $0x2942ac0a // ldp w10, w11, [x0, #20] 615 WORD $0x91007000 // add x0, x0, #28 616 WORD $0x0e040d47 // dup v7.2s, w10 617 WORD $0x53167d4d // lsr w13, w10, #22 618 WORD $0x2ea544e7 // ushl v7.2s, v7.2s, v5.2s 619 WORD $0x138a756a // extr w10, w11, w10, #29 620 WORD $0x4e141da7 // mov v7.s[2], w13 621 WORD $0x4e1c1d47 // mov v7.s[3], w10 622 WORD $0x4e201ce7 // and v7.16b, v7.16b, v0.16b 623 WORD $0x3d800927 // str q7, [x9, #32] 624 WORD $0x4d40c987 // ld1r { v7.4s }, [x12] 625 WORD $0x6ea644e7 // ushl v7.4s, v7.4s, v6.4s 626 WORD $0x4e201ce7 // and v7.16b, v7.16b, v0.16b 627 WORD $0x3d800d27 // str q7, [x9, #48] 628 WORD $0x91020129 // add x9, x9, #128 629 BNE LBB0_24 630 JMP LBB0_99 631 LBB0_25: 632 WORD $0x7100805f // cmp w2, #32 633 BLT LBB0_99 634 WORD $0x91010029 // add x9, x1, #64 635 WORD $0x6f00e620 // movi v0.2d, #0x0000ff000000ff 636 LBB0_27: 637 WORD $0xb940000a // ldr w10, [x0] 638 WORD $0xf1000508 // subs x8, x8, #1 639 WORD $0x53087d4b // lsr w11, w10, #8 640 WORD $0x1e270141 // fmov s1, w10 641 WORD $0x53107d4c // lsr w12, w10, #16 642 WORD $0x4e0c1d61 // mov v1.s[1], w11 643 WORD $0x53187d4d // lsr w13, w10, #24 644 WORD $0x4e141d81 // mov v1.s[2], w12 645 WORD $0x4e1c1da1 // mov v1.s[3], w13 646 WORD $0x4e201c21 // and v1.16b, v1.16b, v0.16b 647 WORD $0x3c9c0121 // stur q1, [x9, #-64] 648 WORD $0xb940040a // ldr w10, [x0, #4] 649 WORD $0x53087d4b // lsr w11, w10, #8 650 WORD $0x1e270141 // fmov s1, w10 651 WORD $0x53107d4c // lsr w12, w10, #16 652 WORD $0x4e0c1d61 // mov v1.s[1], w11 653 WORD $0x53187d4d // lsr w13, w10, #24 654 WORD $0x4e141d81 // mov v1.s[2], w12 655 WORD $0x4e1c1da1 // mov v1.s[3], w13 656 WORD $0x4e201c21 // and v1.16b, v1.16b, v0.16b 657 WORD $0x3c9d0121 // stur q1, [x9, #-48] 658 WORD $0xb940080a // ldr w10, [x0, #8] 659 WORD $0x53087d4b // lsr w11, w10, #8 660 WORD $0x1e270141 // fmov s1, w10 661 WORD $0x53107d4c // lsr w12, w10, #16 662 WORD $0x4e0c1d61 // mov v1.s[1], w11 663 WORD $0x53187d4d // lsr w13, w10, #24 664 WORD $0x4e141d81 // mov v1.s[2], w12 665 WORD $0x4e1c1da1 // mov v1.s[3], w13 666 WORD $0x4e201c21 // and v1.16b, v1.16b, v0.16b 667 WORD $0x3c9e0121 // stur q1, [x9, #-32] 668 WORD $0xb9400c0a // ldr w10, [x0, #12] 669 WORD $0x53087d4b // lsr w11, w10, #8 670 WORD $0x1e270141 // fmov s1, w10 671 WORD $0x53107d4c // lsr w12, w10, #16 672 WORD $0x4e0c1d61 // mov v1.s[1], w11 673 WORD $0x53187d4d // lsr w13, w10, #24 674 WORD $0x4e141d81 // mov v1.s[2], w12 675 WORD $0x4e1c1da1 // mov v1.s[3], w13 676 WORD $0x4e201c21 // and v1.16b, v1.16b, v0.16b 677 WORD $0x3c9f0121 // stur q1, [x9, #-16] 678 WORD $0xb940100a // ldr w10, [x0, #16] 679 WORD $0x53087d4b // lsr w11, w10, #8 680 WORD $0x1e270141 // fmov s1, w10 681 WORD $0x53107d4c // lsr w12, w10, #16 682 WORD $0x4e0c1d61 // mov v1.s[1], w11 683 WORD $0x53187d4d // lsr w13, w10, #24 684 WORD $0x4e141d81 // mov v1.s[2], w12 685 WORD $0x4e1c1da1 // mov v1.s[3], w13 686 WORD $0x4e201c21 // and v1.16b, v1.16b, v0.16b 687 WORD $0x3d800121 // str q1, [x9] 688 WORD $0xb940140a // ldr w10, [x0, #20] 689 WORD $0x53087d4b // lsr w11, w10, #8 690 WORD $0x1e270141 // fmov s1, w10 691 WORD $0x53107d4c // lsr w12, w10, #16 692 WORD $0x4e0c1d61 // mov v1.s[1], w11 693 WORD $0x53187d4d // lsr w13, w10, #24 694 WORD $0x4e141d81 // mov v1.s[2], w12 695 WORD $0x4e1c1da1 // mov v1.s[3], w13 696 WORD $0x4e201c21 // and v1.16b, v1.16b, v0.16b 697 WORD $0x3d800521 // str q1, [x9, #16] 698 WORD $0xb940180a // ldr w10, [x0, #24] 699 WORD $0x53087d4b // lsr w11, w10, #8 700 WORD $0x1e270141 // fmov s1, w10 701 WORD $0x53107d4c // lsr w12, w10, #16 702 WORD $0x4e0c1d61 // mov v1.s[1], w11 703 WORD $0x53187d4d // lsr w13, w10, #24 704 WORD $0x4e141d81 // mov v1.s[2], w12 705 WORD $0x4e1c1da1 // mov v1.s[3], w13 706 WORD $0x4e201c21 // and v1.16b, v1.16b, v0.16b 707 WORD $0x3d800921 // str q1, [x9, #32] 708 WORD $0xb9401c0a // ldr w10, [x0, #28] 709 WORD $0x91008000 // add x0, x0, #32 710 WORD $0x53087d4b // lsr w11, w10, #8 711 WORD $0x1e270141 // fmov s1, w10 712 WORD $0x53107d4c // lsr w12, w10, #16 713 WORD $0x4e0c1d61 // mov v1.s[1], w11 714 WORD $0x53187d4d // lsr w13, w10, #24 715 WORD $0x4e141d81 // mov v1.s[2], w12 716 WORD $0x4e1c1da1 // mov v1.s[3], w13 717 WORD $0x4e201c21 // and v1.16b, v1.16b, v0.16b 718 WORD $0x3d800d21 // str q1, [x9, #48] 719 WORD $0x91020129 // add x9, x9, #128 720 BNE LBB0_27 721 JMP LBB0_99 722 LBB0_28: 723 WORD $0x7100805f // cmp w2, #32 724 BLT LBB0_99 725 MOVD $0x0000001100000008, R1 // LCPI0_73 726 MOVD $0x000000150000000C, R2 // LCPI0_75 727 MOVD $0x0000000b00000002, R3 // LCPI0_77 728 MOVD $0x0000000f00000006, R4 // LCPI0_79 729 MOVD $0x0000000a00000001, R5 // LCPI0_81 730 MOVD $0x0000000e00000005, R6 // LCPI0_83 731 WORD $0x91010029 // add x9, x1, #64 732 WORD $0x4f00c420 // movi v0.4s, #1, msl #8 733 WORD $0x2ea0b821 // neg v1.2s, v1.2s 734 WORD $0x2ea0b842 // neg v2.2s, v2.2s 735 WORD $0x2ea0b863 // neg v3.2s, v3.2s 736 WORD $0x2ea0b884 // neg v4.2s, v4.2s 737 WORD $0x2ea0b8a5 // neg v5.2s, v5.2s 738 WORD $0x2ea0b8c6 // neg v6.2s, v6.2s 739 LBB0_30: 740 WORD $0x29402c0a // ldp w10, w11, [x0] 741 WORD $0xf1000508 // subs x8, x8, #1 742 WORD $0x53097d4c // lsr w12, w10, #9 743 WORD $0x1e270147 // fmov s7, w10 744 WORD $0x53127d4d // lsr w13, w10, #18 745 WORD $0x4e0c1d87 // mov v7.s[1], w12 746 WORD $0x138a6d6b // extr w11, w11, w10, #27 747 WORD $0x4e141da7 // mov v7.s[2], w13 748 WORD $0x4e1c1d67 // mov v7.s[3], w11 749 WORD $0x4e201cf0 // and v16.16b, v7.16b, v0.16b 750 WORD $0x3c9c0130 // stur q16, [x9, #-64] 751 WORD $0x2940ac0a // ldp w10, w11, [x0, #4] 752 WORD $0x530d7d4c // lsr w12, w10, #13 753 WORD $0x53167d4d // lsr w13, w10, #22 754 WORD $0x4e0c1d87 // mov v7.s[1], w12 755 WORD $0x138a7d6a // extr w10, w11, w10, #31 756 WORD $0x4e141da7 // mov v7.s[2], w13 757 WORD $0x4e1c1d47 // mov v7.s[3], w10 758 WORD $0x4e201ce7 // and v7.16b, v7.16b, v0.16b 759 WORD $0x3c9d0127 // stur q7, [x9, #-48] 760 WORD $0x29412c0a // ldp w10, w11, [x0, #8] 761 WORD $0x0e040d47 // dup v7.2s, w10 762 WORD $0x138a696a // extr w10, w11, w10, #26 763 WORD $0x2ea144e7 // ushl v7.2s, v7.2s, v1.2s 764 WORD $0x53037d6b // lsr w11, w11, #3 765 WORD $0x4e141d47 // mov v7.s[2], w10 766 WORD $0x4e1c1d67 // mov v7.s[3], w11 767 WORD $0x4e201ce7 // and v7.16b, v7.16b, v0.16b 768 WORD $0x3c9e0127 // stur q7, [x9, #-32] 769 WORD $0x2941ac0a // ldp w10, w11, [x0, #12] 770 WORD $0x0e040d47 // dup v7.2s, w10 771 WORD $0x138a796a // extr w10, w11, w10, #30 772 WORD $0x2ea244e7 // ushl v7.2s, v7.2s, v2.2s 773 WORD $0x53077d6b // lsr w11, w11, #7 774 WORD $0x4e141d47 // mov v7.s[2], w10 775 WORD $0x4e1c1d67 // mov v7.s[3], w11 776 WORD $0x4e201ce7 // and v7.16b, v7.16b, v0.16b 777 WORD $0x3c9f0127 // stur q7, [x9, #-16] 778 WORD $0x29422c0a // ldp w10, w11, [x0, #16] 779 WORD $0x53107d4c // lsr w12, w10, #16 780 WORD $0x138a656a // extr w10, w11, w10, #25 781 WORD $0x0e040d67 // dup v7.2s, w11 782 WORD $0x1e270190 // fmov s16, w12 783 WORD $0x2ea344e7 // ushl v7.2s, v7.2s, v3.2s 784 WORD $0x4e0c1d50 // mov v16.s[1], w10 785 WORD $0x6e1804f0 // mov v16.d[1], v7.d[0] 786 WORD $0x4e201e07 // and v7.16b, v16.16b, v0.16b 787 WORD $0x3d800127 // str q7, [x9] 788 WORD $0x2942ac0a // ldp w10, w11, [x0, #20] 789 WORD $0x53147d4c // lsr w12, w10, #20 790 WORD $0x138a756a // extr w10, w11, w10, #29 791 WORD $0x0e040d67 // dup v7.2s, w11 792 WORD $0x1e270190 // fmov s16, w12 793 WORD $0x2ea444e7 // ushl v7.2s, v7.2s, v4.2s 794 WORD $0x4e0c1d50 // mov v16.s[1], w10 795 WORD $0x6e1804f0 // mov v16.d[1], v7.d[0] 796 WORD $0x4e201e07 // and v7.16b, v16.16b, v0.16b 797 WORD $0x3d800527 // str q7, [x9, #16] 798 WORD $0x29432c0a // ldp w10, w11, [x0, #24] 799 WORD $0x138a616a // extr w10, w11, w10, #24 800 WORD $0x0e040d67 // dup v7.2s, w11 801 WORD $0x2ea544e7 // ushl v7.2s, v7.2s, v5.2s 802 WORD $0x1e270150 // fmov s16, w10 803 WORD $0x6e0c04f0 // mov v16.s[1], v7.s[0] 804 WORD $0x53137d6b // lsr w11, w11, #19 805 WORD $0x6e1424f0 // mov v16.s[2], v7.s[1] 806 WORD $0x4e1c1d70 // mov v16.s[3], w11 807 WORD $0x4e201e07 // and v7.16b, v16.16b, v0.16b 808 WORD $0x3d800927 // str q7, [x9, #32] 809 WORD $0x2943ac0a // ldp w10, w11, [x0, #28] 810 WORD $0x91009000 // add x0, x0, #36 811 WORD $0x138a716a // extr w10, w11, w10, #28 812 WORD $0x0e040d67 // dup v7.2s, w11 813 WORD $0x2ea644e7 // ushl v7.2s, v7.2s, v6.2s 814 WORD $0x1e270150 // fmov s16, w10 815 WORD $0x6e0c04f0 // mov v16.s[1], v7.s[0] 816 WORD $0x53177d6b // lsr w11, w11, #23 817 WORD $0x6e1424f0 // mov v16.s[2], v7.s[1] 818 WORD $0x4e1c1d70 // mov v16.s[3], w11 819 WORD $0x4e201e07 // and v7.16b, v16.16b, v0.16b 820 WORD $0x3d800d27 // str q7, [x9, #48] 821 WORD $0x91020129 // add x9, x9, #128 822 BNE LBB0_30 823 JMP LBB0_99 824 LBB0_31: 825 WORD $0x7100805f //cmp w2, #32 826 BLT LBB0_99 827 MOVD $0x0000000e00000004, R1 // LCPI0_87 828 MOVD $0x0000000c00000002, R2 // LCPI0_89 829 MOVD $0x0000001200000008, R3 // LCPI0_85 830 WORD $0x91010029 // add x9, x1, #64 831 WORD $0x4f00c460 // movi v0.4s, #3, msl #8 832 WORD $0x2ea0b821 // neg v1.2s, v1.2s 833 WORD $0x2ea0b842 // neg v2.2s, v2.2s 834 WORD $0x2ea0b863 // neg v3.2s, v3.2s 835 LBB0_33: 836 WORD $0x29402c0a // ldp w10, w11, [x0] 837 WORD $0xf1000508 // subs x8, x8, #1 838 WORD $0x530a7d4c // lsr w12, w10, #10 839 WORD $0x1e270144 // fmov s4, w10 840 WORD $0x53147d4d // lsr w13, w10, #20 841 WORD $0x4e0c1d84 // mov v4.s[1], w12 842 WORD $0x138a796b // extr w11, w11, w10, #30 843 WORD $0x4e141da4 // mov v4.s[2], w13 844 WORD $0x4e1c1d64 // mov v4.s[3], w11 845 WORD $0x4e201c85 // and v5.16b, v4.16b, v0.16b 846 WORD $0x3c9c0125 // stur q5, [x9, #-64] 847 WORD $0x2940ac0a // ldp w10, w11, [x0, #4] 848 WORD $0x53127d4c // lsr w12, w10, #18 849 WORD $0x138a716a // extr w10, w11, w10, #28 850 WORD $0x4e0c1d84 // mov v4.s[1], w12 851 WORD $0x53067d6b // lsr w11, w11, #6 852 WORD $0x4e141d44 // mov v4.s[2], w10 853 WORD $0x4e1c1d64 // mov v4.s[3], w11 854 WORD $0x4e201c84 // and v4.16b, v4.16b, v0.16b 855 WORD $0x3c9d0124 // stur q4, [x9, #-48] 856 WORD $0x29412c0a // ldp w10, w11, [x0, #8] 857 WORD $0x53107d4c // lsr w12, w10, #16 858 WORD $0x138a696a // extr w10, w11, w10, #26 859 WORD $0x0e040d64 // dup v4.2s, w11 860 WORD $0x1e270185 // fmov s5, w12 861 WORD $0x2ea14484 // ushl v4.2s, v4.2s, v1.2s 862 WORD $0x4e0c1d45 // mov v5.s[1], w10 863 WORD $0x6e180485 // mov v5.d[1], v4.d[0] 864 WORD $0x4e201ca4 // and v4.16b, v5.16b, v0.16b 865 WORD $0x3c9e0124 // stur q4, [x9, #-32] 866 WORD $0x2941ac0a // ldp w10, w11, [x0, #12] 867 WORD $0x138a616a // extr w10, w11, w10, #24 868 WORD $0x0e040d64 // dup v4.2s, w11 869 WORD $0x2ea24484 // ushl v4.2s, v4.2s, v2.2s 870 WORD $0x1e270145 // fmov s5, w10 871 WORD $0x6e0c0485 // mov v5.s[1], v4.s[0] 872 WORD $0x53167d6b // lsr w11, w11, #22 873 WORD $0x6e142485 // mov v5.s[2], v4.s[1] 874 WORD $0x4e1c1d65 // mov v5.s[3], w11 875 WORD $0x4e201ca4 // and v4.16b, v5.16b, v0.16b 876 WORD $0x3c9f0124 // stur q4, [x9, #-16] 877 WORD $0x2942ac0a // ldp w10, w11, [x0, #20] 878 WORD $0x530a7d4c // lsr w12, w10, #10 879 WORD $0x1e270144 // fmov s4, w10 880 WORD $0x53147d4d // lsr w13, w10, #20 881 WORD $0x4e0c1d84 // mov v4.s[1], w12 882 WORD $0x138a796b // extr w11, w11, w10, #30 883 WORD $0x4e141da4 // mov v4.s[2], w13 884 WORD $0x4e1c1d64 // mov v4.s[3], w11 885 WORD $0x4e201c84 // and v4.16b, v4.16b, v0.16b 886 WORD $0x3d800124 // str q4, [x9] 887 WORD $0x29432c0a // ldp w10, w11, [x0, #24] 888 WORD $0x0e040d44 // dup v4.2s, w10 889 WORD $0x138a716a // extr w10, w11, w10, #28 890 WORD $0x2ea34484 // ushl v4.2s, v4.2s, v3.2s 891 WORD $0x53067d6b // lsr w11, w11, #6 892 WORD $0x4e141d44 // mov v4.s[2], w10 893 WORD $0x4e1c1d64 // mov v4.s[3], w11 894 WORD $0x4e201c84 // and v4.16b, v4.16b, v0.16b 895 WORD $0x3d800524 // str q4, [x9, #16] 896 WORD $0x2943ac0a // ldp w10, w11, [x0, #28] 897 WORD $0x53107d4c // lsr w12, w10, #16 898 WORD $0x138a696a // extr w10, w11, w10, #26 899 WORD $0x0e040d64 // dup v4.2s, w11 900 WORD $0x1e270185 // fmov s5, w12 901 WORD $0x2ea14484 // ushl v4.2s, v4.2s, v1.2s 902 WORD $0x4e0c1d45 // mov v5.s[1], w10 903 WORD $0x6e180485 // mov v5.d[1], v4.d[0] 904 WORD $0x4e201ca4 // and v4.16b, v5.16b, v0.16b 905 WORD $0x3d800924 // str q4, [x9, #32] 906 WORD $0x29442c0a // ldp w10, w11, [x0, #32] 907 WORD $0x9100a000 // add x0, x0, #40 908 WORD $0x138a616a // extr w10, w11, w10, #24 909 WORD $0x0e040d64 // dup v4.2s, w11 910 WORD $0x2ea24484 // ushl v4.2s, v4.2s, v2.2s 911 WORD $0x1e270145 // fmov s5, w10 912 WORD $0x6e0c0485 // mov v5.s[1], v4.s[0] 913 WORD $0x53167d6b // lsr w11, w11, #22 914 WORD $0x6e142485 // mov v5.s[2], v4.s[1] 915 WORD $0x4e1c1d65 // mov v5.s[3], w11 916 WORD $0x4e201ca4 // and v4.16b, v5.16b, v0.16b 917 WORD $0x3d800d24 // str q4, [x9, #48] 918 WORD $0x91020129 // add x9, x9, #128 919 BNE LBB0_33 920 JMP LBB0_99 921 LBB0_34: 922 WORD $0x7100805f // cmp w2, #32 923 BL LBB0_99 924 MOVD $0x0000000e00000003, R1 // LCPI0_93 925 MOVD $0x0000000f00000004, R2 // LCPI0_95 926 MOVD $0x0000001100000006, R3 // LCPI0_97 927 MOVD $0x0000001200000007, R4 // LCPI0_99 928 MOVD $0x0000001300000008, R5 // LCPI0_101 929 MOVD $0x000000150000000a, R6 // LCPI0_114 930 WORD $0x91010029 // add x9, x1, #64 931 WORD $0x4f00c4e0 // movi v0.4s, #7, msl #8 932 WORD $0x2ea0b821 // neg v1.2s, v1.2s 933 WORD $0x2ea0b842 // neg v2.2s, v2.2s 934 WORD $0x2ea0b863 // neg v3.2s, v3.2s 935 WORD $0x2ea0b884 // neg v4.2s, v4.2s 936 WORD $0x2ea0b8a5 // neg v5.2s, v5.2s 937 WORD $0x2ea0b8c6 // neg v6.2s, v6.2s 938 LBB0_36: 939 WORD $0x29402c0a // ldp w10, w11, [x0] 940 WORD $0xf1000508 // subs x8, x8, #1 941 WORD $0x530b7d4c // lsr w12, w10, #11 942 WORD $0x1e270147 // fmov s7, w10 943 WORD $0x138a596d // extr w13, w11, w10, #22 944 WORD $0x4e0c1d87 // mov v7.s[1], w12 945 WORD $0x53017d6b // lsr w11, w11, #1 946 WORD $0x4e141da7 // mov v7.s[2], w13 947 WORD $0x4e1c1d67 // mov v7.s[3], w11 948 WORD $0x4e201cf0 // and v16.16b, v7.16b, v0.16b 949 WORD $0x3c9c0130 // stur q16, [x9, #-64] 950 WORD $0x2940ac0a // ldp w10, w11, [x0, #4] 951 WORD $0x138a5d6a // extr w10, w11, w10, #23 952 WORD $0x53027d6c // lsr w12, w11, #2 953 WORD $0x4e0c1d47 // mov v7.s[1], w10 954 WORD $0x530d7d6b // lsr w11, w11, #13 955 WORD $0x4e141d87 // mov v7.s[2], w12 956 WORD $0x4e1c1d67 // mov v7.s[3], w11 957 WORD $0x4e201ce7 // and v7.16b, v7.16b, v0.16b 958 WORD $0x3c9d0127 // stur q7, [x9, #-48] 959 WORD $0x29412c0a // ldp w10, w11, [x0, #8] 960 WORD $0xb940100c // ldr w12, [x0, #16] 961 WORD $0x138a616a // extr w10, w11, w10, #24 962 WORD $0x0e040d67 // dup v7.2s, w11 963 WORD $0x2ea144e7 // ushl v7.2s, v7.2s, v1.2s 964 WORD $0x1e270150 // fmov s16, w10 965 WORD $0x6e0c04f0 // mov v16.s[1], v7.s[0] 966 WORD $0x138b658b // extr w11, w12, w11, #25 967 WORD $0x6e1424f0 // mov v16.s[2], v7.s[1] 968 WORD $0x4e1c1d70 // mov v16.s[3], w11 969 WORD $0x4e201e07 // and v7.16b, v16.16b, v0.16b 970 WORD $0x3c9e0127 // stur q7, [x9, #-32] 971 WORD $0x29422c0a // ldp w10, w11, [x0, #16] 972 WORD $0x0e040d47 // dup v7.2s, w10 973 WORD $0x138a696a // extr w10, w11, w10, #26 974 WORD $0x2ea244e7 // ushl v7.2s, v7.2s, v2.2s 975 WORD $0x53057d6b // lsr w11, w11, #5 976 WORD $0x4e141d47 // mov v7.s[2], w10 977 WORD $0x4e1c1d67 // mov v7.s[3], w11 978 WORD $0x4e201ce7 // and v7.16b, v7.16b, v0.16b 979 WORD $0x3c9f0127 // stur q7, [x9, #-16] 980 WORD $0x2942ac0a // ldp w10, w11, [x0, #20] 981 WORD $0x53107d4c // lsr w12, w10, #16 982 WORD $0x138a6d6a // extr w10, w11, w10, #27 983 WORD $0x0e040d67 // dup v7.2s, w11 984 WORD $0x1e270190 // fmov s16, w12 985 WORD $0x2ea344e7 // ushl v7.2s, v7.2s, v3.2s 986 WORD $0x4e0c1d50 // mov v16.s[1], w10 987 WORD $0x6e1804f0 // mov v16.d[1], v7.d[0] 988 WORD $0x4e201e07 // and v7.16b, v16.16b, v0.16b 989 WORD $0x3d800127 // str q7, [x9] 990 WORD $0x29432c0a // ldp w10, w11, [x0, #24] 991 WORD $0xb940200c // ldr w12, [x0, #32] 992 WORD $0x138a716a // extr w10, w11, w10, #28 993 WORD $0x0e040d67 // dup v7.2s, w11 994 WORD $0x2ea444e7 // ushl v7.2s, v7.2s, v4.2s 995 WORD $0x1e270150 // fmov s16, w10 996 WORD $0x6e0c04f0 // mov v16.s[1], v7.s[0] 997 WORD $0x138b758b // extr w11, w12, w11, #29 998 WORD $0x6e1424f0 // mov v16.s[2], v7.s[1] 999 WORD $0x4e1c1d70 // mov v16.s[3], w11 1000 WORD $0x4e201e07 // and v7.16b, v16.16b, v0.16b 1001 WORD $0x3d800527 // str q7, [x9, #16] 1002 WORD $0x29442c0a // ldp w10, w11, [x0, #32] 1003 WORD $0x0e040d47 // dup v7.2s, w10 1004 WORD $0x138a796a // extr w10, w11, w10, #30 1005 WORD $0x2ea544e7 // ushl v7.2s, v7.2s, v5.2s 1006 WORD $0x53097d6b // lsr w11, w11, #9 1007 WORD $0x4e141d47 // mov v7.s[2], w10 1008 WORD $0x4e1c1d67 // mov v7.s[3], w11 1009 WORD $0x4e201ce7 // and v7.16b, v7.16b, v0.16b 1010 WORD $0x3d800927 // str q7, [x9, #32] 1011 WORD $0x2944ac0a // ldp w10, w11, [x0, #36] 1012 WORD $0x9100b000 // add x0, x0, #44 1013 WORD $0x53147d4c // lsr w12, w10, #20 1014 WORD $0x138a7d6a // extr w10, w11, w10, #31 1015 WORD $0x0e040d67 // dup v7.2s, w11 1016 WORD $0x1e270190 // fmov s16, w12 1017 WORD $0x2ea644e7 // ushl v7.2s, v7.2s, v6.2s 1018 WORD $0x4e0c1d50 // mov v16.s[1], w10 1019 WORD $0x6e1804f0 // mov v16.d[1], v7.d[0] 1020 WORD $0x4e201e07 // and v7.16b, v16.16b, v0.16b 1021 WORD $0x3d800d27 // str q7, [x9, #48] 1022 WORD $0x91020129 // add x9, x9, #128 1023 BNE LBB0_36 1024 JMP LBB0_99 1025 LBB0_37: 1026 WORD $0x7100805f // cmp w2, #32 1027 BLT LBB0_99 1028 MOVD $0x0000000e00000003, R1 // LCPI0_113 1029 WORD $0x91010029 // add x9, x1, #64 1030 WORD $0x4f00c5e0 // movi v0.4s, #15, msl #8 1031 WORD $0x2ea0b821 // neg v1.2s, v1.2s 1032 LBB0_39: 1033 WORD $0x29402c0a // ldp w10, w11, [x0] 1034 WORD $0xf1000508 // subs x8, x8, #1 1035 WORD $0x530c7d4c // lsr w12, w10, #12 1036 WORD $0x1e270142 // fmov s2, w10 1037 WORD $0x138a616d // extr w13, w11, w10, #24 1038 WORD $0x4e0c1d82 // mov v2.s[1], w12 1039 WORD $0x53047d6b // lsr w11, w11, #4 1040 WORD $0x4e141da2 // mov v2.s[2], w13 1041 WORD $0x4e1c1d62 // mov v2.s[3], w11 1042 WORD $0x4e201c43 // and v3.16b, v2.16b, v0.16b 1043 WORD $0x3c9c0123 // stur q3, [x9, #-64] 1044 WORD $0x2940ac0a // ldp w10, w11, [x0, #4] 1045 WORD $0x138a716a // extr w10, w11, w10, #28 1046 WORD $0x53087d6c // lsr w12, w11, #8 1047 WORD $0x4e0c1d42 // mov v2.s[1], w10 1048 WORD $0x53147d6b // lsr w11, w11, #20 1049 WORD $0x4e141d82 // mov v2.s[2], w12 1050 WORD $0x4e1c1d62 // mov v2.s[3], w11 1051 WORD $0x4e201c42 // and v2.16b, v2.16b, v0.16b 1052 WORD $0x3c9d0122 // stur q2, [x9, #-48] 1053 WORD $0x2941ac0a // ldp w10, w11, [x0, #12] 1054 WORD $0x530c7d4c // lsr w12, w10, #12 1055 WORD $0x1e270142 // fmov s2, w10 1056 WORD $0x138a616d // extr w13, w11, w10, #24 1057 WORD $0x4e0c1d82 // mov v2.s[1], w12 1058 WORD $0x53047d6b // lsr w11, w11, #4 1059 WORD $0x4e141da2 // mov v2.s[2], w13 1060 WORD $0x4e1c1d62 // mov v2.s[3], w11 1061 WORD $0x4e201c42 // and v2.16b, v2.16b, v0.16b 1062 WORD $0x3c9e0122 // stur q2, [x9, #-32] 1063 WORD $0x29422c0a // ldp w10, w11, [x0, #16] 1064 WORD $0x53107d4c // lsr w12, w10, #16 1065 WORD $0x138a716a // extr w10, w11, w10, #28 1066 WORD $0x0e040d62 // dup v2.2s, w11 1067 WORD $0x1e270183 // fmov s3, w12 1068 WORD $0x2ea14442 // ushl v2.2s, v2.2s, v1.2s 1069 WORD $0x4e0c1d43 // mov v3.s[1], w10 1070 WORD $0x6e180443 // mov v3.d[1], v2.d[0] 1071 WORD $0x4e201c62 // and v2.16b, v3.16b, v0.16b 1072 WORD $0x3c9f0122 // stur q2, [x9, #-16] 1073 WORD $0x29432c0a // ldp w10, w11, [x0, #24] 1074 WORD $0x530c7d4c // lsr w12, w10, #12 1075 WORD $0x1e270142 // fmov s2, w10 1076 WORD $0x138a616d // extr w13, w11, w10, #24 1077 WORD $0x4e0c1d82 // mov v2.s[1], w12 1078 WORD $0x53047d6b // lsr w11, w11, #4 1079 WORD $0x4e141da2 // mov v2.s[2], w13 1080 WORD $0x4e1c1d62 // mov v2.s[3], w11 1081 WORD $0x4e201c42 // and v2.16b, v2.16b, v0.16b 1082 WORD $0x3d800122 // str q2, [x9] 1083 WORD $0x2943ac0a // ldp w10, w11, [x0, #28] 1084 WORD $0x53107d4c // lsr w12, w10, #16 1085 WORD $0x138a716a // extr w10, w11, w10, #28 1086 WORD $0x0e040d62 // dup v2.2s, w11 1087 WORD $0x1e270183 // fmov s3, w12 1088 WORD $0x2ea14442 // ushl v2.2s, v2.2s, v1.2s 1089 WORD $0x4e0c1d43 // mov v3.s[1], w10 1090 WORD $0x6e180443 // mov v3.d[1], v2.d[0] 1091 WORD $0x4e201c62 // and v2.16b, v3.16b, v0.16b 1092 WORD $0x3d800522 // str q2, [x9, #16] 1093 WORD $0x2944ac0a // ldp w10, w11, [x0, #36] 1094 WORD $0x530c7d4c // lsr w12, w10, #12 1095 WORD $0x1e270142 // fmov s2, w10 1096 WORD $0x138a616d // extr w13, w11, w10, #24 1097 WORD $0x4e0c1d82 // mov v2.s[1], w12 1098 WORD $0x53047d6b // lsr w11, w11, #4 1099 WORD $0x4e141da2 // mov v2.s[2], w13 1100 WORD $0x4e1c1d62 // mov v2.s[3], w11 1101 WORD $0x4e201c42 // and v2.16b, v2.16b, v0.16b 1102 WORD $0x3d800922 // str q2, [x9, #32] 1103 WORD $0x29452c0a // ldp w10, w11, [x0, #40] 1104 WORD $0x9100c000 // add x0, x0, #48 1105 WORD $0x53107d4c // lsr w12, w10, #16 1106 WORD $0x138a716a // extr w10, w11, w10, #28 1107 WORD $0x0e040d62 // dup v2.2s, w11 1108 WORD $0x1e270183 // fmov s3, w12 1109 WORD $0x2ea14442 // ushl v2.2s, v2.2s, v1.2s 1110 WORD $0x4e0c1d43 // mov v3.s[1], w10 1111 WORD $0x6e180443 // mov v3.d[1], v2.d[0] 1112 WORD $0x4e201c62 // and v2.16b, v3.16b, v0.16b 1113 WORD $0x3d800d22 // str q2, [x9, #48] 1114 WORD $0x91020129 // add x9, x9, #128 1115 BNE LBB0_39 1116 JMP LBB0_99 1117 LBB0_40: 1118 WORD $0x7100805f // cmp w2, #32 1119 BLT LBB0_99 1120 MOVD $0x0000000f00000002, R1 // LCPI0_105 1121 MOVD $0x0000001100000004, R2 // LCPI0_107 1122 MOVD $0x0000001200000005, R3 // LCPI0_109 1123 MOVD $0x0000001300000006, R4 // LCPI0_112 1124 WORD $0x91010029 // add x9, x1, #64 1125 WORD $0x4f00c7e0 // movi v0.4s, #31, msl #8 1126 WORD $0x2ea0b821 // neg v1.2s, v1.2s 1127 WORD $0x2ea0b842 // neg v2.2s, v2.2s 1128 WORD $0x2ea0b863 // neg v3.2s, v3.2s 1129 WORD $0x2ea0b884 // neg v4.2s, v4.2s 1130 LBB0_42: 1131 WORD $0x29402c0a // ldp w10, w11, [x0] 1132 WORD $0xf1000508 // subs x8, x8, #1 1133 WORD $0x530d7d4c // lsr w12, w10, #13 1134 WORD $0x1e270145 // fmov s5, w10 1135 WORD $0x138a696d // extr w13, w11, w10, #26 1136 WORD $0x4e0c1d85 // mov v5.s[1], w12 1137 WORD $0x53077d6b // lsr w11, w11, #7 1138 WORD $0x4e141da5 // mov v5.s[2], w13 1139 WORD $0x4e1c1d65 // mov v5.s[3], w11 1140 WORD $0x4e201ca6 // and v6.16b, v5.16b, v0.16b 1141 WORD $0x3c9c0126 // stur q6, [x9, #-64] 1142 WORD $0x29412c0a // ldp w10, w11, [x0, #8] 1143 WORD $0x53017d4c // lsr w12, w10, #1 1144 WORD $0x530e7d4d // lsr w13, w10, #14 1145 WORD $0x4e0c1d85 // mov v5.s[1], w12 1146 WORD $0x138a6d6a // extr w10, w11, w10, #27 1147 WORD $0x4e141da5 // mov v5.s[2], w13 1148 WORD $0x4e1c1d45 // mov v5.s[3], w10 1149 WORD $0x4e201ca5 // and v5.16b, v5.16b, v0.16b 1150 WORD $0x3c9d0125 // stur q5, [x9, #-48] 1151 WORD $0x2941ac0a // ldp w10, w11, [x0, #12] 1152 WORD $0x53087d4c // lsr w12, w10, #8 1153 WORD $0x138a556a // extr w10, w11, w10, #21 1154 WORD $0x0e040d65 // dup v5.2s, w11 1155 WORD $0x1e270186 // fmov s6, w12 1156 WORD $0x2ea144a5 // ushl v5.2s, v5.2s, v1.2s 1157 WORD $0x4e0c1d46 // mov v6.s[1], w10 1158 WORD $0x6e1804a6 // mov v6.d[1], v5.d[0] 1159 WORD $0x4e201cc5 // and v5.16b, v6.16b, v0.16b 1160 WORD $0x3c9e0125 // stur q5, [x9, #-32] 1161 WORD $0x29422c0a // ldp w10, w11, [x0, #16] 1162 WORD $0xb940180c // ldr w12, [x0, #24] 1163 WORD $0x138a716a // extr w10, w11, w10, #28 1164 WORD $0x53097d6d // lsr w13, w11, #9 1165 WORD $0x1e270145 // fmov s5, w10 1166 WORD $0x138b598b // extr w11, w12, w11, #22 1167 WORD $0x4e0c1da5 // mov v5.s[1], w13 1168 WORD $0x53037d8c // lsr w12, w12, #3 1169 WORD $0x4e141d65 // mov v5.s[2], w11 1170 WORD $0x4e1c1d85 // mov v5.s[3], w12 1171 WORD $0x4e201ca5 // and v5.16b, v5.16b, v0.16b 1172 WORD $0x3c9f0125 // stur q5, [x9, #-16] 1173 WORD $0x29432c0a // ldp w10, w11, [x0, #24] 1174 WORD $0xb940200c // ldr w12, [x0, #32] 1175 WORD $0x53107d4d // lsr w13, w10, #16 1176 WORD $0x138a756a // extr w10, w11, w10, #29 1177 WORD $0x1e2701a5 // fmov s5, w13 1178 WORD $0x530a7d6e // lsr w14, w11, #10 1179 WORD $0x4e0c1d45 // mov v5.s[1], w10 1180 WORD $0x138b5d8b // extr w11, w12, w11, #23 1181 WORD $0x4e141dc5 // mov v5.s[2], w14 1182 WORD $0x4e1c1d65 // mov v5.s[3], w11 1183 WORD $0x4e201ca5 // and v5.16b, v5.16b, v0.16b 1184 WORD $0x3d800125 // str q5, [x9] 1185 WORD $0x29442c0a // ldp w10, w11, [x0, #32] 1186 WORD $0x0e040d45 // dup v5.2s, w10 1187 WORD $0x138a796a // extr w10, w11, w10, #30 1188 WORD $0x2ea244a5 // ushl v5.2s, v5.2s, v2.2s 1189 WORD $0x530b7d6b // lsr w11, w11, #11 1190 WORD $0x4e141d45 // mov v5.s[2], w10 1191 WORD $0x4e1c1d65 // mov v5.s[3], w11 1192 WORD $0x4e201ca5 // and v5.16b, v5.16b, v0.16b 1193 WORD $0x3d800525 // str q5, [x9, #16] 1194 WORD $0x2944ac0a // ldp w10, w11, [x0, #36] 1195 WORD $0xb9402c0c // ldr w12, [x0, #44] 1196 WORD $0x138a616a // extr w10, w11, w10, #24 1197 WORD $0x0e040d65 // dup v5.2s, w11 1198 WORD $0x2ea344a5 // ushl v5.2s, v5.2s, v3.2s 1199 WORD $0x1e270146 // fmov s6, w10 1200 WORD $0x6e0c04a6 // mov v6.s[1], v5.s[0] 1201 WORD $0x138b7d8b // extr w11, w12, w11, #31 1202 WORD $0x6e1424a6 // mov v6.s[2], v5.s[1] 1203 WORD $0x4e1c1d66 // mov v6.s[3], w11 1204 WORD $0x4e201cc5 // and v5.16b, v6.16b, v0.16b 1205 WORD $0x3d800925 // str q5, [x9, #32] 1206 WORD $0x2945ac0a // ldp w10, w11, [x0, #44] 1207 WORD $0x9100d000 // add x0, x0, #52 1208 WORD $0x530c7d4c // lsr w12, w10, #12 1209 WORD $0x138a656a // extr w10, w11, w10, #25 1210 WORD $0x0e040d65 // dup v5.2s, w11 1211 WORD $0x1e270186 // fmov s6, w12 1212 WORD $0x2ea444a5 // ushl v5.2s, v5.2s, v4.2s 1213 WORD $0x4e0c1d46 // mov v6.s[1], w10 1214 WORD $0x6e1804a6 // mov v6.d[1], v5.d[0] 1215 WORD $0x4e201cc5 // and v5.16b, v6.16b, v0.16b 1216 WORD $0x3d800d25 // str q5, [x9, #48] 1217 WORD $0x91020129 // add x9, x9, #128 1218 BNE LBB0_42 1219 JMP LBB0_99 1220 LBB0_43: 1221 WORD $0x7100805f // cmp w2, #32 1222 BLT LBB0_99 1223 MOVD $0x0000001200000004, R1 // LCPI0_111 1224 WORD $0x91010029 // add x9, x1, #64 1225 WORD $0x4f01c7e0 // movi v0.4s, #63, msl #8 1226 WORD $0x2ea0b821 // neg v1.2s, v1.2s 1227 LBB0_45: 1228 WORD $0x29402c0a // ldp w10, w11, [x0] 1229 WORD $0xf1000508 // subs x8, x8, #1 1230 WORD $0x530e7d4c // lsr w12, w10, #14 1231 WORD $0x1e270142 // fmov s2, w10 1232 WORD $0x138a716d // extr w13, w11, w10, #28 1233 WORD $0x4e0c1d82 // mov v2.s[1], w12 1234 WORD $0x530a7d6b // lsr w11, w11, #10 1235 WORD $0x4e141da2 // mov v2.s[2], w13 1236 WORD $0x4e1c1d62 // mov v2.s[3], w11 1237 WORD $0x4e201c42 // and v2.16b, v2.16b, v0.16b 1238 WORD $0x3c9c0122 // stur q2, [x9, #-64] 1239 WORD $0x2940ac0a // ldp w10, w11, [x0, #4] 1240 WORD $0xb9400c0c // ldr w12, [x0, #12] 1241 WORD $0x138a616a // extr w10, w11, w10, #24 1242 WORD $0x53067d6d // lsr w13, w11, #6 1243 WORD $0x1e270142 // fmov s2, w10 1244 WORD $0x138b518b // extr w11, w12, w11, #20 1245 WORD $0x4e0c1da2 // mov v2.s[1], w13 1246 WORD $0x53027d8c // lsr w12, w12, #2 1247 WORD $0x4e141d62 // mov v2.s[2], w11 1248 WORD $0x4e1c1d82 // mov v2.s[3], w12 1249 WORD $0x4e201c42 // and v2.16b, v2.16b, v0.16b 1250 WORD $0x3c9d0122 // stur q2, [x9, #-48] 1251 WORD $0x2941ac0a // ldp w10, w11, [x0, #12] 1252 WORD $0xb940140c // ldr w12, [x0, #20] 1253 WORD $0x53107d4d // lsr w13, w10, #16 1254 WORD $0x138a796a // extr w10, w11, w10, #30 1255 WORD $0x1e2701a2 // fmov s2, w13 1256 WORD $0x530c7d6e // lsr w14, w11, #12 1257 WORD $0x4e0c1d42 // mov v2.s[1], w10 1258 WORD $0x138b698b // extr w11, w12, w11, #26 1259 WORD $0x4e141dc2 // mov v2.s[2], w14 1260 WORD $0x4e1c1d62 // mov v2.s[3], w11 1261 WORD $0x4e201c42 // and v2.16b, v2.16b, v0.16b 1262 WORD $0x3c9e0122 // stur q2, [x9, #-32] 1263 WORD $0x2942ac0a // ldp w10, w11, [x0, #20] 1264 WORD $0x53087d4c // lsr w12, w10, #8 1265 WORD $0x138a596a // extr w10, w11, w10, #22 1266 WORD $0x0e040d62 // dup v2.2s, w11 1267 WORD $0x1e270183 // fmov s3, w12 1268 WORD $0x2ea14442 // ushl v2.2s, v2.2s, v1.2s 1269 WORD $0x4e0c1d43 // mov v3.s[1], w10 1270 WORD $0x6e180443 // mov v3.d[1], v2.d[0] 1271 WORD $0x4e201c62 // and v2.16b, v3.16b, v0.16b 1272 WORD $0x3c9f0122 // stur q2, [x9, #-16] 1273 WORD $0x2943ac0a // ldp w10, w11, [x0, #28] 1274 WORD $0x530e7d4c // lsr w12, w10, #14 1275 WORD $0x1e270142 // fmov s2, w10 1276 WORD $0x138a716d // extr w13, w11, w10, #28 1277 WORD $0x4e0c1d82 // mov v2.s[1], w12 1278 WORD $0x530a7d6b // lsr w11, w11, #10 1279 WORD $0x4e141da2 // mov v2.s[2], w13 1280 WORD $0x4e1c1d62 // mov v2.s[3], w11 1281 WORD $0x4e201c42 // and v2.16b, v2.16b, v0.16b 1282 WORD $0x3d800122 // str q2, [x9] 1283 WORD $0x29442c0a // ldp w10, w11, [x0, #32] 1284 WORD $0xb940280c // ldr w12, [x0, #40] 1285 WORD $0x138a616a // extr w10, w11, w10, #24 1286 WORD $0x53067d6d // lsr w13, w11, #6 1287 WORD $0x1e270142 // fmov s2, w10 1288 WORD $0x138b518b // extr w11, w12, w11, #20 1289 WORD $0x4e0c1da2 // mov v2.s[1], w13 1290 WORD $0x53027d8c // lsr w12, w12, #2 1291 WORD $0x4e141d62 // mov v2.s[2], w11 1292 WORD $0x4e1c1d82 // mov v2.s[3], w12 1293 WORD $0x4e201c42 // and v2.16b, v2.16b, v0.16b 1294 WORD $0x3d800522 // str q2, [x9, #16] 1295 WORD $0x29452c0a // ldp w10, w11, [x0, #40] 1296 WORD $0xb940300c // ldr w12, [x0, #48] 1297 WORD $0x53107d4d // lsr w13, w10, #16 1298 WORD $0x138a796a // extr w10, w11, w10, #30 1299 WORD $0x1e2701a2 // fmov s2, w13 1300 WORD $0x530c7d6e // lsr w14, w11, #12 1301 WORD $0x4e0c1d42 // mov v2.s[1], w10 1302 WORD $0x138b698b // extr w11, w12, w11, #26 1303 WORD $0x4e141dc2 // mov v2.s[2], w14 1304 WORD $0x4e1c1d62 // mov v2.s[3], w11 1305 WORD $0x4e201c42 // and v2.16b, v2.16b, v0.16b 1306 WORD $0x3d800922 // str q2, [x9, #32] 1307 WORD $0x29462c0a // ldp w10, w11, [x0, #48] 1308 WORD $0x9100e000 // add x0, x0, #56 1309 WORD $0x53087d4c // lsr w12, w10, #8 1310 WORD $0x138a596a // extr w10, w11, w10, #22 1311 WORD $0x0e040d62 // dup v2.2s, w11 1312 WORD $0x1e270183 // fmov s3, w12 1313 WORD $0x2ea14442 // ushl v2.2s, v2.2s, v1.2s 1314 WORD $0x4e0c1d43 // mov v3.s[1], w10 1315 WORD $0x6e180443 // mov v3.d[1], v2.d[0] 1316 WORD $0x4e201c62 // and v2.16b, v3.16b, v0.16b 1317 WORD $0x3d800d22 // str q2, [x9, #48] 1318 WORD $0x91020129 // add x9, x9, #128 1319 BNE LBB0_45 1320 JMP LBB0_99 1321 LBB0_46: 1322 WORD $0x7100805f // cmp w2, #32 1323 BLT LBB0_99 1324 MOVD $0x0000001100000002, R1 // LCPI0_110 1325 WORD $0x91010029 // add x9, x1, #64 1326 WORD $0x4f03c7e0 // movi v0.4s, #127, msl #8 1327 WORD $0x2ea0b821 // neg v1.2s, v1.2s 1328 LBB0_48: 1329 WORD $0x29402c0a // ldp w10, w11, [x0] 1330 WORD $0xf1000508 // subs x8, x8, #1 1331 WORD $0x530f7d4c // lsr w12, w10, #15 1332 WORD $0x1e270142 // fmov s2, w10 1333 WORD $0x138a796d // extr w13, w11, w10, #30 1334 WORD $0x4e0c1d82 // mov v2.s[1], w12 1335 WORD $0x530d7d6b // lsr w11, w11, #13 1336 WORD $0x4e141da2 // mov v2.s[2], w13 1337 WORD $0x4e1c1d62 // mov v2.s[3], w11 1338 WORD $0x4e201c42 // and v2.16b, v2.16b, v0.16b 1339 WORD $0x3c9c0122 // stur q2, [x9, #-64] 1340 WORD $0x2940ac0a // ldp w10, w11, [x0, #4] 1341 WORD $0xb9400c0c // ldr w12, [x0, #12] 1342 WORD $0x138a716a // extr w10, w11, w10, #28 1343 WORD $0x530b7d6d // lsr w13, w11, #11 1344 WORD $0x1e270142 // fmov s2, w10 1345 WORD $0x138b698b // extr w11, w12, w11, #26 1346 WORD $0x4e0c1da2 // mov v2.s[1], w13 1347 WORD $0x53097d8c // lsr w12, w12, #9 1348 WORD $0x4e141d62 // mov v2.s[2], w11 1349 WORD $0x4e1c1d82 // mov v2.s[3], w12 1350 WORD $0x4e201c42 // and v2.16b, v2.16b, v0.16b 1351 WORD $0x3c9d0122 // stur q2, [x9, #-48] 1352 WORD $0x2941ac0a // ldp w10, w11, [x0, #12] 1353 WORD $0xb940140c // ldr w12, [x0, #20] 1354 WORD $0x138a616a // extr w10, w11, w10, #24 1355 WORD $0x53077d6d // lsr w13, w11, #7 1356 WORD $0x1e270142 // fmov s2, w10 1357 WORD $0x138b598b // extr w11, w12, w11, #22 1358 WORD $0x4e0c1da2 // mov v2.s[1], w13 1359 WORD $0x53057d8c // lsr w12, w12, #5 1360 WORD $0x4e141d62 // mov v2.s[2], w11 1361 WORD $0x4e1c1d82 // mov v2.s[3], w12 1362 WORD $0x4e201c42 // and v2.16b, v2.16b, v0.16b 1363 WORD $0x3c9e0122 // stur q2, [x9, #-32] 1364 WORD $0x2942ac0a // ldp w10, w11, [x0, #20] 1365 WORD $0xb9401c0c // ldr w12, [x0, #28] 1366 WORD $0x138a516a // extr w10, w11, w10, #20 1367 WORD $0x53037d6d // lsr w13, w11, #3 1368 WORD $0x1e270142 // fmov s2, w10 1369 WORD $0x138b498b // extr w11, w12, w11, #18 1370 WORD $0x4e0c1da2 // mov v2.s[1], w13 1371 WORD $0x53017d8c // lsr w12, w12, #1 1372 WORD $0x4e141d62 // mov v2.s[2], w11 1373 WORD $0x4e1c1d82 // mov v2.s[3], w12 1374 WORD $0x4e201c42 // and v2.16b, v2.16b, v0.16b 1375 WORD $0x3c9f0122 // stur q2, [x9, #-16] 1376 WORD $0x2943ac0a // ldp w10, w11, [x0, #28] 1377 WORD $0xb940240c // ldr w12, [x0, #36] 1378 WORD $0x53107d4d // lsr w13, w10, #16 1379 WORD $0x138a7d6a // extr w10, w11, w10, #31 1380 WORD $0x1e2701a2 // fmov s2, w13 1381 WORD $0x530e7d6e // lsr w14, w11, #14 1382 WORD $0x4e0c1d42 // mov v2.s[1], w10 1383 WORD $0x138b758b // extr w11, w12, w11, #29 1384 WORD $0x4e141dc2 // mov v2.s[2], w14 1385 WORD $0x4e1c1d62 // mov v2.s[3], w11 1386 WORD $0x4e201c42 // and v2.16b, v2.16b, v0.16b 1387 WORD $0x3d800122 // str q2, [x9] 1388 WORD $0x2944ac0a // ldp w10, w11, [x0, #36] 1389 WORD $0xb9402c0c // ldr w12, [x0, #44] 1390 WORD $0x530c7d4d // lsr w13, w10, #12 1391 WORD $0x138a6d6a // extr w10, w11, w10, #27 1392 WORD $0x1e2701a2 // fmov s2, w13 1393 WORD $0x530a7d6e // lsr w14, w11, #10 1394 WORD $0x4e0c1d42 // mov v2.s[1], w10 1395 WORD $0x138b658b // extr w11, w12, w11, #25 1396 WORD $0x4e141dc2 // mov v2.s[2], w14 1397 WORD $0x4e1c1d62 // mov v2.s[3], w11 1398 WORD $0x4e201c42 // and v2.16b, v2.16b, v0.16b 1399 WORD $0x3d800522 // str q2, [x9, #16] 1400 WORD $0x2945ac0a // ldp w10, w11, [x0, #44] 1401 WORD $0xb940340c // ldr w12, [x0, #52] 1402 WORD $0x53087d4d // lsr w13, w10, #8 1403 WORD $0x138a5d6a // extr w10, w11, w10, #23 1404 WORD $0x1e2701a2 // fmov s2, w13 1405 WORD $0x53067d6e // lsr w14, w11, #6 1406 WORD $0x4e0c1d42 // mov v2.s[1], w10 1407 WORD $0x138b558b // extr w11, w12, w11, #21 1408 WORD $0x4e141dc2 // mov v2.s[2], w14 1409 WORD $0x4e1c1d62 // mov v2.s[3], w11 1410 WORD $0x4e201c42 // and v2.16b, v2.16b, v0.16b 1411 WORD $0x3d800922 // str q2, [x9, #32] 1412 WORD $0x2946ac0a // ldp w10, w11, [x0, #52] 1413 WORD $0x9100f000 // add x0, x0, #60 1414 WORD $0x53047d4c // lsr w12, w10, #4 1415 WORD $0x138a4d6a // extr w10, w11, w10, #19 1416 WORD $0x0e040d62 // dup v2.2s, w11 1417 WORD $0x1e270183 // fmov s3, w12 1418 WORD $0x2ea14442 // ushl v2.2s, v2.2s, v1.2s 1419 WORD $0x4e0c1d43 // mov v3.s[1], w10 1420 WORD $0x6e180443 // mov v3.d[1], v2.d[0] 1421 WORD $0x4e201c62 // and v2.16b, v3.16b, v0.16b 1422 WORD $0x3d800d22 // str q2, [x9, #48] 1423 WORD $0x91020129 // add x9, x9, #128 1424 BNE LBB0_48 1425 JMP LBB0_99 1426 LBB0_49: 1427 WORD $0x7100805f // cmp w2, #32 1428 BLT LBB0_99 1429 WORD $0x91010029 // add x9, x1, #64 1430 WORD $0x6f01e660 // movi v0.2d, #0x00ffff0000ffff 1431 LBB0_51: 1432 WORD $0x29402c0a // ldp w10, w11, [x0] 1433 WORD $0xf1000508 // subs x8, x8, #1 1434 WORD $0x53107d4c // lsr w12, w10, #16 1435 WORD $0x1e270141 // fmov s1, w10 1436 WORD $0x4e0c1d81 // mov v1.s[1], w12 1437 WORD $0x53107d6d // lsr w13, w11, #16 1438 WORD $0x4e141d61 // mov v1.s[2], w11 1439 WORD $0x4e1c1da1 // mov v1.s[3], w13 1440 WORD $0x4e201c21 // and v1.16b, v1.16b, v0.16b 1441 WORD $0x3c9c0121 // stur q1, [x9, #-64] 1442 WORD $0x29412c0a // ldp w10, w11, [x0, #8] 1443 WORD $0x53107d4c // lsr w12, w10, #16 1444 WORD $0x1e270141 // fmov s1, w10 1445 WORD $0x4e0c1d81 // mov v1.s[1], w12 1446 WORD $0x53107d6d // lsr w13, w11, #16 1447 WORD $0x4e141d61 // mov v1.s[2], w11 1448 WORD $0x4e1c1da1 // mov v1.s[3], w13 1449 WORD $0x4e201c21 // and v1.16b, v1.16b, v0.16b 1450 WORD $0x3c9d0121 // stur q1, [x9, #-48] 1451 WORD $0x29422c0a // ldp w10, w11, [x0, #16] 1452 WORD $0x53107d4c // lsr w12, w10, #16 1453 WORD $0x1e270141 // fmov s1, w10 1454 WORD $0x4e0c1d81 // mov v1.s[1], w12 1455 WORD $0x53107d6d // lsr w13, w11, #16 1456 WORD $0x4e141d61 // mov v1.s[2], w11 1457 WORD $0x4e1c1da1 // mov v1.s[3], w13 1458 WORD $0x4e201c21 // and v1.16b, v1.16b, v0.16b 1459 WORD $0x3c9e0121 // stur q1, [x9, #-32] 1460 WORD $0x29432c0a // ldp w10, w11, [x0, #24] 1461 WORD $0x53107d4c // lsr w12, w10, #16 1462 WORD $0x1e270141 // fmov s1, w10 1463 WORD $0x4e0c1d81 // mov v1.s[1], w12 1464 WORD $0x53107d6d // lsr w13, w11, #16 1465 WORD $0x4e141d61 // mov v1.s[2], w11 1466 WORD $0x4e1c1da1 // mov v1.s[3], w13 1467 WORD $0x4e201c21 // and v1.16b, v1.16b, v0.16b 1468 WORD $0x3c9f0121 // stur q1, [x9, #-16] 1469 WORD $0x29442c0a // ldp w10, w11, [x0, #32] 1470 WORD $0x53107d4c // lsr w12, w10, #16 1471 WORD $0x1e270141 // fmov s1, w10 1472 WORD $0x4e0c1d81 // mov v1.s[1], w12 1473 WORD $0x53107d6d // lsr w13, w11, #16 1474 WORD $0x4e141d61 // mov v1.s[2], w11 1475 WORD $0x4e1c1da1 // mov v1.s[3], w13 1476 WORD $0x4e201c21 // and v1.16b, v1.16b, v0.16b 1477 WORD $0x3d800121 // str q1, [x9] 1478 WORD $0x29452c0a // ldp w10, w11, [x0, #40] 1479 WORD $0x53107d4c // lsr w12, w10, #16 1480 WORD $0x1e270141 // fmov s1, w10 1481 WORD $0x4e0c1d81 // mov v1.s[1], w12 1482 WORD $0x53107d6d // lsr w13, w11, #16 1483 WORD $0x4e141d61 // mov v1.s[2], w11 1484 WORD $0x4e1c1da1 // mov v1.s[3], w13 1485 WORD $0x4e201c21 // and v1.16b, v1.16b, v0.16b 1486 WORD $0x3d800521 // str q1, [x9, #16] 1487 WORD $0x29462c0a // ldp w10, w11, [x0, #48] 1488 WORD $0x53107d4c // lsr w12, w10, #16 1489 WORD $0x1e270141 // fmov s1, w10 1490 WORD $0x4e0c1d81 // mov v1.s[1], w12 1491 WORD $0x53107d6d // lsr w13, w11, #16 1492 WORD $0x4e141d61 // mov v1.s[2], w11 1493 WORD $0x4e1c1da1 // mov v1.s[3], w13 1494 WORD $0x4e201c21 // and v1.16b, v1.16b, v0.16b 1495 WORD $0x3d800921 // str q1, [x9, #32] 1496 WORD $0x29472c0a // ldp w10, w11, [x0, #56] 1497 WORD $0x91010000 // add x0, x0, #64 1498 WORD $0x53107d4c // lsr w12, w10, #16 1499 WORD $0x1e270141 // fmov s1, w10 1500 WORD $0x4e0c1d81 // mov v1.s[1], w12 1501 WORD $0x53107d6d // lsr w13, w11, #16 1502 WORD $0x4e141d61 // mov v1.s[2], w11 1503 WORD $0x4e1c1da1 // mov v1.s[3], w13 1504 WORD $0x4e201c21 // and v1.16b, v1.16b, v0.16b 1505 WORD $0x3d800d21 // str q1, [x9, #48] 1506 WORD $0x91020129 // add x9, x9, #128 1507 BNE LBB0_51 1508 JMP LBB0_99 1509 LBB0_52: 1510 WORD $0x7100805f // cmp w2, #32 1511 BLT LBB0_99 1512 WORD $0x91010029 // add x9, x1, #64 1513 WORD $0x4f00d420 // movi v0.4s, #1, msl #16 1514 LBB0_54: 1515 WORD $0x29402c0a // ldp w10, w11, [x0] 1516 WORD $0xb940080c // ldr w12, [x0, #8] 1517 WORD $0xf1000508 // subs x8, x8, #1 1518 WORD $0x138a456d // extr w13, w11, w10, #17 1519 WORD $0x1e270141 // fmov s1, w10 1520 WORD $0x53027d6e // lsr w14, w11, #2 1521 WORD $0x4e0c1da1 // mov v1.s[1], w13 1522 WORD $0x138b4d8b // extr w11, w12, w11, #19 1523 WORD $0x4e141dc1 // mov v1.s[2], w14 1524 WORD $0x4e1c1d61 // mov v1.s[3], w11 1525 WORD $0x4e201c21 // and v1.16b, v1.16b, v0.16b 1526 WORD $0x3c9c0121 // stur q1, [x9, #-64] 1527 WORD $0x29412c0a // ldp w10, w11, [x0, #8] 1528 WORD $0xb940100c // ldr w12, [x0, #16] 1529 WORD $0x53047d4d // lsr w13, w10, #4 1530 WORD $0x138a556a // extr w10, w11, w10, #21 1531 WORD $0x1e2701a1 // fmov s1, w13 1532 WORD $0x53067d6e // lsr w14, w11, #6 1533 WORD $0x4e0c1d41 // mov v1.s[1], w10 1534 WORD $0x138b5d8b // extr w11, w12, w11, #23 1535 WORD $0x4e141dc1 // mov v1.s[2], w14 1536 WORD $0x4e1c1d61 // mov v1.s[3], w11 1537 WORD $0x4e201c21 // and v1.16b, v1.16b, v0.16b 1538 WORD $0x3c9d0121 // stur q1, [x9, #-48] 1539 WORD $0x29422c0a // ldp w10, w11, [x0, #16] 1540 WORD $0xb940180c // ldr w12, [x0, #24] 1541 WORD $0x53087d4d // lsr w13, w10, #8 1542 WORD $0x138a656a // extr w10, w11, w10, #25 1543 WORD $0x1e2701a1 // fmov s1, w13 1544 WORD $0x530a7d6e // lsr w14, w11, #10 1545 WORD $0x4e0c1d41 // mov v1.s[1], w10 1546 WORD $0x138b6d8b // extr w11, w12, w11, #27 1547 WORD $0x4e141dc1 // mov v1.s[2], w14 1548 WORD $0x4e1c1d61 // mov v1.s[3], w11 1549 WORD $0x4e201c21 // and v1.16b, v1.16b, v0.16b 1550 WORD $0x3c9e0121 // stur q1, [x9, #-32] 1551 WORD $0x29432c0a // ldp w10, w11, [x0, #24] 1552 WORD $0xb940200c // ldr w12, [x0, #32] 1553 WORD $0x530c7d4d // lsr w13, w10, #12 1554 WORD $0x138a756a // extr w10, w11, w10, #29 1555 WORD $0x1e2701a1 // fmov s1, w13 1556 WORD $0x530e7d6e // lsr w14, w11, #14 1557 WORD $0x4e0c1d41 // mov v1.s[1], w10 1558 WORD $0x138b7d8b // extr w11, w12, w11, #31 1559 WORD $0x4e141dc1 // mov v1.s[2], w14 1560 WORD $0x4e1c1d61 // mov v1.s[3], w11 1561 WORD $0x4e201c21 // and v1.16b, v1.16b, v0.16b 1562 WORD $0x3c9f0121 // stur q1, [x9, #-16] 1563 WORD $0x29442c0a // ldp w10, w11, [x0, #32] 1564 WORD $0xb940280c // ldr w12, [x0, #40] 1565 WORD $0x138a416a // extr w10, w11, w10, #16 1566 WORD $0x53017d6d // lsr w13, w11, #1 1567 WORD $0x1e270141 // fmov s1, w10 1568 WORD $0x138b498b // extr w11, w12, w11, #18 1569 WORD $0x4e0c1da1 // mov v1.s[1], w13 1570 WORD $0x53037d8c // lsr w12, w12, #3 1571 WORD $0x4e141d61 // mov v1.s[2], w11 1572 WORD $0x4e1c1d81 // mov v1.s[3], w12 1573 WORD $0x4e201c21 // and v1.16b, v1.16b, v0.16b 1574 WORD $0x3d800121 // str q1, [x9] 1575 WORD $0x29452c0a // ldp w10, w11, [x0, #40] 1576 WORD $0xb940300c // ldr w12, [x0, #48] 1577 WORD $0x138a516a // extr w10, w11, w10, #20 1578 WORD $0x53057d6d // lsr w13, w11, #5 1579 WORD $0x1e270141 // fmov s1, w10 1580 WORD $0x138b598b // extr w11, w12, w11, #22 1581 WORD $0x4e0c1da1 // mov v1.s[1], w13 1582 WORD $0x53077d8c // lsr w12, w12, #7 1583 WORD $0x4e141d61 // mov v1.s[2], w11 1584 WORD $0x4e1c1d81 // mov v1.s[3], w12 1585 WORD $0x4e201c21 // and v1.16b, v1.16b, v0.16b 1586 WORD $0x3d800521 // str q1, [x9, #16] 1587 WORD $0x29462c0a // ldp w10, w11, [x0, #48] 1588 WORD $0xb940380c // ldr w12, [x0, #56] 1589 WORD $0x138a616a // extr w10, w11, w10, #24 1590 WORD $0x53097d6d // lsr w13, w11, #9 1591 WORD $0x1e270141 // fmov s1, w10 1592 WORD $0x138b698b // extr w11, w12, w11, #26 1593 WORD $0x4e0c1da1 // mov v1.s[1], w13 1594 WORD $0x530b7d8c // lsr w12, w12, #11 1595 WORD $0x4e141d61 // mov v1.s[2], w11 1596 WORD $0x4e1c1d81 // mov v1.s[3], w12 1597 WORD $0x4e201c21 // and v1.16b, v1.16b, v0.16b 1598 WORD $0x3d800921 // str q1, [x9, #32] 1599 WORD $0x29472c0a // ldp w10, w11, [x0, #56] 1600 WORD $0xb940400c // ldr w12, [x0, #64] 1601 WORD $0x91011000 // add x0, x0, #68 1602 WORD $0x138a716a // extr w10, w11, w10, #28 1603 WORD $0x530d7d6d // lsr w13, w11, #13 1604 WORD $0x1e270141 // fmov s1, w10 1605 WORD $0x138b798b // extr w11, w12, w11, #30 1606 WORD $0x4e0c1da1 // mov v1.s[1], w13 1607 WORD $0x530f7d8c // lsr w12, w12, #15 1608 WORD $0x4e141d61 // mov v1.s[2], w11 1609 WORD $0x4e1c1d81 // mov v1.s[3], w12 1610 WORD $0x4e201c21 // and v1.16b, v1.16b, v0.16b 1611 WORD $0x3d800d21 // str q1, [x9, #48] 1612 WORD $0x91020129 // add x9, x9, #128 1613 BNE LBB0_54 1614 JMP LBB0_99 1615 LBB0_55: 1616 WORD $0x7100805f // cmp w2, #32 1617 BLT LBB0_99 1618 WORD $0x91010029 // add x9, x1, #64 1619 WORD $0x4f00d460 // movi v0.4s, #3, msl #16 1620 LBB0_57: 1621 WORD $0x29402c0a // ldp w10, w11, [x0] 1622 WORD $0xb940080c // ldr w12, [x0, #8] 1623 WORD $0xf1000508 // subs x8, x8, #1 1624 WORD $0x138a496d // extr w13, w11, w10, #18 1625 WORD $0x1e270141 // fmov s1, w10 1626 WORD $0x53047d6e // lsr w14, w11, #4 1627 WORD $0x4e0c1da1 // mov v1.s[1], w13 1628 WORD $0x138b598b // extr w11, w12, w11, #22 1629 WORD $0x4e141dc1 // mov v1.s[2], w14 1630 WORD $0x4e1c1d61 // mov v1.s[3], w11 1631 WORD $0x4e201c21 // and v1.16b, v1.16b, v0.16b 1632 WORD $0x3c9c0121 // stur q1, [x9, #-64] 1633 WORD $0x29412c0a // ldp w10, w11, [x0, #8] 1634 WORD $0xb940100c // ldr w12, [x0, #16] 1635 WORD $0x53087d4d // lsr w13, w10, #8 1636 WORD $0x138a696a // extr w10, w11, w10, #26 1637 WORD $0x1e2701a1 // fmov s1, w13 1638 WORD $0x530c7d6e // lsr w14, w11, #12 1639 WORD $0x4e0c1d41 // mov v1.s[1], w10 1640 WORD $0x138b798b // extr w11, w12, w11, #30 1641 WORD $0x4e141dc1 // mov v1.s[2], w14 1642 WORD $0x4e1c1d61 // mov v1.s[3], w11 1643 WORD $0x4e201c21 // and v1.16b, v1.16b, v0.16b 1644 WORD $0x3c9d0121 // stur q1, [x9, #-48] 1645 WORD $0x29422c0a // ldp w10, w11, [x0, #16] 1646 WORD $0xb940180c // ldr w12, [x0, #24] 1647 WORD $0x138a416a // extr w10, w11, w10, #16 1648 WORD $0x53027d6d // lsr w13, w11, #2 1649 WORD $0x1e270141 // fmov s1, w10 1650 WORD $0x138b518b // extr w11, w12, w11, #20 1651 WORD $0x4e0c1da1 // mov v1.s[1], w13 1652 WORD $0x53067d8c // lsr w12, w12, #6 1653 WORD $0x4e141d61 // mov v1.s[2], w11 1654 WORD $0x4e1c1d81 // mov v1.s[3], w12 1655 WORD $0x4e201c21 // and v1.16b, v1.16b, v0.16b 1656 WORD $0x3c9e0121 // stur q1, [x9, #-32] 1657 WORD $0x29432c0a // ldp w10, w11, [x0, #24] 1658 WORD $0xb940200c // ldr w12, [x0, #32] 1659 WORD $0x138a616a // extr w10, w11, w10, #24 1660 WORD $0x530a7d6d // lsr w13, w11, #10 1661 WORD $0x1e270141 // fmov s1, w10 1662 WORD $0x138b718b // extr w11, w12, w11, #28 1663 WORD $0x4e0c1da1 // mov v1.s[1], w13 1664 WORD $0x530e7d8c // lsr w12, w12, #14 1665 WORD $0x4e141d61 // mov v1.s[2], w11 1666 WORD $0x4e1c1d81 // mov v1.s[3], w12 1667 WORD $0x4e201c21 // and v1.16b, v1.16b, v0.16b 1668 WORD $0x3c9f0121 // stur q1, [x9, #-16] 1669 WORD $0x2944ac0a // ldp w10, w11, [x0, #36] 1670 WORD $0xb9402c0c // ldr w12, [x0, #44] 1671 WORD $0x138a496d // extr w13, w11, w10, #18 1672 WORD $0x1e270141 // fmov s1, w10 1673 WORD $0x53047d6e // lsr w14, w11, #4 1674 WORD $0x4e0c1da1 // mov v1.s[1], w13 1675 WORD $0x138b598b // extr w11, w12, w11, #22 1676 WORD $0x4e141dc1 // mov v1.s[2], w14 1677 WORD $0x4e1c1d61 // mov v1.s[3], w11 1678 WORD $0x4e201c21 // and v1.16b, v1.16b, v0.16b 1679 WORD $0x3d800121 // str q1, [x9] 1680 WORD $0x2945ac0a // ldp w10, w11, [x0, #44] 1681 WORD $0xb940340c // ldr w12, [x0, #52] 1682 WORD $0x53087d4d // lsr w13, w10, #8 1683 WORD $0x138a696a // extr w10, w11, w10, #26 1684 WORD $0x1e2701a1 // fmov s1, w13 1685 WORD $0x530c7d6e // lsr w14, w11, #12 1686 WORD $0x4e0c1d41 // mov v1.s[1], w10 1687 WORD $0x138b798b // extr w11, w12, w11, #30 1688 WORD $0x4e141dc1 // mov v1.s[2], w14 1689 WORD $0x4e1c1d61 // mov v1.s[3], w11 1690 WORD $0x4e201c21 // and v1.16b, v1.16b, v0.16b 1691 WORD $0x3d800521 // str q1, [x9, #16] 1692 WORD $0x2946ac0a // ldp w10, w11, [x0, #52] 1693 WORD $0xb9403c0c // ldr w12, [x0, #60] 1694 WORD $0x138a416a // extr w10, w11, w10, #16 1695 WORD $0x53027d6d // lsr w13, w11, #2 1696 WORD $0x1e270141 // fmov s1, w10 1697 WORD $0x138b518b // extr w11, w12, w11, #20 1698 WORD $0x4e0c1da1 // mov v1.s[1], w13 1699 WORD $0x53067d8c // lsr w12, w12, #6 1700 WORD $0x4e141d61 // mov v1.s[2], w11 1701 WORD $0x4e1c1d81 // mov v1.s[3], w12 1702 WORD $0x4e201c21 // and v1.16b, v1.16b, v0.16b 1703 WORD $0x3d800921 // str q1, [x9, #32] 1704 WORD $0x2947ac0a // ldp w10, w11, [x0, #60] 1705 WORD $0xb940440c // ldr w12, [x0, #68] 1706 WORD $0x91012000 // add x0, x0, #72 1707 WORD $0x138a616a // extr w10, w11, w10, #24 1708 WORD $0x530a7d6d // lsr w13, w11, #10 1709 WORD $0x1e270141 // fmov s1, w10 1710 WORD $0x138b718b // extr w11, w12, w11, #28 1711 WORD $0x4e0c1da1 // mov v1.s[1], w13 1712 WORD $0x530e7d8c // lsr w12, w12, #14 1713 WORD $0x4e141d61 // mov v1.s[2], w11 1714 WORD $0x4e1c1d81 // mov v1.s[3], w12 1715 WORD $0x4e201c21 // and v1.16b, v1.16b, v0.16b 1716 WORD $0x3d800d21 // str q1, [x9, #48] 1717 WORD $0x91020129 // add x9, x9, #128 1718 BNE LBB0_57 1719 JMP LBB0_99 1720 LBB0_58: 1721 WORD $0x7100805f // cmp w2, #32 1722 BLT LBB0_99 1723 MOVD $0x000000120000001f, R5 // LCPI0_102 1724 MOVD $0x0000000e00000001, R1 // LCPI0_103 1725 MOVD $0x000000110000001e, R6 // LCPI0_104 1726 MOVD $0x0000000f00000002, R2 // LCPI0_105 1727 MOVD $0x0000000f0000001c, R7 // LCPI0_106 1728 MOVD $0x0000001100000004, R3 // LCPI0_107 1729 MOVD $0x0000000e0000001b, R16 // LCPI0_108 1730 MOVD $0x0000001200000005, R4 // LCPI0_109 1731 WORD $0x91010029 // add x9, x1, #64 1732 WORD $0x4f00d4e0 // movi v0.4s, #7, msl #16 1733 WORD $0x2ea0b8a5 // neg v5.2s, v5.2s 1734 WORD $0x2ea0b8c6 // neg v6.2s, v6.2s 1735 WORD $0x2ea0b8e7 // neg v7.2s, v7.2s 1736 WORD $0x2ea0ba10 // neg v16.2s, v16.2s 1737 LBB0_60: 1738 WORD $0x29402c0a // ldp w10, w11, [x0] 1739 WORD $0xb940080c // ldr w12, [x0, #8] 1740 WORD $0xf1000508 // subs x8, x8, #1 1741 WORD $0x138a4d6d // extr w13, w11, w10, #19 1742 WORD $0x1e270151 // fmov s17, w10 1743 WORD $0x53067d6e // lsr w14, w11, #6 1744 WORD $0x4e0c1db1 // mov v17.s[1], w13 1745 WORD $0x138b658b // extr w11, w12, w11, #25 1746 WORD $0x4e141dd1 // mov v17.s[2], w14 1747 WORD $0x4e1c1d71 // mov v17.s[3], w11 1748 WORD $0x4e201e31 // and v17.16b, v17.16b, v0.16b 1749 WORD $0x3c9c0131 // stur q17, [x9, #-64] 1750 WORD $0xb940080a // ldr w10, [x0, #8] 1751 WORD $0xfc40c011 // ldur d17, [x0, #12] 1752 WORD $0x1e270152 // fmov s18, w10 1753 WORD $0x0e913a52 // zip1 v25.2s, v25.2s, v17.2s 1754 WORD $0x530c7d4b // lsr w11, w10, #12 1755 WORD $0x2ea14633 // ushl v19.2s, v17.2s, v1.2s 1756 WORD $0x0e0c3e2a // mov w10, v17.s[1] 1757 WORD $0x2ea54651 // ushl v17.2s, v25.2s, v5.2s 1758 WORD $0x1e270172 // fmov s18, w11 1759 WORD $0x0eb11e71 // orr v17.8b, v19.8b, v17.8b 1760 WORD $0x6e0c0632 // mov v25.s[1], v17.s[0] 1761 WORD $0x53057d4a // lsr w10, w10, #5 1762 WORD $0x6e142632 // mov v25.s[2], v17.s[1] 1763 WORD $0x4e1c1d52 // mov v25.s[3], w10 1764 WORD $0x4e201e51 // and v17.16b, v25.16b, v0.16b 1765 WORD $0x3c9d0131 // stur q17, [x9, #-48] 1766 WORD $0x29422c0a // ldp w10, w11, [x0, #16] 1767 WORD $0xfd400c11 // ldr d17, [x0, #24] 1768 WORD $0x1e270172 // fmov s18, w11 1769 WORD $0x138a616a // extr w10, w11, w10, #24 1770 WORD $0x0e913a52 // zip1 v25.2s, v25.2s, v17.2s 1771 WORD $0x530b7d6c // lsr w12, w11, #11 1772 WORD $0x2ea24631 // ushl v17.2s, v17.2s, v2.2s 1773 WORD $0x1e270153 // fmov s19, w10 1774 WORD $0x2ea64652 // ushl v25.2s, v25.2s, v6.2s 1775 WORD $0x4e0c1d93 // mov v19.s[1], w12 1776 WORD $0x0eb21e31 // orr v17.8b, v17.8b, v25.8b 1777 WORD $0x6e180633 // mov v19.d[1], v17.d[0] 1778 WORD $0x4e201e71 // and v17.16b, v19.16b, v0.16b 1779 WORD $0x3c9e0131 // stur q17, [x9, #-32] 1780 WORD $0x2943ac0a // ldp w10, w11, [x0, #28] 1781 WORD $0xb940240c // ldr w12, [x0, #36] 1782 WORD $0x53047d4d // lsr w13, w10, #4 1783 WORD $0x138a5d6a // extr w10, w11, w10, #23 1784 WORD $0x1e2701b1 // fmov s17, w13 1785 WORD $0x530a7d6e // lsr w14, w11, #10 1786 WORD $0x4e0c1d51 // mov v17.s[1], w10 1787 WORD $0x138b758b // extr w11, w12, w11, #29 1788 WORD $0x4e141dd1 // mov v17.s[2], w14 1789 WORD $0x4e1c1d71 // mov v17.s[3], w11 1790 WORD $0x4e201e31 // and v17.16b, v17.16b, v0.16b 1791 WORD $0x3c9f0131 // stur q17, [x9, #-16] 1792 WORD $0x2944ac0a // ldp w10, w11, [x0, #36] 1793 WORD $0xb9402c0c // ldr w12, [x0, #44] 1794 WORD $0x138a416a // extr w10, w11, w10, #16 1795 WORD $0x53037d6d // lsr w13, w11, #3 1796 WORD $0x1e270151 // fmov s17, w10 1797 WORD $0x138b598b // extr w11, w12, w11, #22 1798 WORD $0x4e0c1db1 // mov v17.s[1], w13 1799 WORD $0x53097d8c // lsr w12, w12, #9 1800 WORD $0x4e141d71 // mov v17.s[2], w11 1801 WORD $0x4e1c1d91 // mov v17.s[3], w12 1802 WORD $0x4e201e31 // and v17.16b, v17.16b, v0.16b 1803 WORD $0x3d800131 // str q17, [x9] 1804 WORD $0xfd401811 // ldr d17, [x0, #48] 1805 WORD $0xbd402c12 // ldr s18, [x0, #44] 1806 WORD $0xb940380a // ldr w10, [x0, #56] 1807 WORD $0x2ea34633 // ushl v19.2s, v17.2s, v3.2s 1808 WORD $0x0e913a52 // zip1 v25.2s, v25.2s, v17.2s 1809 WORD $0x0e0c3e2b // mov w11, v17.s[1] 1810 WORD $0x2ea74651 // ushl v17.2s, v25.2s, v7.2s 1811 WORD $0x53027d6c // lsr w12, w11, #2 1812 WORD $0x0eb11e71 // orr v17.8b, v19.8b, v17.8b 1813 WORD $0x138b554a // extr w10, w10, w11, #21 1814 WORD $0x4e141d91 // mov v17.s[2], w12 1815 WORD $0x4e1c1d51 // mov v17.s[3], w10 1816 WORD $0x4e201e31 // and v17.16b, v17.16b, v0.16b 1817 WORD $0x3d800531 // str q17, [x9, #16] 1818 WORD $0xb940380a // ldr w10, [x0, #56] 1819 WORD $0xfc43c011 // ldur d17, [x0, #60] 1820 WORD $0x1e270152 // fmov s18, w10 1821 WORD $0x0e913a52 // zip1 v25.2s, v25.2s, v17.2s 1822 WORD $0x53087d4b // lsr w11, w10, #8 1823 WORD $0x2ea44633 // ushl v19.2s, v17.2s, v4.2s 1824 WORD $0x0e0c3e2a // mov w10, v17.s[1] 1825 WORD $0x2eb04651 // ushl v17.2s, v25.2s, v16.2s 1826 WORD $0x1e270172 // fmov s18, w11 1827 WORD $0x0eb11e71 // orr v17.8b, v19.8b, v17.8b 1828 WORD $0x6e0c0632 // mov v25.s[1], v17.s[0] 1829 WORD $0x53017d4a // lsr w10, w10, #1 1830 WORD $0x6e142632 // mov v25.s[2], v17.s[1] 1831 WORD $0x4e1c1d52 // mov v25.s[3], w10 1832 WORD $0x4e201e51 // and v17.16b, v25.16b, v0.16b 1833 WORD $0x3d800931 // str q17, [x9, #32] 1834 WORD $0x29482c0a // ldp w10, w11, [x0, #64] 1835 WORD $0xb940480c // ldr w12, [x0, #72] 1836 WORD $0x91013000 // add x0, x0, #76 1837 WORD $0x138a516a // extr w10, w11, w10, #20 1838 WORD $0x53077d6d // lsr w13, w11, #7 1839 WORD $0x1e270151 // fmov s17, w10 1840 WORD $0x138b698b // extr w11, w12, w11, #26 1841 WORD $0x4e0c1db1 // mov v17.s[1], w13 1842 WORD $0x530d7d8c // lsr w12, w12, #13 1843 WORD $0x4e141d71 // mov v17.s[2], w11 1844 WORD $0x4e1c1d91 // mov v17.s[3], w12 1845 WORD $0x4e201e31 // and v17.16b, v17.16b, v0.16b 1846 WORD $0x3d800d31 // str q17, [x9, #48] 1847 WORD $0x91020129 // add x9, x9, #128 1848 BNE LBB0_60 1849 JMP LBB0_99 1850 LBB0_61: 1851 WORD $0x7100805f // cmp w2, #32 1852 BLT LBB0_99 1853 WORD $0x91010029 // add x9, x1, #64 1854 WORD $0x4f00d5e0 // movi v0.4s, #15, msl #16 1855 LBB0_63: 1856 WORD $0x29402c0a // ldp w10, w11, [x0] 1857 WORD $0xb940080c // ldr w12, [x0, #8] 1858 WORD $0xf1000508 // subs x8, x8, #1 1859 WORD $0x138a516d // extr w13, w11, w10, #20 1860 WORD $0x1e270141 // fmov s1, w10 1861 WORD $0x53087d6e // lsr w14, w11, #8 1862 WORD $0x4e0c1da1 // mov v1.s[1], w13 1863 WORD $0x138b718b // extr w11, w12, w11, #28 1864 WORD $0x4e141dc1 // mov v1.s[2], w14 1865 WORD $0x4e1c1d61 // mov v1.s[3], w11 1866 WORD $0x4e201c21 // and v1.16b, v1.16b, v0.16b 1867 WORD $0x3c9c0121 // stur q1, [x9, #-64] 1868 WORD $0x29412c0a // ldp w10, w11, [x0, #8] 1869 WORD $0xb940100c // ldr w12, [x0, #16] 1870 WORD $0x138a416a // extr w10, w11, w10, #16 1871 WORD $0x53047d6d // lsr w13, w11, #4 1872 WORD $0x1e270141 // fmov s1, w10 1873 WORD $0x138b618b // extr w11, w12, w11, #24 1874 WORD $0x4e0c1da1 // mov v1.s[1], w13 1875 WORD $0x530c7d8c // lsr w12, w12, #12 1876 WORD $0x4e141d61 // mov v1.s[2], w11 1877 WORD $0x4e1c1d81 // mov v1.s[3], w12 1878 WORD $0x4e201c21 // and v1.16b, v1.16b, v0.16b 1879 WORD $0x3c9d0121 // stur q1, [x9, #-48] 1880 WORD $0x2942ac0a // ldp w10, w11, [x0, #20] 1881 WORD $0xb9401c0c // ldr w12, [x0, #28] 1882 WORD $0x138a516d // extr w13, w11, w10, #20 1883 WORD $0x1e270141 // fmov s1, w10 1884 WORD $0x53087d6e // lsr w14, w11, #8 1885 WORD $0x4e0c1da1 // mov v1.s[1], w13 1886 WORD $0x138b718b // extr w11, w12, w11, #28 1887 WORD $0x4e141dc1 // mov v1.s[2], w14 1888 WORD $0x4e1c1d61 // mov v1.s[3], w11 1889 WORD $0x4e201c21 // and v1.16b, v1.16b, v0.16b 1890 WORD $0x3c9e0121 // stur q1, [x9, #-32] 1891 WORD $0x2943ac0a // ldp w10, w11, [x0, #28] 1892 WORD $0xb940240c // ldr w12, [x0, #36] 1893 WORD $0x138a416a // extr w10, w11, w10, #16 1894 WORD $0x53047d6d // lsr w13, w11, #4 1895 WORD $0x1e270141 // fmov s1, w10 1896 WORD $0x138b618b // extr w11, w12, w11, #24 1897 WORD $0x4e0c1da1 // mov v1.s[1], w13 1898 WORD $0x530c7d8c // lsr w12, w12, #12 1899 WORD $0x4e141d61 // mov v1.s[2], w11 1900 WORD $0x4e1c1d81 // mov v1.s[3], w12 1901 WORD $0x4e201c21 // and v1.16b, v1.16b, v0.16b 1902 WORD $0x3c9f0121 // stur q1, [x9, #-16] 1903 WORD $0x29452c0a // ldp w10, w11, [x0, #40] 1904 WORD $0xb940300c // ldr w12, [x0, #48] 1905 WORD $0x138a516d // extr w13, w11, w10, #20 1906 WORD $0x1e270141 // fmov s1, w10 1907 WORD $0x53087d6e // lsr w14, w11, #8 1908 WORD $0x4e0c1da1 // mov v1.s[1], w13 1909 WORD $0x138b718b // extr w11, w12, w11, #28 1910 WORD $0x4e141dc1 // mov v1.s[2], w14 1911 WORD $0x4e1c1d61 // mov v1.s[3], w11 1912 WORD $0x4e201c21 // and v1.16b, v1.16b, v0.16b 1913 WORD $0x3d800121 // str q1, [x9] 1914 WORD $0x29462c0a // ldp w10, w11, [x0, #48] 1915 WORD $0xb940380c // ldr w12, [x0, #56] 1916 WORD $0x138a416a // extr w10, w11, w10, #16 1917 WORD $0x53047d6d // lsr w13, w11, #4 1918 WORD $0x1e270141 // fmov s1, w10 1919 WORD $0x138b618b // extr w11, w12, w11, #24 1920 WORD $0x4e0c1da1 // mov v1.s[1], w13 1921 WORD $0x530c7d8c // lsr w12, w12, #12 1922 WORD $0x4e141d61 // mov v1.s[2], w11 1923 WORD $0x4e1c1d81 // mov v1.s[3], w12 1924 WORD $0x4e201c21 // and v1.16b, v1.16b, v0.16b 1925 WORD $0x3d800521 // str q1, [x9, #16] 1926 WORD $0x2947ac0a // ldp w10, w11, [x0, #60] 1927 WORD $0xb940440c // ldr w12, [x0, #68] 1928 WORD $0x138a516d // extr w13, w11, w10, #20 1929 WORD $0x1e270141 // fmov s1, w10 1930 WORD $0x53087d6e // lsr w14, w11, #8 1931 WORD $0x4e0c1da1 // mov v1.s[1], w13 1932 WORD $0x138b718b // extr w11, w12, w11, #28 1933 WORD $0x4e141dc1 // mov v1.s[2], w14 1934 WORD $0x4e1c1d61 // mov v1.s[3], w11 1935 WORD $0x4e201c21 // and v1.16b, v1.16b, v0.16b 1936 WORD $0x3d800921 // str q1, [x9, #32] 1937 WORD $0x2948ac0a // ldp w10, w11, [x0, #68] 1938 WORD $0xb9404c0c // ldr w12, [x0, #76] 1939 WORD $0x91014000 // add x0, x0, #80 1940 WORD $0x138a416a // extr w10, w11, w10, #16 1941 WORD $0x53047d6d // lsr w13, w11, #4 1942 WORD $0x1e270141 // fmov s1, w10 1943 WORD $0x138b618b // extr w11, w12, w11, #24 1944 WORD $0x4e0c1da1 // mov v1.s[1], w13 1945 WORD $0x530c7d8c // lsr w12, w12, #12 1946 WORD $0x4e141d61 // mov v1.s[2], w11 1947 WORD $0x4e1c1d81 // mov v1.s[3], w12 1948 WORD $0x4e201c21 // and v1.16b, v1.16b, v0.16b 1949 WORD $0x3d800d21 // str q1, [x9, #48] 1950 WORD $0x91020129 // add x9, x9, #128 1951 BNE LBB0_63 1952 JMP LBB0_99 1953 LBB0_64: 1954 WORD $0x7100805f // cmp w2, #32 1955 BLT LBB0_99 1956 MOVD $0x000000130000001e, R7 // LCPI0_90 1957 MOVD $0x0000000d00000002, R1 // LCPI0_91 1958 MOVD $0x000000120000001d, R16 // LCPI0_92 1959 MOVD $0x0000000e00000003, R2 // LCPI0_93 1960 MOVD $0x000000110000001c, R17 // LCPI0_94 1961 MOVD $0x0000000f00000004, R3 // LCPI0_95 1962 MOVD $0x0000000f0000001a, R25 // LCPI0_96 1963 MOVD $0x0000001100000006, R4 // LCPI0_97 1964 MOVD $0x0000000e00000019, R19 // LCPI0_98 1965 MOVD $0x0000001200000007, R5 // LCPI0_99 1966 MOVD $0x0000000d00000018, R20 // LCPI0_100 1967 MOVD $0x0000001300000008, R6 // LCPI0_101 1968 WORD $0x91010029 // add x9, x1, #64 1969 WORD $0x4f00d7e0 // movi v0.4s, #31, msl #16 1970 WORD $0x2ea0b8e7 // neg v7.2s, v7.2s 1971 WORD $0x2ea0ba10 // neg v16.2s, v16.2s 1972 WORD $0x2ea0ba31 // neg v17.2s, v17.2s 1973 WORD $0x2ea0bb39 // neg v25.2s, v25.2s 1974 WORD $0x2ea0ba73 // neg v19.2s, v19.2s 1975 WORD $0x2ea0ba94 // neg v20.2s, v20.2s 1976 LBB0_66: 1977 WORD $0x29402c0a // ldp w10, w11, [x0] 1978 WORD $0xb940080c // ldr w12, [x0, #8] 1979 WORD $0xf1000508 // subs x8, x8, #1 1980 WORD $0x138a556d // extr w13, w11, w10, #21 1981 WORD $0x1e270155 // fmov s21, w10 1982 WORD $0x530a7d6e // lsr w14, w11, #10 1983 WORD $0x4e0c1db5 // mov v21.s[1], w13 1984 WORD $0x138b7d8b // extr w11, w12, w11, #31 1985 WORD $0x4e141dd5 // mov v21.s[2], w14 1986 WORD $0x4e1c1d75 // mov v21.s[3], w11 1987 WORD $0x4e201eb5 // and v21.16b, v21.16b, v0.16b 1988 WORD $0x3c9c0135 // stur q21, [x9, #-64] 1989 WORD $0x29412c0a // ldp w10, w11, [x0, #8] 1990 WORD $0xfd400815 // ldr d21, [x0, #16] 1991 WORD $0x1e270176 // fmov s22, w11 1992 WORD $0x138a516a // extr w10, w11, w10, #20 1993 WORD $0x0e953ad6 // zip1 v22.2s, v22.2s, v21.2s 1994 WORD $0x53097d6c // lsr w12, w11, #9 1995 WORD $0x2ea146b5 // ushl v21.2s, v21.2s, v1.2s 1996 WORD $0x1e270157 // fmov s23, w10 1997 WORD $0x2ea746d6 // ushl v22.2s, v22.2s, v7.2s 1998 WORD $0x4e0c1d97 // mov v23.s[1], w12 1999 WORD $0x0eb61eb5 // orr v21.8b, v21.8b, v22.8b 2000 WORD $0x6e1806b7 // mov v23.d[1], v21.d[0] 2001 WORD $0x4e201ef5 // and v21.16b, v23.16b, v0.16b 2002 WORD $0x3c9d0135 // stur q21, [x9, #-48] 2003 WORD $0xb940140a // ldr w10, [x0, #20] 2004 WORD $0xfd400c15 // ldr d21, [x0, #24] 2005 WORD $0x1e270156 // fmov s22, w10 2006 WORD $0x0e953ad6 // zip1 v22.2s, v22.2s, v21.2s 2007 WORD $0x53087d4b // lsr w11, w10, #8 2008 WORD $0x2ea246b7 // ushl v23.2s, v21.2s, v2.2s 2009 WORD $0x0e0c3eaa // mov w10, v21.s[1] 2010 WORD $0x2eb046d5 // ushl v21.2s, v22.2s, v16.2s 2011 WORD $0x1e270176 // fmov s22, w11 2012 WORD $0x0eb51ef5 // orr v21.8b, v23.8b, v21.8b 2013 WORD $0x6e0c06b6 // mov v22.s[1], v21.s[0] 2014 WORD $0x53077d4a // lsr w10, w10, #7 2015 WORD $0x6e1426b6 // mov v22.s[2], v21.s[1] 2016 WORD $0x4e1c1d56 // mov v22.s[3], w10 2017 WORD $0x4e201ed5 // and v21.16b, v22.16b, v0.16b 2018 WORD $0x3c9e0135 // stur q21, [x9, #-32] 2019 WORD $0xfd401015 // ldr d21, [x0, #32] 2020 WORD $0xbd401c16 // ldr s22, [x0, #28] 2021 WORD $0xb940280a // ldr w10, [x0, #40] 2022 WORD $0x2ea346b7 // ushl v23.2s, v21.2s, v3.2s 2023 WORD $0x0e953ad6 // zip1 v22.2s, v22.2s, v21.2s 2024 WORD $0x0e0c3eab // mov w11, v21.s[1] 2025 WORD $0x2eb146d5 // ushl v21.2s, v22.2s, v17.2s 2026 WORD $0x53067d6c // lsr w12, w11, #6 2027 WORD $0x0eb51ef5 // orr v21.8b, v23.8b, v21.8b 2028 WORD $0x138b6d4a // extr w10, w10, w11, #27 2029 WORD $0x4e141d95 // mov v21.s[2], w12 2030 WORD $0x4e1c1d55 // mov v21.s[3], w10 2031 WORD $0x4e201eb5 // and v21.16b, v21.16b, v0.16b 2032 WORD $0x3c9f0135 // stur q21, [x9, #-16] 2033 WORD $0x29452c0a // ldp w10, w11, [x0, #40] 2034 WORD $0xfd401815 // ldr d21, [x0, #48] 2035 WORD $0x1e270176 // fmov s22, w11 2036 WORD $0x138a416a // extr w10, w11, w10, #16 2037 WORD $0x0e953ad6 // zip1 v22.2s, v22.2s, v21.2s 2038 WORD $0x53057d6c // lsr w12, w11, #5 2039 WORD $0x2ea446b5 // ushl v21.2s, v21.2s, v4.2s 2040 WORD $0x1e270157 // fmov s23, w10 2041 WORD $0x2eb246d6 // ushl v22.2s, v22.2s, v25.2s 2042 WORD $0x4e0c1d97 // mov v23.s[1], w12 2043 WORD $0x0eb61eb5 // orr v21.8b, v21.8b, v22.8b 2044 WORD $0x6e1806b7 // mov v23.d[1], v21.d[0] 2045 WORD $0x4e201ef5 // and v21.16b, v23.16b, v0.16b 2046 WORD $0x3d800135 // str q21, [x9] 2047 WORD $0xb940340a // ldr w10, [x0, #52] 2048 WORD $0xfd401c15 // ldr d21, [x0, #56] 2049 WORD $0x1e270156 // fmov s22, w10 2050 WORD $0x0e953ad6 // zip1 v22.2s, v22.2s, v21.2s 2051 WORD $0x53047d4b // lsr w11, w10, #4 2052 WORD $0x2ea546b7 // ushl v23.2s, v21.2s, v5.2s 2053 WORD $0x0e0c3eaa // mov w10, v21.s[1] 2054 WORD $0x2eb346d5 // ushl v21.2s, v22.2s, v19.2s 2055 WORD $0x1e270176 // fmov s22, w11 2056 WORD $0x0eb51ef5 // orr v21.8b, v23.8b, v21.8b 2057 WORD $0x6e0c06b6 // mov v22.s[1], v21.s[0] 2058 WORD $0x53037d4a // lsr w10, w10, #3 2059 WORD $0x6e1426b6 // mov v22.s[2], v21.s[1] 2060 WORD $0x4e1c1d56 // mov v22.s[3], w10 2061 WORD $0x4e201ed5 // and v21.16b, v22.16b, v0.16b 2062 WORD $0x3d800535 // str q21, [x9, #16] 2063 WORD $0xfd402015 // ldr d21, [x0, #64] 2064 WORD $0xbd403c16 // ldr s22, [x0, #60] 2065 WORD $0xb940480a // ldr w10, [x0, #72] 2066 WORD $0x2ea646b7 // ushl v23.2s, v21.2s, v6.2s 2067 WORD $0x0e953ad6 // zip1 v22.2s, v22.2s, v21.2s 2068 WORD $0x0e0c3eab // mov w11, v21.s[1] 2069 WORD $0x2eb446d5 // ushl v21.2s, v22.2s, v20.2s 2070 WORD $0x53027d6c // lsr w12, w11, #2 2071 WORD $0x0eb51ef5 // orr v21.8b, v23.8b, v21.8b 2072 WORD $0x138b5d4a // extr w10, w10, w11, #23 2073 WORD $0x4e141d95 // mov v21.s[2], w12 2074 WORD $0x4e1c1d55 // mov v21.s[3], w10 2075 WORD $0x4e201eb5 // and v21.16b, v21.16b, v0.16b 2076 WORD $0x3d800935 // str q21, [x9, #32] 2077 WORD $0x29492c0a // ldp w10, w11, [x0, #72] 2078 WORD $0xb940500c // ldr w12, [x0, #80] 2079 WORD $0x91015000 // add x0, x0, #84 2080 WORD $0x138a316a // extr w10, w11, w10, #12 2081 WORD $0x53017d6d // lsr w13, w11, #1 2082 WORD $0x1e270155 // fmov s21, w10 2083 WORD $0x138b598b // extr w11, w12, w11, #22 2084 WORD $0x4e0c1db5 // mov v21.s[1], w13 2085 WORD $0x530b7d8c // lsr w12, w12, #11 2086 WORD $0x4e141d75 // mov v21.s[2], w11 2087 WORD $0x4e1c1d95 // mov v21.s[3], w12 2088 WORD $0x4e201eb5 // and v21.16b, v21.16b, v0.16b 2089 WORD $0x3d800d35 // str q21, [x9, #48] 2090 WORD $0x91020129 // add x9, x9, #128 2091 2092 BNE LBB0_66 2093 JMP LBB0_99 2094 LBB0_67: 2095 WORD $0x7100805f // cmp w2, #32 2096 BLT LBB0_99 2097 MOVD $0x0000000e00000018, R4 // LCPI0_84 2098 MOVD $0x0000001200000008, R1 // LCPI0_85 2099 MOVD $0x000000120000001c, R5 // LCPI0_86 2100 MOVD $0x0000000e00000004, R2 // LCPI0_87 2101 MOVD $0x000000140000001e, R6 // LCPI0_88 2102 MOVD $0x0000000c00000002, R3 // LCPI0_89 2103 2104 WORD $0x91010029 // add x9, x1, #64 2105 WORD $0x4f01d7e0 // movi v0.4s, #63, msl #16 2106 WORD $0x2ea0b884 // neg v4.2s, v4.2s 2107 WORD $0x2ea0b8a5 // neg v5.2s, v5.2s 2108 WORD $0x2ea0b8c6 // neg v6.2s, v6.2s 2109 LBB0_69: 2110 WORD $0x29402c0a // ldp w10, w11, [x0] 2111 WORD $0xb940080c // ldr w12, [x0, #8] 2112 WORD $0xf1000508 // subs x8, x8, #1 2113 WORD $0x138a596d // extr w13, w11, w10, #22 2114 WORD $0x1e270147 // fmov s7, w10 2115 WORD $0x138b318b // extr w11, w12, w11, #12 2116 WORD $0x4e0c1da7 // mov v7.s[1], w13 2117 WORD $0x53027d8c // lsr w12, w12, #2 2118 WORD $0x4e141d67 // mov v7.s[2], w11 2119 WORD $0x4e1c1d87 // mov v7.s[3], w12 2120 WORD $0x4e201ce7 // and v7.16b, v7.16b, v0.16b 2121 WORD $0x3c9c0127 // stur q7, [x9, #-64] 2122 WORD $0xfc40c007 // ldur d7, [x0, #12] 2123 WORD $0xbd400810 // ldr s16, [x0, #8] 2124 WORD $0xb940140a // ldr w10, [x0, #20] 2125 WORD $0x2ea144f1 // ushl v17.2s, v7.2s, v1.2s 2126 WORD $0x0e873a10 // zip1 v16.2s, v16.2s, v7.2s 2127 WORD $0x0e0c3ceb // mov w11, v7.s[1] 2128 WORD $0x2ea44607 // ushl v7.2s, v16.2s, v4.2s 2129 WORD $0x53047d6c // lsr w12, w11, #4 2130 WORD $0x0ea71e27 // orr v7.8b, v17.8b, v7.8b 2131 WORD $0x138b694a // extr w10, w10, w11, #26 2132 WORD $0x4e141d87 // mov v7.s[2], w12 2133 WORD $0x4e1c1d47 // mov v7.s[3], w10 2134 WORD $0x4e201ce7 // and v7.16b, v7.16b, v0.16b 2135 WORD $0x3c9d0127 // stur q7, [x9, #-48] 2136 WORD $0x2942ac0a // ldp w10, w11, [x0, #20] 2137 WORD $0xfc41c007 // ldur d7, [x0, #28] 2138 WORD $0x1e270170 // fmov s16, w11 2139 WORD $0x138a416a // extr w10, w11, w10, #16 2140 WORD $0x0e873a10 // zip1 v16.2s, v16.2s, v7.2s 2141 WORD $0x53067d6c // lsr w12, w11, #6 2142 WORD $0x2ea244e7 // ushl v7.2s, v7.2s, v2.2s 2143 WORD $0x1e270151 // fmov s17, w10 2144 WORD $0x2ea54610 // ushl v16.2s, v16.2s, v5.2s 2145 WORD $0x4e0c1d91 // mov v17.s[1], w12 2146 WORD $0x0eb01ce7 // orr v7.8b, v7.8b, v16.8b 2147 WORD $0x6e1804f1 // mov v17.d[1], v7.d[0] 2148 WORD $0x4e201e27 // and v7.16b, v17.16b, v0.16b 2149 WORD $0x3c9e0127 // stur q7, [x9, #-32] 2150 WORD $0xb940200a // ldr w10, [x0, #32] 2151 WORD $0xfc424007 // ldur d7, [x0, #36] 2152 WORD $0x1e270150 // fmov s16, w10 2153 WORD $0x0e873a10 // zip1 v16.2s, v16.2s, v7.2s 2154 WORD $0x53087d4b // lsr w11, w10, #8 2155 WORD $0x2ea344f1 // ushl v17.2s, v7.2s, v3.2s 2156 WORD $0x0e0c3cea // mov w10, v7.s[1] 2157 WORD $0x2ea64607 // ushl v7.2s, v16.2s, v6.2s 2158 WORD $0x1e270170 // fmov s16, w11 2159 WORD $0x0ea71e27 // orr v7.8b, v17.8b, v7.8b 2160 WORD $0x6e0c04f0 // mov v16.s[1], v7.s[0] 2161 WORD $0x530a7d4a // lsr w10, w10, #10 2162 WORD $0x6e1424f0 // mov v16.s[2], v7.s[1] 2163 WORD $0x4e1c1d50 // mov v16.s[3], w10 2164 WORD $0x4e201e07 // and v7.16b, v16.16b, v0.16b 2165 WORD $0x3c9f0127 // stur q7, [x9, #-16] 2166 WORD $0x2945ac0a // ldp w10, w11, [x0, #44] 2167 WORD $0x138a596d // extr w13, w11, w10, #22 2168 WORD $0x1e270147 // fmov s7, w10 2169 WORD $0x138b318b // extr w11, w12, w11, #12 2170 WORD $0x4e0c1da7 // mov v7.s[1], w13 2171 WORD $0x53027d8c // lsr w12, w12, #2 2172 WORD $0x4e141d67 // mov v7.s[2], w11 2173 WORD $0x4e1c1d87 // mov v7.s[3], w12 2174 WORD $0x4e201ce7 // and v7.16b, v7.16b, v0.16b 2175 WORD $0x3d800127 // str q7, [x9] 2176 WORD $0xfd401c07 // ldr d7, [x0, #56] 2177 WORD $0xbd403410 // ldr s16, [x0, #52] 2178 WORD $0xb940400a // ldr w10, [x0, #64] 2179 WORD $0x2ea144f1 // ushl v17.2s, v7.2s, v1.2s 2180 WORD $0x0e873a10 // zip1 v16.2s, v16.2s, v7.2s 2181 WORD $0x0e0c3ceb // mov w11, v7.s[1] 2182 WORD $0x2ea44607 // ushl v7.2s, v16.2s, v4.2s 2183 WORD $0x53047d6c // lsr w12, w11, #4 2184 WORD $0x0ea71e27 // orr v7.8b, v17.8b, v7.8b 2185 WORD $0x138b694a // extr w10, w10, w11, #26 2186 WORD $0x4e141d87 // mov v7.s[2], w12 2187 WORD $0x4e1c1d47 // mov v7.s[3], w10 2188 WORD $0x4e201ce7 // and v7.16b, v7.16b, v0.16b 2189 WORD $0x3d800527 // str q7, [x9, #16] 2190 WORD $0x29482c0a // ldp w10, w11, [x0, #64] 2191 WORD $0xfd402407 // ldr d7, [x0, #72] 2192 WORD $0x1e270170 // fmov s16, w11 2193 WORD $0x138a416a // extr w10, w11, w10, #16 2194 WORD $0x0e873a10 // zip1 v16.2s, v16.2s, v7.2s 2195 WORD $0x53067d6c // lsr w12, w11, #6 2196 WORD $0x2ea244e7 // ushl v7.2s, v7.2s, v2.2s 2197 WORD $0x1e270151 // fmov s17, w10 2198 WORD $0x2ea54610 // ushl v16.2s, v16.2s, v5.2s 2199 WORD $0x4e0c1d91 // mov v17.s[1], w12 2200 WORD $0x0eb01ce7 // orr v7.8b, v7.8b, v16.8b 2201 WORD $0x6e1804f1 // mov v17.d[1], v7.d[0] 2202 WORD $0x4e201e27 // and v7.16b, v17.16b, v0.16b 2203 WORD $0x3d800927 // str q7, [x9, #32] 2204 WORD $0xb9404c0a // ldr w10, [x0, #76] 2205 WORD $0xfd402807 // ldr d7, [x0, #80] 2206 WORD $0x91016000 // add x0, x0, #88 2207 WORD $0x1e270150 // fmov s16, w10 2208 WORD $0x0e873a10 // zip1 v16.2s, v16.2s, v7.2s 2209 WORD $0x53087d4b // lsr w11, w10, #8 2210 WORD $0x2ea344f1 // ushl v17.2s, v7.2s, v3.2s 2211 WORD $0x0e0c3cea // mov w10, v7.s[1] 2212 WORD $0x2ea64607 // ushl v7.2s, v16.2s, v6.2s 2213 WORD $0x1e270170 // fmov s16, w11 2214 WORD $0x0ea71e27 // orr v7.8b, v17.8b, v7.8b 2215 WORD $0x6e0c04f0 // mov v16.s[1], v7.s[0] 2216 WORD $0x530a7d4a // lsr w10, w10, #10 2217 WORD $0x6e1424f0 // mov v16.s[2], v7.s[1] 2218 WORD $0x4e1c1d50 // mov v16.s[3], w10 2219 WORD $0x4e201e07 // and v7.16b, v16.16b, v0.16b 2220 WORD $0x3d800d27 // str q7, [x9, #48] 2221 WORD $0x91020129 // add x9, x9, #128 2222 2223 BNE LBB0_69 2224 JMP LBB0_99 2225 LBB0_70: 2226 WORD $0x7100805f // cmp w2, #32 2227 BLT LBB0_99 2228 MOVD $0x000000130000001c, R16 // LCPI0_70 2229 MOVD $0x0000000d00000004, R1 // LCPI0_71 2230 MOVD $0x0000000f00000018, R17 // LCPI0_72 2231 MOVD $0x0000001100000008, R2 // LCPI0_73 2232 MOVD $0x0000000b00000014, R25 // LCPI0_74 2233 MOVD $0x000000150000000c, R3 // LCPI0_75 2234 MOVD $0x000000150000001e, R19 // LCPI0_76 2235 MOVD $0x0000000b00000002, R4 // LCPI0_77 2236 MOVD $0x000000110000001a, R20 // LCPI0_78 2237 MOVD $0x0000000f00000006, R5 // LCPI0_79 2238 MOVD $0x000000160000001f, R21 // LCPI0_80 2239 MOVD $0x0000000a00000001, R6 // LCPI0_81 2240 MOVD $0x000000120000001b, R22 // LCPI0_82 2241 MOVD $0x0000000e00000005, R7 // LCPI0_83 2242 2243 WORD $0x91010029 // add x9, x1, #64 2244 WORD $0x4f03d7e0 // movi v0.4s, #127, msl #16 2245 WORD $0x2ea0ba10 // neg v16.2s, v16.2s 2246 WORD $0x2ea0ba31 // neg v17.2s, v17.2s 2247 WORD $0x2ea0bb39 //neg v25.2s, v25.2s 2248 WORD $0x2ea0ba73 // neg v19.2s, v19.2s 2249 WORD $0x2ea0ba94 // neg v20.2s, v20.2s 2250 WORD $0x2ea0bab5 // neg v21.2s, v21.2s 2251 WORD $0x2ea0bad6 // neg v22.2s, v22.2s 2252 LBB0_72: 2253 WORD $0x29402c0a // ldp w10, w11, [x0] 2254 WORD $0xb940080c // ldr w12, [x0, #8] 2255 WORD $0xf1000508 // subs x8, x8, #1 2256 WORD $0x138a5d6d // extr w13, w11, w10, #23 2257 WORD $0x1e270157 // fmov s23, w10 2258 WORD $0x138b398b // extr w11, w12, w11, #14 2259 WORD $0x4e0c1db7 // mov v23.s[1], w13 2260 WORD $0x53057d8c // lsr w12, w12, #5 2261 WORD $0x4e141d77 // mov v23.s[2], w11 2262 WORD $0x4e1c1d97 // mov v23.s[3], w12 2263 WORD $0x4e201ef7 // and v23.16b, v23.16b, v0.16b 2264 WORD $0x3c9c0137 // stur q23, [x9, #-64] 2265 WORD $0xfc40c017 // ldur d23, [x0, #12] 2266 WORD $0xbd400818 // ldr s24, [x0, #8] 2267 WORD $0xb940140a // ldr w10, [x0, #20] 2268 WORD $0x2ea146f9 // ushl v25.2s, v23.2s, v1.2s 2269 WORD $0x0e973b18 // zip1 v24.2s, v24.2s, v23.2s 2270 WORD $0x0e0c3eeb // mov w11, v23.s[1] 2271 WORD $0x2eb04717 // ushl v23.2s, v24.2s, v16.2s 2272 WORD $0x53017d4c // lsr w12, w10, #1 2273 WORD $0x138b294a // extr w10, w10, w11, #10 2274 WORD $0x0eb71f37 // orr v23.8b, v25.8b, v23.8b 2275 WORD $0x4e141d57 // mov v23.s[2], w10 2276 WORD $0x4e1c1d97 // mov v23.s[3], w12 2277 WORD $0x4e201ef7 // and v23.16b, v23.16b, v0.16b 2278 WORD $0x3c9d0137 // stur q23, [x9, #-48] 2279 WORD $0xfd400c17 // ldr d23, [x0, #24] 2280 WORD $0xbd401418 // ldr s24, [x0, #20] 2281 WORD $0xb940200a // ldr w10, [x0, #32] 2282 WORD $0x2ea246f9 // ushl v25.2s, v23.2s, v2.2s 2283 WORD $0x0e973b18 // zip1 v24.2s, v24.2s, v23.2s 2284 WORD $0x0e0c3eeb // mov w11, v23.s[1] 2285 WORD $0x2eb14717 // ushl v23.2s, v24.2s, v17.2s 2286 WORD $0x53067d6c // lsr w12, w11, #6 2287 WORD $0x0eb71f37 // orr v23.8b, v25.8b, v23.8b 2288 WORD $0x138b754a // extr w10, w10, w11, #29 2289 WORD $0x4e141d97 // mov v23.s[2], w12 2290 WORD $0x4e1c1d57 // mov v23.s[3], w10 2291 WORD $0x4e201ef7 // and v23.16b, v23.16b, v0.16b 2292 WORD $0x3c9e0137 // stur q23, [x9, #-32] 2293 WORD $0xfc424017 // ldur d23, [x0, #36] 2294 WORD $0xbd402018 // ldr s24, [x0, #32] 2295 WORD $0xb9402c0a // ldr w10, [x0, #44] 2296 WORD $0x2ea346f9 // ushl v25.2s, v23.2s, v3.2s 2297 WORD $0x0e973b18 // zip1 v24.2s, v24.2s, v23.2s 2298 WORD $0x0e0c3eeb // mov w11, v23.s[1] 2299 WORD $0x2eb24717 // ushl v23.2s, v24.2s, v25.2s 2300 WORD $0x53027d6c // lsr w12, w11, #2 2301 WORD $0x0eb71f37 // orr v23.8b, v25.8b, v23.8b 2302 WORD $0x138b654a // extr w10, w10, w11, #25 2303 WORD $0x4e141d97 // mov v23.s[2], w12 2304 WORD $0x4e1c1d57 // mov v23.s[3], w10 2305 WORD $0x4e201ef7 // and v23.16b, v23.16b, v0.16b 2306 WORD $0x3c9f0137 // stur q23, [x9, #-16] 2307 WORD $0x2945ac0a // ldp w10, w11, [x0, #44] 2308 WORD $0xfc434017 // ldur d23, [x0, #52] 2309 WORD $0x1e270178 // fmov s24, w11 2310 WORD $0x138a416a // extr w10, w11, w10, #16 2311 WORD $0x0e973b18 // zip1 v24.2s, v24.2s, v23.2s 2312 WORD $0x53077d6c // lsr w12, w11, #7 2313 WORD $0x2ea446f7 // ushl v23.2s, v23.2s, v4.2s 2314 WORD $0x1e270159 // fmov s25, w10 2315 WORD $0x2eb34718 // ushl v24.2s, v24.2s, v19.2s 2316 WORD $0x4e0c1d99 // mov v25.s[1], w12 2317 WORD $0x0eb81ef7 // orr v23.8b, v23.8b, v24.8b 2318 WORD $0x6e1806f9 // mov v25.d[1], v23.d[0] 2319 WORD $0x4e201f37 // and v23.16b, v25.16b, v0.16b 2320 WORD $0x3d800137 // str q23, [x9] 2321 WORD $0x29472c0a // ldp w10, w11, [x0, #56] 2322 WORD $0xfd402017 // ldr d23, [x0, #64] 2323 WORD $0x1e270178 // fmov s24, w11 2324 WORD $0x138a316a // extr w10, w11, w10, #12 2325 WORD $0x0e973b18 // zip1 v24.2s, v24.2s, v23.2s 2326 WORD $0x53037d6c // lsr w12, w11, #3 2327 WORD $0x2ea546f7 // ushl v23.2s, v23.2s, v5.2s 2328 WORD $0x1e270159 // fmov s25, w10 2329 WORD $0x2eb44718 // ushl v24.2s, v24.2s, v20.2s 2330 WORD $0x4e0c1d99 // mov v25.s[1], w12 2331 WORD $0x0eb81ef7 // orr v23.8b, v23.8b, v24.8b 2332 WORD $0x6e1806f9 // mov v25.d[1], v23.d[0] 2333 WORD $0x4e201f37 // and v23.16b, v25.16b, v0.16b 2334 WORD $0x3d800537 // str q23, [x9, #16] 2335 WORD $0xb940440a // ldr w10, [x0, #68] 2336 WORD $0xfd402417 // ldr d23, [x0, #72] 2337 WORD $0xb940500b // ldr w11, [x0, #80] 2338 WORD $0x1e270158 // fmov s24, w10 2339 WORD $0x0e973b18 // zip1 v24.2s, v24.2s, v23.2s 2340 WORD $0x53087d4c // lsr w12, w10, #8 2341 WORD $0x2ea646f9 // ushl v25.2s, v23.2s, v6.2s 2342 WORD $0x0e0c3eea // mov w10, v23.s[1] 2343 WORD $0x2eb54717 // ushl v23.2s, v24.2s, v21.2s 2344 WORD $0x1e270198 // fmov s24, w12 2345 WORD $0x0eb71f37 // orr v23.8b, v25.8b, v23.8b 2346 WORD $0x6e0c06f8 // mov v24.s[1], v23.s[0] 2347 WORD $0x138a356a // extr w10, w11, w10, #13 2348 WORD $0x6e1426f8 // mov v24.s[2], v23.s[1] 2349 WORD $0x4e1c1d58 // mov v24.s[3], w10 2350 WORD $0x4e201f17 // and v23.16b, v24.16b, v0.16b 2351 WORD $0x3d800937 // str q23, [x9, #32] 2352 WORD $0xb940500a // ldr w10, [x0, #80] 2353 WORD $0xfc454017 // ldur d23, [x0, #84] 2354 WORD $0x91017000 // add x0, x0, #92 2355 WORD $0x1e270158 // fmov s24, w10 2356 WORD $0x0e973b18 // zip1 v24.2s, v24.2s, v23.2s 2357 WORD $0x53047d4b // lsr w11, w10, #4 2358 WORD $0x2ea746f9 // ushl v25.2s, v23.2s, v7.2s 2359 WORD $0x0e0c3eea // mov w10, v23.s[1] 2360 WORD $0x2eb64717 // ushl v23.2s, v24.2s, v22.2s 2361 WORD $0x1e270178 // fmov s24, w11 2362 WORD $0x0eb71f37 // orr v23.8b, v25.8b, v23.8b 2363 WORD $0x6e0c06f8 // mov v24.s[1], v23.s[0] 2364 WORD $0x53097d4a // lsr w10, w10, #9 2365 WORD $0x6e1426f8 // mov v24.s[2], v23.s[1] 2366 WORD $0x4e1c1d58 // mov v24.s[3], w10 2367 WORD $0x4e201f17 // and v23.16b, v24.16b, v0.16b 2368 WORD $0x3d800d37 // str q23, [x9, #48] 2369 WORD $0x91020129 // add x9, x9, #128 2370 2371 BNE LBB0_72 2372 JMP LBB0_99 2373 LBB0_73: 2374 WORD $0x7100805f // cmp w2, #32 2375 BLT LBB0_99 2376 WORD $0x91001829 // add x9, x1, #6 2377 LBB0_75: 2378 WORD $0x29402c0a // ldp w10, w11, [x0] 2379 WORD $0xb940080c // ldr w12, [x0, #8] 2380 WORD $0xf1000508 // subs x8, x8, #1 2381 WORD $0x138a616d // extr w13, w11, w10, #24 2382 WORD $0x1e270140 // fmov s0, w10 2383 WORD $0x138b418b // extr w11, w12, w11, #16 2384 WORD $0x4e0c1da0 // mov v0.s[1], w13 2385 WORD $0x53087d8c // lsr w12, w12, #8 2386 WORD $0x4e141d60 // mov v0.s[2], w11 2387 WORD $0x4e1c1d80 // mov v0.s[3], w12 2388 WORD $0x6f0777e0 // bic v0.4s, #255, lsl #24 2389 WORD $0x3c9c0120 // stur q0, [x9, #-64] 2390 WORD $0x2941ac0a // ldp w10, w11, [x0, #12] 2391 WORD $0xb940140c // ldr w12, [x0, #20] 2392 WORD $0x138a616d // extr w13, w11, w10, #24 2393 WORD $0x1e270140 // fmov s0, w10 2394 WORD $0x138b418b // extr w11, w12, w11, #16 2395 WORD $0x4e0c1da0 // mov v0.s[1], w13 2396 WORD $0x53087d8c // lsr w12, w12, #8 2397 WORD $0x4e141d60 // mov v0.s[2], w11 2398 WORD $0x4e1c1d80 // mov v0.s[3], w12 2399 WORD $0x6f0777e0 // bic v0.4s, #255, lsl #24 2400 WORD $0x3c9d0120 // stur q0, [x9, #-48] 2401 WORD $0x29432c0a // ldp w10, w11, [x0, #24] 2402 WORD $0xb940200c // ldr w12, [x0, #32] 2403 WORD $0x138a616d // extr w13, w11, w10, #24 2404 WORD $0x1e270140 // fmov s0, w10 2405 WORD $0x138b418b // extr w11, w12, w11, #16 2406 WORD $0x4e0c1da0 // mov v0.s[1], w13 2407 WORD $0x53087d8c // lsr w12, w12, #8 2408 WORD $0x4e141d60 // mov v0.s[2], w11 2409 WORD $0x4e1c1d80 // mov v0.s[3], w12 2410 WORD $0x6f0777e0 // bic v0.4s, #255, lsl #24 2411 WORD $0x3c9e0120 // stur q0, [x9, #-32] 2412 WORD $0x2944ac0a // ldp w10, w11, [x0, #36] 2413 WORD $0xb9402c0c // ldr w12, [x0, #44] 2414 WORD $0x138a616d // extr w13, w11, w10, #24 2415 WORD $0x1e270140 // fmov s0, w10 2416 WORD $0x138b418b // extr w11, w12, w11, #16 2417 WORD $0x4e0c1da0 // mov v0.s[1], w13 2418 WORD $0x53087d8c // lsr w12, w12, #8 2419 WORD $0x4e141d60 // mov v0.s[2], w11 2420 WORD $0x4e1c1d80 // mov v0.s[3], w12 2421 WORD $0x6f0777e0 // bic v0.4s, #255, lsl #24 2422 WORD $0x3c9f0120 // stur q0, [x9, #-16] 2423 WORD $0x29462c0a // ldp w10, w11, [x0, #48] 2424 WORD $0xb940380c // ldr w12, [x0, #56] 2425 WORD $0x138a616d // extr w13, w11, w10, #24 2426 WORD $0x1e270140 // fmov s0, w10 2427 WORD $0x138b418b // extr w11, w12, w11, #16 2428 WORD $0x4e0c1da0 // mov v0.s[1], w13 2429 WORD $0x53087d8c // lsr w12, w12, #8 2430 WORD $0x4e141d60 // mov v0.s[2], w11 2431 WORD $0x4e1c1d80 // mov v0.s[3], w12 2432 WORD $0x6f0777e0 // bic v0.4s, #255, lsl #24 2433 WORD $0x3d800120 // str q0, [x9] 2434 WORD $0x2947ac0a // ldp w10, w11, [x0, #60] 2435 WORD $0xb940440c // ldr w12, [x0, #68] 2436 WORD $0x138a616d // extr w13, w11, w10, #24 2437 WORD $0x1e270140 // fmov s0, w10 2438 WORD $0x138b418b // extr w11, w12, w11, #16 2439 WORD $0x4e0c1da0 // mov v0.s[1], w13 2440 WORD $0x53087d8c // lsr w12, w12, #8 2441 WORD $0x4e141d60 // mov v0.s[2], w11 2442 WORD $0x4e1c1d80 // mov v0.s[3], w12 2443 WORD $0x6f0777e0 // bic v0.4s, #255, lsl #24 2444 WORD $0x3d800520 // str q0, [x9, #16] 2445 WORD $0x29492c0a // ldp w10, w11, [x0, #72] 2446 WORD $0xb940500c // ldr w12, [x0, #80] 2447 WORD $0x138a616d // extr w13, w11, w10, #24 2448 WORD $0x1e270140 // fmov s0, w10 2449 WORD $0x138b418b // extr w11, w12, w11, #16 2450 WORD $0x4e0c1da0 // mov v0.s[1], w13 2451 WORD $0x53087d8c // lsr w12, w12, #8 2452 WORD $0x4e141d60 // mov v0.s[2], w11 2453 WORD $0x4e1c1d80 // mov v0.s[3], w12 2454 WORD $0x6f0777e0 // bic v0.4s, #255, lsl #24 2455 WORD $0x3d800920 // str q0, [x9, #32] 2456 WORD $0x294aac0a // ldp w10, w11, [x0, #84] 2457 WORD $0xb9405c0c // ldr w12, [x0, #92] 2458 WORD $0x91018000 // add x0, x0, #96 2459 WORD $0x138a616d // extr w13, w11, w10, #24 2460 WORD $0x1e270140 // fmov s0, w10 2461 WORD $0x138b418b // extr w11, w12, w11, #16 2462 WORD $0x4e0c1da0 // mov v0.s[1], w13 2463 WORD $0x53087d8c // lsr w12, w12, #8 2464 WORD $0x4e141d60 // mov v0.s[2], w11 2465 WORD $0x4e1c1d80 // mov v0.s[3], w12 2466 WORD $0x6f0777e0 // bic v0.4s, #255, lsl #24 2467 WORD $0x3d800d20 // str q0, [x9, #48] 2468 WORD $0x91020129 // add x9, x9, #128 2469 2470 BNE LBB0_75 2471 JMP LBB0_99 2472 LBB0_76: 2473 WORD $0x7100805f // cmp w2, #32 2474 BLT LBB0_99 2475 MOVD $0x000000160000001d, R7 // LCPI0_56 2476 MOVD $0x0000000a00000003, R0 // LCPI0_57 2477 MOVD $0x000000130000001a, R16 // LCPI0_58 2478 MOVD $0x0000000d00000006, R1 // LCPI0_59 2479 MOVD $0x000000170000001e, R17 // LCPI0_60 2480 MOVD $0x0000000900000002, R2 // LCPI0_61 2481 MOVD $0x0000000900000010, R25 // LCPI0_62 2482 MOVD $0x0000001700000010, R3 // LCPI0_63 2483 MOVD $0x0000000d00000014, R19 // LCPI0_64 2484 MOVD $0x000000130000000c, R4 // LCPI0_65 2485 MOVD $0x0000001100000018, R20 // LCPI0_66 2486 MOVD $0x0000000f00000008, R5 // LCPI0_67 2487 MOVD $0x000000150000001c, R21 // LCPI0_68 2488 MOVD $0x0000000b00000004, R6 // LCPI0_69 2489 2490 WORD $0x91010029 // add x9, x1, #64 2491 WORD $0x2ea0b8e7 // neg v7.2s, v7.2s 2492 WORD $0x2ea0ba10 // neg v16.2s, v16.2s 2493 WORD $0x2ea0ba31 // neg v17.2s, v17.2s 2494 WORD $0x2ea0bb39 // neg v25.2s, v25.2s 2495 WORD $0x2ea0ba73 // neg v19.2s, v19.2s 2496 WORD $0x2ea0ba94 // neg v20.2s, v20.2s 2497 WORD $0x2ea0bab5 // neg v21.2s, v21.2s 2498 LBB0_78: 2499 WORD $0x29402c0a // ldp w10, w11, [x0] 2500 WORD $0x2941340c // ldp w12, w13, [x0, #8] 2501 WORD $0xf1000508 // subs x8, x8, #1 2502 WORD $0x138a656e // extr w14, w11, w10, #25 2503 WORD $0x1e270156 // fmov s22, w10 2504 WORD $0x138b498b // extr w11, w12, w11, #18 2505 WORD $0x4e0c1dd6 // mov v22.s[1], w14 2506 WORD $0x138c2dac // extr w12, w13, w12, #11 2507 WORD $0x4e141d76 // mov v22.s[2], w11 2508 WORD $0x4e1c1d96 // mov v22.s[3], w12 2509 WORD $0x6f0777d6 // bic v22.4s, #254, lsl #24 2510 WORD $0x3c9c0136 // stur q22, [x9, #-64] 2511 WORD $0xb9400c0a // ldr w10, [x0, #12] 2512 WORD $0xfd400816 // ldr d22, [x0, #16] 2513 WORD $0xb940180b // ldr w11, [x0, #24] 2514 WORD $0x1e270157 // fmov s23, w10 2515 WORD $0x0e963af7 // zip1 v23.2s, v23.2s, v22.2s 2516 WORD $0x53047d4c // lsr w12, w10, #4 2517 WORD $0x2ea046d8 // ushl v24.2s, v22.2s, v0.2s 2518 WORD $0x0e0c3eca // mov w10, v22.s[1] 2519 WORD $0x2ea746f6 // ushl v22.2s, v23.2s, v7.2s 2520 WORD $0x1e270197 // fmov s23, w12 2521 WORD $0x0eb61f16 // orr v22.8b, v24.8b, v22.8b 2522 WORD $0x6e0c06d7 // mov v23.s[1], v22.s[0] 2523 WORD $0x138a3d6a // extr w10, w11, w10, #15 2524 WORD $0x6e1426d7 // mov v23.s[2], v22.s[1] 2525 WORD $0x4e1c1d57 // mov v23.s[3], w10 2526 WORD $0x6f0777d7 // bic v23.4s, #254, lsl #24 2527 WORD $0x3c9d0137 // stur q23, [x9, #-48] 2528 WORD $0x29432c0a // ldp w10, w11, [x0, #24] 2529 WORD $0xfd401016 // ldr d22, [x0, #32] 2530 WORD $0x1e270177 // fmov s23, w11 2531 WORD $0x138a216a // extr w10, w11, w10, #8 2532 WORD $0x0e963af7 // zip1 v23.2s, v23.2s, v22.2s 2533 WORD $0x53017d6c // lsr w12, w11, #1 2534 WORD $0x2ea146d6 // ushl v22.2s, v22.2s, v1.2s 2535 WORD $0x1e270158 // fmov s24, w10 2536 WORD $0x2eb046f7 // ushl v23.2s, v23.2s, v16.2s 2537 WORD $0x4e0c1d98 // mov v24.s[1], w12 2538 WORD $0x0eb71ed6 // orr v22.8b, v22.8b, v23.8b 2539 WORD $0x6e1806d8 // mov v24.d[1], v22.d[0] 2540 WORD $0x6f0777d8 // bic v24.4s, #254, lsl #24 2541 WORD $0x3c9e0138 // stur q24, [x9, #-32] 2542 WORD $0x2944ac0a // ldp w10, w11, [x0, #36] 2543 WORD $0xfc42c016 // ldur d22, [x0, #44] 2544 WORD $0x1e270177 // fmov s23, w11 2545 WORD $0x138a316a // extr w10, w11, w10, #12 2546 WORD $0x0e963af7 // zip1 v23.2s, v23.2s, v22.2s 2547 WORD $0x53057d6c // lsr w12, w11, #5 2548 WORD $0x2ea246d6 // ushl v22.2s, v22.2s, v2.2s 2549 WORD $0x1e270158 // fmov s24, w10 2550 WORD $0x2eb146f7 // ushl v23.2s, v23.2s, v17.2s 2551 WORD $0x4e0c1d98 // mov v24.s[1], w12 2552 WORD $0x0eb71ed6 // orr v22.8b, v22.8b, v23.8b 2553 WORD $0x6e1806d8 // mov v24.d[1], v22.d[0] 2554 WORD $0x6f0777d8 // bic v24.4s, #254, lsl #24 2555 WORD $0x3c9f0138 // stur q24, [x9, #-16] 2556 WORD $0xfc434016 // ldur d22, [x0, #52] 2557 WORD $0xbd403017 // ldr s23, [x0, #48] 2558 WORD $0xb9403c0a // ldr w10, [x0, #60] 2559 WORD $0x2ea346d8 // ushl v24.2s, v22.2s, v3.2s 2560 WORD $0x0e963af7 // zip1 v23.2s, v23.2s, v22.2s 2561 WORD $0x0e0c3ecb // mov w11, v22.s[1] 2562 WORD $0x2eb246f6 // ushl v22.2s, v23.2s, v25.2s 2563 WORD $0x53027d6c // lsr w12, w11, #2 2564 WORD $0x0eb61f16 // orr v22.8b, v24.8b, v22.8b 2565 WORD $0x138b6d4a // extr w10, w10, w11, #27 2566 WORD $0x4e141d96 // mov v22.s[2], w12 2567 WORD $0x4e1c1d56 // mov v22.s[3], w10 2568 WORD $0x6f0777d6 // bic v22.4s, #254, lsl #24 2569 WORD $0x3d800136 // str q22, [x9] 2570 WORD $0xfd402016 // ldr d22, [x0, #64] 2571 WORD $0xbd403c17 // ldr s23, [x0, #60] 2572 WORD $0xb940480a // ldr w10, [x0, #72] 2573 WORD $0x2ea446d8 // ushl v24.2s, v22.2s, v4.2s 2574 WORD $0x0e963af7 // zip1 v23.2s, v23.2s, v22.2s 2575 WORD $0x0e0c3ecb // mov w11, v22.s[1] 2576 WORD $0x2eb346f6 // ushl v22.2s, v23.2s, v19.2s 2577 WORD $0x53067d6c // lsr w12, w11, #6 2578 WORD $0x0eb61f16 // orr v22.8b, v24.8b, v22.8b 2579 WORD $0x138b7d4a // extr w10, w10, w11, #31 2580 WORD $0x4e141d96 // mov v22.s[2], w12 2581 WORD $0x4e1c1d56 // mov v22.s[3], w10 2582 WORD $0x6f0777d6 // bic v22.4s, #254, lsl #24 2583 WORD $0x3d800536 // str q22, [x9, #16] 2584 WORD $0xfc44c016 // ldur d22, [x0, #76] 2585 WORD $0xbd404817 // ldr s23, [x0, #72] 2586 WORD $0xb940540a // ldr w10, [x0, #84] 2587 WORD $0x2ea546d8 // ushl v24.2s, v22.2s, v5.2s 2588 WORD $0x0e963af7 // zip1 v23.2s, v23.2s, v22.2s 2589 WORD $0x0e0c3ecb // mov w11, v22.s[1] 2590 WORD $0x2eb446f6 // ushl v22.2s, v23.2s, v20.2s 2591 WORD $0x53037d4c // lsr w12, w10, #3 2592 WORD $0x138b294a // extr w10, w10, w11, #10 2593 WORD $0x0eb61f16 // orr v22.8b, v24.8b, v22.8b 2594 WORD $0x4e141d56 // mov v22.s[2], w10 2595 WORD $0x4e1c1d96 // mov v22.s[3], w12 2596 WORD $0x6f0777d6 // bic v22.4s, #254, lsl #24 2597 WORD $0x3d800936 // str q22, [x9, #32] 2598 WORD $0xfd402c16 // ldr d22, [x0, #88] 2599 WORD $0xbd405417 // ldr s23, [x0, #84] 2600 WORD $0xb940600a // ldr w10, [x0, #96] 2601 WORD $0x91019000 // add x0, x0, #100 2602 WORD $0x2ea646d8 // ushl v24.2s, v22.2s, v6.2s 2603 WORD $0x0e963af7 // zip1 v23.2s, v23.2s, v22.2s 2604 WORD $0x0e0c3ecb // mov w11, v22.s[1] 2605 WORD $0x2eb546f6 // ushl v22.2s, v23.2s, v21.2s 2606 WORD $0x138b394a // extr w10, w10, w11, #14 2607 WORD $0x0eb61f16 // orr v22.8b, v24.8b, v22.8b 2608 WORD $0x4e141d56 // mov v22.s[2], w10 2609 WORD $0x4e1c1d96 // mov v22.s[3], w12 2610 WORD $0x6f0777d6 // bic v22.4s, #254, lsl #24 2611 WORD $0x3d800d36 // str q22, [x9, #48] 2612 WORD $0x91020129 // add x9, x9, #128 2613 2614 BNE LBB0_78 2615 JMP LBB0_99 2616 LBB0_79: 2617 WORD $0x7100805f // cmp w2, #32 2618 BLT LBB0_99 2619 MOVD $0x000000070000000c, R7 // LCPI0_36 2620 MOVD $0x0000001900000014, R0 // LCPI0_37 2621 VMOVQ $0x0000001300000018, $0x000000090000000e, V16 // LCPI0_38 2622 VMOVQ $0x0000000d00000008, $0x0000001700000012, V1 // LCPI0_39 2623 MOVD $0x0000001a0000001f, R17 // LCPI0_40 2624 MOVD $0x0000000600000001, R2 // LCPI0_41 2625 MOVD $0x0000000b00000010, R25 // LCPI0_42 2626 MOVD $0x0000001500000010, R3 // LCPI0_43 2627 VMOVQ $0x000000170000001c, $0x0000000d00000012, V19 // LCPI0_44 2628 VMOVQ $0x0000000900000004, $0x000000130000000e, V4 // LCPI0_45 2629 MOVD $0x000000190000001e, R20 // LCPI0_46 2630 MOVD $0x0000000700000002, R5 // LCPI0_47 2631 MOVD $0x0000000f00000014, R21 // LCPI0_48 2632 MOVD $0x000000110000000c, R6 // LCPI0_49 2633 2634 WORD $0x91010029 // add x9, x1, #64 2635 WORD $0x2ea0b8e7 // neg v7.2s, v7.2s 2636 WORD $0x6ea0ba10 // neg v16.4s, v16.4s 2637 WORD $0x2ea0ba31 // neg v17.2s, v17.2s 2638 WORD $0x2ea0bb39 // neg v25.2s, v25.2s 2639 WORD $0x6ea0ba73 // neg v19.4s, v19.4s 2640 WORD $0x2ea0ba94 // neg v20.2s, v20.2s 2641 WORD $0x2ea0bab5 // neg v21.2s, v21.2s 2642 LBB0_81: 2643 WORD $0x29402c0a // ldp w10, w11, [x0] 2644 WORD $0x2941340c // ldp w12, w13, [x0, #8] 2645 WORD $0xf1000508 // subs x8, x8, #1 2646 WORD $0x138a6d6e // extr w14, w11, w10, #27 2647 WORD $0x1e270156 // fmov s22, w10 2648 WORD $0x138b598b // extr w11, w12, w11, #22 2649 WORD $0x4e0c1dd6 // mov v22.s[1], w14 2650 WORD $0x138c45ac // extr w12, w13, w12, #17 2651 WORD $0x4e141d76 // mov v22.s[2], w11 2652 WORD $0x4e1c1d96 // mov v22.s[3], w12 2653 WORD $0x6f077716 // bic v22.4s, #248, lsl #24 2654 WORD $0x3c9c0136 // stur q22, [x9, #-64] 2655 WORD $0xfd400816 // ldr d22, [x0, #16] 2656 WORD $0xbd400c17 // ldr s23, [x0, #12] 2657 WORD $0xb940180a // ldr w10, [x0, #24] 2658 WORD $0x2ea046d8 // ushl v24.2s, v22.2s, v0.2s 2659 WORD $0x0e963af7 // zip1 v23.2s, v23.2s, v22.2s 2660 WORD $0x0e0c3ecb // mov w11, v22.s[1] 2661 WORD $0x2ea746f6 // ushl v22.2s, v23.2s, v7.2s 2662 WORD $0x53027d6c // lsr w12, w11, #2 2663 WORD $0x0eb61f16 // orr v22.8b, v24.8b, v22.8b 2664 WORD $0x138b754a // extr w10, w10, w11, #29 2665 WORD $0x4e141d96 // mov v22.s[2], w12 2666 WORD $0x4e1c1d56 // mov v22.s[3], w10 2667 WORD $0x6f077716 // bic v22.4s, #248, lsl #24 2668 WORD $0x3c9d0136 // stur q22, [x9, #-48] 2669 WORD $0xbd401816 // ldr s22, [x0, #24] 2670 WORD $0x3cc1c017 // ldur q23, [x0, #28] 2671 WORD $0x6e1622d6 // ext v22.16b, v22.16b, v22.16b, #4 2672 WORD $0x6e1762d6 // ext v22.16b, v22.16b, v23.16b, #12 2673 WORD $0x6ea146f8 // ushl v24.4s, v23.4s, v1.4s 2674 WORD $0x6eb046d6 // ushl v22.4s, v22.4s, v16.4s 2675 WORD $0x4eb61f16 // orr v22.16b, v24.16b, v22.16b 2676 WORD $0x6f077716 // bic v22.4s, #248, lsl #24 2677 WORD $0x3c9e0136 // stur q22, [x9, #-32] 2678 WORD $0xb940280a // ldr w10, [x0, #40] 2679 WORD $0xfc42c016 // ldur d22, [x0, #44] 2680 WORD $0xb940340b // ldr w11, [x0, #52] 2681 WORD $0x1e270157 // fmov s23, w10 2682 WORD $0x0e963af7 // zip1 v23.2s, v23.2s, v22.2s 2683 WORD $0x53047d4c // lsr w12, w10, #4 2684 WORD $0x2ea246d8 // ushl v24.2s, v22.2s, v2.2s 2685 WORD $0x0e0c3eca // mov w10, v22.s[1] 2686 WORD $0x2eb146f6 // ushl v22.2s, v23.2s, v17.2s 2687 WORD $0x1e270197 // fmov s23, w12 2688 WORD $0x0eb61f16 // orr v22.8b, v24.8b, v22.8b 2689 WORD $0x6e0c06d7 // mov v23.s[1], v22.s[0] 2690 WORD $0x138a556a // extr w10, w11, w10, #21 2691 WORD $0x6e1426d7 // mov v23.s[2], v22.s[1] 2692 WORD $0x4e1c1d57 // mov v23.s[3], w10 2693 WORD $0x6f077717 // bic v23.4s, #248, lsl #24 2694 WORD $0x3c9f0137 // stur q23, [x9, #-16] 2695 WORD $0xfd401c16 // ldr d22, [x0, #56] 2696 WORD $0xbd403417 // ldr s23, [x0, #52] 2697 WORD $0xb940400a // ldr w10, [x0, #64] 2698 WORD $0x2ea346d8 // ushl v24.2s, v22.2s, v3.2s 2699 WORD $0x0e963af7 // zip1 v23.2s, v23.2s, v22.2s 2700 WORD $0x0e0c3ecb // mov w11, v22.s[1] 2701 WORD $0x2eb246f6 // ushl v22.2s, v23.2s, v25.2s 2702 WORD $0x53017d4c // lsr w12, w10, #1 2703 WORD $0x138b194a // extr w10, w10, w11, #6 2704 WORD $0x0eb61f16 // orr v22.8b, v24.8b, v22.8b 2705 WORD $0x4e141d56 // mov v22.s[2], w10 2706 WORD $0x4e1c1d96 // mov v22.s[3], w12 2707 WORD $0x6f077716 // bic v22.4s, #248, lsl #24 2708 WORD $0x3d800136 // str q22, [x9] 2709 WORD $0xbd404016 // ldr s22, [x0, #64] 2710 WORD $0x3cc44017 // ldur q23, [x0, #68] 2711 WORD $0x6e1622d6 // ext v22.16b, v22.16b, v22.16b, #4 2712 WORD $0x6e1762d6 // ext v22.16b, v22.16b, v23.16b, #12 2713 WORD $0x6ea446f8 // ushl v24.4s, v23.4s, v4.4s 2714 WORD $0x6eb346d6 // ushl v22.4s, v22.4s, v19.4s 2715 WORD $0x4eb61f16 // orr v22.16b, v24.16b, v22.16b 2716 WORD $0x6f077716 // bic v22.4s, #248, lsl #24 2717 WORD $0x3d800536 // str q22, [x9, #16] 2718 WORD $0x294a2c0a // ldp w10, w11, [x0, #80] 2719 WORD $0xfd402c16 // ldr d22, [x0, #88] 2720 WORD $0x1e270177 // fmov s23, w11 2721 WORD $0x138a216a // extr w10, w11, w10, #8 2722 WORD $0x0e963af7 // zip1 v23.2s, v23.2s, v22.2s 2723 WORD $0x53037d6c // lsr w12, w11, #3 2724 WORD $0x2ea546d6 // ushl v22.2s, v22.2s, v5.2s 2725 WORD $0x1e270158 // fmov s24, w10 2726 WORD $0x2eb446f7 // ushl v23.2s, v23.2s, v20.2s 2727 WORD $0x4e0c1d98 // mov v24.s[1], w12 2728 WORD $0x0eb71ed6 // orr v22.8b, v22.8b, v23.8b 2729 WORD $0x6e1806d8 // mov v24.d[1], v22.d[0] 2730 WORD $0x6f077718 // bic v24.4s, #248, lsl #24 2731 WORD $0x3d800938 // str q24, [x9, #32] 2732 WORD $0xfd403016 // ldr d22, [x0, #96] 2733 WORD $0xbd405c17 // ldr s23, [x0, #92] 2734 WORD $0xb940680a // ldr w10, [x0, #104] 2735 WORD $0x9101b000 // add x0, x0, #108 2736 WORD $0x2ea646d8 // ushl v24.2s, v22.2s, v6.2s 2737 WORD $0x0e963af7 // zip1 v23.2s, v23.2s, v22.2s 2738 WORD $0x0e0c3ecb // mov w11, v22.s[1] 2739 WORD $0x2eb546f6 // ushl v22.2s, v23.2s, v21.2s 2740 WORD $0x53057d4c // lsr w12, w10, #5 2741 WORD $0x138b294a // extr w10, w10, w11, #10 2742 WORD $0x0eb61f16 // orr v22.8b, v24.8b, v22.8b 2743 WORD $0x4e141d56 // mov v22.s[2], w10 2744 WORD $0x4e1c1d96 // mov v22.s[3], w12 2745 WORD $0x6f077716 // bic v22.4s, #248, lsl #24 2746 WORD $0x3d800d36 // str q22, [x9, #48] 2747 WORD $0x91020129 // add x9, x9, #128 2748 2749 BNE LBB0_81 2750 JMP LBB0_99 2751 LBB0_82: 2752 WORD $0x7100805f // cmp w2, #32 2753 BLT LBB0_99 2754 MOVD $0x0000000c00000010, R1 // LCPI0_34 2755 MOVD $0x0000001400000010, R0 // LCPI0_35 2756 2757 WORD $0x91010029 // add x9, x1, #64 2758 WORD $0x2ea0b821 // neg v1.2s, v1.2s 2759 LBB0_84: 2760 WORD $0x29402c0a // ldp w10, w11, [x0] 2761 WORD $0x2941340c // ldp w12, w13, [x0, #8] 2762 WORD $0xf1000508 // subs x8, x8, #1 2763 WORD $0x138a716e // extr w14, w11, w10, #28 2764 WORD $0x1e270142 // fmov s2, w10 2765 WORD $0x138b618b // extr w11, w12, w11, #24 2766 WORD $0x4e0c1dc2 // mov v2.s[1], w14 2767 WORD $0x138c51ac // extr w12, w13, w12, #20 2768 WORD $0x4e141d62 // mov v2.s[2], w11 2769 WORD $0x4e1c1d82 // mov v2.s[3], w12 2770 WORD $0x6f077602 // bic v2.4s, #240, lsl #24 2771 WORD $0x3c9c0122 // stur q2, [x9, #-64] 2772 WORD $0xfd400802 // ldr d2, [x0, #16] 2773 WORD $0xbd400c03 // ldr s3, [x0, #12] 2774 WORD $0xb940180a // ldr w10, [x0, #24] 2775 WORD $0x2ea04444 // ushl v4.2s, v2.2s, v0.2s 2776 WORD $0x0e823863 // zip1 v3.2s, v3.2s, v2.2s 2777 WORD $0x0e0c3c4b // mov w11, v2.s[1] 2778 WORD $0x2ea14462 // ushl v2.2s, v3.2s, v1.2s 2779 WORD $0x53047d4c // lsr w12, w10, #4 2780 WORD $0x138b214a // extr w10, w10, w11, #8 2781 WORD $0x0ea21c82 // orr v2.8b, v4.8b, v2.8b 2782 WORD $0x4e141d42 // mov v2.s[2], w10 2783 WORD $0x4e1c1d82 // mov v2.s[3], w12 2784 WORD $0x6f077602 // bic v2.4s, #240, lsl #24 2785 WORD $0x3c9d0122 // stur q2, [x9, #-48] 2786 WORD $0x2943ac0a // ldp w10, w11, [x0, #28] 2787 WORD $0x2944b40c // ldp w12, w13, [x0, #36] 2788 WORD $0x138a716e // extr w14, w11, w10, #28 2789 WORD $0x1e270142 // fmov s2, w10 2790 WORD $0x138b618b // extr w11, w12, w11, #24 2791 WORD $0x4e0c1dc2 // mov v2.s[1], w14 2792 WORD $0x138c51ac // extr w12, w13, w12, #20 2793 WORD $0x4e141d62 // mov v2.s[2], w11 2794 WORD $0x4e1c1d82 // mov v2.s[3], w12 2795 WORD $0x6f077602 // bic v2.4s, #240, lsl #24 2796 WORD $0x3c9e0122 // stur q2, [x9, #-32] 2797 WORD $0xfc42c002 // ldur d2, [x0, #44] 2798 WORD $0xbd402803 // ldr s3, [x0, #40] 2799 WORD $0xb940340a // ldr w10, [x0, #52] 2800 WORD $0x2ea04444 // ushl v4.2s, v2.2s, v0.2s 2801 WORD $0x0e823863 // zip1 v3.2s, v3.2s, v2.2s 2802 WORD $0x0e0c3c4b // mov w11, v2.s[1] 2803 WORD $0x2ea14462 // ushl v2.2s, v3.2s, v1.2s 2804 WORD $0x53047d4c // lsr w12, w10, #4 2805 WORD $0x138b214a // extr w10, w10, w11, #8 2806 WORD $0x0ea21c82 // orr v2.8b, v4.8b, v2.8b 2807 WORD $0x4e141d42 // mov v2.s[2], w10 2808 WORD $0x4e1c1d82 // mov v2.s[3], w12 2809 WORD $0x6f077602 // bic v2.4s, #240, lsl #24 2810 WORD $0x3c9f0122 // stur q2, [x9, #-16] 2811 WORD $0x29472c0a // ldp w10, w11, [x0, #56] 2812 WORD $0x2948340c // ldp w12, w13, [x0, #64] 2813 WORD $0x138a716e // extr w14, w11, w10, #28 2814 WORD $0x1e270142 // fmov s2, w10 2815 WORD $0x138b618b // extr w11, w12, w11, #24 2816 WORD $0x4e0c1dc2 // mov v2.s[1], w14 2817 WORD $0x138c51ac // extr w12, w13, w12, #20 2818 WORD $0x4e141d62 // mov v2.s[2], w11 2819 WORD $0x4e1c1d82 // mov v2.s[3], w12 2820 WORD $0x6f077602 // bic v2.4s, #240, lsl #24 2821 WORD $0x3d800122 // str q2, [x9] 2822 WORD $0xfd402402 // ldr d2, [x0, #72] 2823 WORD $0xbd404403 // ldr s3, [x0, #68] 2824 WORD $0xb940500a // ldr w10, [x0, #80] 2825 WORD $0x2ea04444 // ushl v4.2s, v2.2s, v0.2s 2826 WORD $0x0e823863 // zip1 v3.2s, v3.2s, v2.2s 2827 WORD $0x0e0c3c4b // mov w11, v2.s[1] 2828 WORD $0x2ea14462 // ushl v2.2s, v3.2s, v1.2s 2829 WORD $0x53047d4c // lsr w12, w10, #4 2830 WORD $0x138b214a // extr w10, w10, w11, #8 2831 WORD $0x0ea21c82 // orr v2.8b, v4.8b, v2.8b 2832 WORD $0x4e141d42 // mov v2.s[2], w10 2833 WORD $0x4e1c1d82 // mov v2.s[3], w12 2834 WORD $0x6f077602 // bic v2.4s, #240, lsl #24 2835 WORD $0x3d800522 // str q2, [x9, #16] 2836 WORD $0x294aac0a // ldp w10, w11, [x0, #84] 2837 WORD $0x294bb40c // ldp w12, w13, [x0, #92] 2838 WORD $0x138a716e // extr w14, w11, w10, #28 2839 WORD $0x1e270142 // fmov s2, w10 2840 WORD $0x138b618b // extr w11, w12, w11, #24 2841 WORD $0x4e0c1dc2 // mov v2.s[1], w14 2842 WORD $0x138c51ac // extr w12, w13, w12, #20 2843 WORD $0x4e141d62 // mov v2.s[2], w11 2844 WORD $0x4e1c1d82 // mov v2.s[3], w12 2845 WORD $0x6f077602 // bic v2.4s, #240, lsl #24 2846 WORD $0x3d800922 // str q2, [x9, #32] 2847 WORD $0xfc464002 // ldur d2, [x0, #100] 2848 WORD $0xbd406003 // ldr s3, [x0, #96] 2849 WORD $0xb9406c0a // ldr w10, [x0, #108] 2850 WORD $0x9101c000 // add x0, x0, #112 2851 WORD $0x2ea04444 // ushl v4.2s, v2.2s, v0.2s 2852 WORD $0x0e823863 // zip1 v3.2s, v3.2s, v2.2s 2853 WORD $0x0e0c3c4b // mov w11, v2.s[1] 2854 WORD $0x2ea14462 // ushl v2.2s, v3.2s, v1.2s 2855 WORD $0x53047d4c // lsr w12, w10, #4 2856 WORD $0x138b214a // extr w10, w10, w11, #8 2857 WORD $0x0ea21c82 // orr v2.8b, v4.8b, v2.8b 2858 WORD $0x4e141d42 // mov v2.s[2], w10 2859 WORD $0x4e1c1d82 // mov v2.s[3], w12 2860 WORD $0x6f077602 // bic v2.4s, #240, lsl #24 2861 WORD $0x3d800d22 // str q2, [x9, #48] 2862 WORD $0x91020129 // add x9, x9, #128 2863 2864 BNE LBB0_84 2865 JMP LBB0_99 2866 LBB0_85: 2867 WORD $0x7100805f // cmp w2, #32 2868 BLT LBB0_99 2869 VMOVQ $0x0000001100000014, $0x0000000b0000000e, V7 // LCPI0_20 2870 VMOVQ $0x0000000f0000000c, $0x0000001500000012, V0 // LCPI0_21 2871 MOVD $0x0000000500000008, R16 // LCPI0_22 2872 MOVD $0x0000001b00000018, R1 // LCPI0_23 2873 VMOVQ $0x000000190000001c, $0x0000001300000016, V17 // LCPI0_24 2874 VMOVQ $0x0000000700000004, $0x0000000d0000000a, V2 // LCPI0_25 2875 VMOVQ $0x0000000d00000010, $0x000000070000000a, V25 // LCPI0_26 2876 VMOVQ $0x0000001300000010, $0x0000001900000016, V3 // LCPI0_27 2877 MOVD $0x0000001b0000001e, R19 // LCPI0_28 2878 MOVD $0x0000000500000002, R4 // LCPI0_29 2879 VMOVQ $0x0000001500000018, $0x0000000f00000012, V20 // LCPI0_30 2880 VMOVQ $0x0000000b00000008, $0x000000110000000e, V5 // LCPI0_31 2881 MOVD $0x000000090000000c, R21 // LCPI0_32 2882 MOVD $0x0000001700000014, R6 // LCPI0_33 2883 2884 WORD $0x91010029 // add x9, x1, #64 2885 WORD $0x6ea0b8e7 // neg v7.4s, v7.4s 2886 WORD $0x2ea0ba10 // neg v16.2s, v16.2s 2887 WORD $0x6ea0ba31 // neg v17.4s, v17.4s 2888 WORD $0x6ea0ba52 // neg v25.4s, v25.4s 2889 WORD $0x2ea0ba73 // neg v19.2s, v19.2s 2890 WORD $0x6ea0ba94 // neg v20.4s, v20.4s 2891 WORD $0x2ea0bab5 // neg v21.2s, v21.2s 2892 LBB0_87: 2893 WORD $0x29402c0a // ldp w10, w11, [x0] 2894 WORD $0x2941340c // ldp w12, w13, [x0, #8] 2895 WORD $0xf1000508 // subs x8, x8, #1 2896 WORD $0x138a756e // extr w14, w11, w10, #29 2897 WORD $0x1e270156 // fmov s22, w10 2898 WORD $0x138b698b // extr w11, w12, w11, #26 2899 WORD $0x4e0c1dd6 // mov v22.s[1], w14 2900 WORD $0x138c5dac // extr w12, w13, w12, #23 2901 WORD $0x4e141d76 // mov v22.s[2], w11 2902 WORD $0x4e1c1d96 // mov v22.s[3], w12 2903 WORD $0x6f077416 // bic v22.4s, #224, lsl #24 2904 WORD $0x3c9c0136 // stur q22, [x9, #-64] 2905 WORD $0xbd400c16 // ldr s22, [x0, #12] 2906 WORD $0x3dc00417 // ldr q23, [x0, #16] 2907 WORD $0x6e1622d6 // ext v22.16b, v22.16b, v22.16b, #4 2908 WORD $0x6e1762d6 // ext v22.16b, v22.16b, v23.16b, #12 2909 WORD $0x6ea046f8 // ushl v24.4s, v23.4s, v0.4s 2910 WORD $0x6ea746d6 // ushl v22.4s, v22.4s, v7.4s 2911 WORD $0x4eb61f16 // orr v22.16b, v24.16b, v22.16b 2912 WORD $0x6f077416 // bic v22.4s, #224, lsl #24 2913 WORD $0x3c9d0136 // stur q22, [x9, #-48] 2914 WORD $0xfd401016 // ldr d22, [x0, #32] 2915 WORD $0xbd401c17 // ldr s23, [x0, #28] 2916 WORD $0xb940280a // ldr w10, [x0, #40] 2917 WORD $0x2ea146d8 // ushl v24.2s, v22.2s, v1.2s 2918 WORD $0x0e963af7 // zip1 v23.2s, v23.2s, v22.2s 2919 WORD $0x0e0c3ecb // mov w11, v22.s[1] 2920 WORD $0x2eb046f6 // ushl v22.2s, v23.2s, v16.2s 2921 WORD $0x53027d6c // lsr w12, w11, #2 2922 WORD $0x0eb61f16 // orr v22.8b, v24.8b, v22.8b 2923 WORD $0x138b7d4a // extr w10, w10, w11, #31 2924 WORD $0x4e141d96 // mov v22.s[2], w12 2925 WORD $0x4e1c1d56 // mov v22.s[3], w10 2926 WORD $0x6f077416 // bic v22.4s, #224, lsl #24 2927 WORD $0x3c9e0136 // stur q22, [x9, #-32] 2928 WORD $0xbd402816 // ldr s22, [x0, #40] 2929 WORD $0x3cc2c017 // ldur q23, [x0, #44] 2930 WORD $0x6e1622d6 // ext v22.16b, v22.16b, v22.16b, #4 2931 WORD $0x6ea246f8 // ushl v24.4s, v23.4s, v2.4s 2932 WORD $0x6e1762d6 // ext v22.16b, v22.16b, v23.16b, #12 2933 WORD $0x6f077418 // bic v24.4s, #224, lsl #24 2934 WORD $0x6eb146d6 // ushl v22.4s, v22.4s, v17.4s 2935 WORD $0x4eb61f16 // orr v22.16b, v24.16b, v22.16b 2936 WORD $0x3c9f0136 // stur q22, [x9, #-16] 2937 WORD $0xbd403816 // ldr s22, [x0, #56] 2938 WORD $0x3cc3c017 // ldur q23, [x0, #60] 2939 WORD $0x6e1622d6 // ext v22.16b, v22.16b, v22.16b, #4 2940 WORD $0x6e1762d6 // ext v22.16b, v22.16b, v23.16b, #12 2941 WORD $0x6ea346f8 // ushl v24.4s, v23.4s, v3.4s 2942 WORD $0x6eb246d6 // ushl v22.4s, v22.4s, v25.4s 2943 WORD $0x4eb61f16 // orr v22.16b, v24.16b, v22.16b 2944 WORD $0x6f077416 // bic v22.4s, #224, lsl #24 2945 WORD $0x3d800136 // str q22, [x9] 2946 WORD $0x29492c0a // ldp w10, w11, [x0, #72] 2947 WORD $0xfd402816 // ldr d22, [x0, #80] 2948 WORD $0x1e270177 // fmov s23, w11 2949 WORD $0x138a116a // extr w10, w11, w10, #4 2950 WORD $0x0e963af7 // zip1 v23.2s, v23.2s, v22.2s 2951 WORD $0x53017d6c // lsr w12, w11, #1 2952 WORD $0x2ea446d6 // ushl v22.2s, v22.2s, v4.2s 2953 WORD $0x1e270158 // fmov s24, w10 2954 WORD $0x2eb346f7 // ushl v23.2s, v23.2s, v19.2s 2955 WORD $0x4e0c1d98 // mov v24.s[1], w12 2956 WORD $0x0eb71ed6 // orr v22.8b, v22.8b, v23.8b 2957 WORD $0x6e1806d8 // mov v24.d[1], v22.d[0] 2958 WORD $0x6f077418 // bic v24.4s, #224, lsl #24 2959 WORD $0x3d800538 // str q24, [x9, #16] 2960 WORD $0xbd405416 // ldr s22, [x0, #84] 2961 WORD $0x3cc58017 // ldur q23, [x0, #88] 2962 WORD $0x6e1622d6 // ext v22.16b, v22.16b, v22.16b, #4 2963 WORD $0x6e1762d6 // ext v22.16b, v22.16b, v23.16b, #12 2964 WORD $0x6ea546f8 // ushl v24.4s, v23.4s, v5.4s 2965 WORD $0x6eb446d6 // ushl v22.4s, v22.4s, v20.4s 2966 WORD $0x4eb61f16 // orr v22.16b, v24.16b, v22.16b 2967 WORD $0x6f077416 // bic v22.4s, #224, lsl #24 2968 WORD $0x3d800936 // str q22, [x9, #32] 2969 WORD $0xfd403416 // ldr d22, [x0, #104] 2970 WORD $0xbd406417 // ldr s23, [x0, #100] 2971 WORD $0xb940700a // ldr w10, [x0, #112] 2972 WORD $0x9101d000 // add x0, x0, #116 2973 WORD $0x2ea646d8 // ushl v24.2s, v22.2s, v6.2s 2974 WORD $0x0e963af7 // zip1 v23.2s, v23.2s, v22.2s 2975 WORD $0x0e0c3ecb // mov w11, v22.s[1] 2976 WORD $0x2eb546f6 // ushl v22.2s, v23.2s, v21.2s 2977 WORD $0x53037d4c // lsr w12, w10, #3 2978 WORD $0x138b194a // extr w10, w10, w11, #6 2979 WORD $0x0eb61f16 // orr v22.8b, v24.8b, v22.8b 2980 WORD $0x4e141d56 // mov v22.s[2], w10 2981 WORD $0x4e1c1d96 // mov v22.s[3], w12 2982 WORD $0x6f077416 // bic v22.4s, #224, lsl #24 2983 WORD $0x3d800d36 // str q22, [x9, #48] 2984 WORD $0x91020129 // add x9, x9, #128 2985 2986 BNE LBB0_87 2987 JMP LBB0_99 2988 LBB0_88: 2989 WORD $0x7100805f // cmp w2, #32 2990 BLT LBB0_99 2991 VMOVQ $0x0000001600000018, $0x0000001200000014, V3 // LCPI0_14 2992 VMOVQ $0x0000000a00000008, $0x0000000e0000000c, V0 // LCPI0_15 2993 VMOVQ $0x0000000e00000010, $0x0000000a0000000c, V4 // LCPI0_16 2994 VMOVQ $0x0000001200000010, $0x0000001600000014, V1 // LCPI0_17 2995 MOVD $0x0000000600000008, R5 // LCPI0_18 2996 MOVD $0x0000001a00000018, R2 // LCPI0_19 2997 2998 WORD $0x91010029 // add x9, x1, #64 2999 WORD $0x6ea0b863 // neg v3.4s, v3.4s 3000 WORD $0x6ea0b884 // neg v4.4s, v4.4s 3001 WORD $0x2ea0b8a5 // neg v5.2s, v5.2s 3002 LBB0_90: 3003 WORD $0x29402c0a // ldp w10, w11, [x0] 3004 WORD $0x2941340c // ldp w12, w13, [x0, #8] 3005 WORD $0xf1000508 // subs x8, x8, #1 3006 WORD $0x138a796e // extr w14, w11, w10, #30 3007 WORD $0x1e270146 // fmov s6, w10 3008 WORD $0x138b718b // extr w11, w12, w11, #28 3009 WORD $0x4e0c1dc6 // mov v6.s[1], w14 3010 WORD $0x138c69ac // extr w12, w13, w12, #26 3011 WORD $0x4e141d66 // mov v6.s[2], w11 3012 WORD $0x4e1c1d86 // mov v6.s[3], w12 3013 WORD $0x6f067406 // bic v6.4s, #192, lsl #24 3014 WORD $0x3c9c0126 // stur q6, [x9, #-64] 3015 WORD $0xbd400c06 // ldr s6, [x0, #12] 3016 WORD $0x3dc00407 // ldr q7, [x0, #16] 3017 WORD $0x6e0620c6 // ext v6.16b, v6.16b, v6.16b, #4 3018 WORD $0x6ea044f0 // ushl v16.4s, v7.4s, v0.4s 3019 WORD $0x6e0760c6 // ext v6.16b, v6.16b, v7.16b, #12 3020 WORD $0x6f067410 // bic v16.4s, #192, lsl #24 3021 WORD $0x6ea344c6 // ushl v6.4s, v6.4s, v3.4s 3022 WORD $0x4ea61e06 // orr v6.16b, v16.16b, v6.16b 3023 WORD $0x3c9d0126 // stur q6, [x9, #-48] 3024 WORD $0xbd401c06 // ldr s6, [x0, #28] 3025 WORD $0x3dc00807 // ldr q7, [x0, #32] 3026 WORD $0x6e0620c6 // ext v6.16b, v6.16b, v6.16b, #4 3027 WORD $0x6ea144f0 // ushl v16.4s, v7.4s, v1.4s 3028 WORD $0x6e0760c6 // ext v6.16b, v6.16b, v7.16b, #12 3029 WORD $0x6f067410 // bic v16.4s, #192, lsl #24 3030 WORD $0x6ea444c6 // ushl v6.4s, v6.4s, v4.4s 3031 WORD $0x4ea61e06 // orr v6.16b, v16.16b, v6.16b 3032 WORD $0x3c9e0126 // stur q6, [x9, #-32] 3033 WORD $0xfd401806 // ldr d6, [x0, #48] 3034 WORD $0xbd402c07 // ldr s7, [x0, #44] 3035 WORD $0xb940380a // ldr w10, [x0, #56] 3036 WORD $0x2ea244d0 // ushl v16.2s, v6.2s, v2.2s 3037 WORD $0x0e8638e7 // zip1 v7.2s, v7.2s, v6.2s 3038 WORD $0x0e0c3ccb // mov w11, v6.s[1] 3039 WORD $0x2ea544e6 // ushl v6.2s, v7.2s, v5.2s 3040 WORD $0x53027d4c // lsr w12, w10, #2 3041 WORD $0x138b114a // extr w10, w10, w11, #4 3042 WORD $0x0ea61e06 // orr v6.8b, v16.8b, v6.8b 3043 WORD $0x4e141d46 // mov v6.s[2], w10 3044 WORD $0x4e1c1d86 // mov v6.s[3], w12 3045 WORD $0x6f067406 // bic v6.4s, #192, lsl #24 3046 WORD $0x3c9f0126 // stur q6, [x9, #-16] 3047 WORD $0x2947ac0a // ldp w10, w11, [x0, #60] 3048 WORD $0x2948b40c // ldp w12, w13, [x0, #68] 3049 WORD $0x138a796e // extr w14, w11, w10, #30 3050 WORD $0x1e270146 // fmov s6, w10 3051 WORD $0x138b718b // extr w11, w12, w11, #28 3052 WORD $0x4e0c1dc6 // mov v6.s[1], w14 3053 WORD $0x138c69ac // extr w12, w13, w12, #26 3054 WORD $0x4e141d66 // mov v6.s[2], w11 3055 WORD $0x4e1c1d86 // mov v6.s[3], w12 3056 WORD $0x6f067406 // bic v6.4s, #192, lsl #24 3057 WORD $0x3d800126 // str q6, [x9] 3058 WORD $0xbd404806 // ldr s6, [x0, #72] 3059 WORD $0x3cc4c007 // ldur q7, [x0, #76] 3060 WORD $0x6e0620c6 // ext v6.16b, v6.16b, v6.16b, #4 3061 WORD $0x6ea044f0 // ushl v16.4s, v7.4s, v0.4s 3062 WORD $0x6e0760c6 // ext v6.16b, v6.16b, v7.16b, #12 3063 WORD $0x6f067410 // bic v16.4s, #192, lsl #24 3064 WORD $0x6ea344c6 // ushl v6.4s, v6.4s, v3.4s 3065 WORD $0x4ea61e06 // orr v6.16b, v16.16b, v6.16b 3066 WORD $0x3d800526 // str q6, [x9, #16] 3067 WORD $0xbd405806 // ldr s6, [x0, #88] 3068 WORD $0x3cc5c007 // ldur q7, [x0, #92] 3069 WORD $0x6e0620c6 // ext v6.16b, v6.16b, v6.16b, #4 3070 WORD $0x6ea144f0 // ushl v16.4s, v7.4s, v1.4s 3071 WORD $0x6e0760c6 // ext v6.16b, v6.16b, v7.16b, #12 3072 WORD $0x6f067410 // bic v16.4s, #192, lsl #24 3073 WORD $0x6ea444c6 // ushl v6.4s, v6.4s, v4.4s 3074 WORD $0x4ea61e06 // orr v6.16b, v16.16b, v6.16b 3075 WORD $0x3d800926 // str q6, [x9, #32] 3076 WORD $0xfc46c006 // ldur d6, [x0, #108] 3077 WORD $0xbd406807 // ldr s7, [x0, #104] 3078 WORD $0xb940740a // ldr w10, [x0, #116] 3079 WORD $0x9101e000 // add x0, x0, #120 3080 WORD $0x2ea244d0 // ushl v16.2s, v6.2s, v2.2s 3081 WORD $0x0e8638e7 // zip1 v7.2s, v7.2s, v6.2s 3082 WORD $0x0e0c3ccb // mov w11, v6.s[1] 3083 WORD $0x2ea544e6 // ushl v6.2s, v7.2s, v5.2s 3084 WORD $0x53027d4c // lsr w12, w10, #2 3085 WORD $0x138b114a // extr w10, w10, w11, #4 3086 WORD $0x0ea61e06 // orr v6.8b, v16.8b, v6.8b 3087 WORD $0x4e141d46 // mov v6.s[2], w10 3088 WORD $0x4e1c1d86 // mov v6.s[3], w12 3089 WORD $0x6f067406 // bic v6.4s, #192, lsl #24 3090 WORD $0x3d800d26 // str q6, [x9, #48] 3091 WORD $0x91020129 // add x9, x9, #128 3092 3093 BNE LBB0_90 3094 JMP LBB0_99 3095 LBB0_91: 3096 WORD $0x7100805f // cmp w2, #32 3097 BLT LBB0_99 3098 VMOVQ $0x0000001b0000001c, $0x000000190000001a, V7 // LCPI0_0 3099 VMOVQ $0x0000000500000004, $0x0000000700000006, V0 // LCPI0_1 3100 VMOVQ $0x0000001700000018, $0x0000001500000016, V16 // LCPI0_2 3101 VMOVQ $0x0000000900000008, $0x0000000b0000000a, V1 // LCPI0_3 3102 VMOVQ $0x0000001300000014, $0x0000001100000012, V17 // LCPI0_4 3103 VMOVQ $0x0000000d0000000c, $0x0000000f0000000e, V2 // LCPI0_5 3104 VMOVQ $0x0000000f00000010, $0x0000000d0000000e, V25 // LCPI0_6 3105 VMOVQ $0x0000001100000010, $0x0000001300000012, V3 // LCPI0_7 3106 VMOVQ $0x0000000b0000000c, $0x000000090000000a, V19 // LCPI0_8 3107 VMOVQ $0x0000001500000014, $0x0000001700000016, V4 // LCPI0_9 3108 VMOVQ $0x0000000700000008, $0x0000000500000006, V20 // LCPI0_10 3109 VMOVQ $0x0000001900000018, $0x0000001b0000001a, V5 // LCPI0_11 3110 MOVD $0x0000000300000004, R21 // LCPI0_12 3111 MOVD $0x0000001d0000001c, R6 // LCPI0_13 3112 3113 WORD $0x91010029 // add x9, x1, #64 3114 WORD $0x6ea0b8e7 // neg v7.4s, v7.4s 3115 WORD $0x6ea0ba10 // neg v16.4s, v16.4s 3116 WORD $0x6ea0ba31 // neg v17.4s, v17.4s 3117 WORD $0x6ea0ba52 // neg v25.4s, v25.4s 3118 WORD $0x6ea0ba73 // neg v19.4s, v19.4s 3119 WORD $0x6ea0ba94 // neg v20.4s, v20.4s 3120 WORD $0x2ea0bab5 // neg v21.2s, v21.2s 3121 LBB0_93: 3122 WORD $0x29402c0a // ldp w10, w11, [x0] 3123 WORD $0x2941340c // ldp w12, w13, [x0, #8] 3124 WORD $0xf1000508 // subs x8, x8, #1 3125 WORD $0x138a7d6e // extr w14, w11, w10, #31 3126 WORD $0x1e270156 // fmov s22, w10 3127 WORD $0x4e0c1dd6 // mov v22.s[1], w14 3128 WORD $0x138c75ac // extr w12, w13, w12, #29 3129 WORD $0x4e141d76 // mov v22.s[2], w11 3130 WORD $0x4e1c1d96 // mov v22.s[3], w12 3131 WORD $0x6f047416 // bic v22.4s, #128, lsl #24 3132 WORD $0x3c9c0136 // stur q22, [x9, #-64] 3133 WORD $0xbd400c16 // ldr s22, [x0, #12] 3134 WORD $0x3dc00417 // ldr q23, [x0, #16] 3135 WORD $0x6e1622d6 // ext v22.16b, v22.16b, v22.16b, #4 3136 WORD $0x6ea046f8 // ushl v24.4s, v23.4s, v0.4s 3137 WORD $0x6e1762d6 // ext v22.16b, v22.16b, v23.16b, #12 3138 WORD $0x6f047418 // bic v24.4s, #128, lsl #24 3139 WORD $0x6ea746d6 // ushl v22.4s, v22.4s, v7.4s 3140 WORD $0x4eb61f16 // orr v22.16b, v24.16b, v22.16b 3141 WORD $0x3c9d0136 // stur q22, [x9, #-48] 3142 WORD $0xbd401c16 // ldr s22, [x0, #28] 3143 WORD $0x3dc00817 // ldr q23, [x0, #32] 3144 WORD $0x6e1622d6 // ext v22.16b, v22.16b, v22.16b, #4 3145 WORD $0x6ea146f8 // ushl v24.4s, v23.4s, v1.4s 3146 WORD $0x6e1762d6 // ext v22.16b, v22.16b, v23.16b, #12 3147 WORD $0x6f047418 // bic v24.4s, #128, lsl #24 3148 WORD $0x6eb046d6 // ushl v22.4s, v22.4s, v16.4s 3149 WORD $0x4eb61f16 // orr v22.16b, v24.16b, v22.16b 3150 WORD $0x3c9e0136 // stur q22, [x9, #-32] 3151 WORD $0xbd402c16 // ldr s22, [x0, #44] 3152 WORD $0x3dc00c17 // ldr q23, [x0, #48] 3153 WORD $0x6e1622d6 // ext v22.16b, v22.16b, v22.16b, #4 3154 WORD $0x6ea246f8 // ushl v24.4s, v23.4s, v2.4s 3155 WORD $0x6e1762d6 // ext v22.16b, v22.16b, v23.16b, #12 3156 WORD $0x6f047418 // bic v24.4s, #128, lsl #24 3157 WORD $0x4eb61f16 // orr v22.16b, v24.16b, v22.16b 3158 WORD $0x3c9f0136 // stur q22, [x9, #-16] 3159 WORD $0xbd403c16 // ldr s22, [x0, #60] 3160 WORD $0x3dc01017 // ldr q23, [x0, #64] 3161 WORD $0x6e1622d6 // ext v22.16b, v22.16b, v22.16b, #4 3162 WORD $0x6ea346f8 // ushl v24.4s, v23.4s, v3.4s 3163 WORD $0x6e1762d6 // ext v22.16b, v22.16b, v23.16b, #12 3164 WORD $0x6f047418 // bic v24.4s, #128, lsl #24 3165 WORD $0x6eb246d6 // ushl v22.4s, v22.4s, v25.4s 3166 WORD $0x4eb61f16 // orr v22.16b, v24.16b, v22.16b 3167 WORD $0x3d800136 // str q22, [x9] 3168 WORD $0xbd404c16 // ldr s22, [x0, #76] 3169 WORD $0x3dc01417 // ldr q23, [x0, #80] 3170 WORD $0x6e1622d6 // ext v22.16b, v22.16b, v22.16b, #4 3171 WORD $0x6ea446f8 // ushl v24.4s, v23.4s, v4.4s 3172 WORD $0x6e1762d6 // ext v22.16b, v22.16b, v23.16b, #12 3173 WORD $0x6f047418 // bic v24.4s, #128, lsl #24 3174 WORD $0x6eb346d6 // ushl v22.4s, v22.4s, v19.4s 3175 WORD $0x4eb61f16 // orr v22.16b, v24.16b, v22.16b 3176 WORD $0x3d800536 // str q22, [x9, #16] 3177 WORD $0xbd405c16 // ldr s22, [x0, #92] 3178 WORD $0x3dc01817 // ldr q23, [x0, #96] 3179 WORD $0x6e1622d6 // ext v22.16b, v22.16b, v22.16b, #4 3180 WORD $0x6ea546f8 // ushl v24.4s, v23.4s, v5.4s 3181 WORD $0x6e1762d6 // ext v22.16b, v22.16b, v23.16b, #12 3182 WORD $0x6f047418 // bic v24.4s, #128, lsl #24 3183 WORD $0x6eb446d6 // ushl v22.4s, v22.4s, v20.4s 3184 WORD $0x4eb61f16 // orr v22.16b, v24.16b, v22.16b 3185 WORD $0x3d800936 // str q22, [x9, #32] 3186 WORD $0xfd403816 // ldr d22, [x0, #112] 3187 WORD $0xbd406c17 // ldr s23, [x0, #108] 3188 WORD $0xb940780a // ldr w10, [x0, #120] 3189 WORD $0x9101f000 // add x0, x0, #124 3190 WORD $0x2ea646d8 // ushl v24.2s, v22.2s, v6.2s 3191 WORD $0x0e963af7 // zip1 v23.2s, v23.2s, v22.2s 3192 WORD $0x0e0c3ecb // mov w11, v22.s[1] 3193 WORD $0x2eb546f6 // ushl v22.2s, v23.2s, v21.2s 3194 WORD $0x53017d4c // lsr w12, w10, #1 3195 WORD $0x138b094a // extr w10, w10, w11, #2 3196 WORD $0x0eb61f16 // orr v22.8b, v24.8b, v22.8b 3197 WORD $0x4e141d56 // mov v22.s[2], w10 3198 WORD $0x4e1c1d96 // mov v22.s[3], w12 3199 WORD $0x6f047416 // bic v22.4s, #128, lsl #24 3200 WORD $0x3d800d36 // str q22, [x9, #48] 3201 WORD $0x91020129 // add x9, x9, #128 3202 3203 BNE LBB0_93 3204 JMP LBB0_99 3205 LBB0_94: 3206 WORD $0x7100805f // cmp w2, #32 3207 BLT LBB0_99 3208 LBB0_95: 3209 WORD $0xad410400 // ldp q0, q1, [x0, #32] 3210 WORD $0xad400c02 // ldp q2, q3, [x0] 3211 WORD $0xf1000508 // subs x8, x8, #1 3212 WORD $0xad010420 // stp q0, q1, [x1, #32] 3213 WORD $0xad000c22 // stp q2, q3, [x1] 3214 WORD $0xad430400 // ldp q0, q1, [x0, #96] 3215 WORD $0xad420c02 // ldp q2, q3, [x0, #64] 3216 WORD $0x91020000 // add x0, x0, #128 3217 WORD $0xad030420 // stp q0, q1, [x1, #96] 3218 WORD $0xad020c22 // stp q2, q3, [x1, #64] 3219 WORD $0x91020021 // add x1, x1, #128 3220 3221 BNE LBB0_95 3222 JMP LBB0_99 3223 LBB0_96: 3224 WORD $0x7100805f // cmp w2, #32 3225 BLT LBB0_99 3226 MOVD $0x000000160000001c, R3 // LCPI0_50 3227 MOVD $0x0000000a00000004, R0 // LCPI0_51 3228 MOVD $0x0000000a00000010, R4 // LCPI0_52 3229 MOVD $0x0000001600000010, R1 // LCPI0_53 3230 MOVD $0x0000001200000018, R5 // LCPI0_54 3231 MOVD $0x0000000e00000008, R2 // LCPI0_55 3232 3233 WORD $0x91010029 // add x9, x1, #64 3234 WORD $0x2ea0b863 // neg v3.2s, v3.2s 3235 WORD $0x2ea0b884 // neg v4.2s, v4.2s 3236 WORD $0x2ea0b8a5 // neg v5.2s, v5.2s 3237 LBB0_98: 3238 WORD $0x29402c0a // ldp w10, w11, [x0] 3239 WORD $0x2941340c // ldp w12, w13, [x0, #8] 3240 WORD $0xf1000508 // subs x8, x8, #1 3241 WORD $0x138a696e // extr w14, w11, w10, #26 3242 WORD $0x1e270146 // fmov s6, w10 3243 WORD $0x138b518b // extr w11, w12, w11, #20 3244 WORD $0x4e0c1dc6 // mov v6.s[1], w14 3245 WORD $0x138c39ac // extr w12, w13, w12, #14 3246 WORD $0x4e141d66 // mov v6.s[2], w11 3247 WORD $0x4e1c1d86 // mov v6.s[3], w12 3248 WORD $0x6f077786 // bic v6.4s, #252, lsl #24 3249 WORD $0x3c9c0126 // stur q6, [x9, #-64] 3250 WORD $0x2941ac0a // ldp w10, w11, [x0, #12] 3251 WORD $0xfc414006 // ldur d6, [x0, #20] 3252 WORD $0x1e270167 // fmov s7, w11 3253 WORD $0x138a216a // extr w10, w11, w10, #8 3254 WORD $0x0e8638e7 // zip1 v7.2s, v7.2s, v6.2s 3255 WORD $0x53027d6c // lsr w12, w11, #2 3256 WORD $0x2ea044c6 // ushl v6.2s, v6.2s, v0.2s 3257 WORD $0x1e270150 // fmov s16, w10 3258 WORD $0x2ea344e7 // ushl v7.2s, v7.2s, v3.2s 3259 WORD $0x4e0c1d90 // mov v16.s[1], w12 3260 WORD $0x0ea71cc6 // orr v6.8b, v6.8b, v7.8b 3261 WORD $0x6e1804d0 // mov v16.d[1], v6.d[0] 3262 WORD $0x6f077790 // bic v16.4s, #252, lsl #24 3263 WORD $0x3c9d0130 // stur q16, [x9, #-48] 3264 WORD $0xfc41c006 // ldur d6, [x0, #28] 3265 WORD $0xbd401807 // ldr s7, [x0, #24] 3266 WORD $0xb940240a // ldr w10, [x0, #36] 3267 WORD $0x2ea144d0 // ushl v16.2s, v6.2s, v1.2s 3268 WORD $0x0e8638e7 // zip1 v7.2s, v7.2s, v6.2s 3269 WORD $0x0e0c3ccb // mov w11, v6.s[1] 3270 WORD $0x2ea444e6 // ushl v6.2s, v7.2s, v4.2s 3271 WORD $0x53047d6c // lsr w12, w11, #4 3272 WORD $0x0ea61e06 // orr v6.8b, v16.8b, v6.8b 3273 WORD $0x138b794a // extr w10, w10, w11, #30 3274 WORD $0x4e141d86 // mov v6.s[2], w12 3275 WORD $0x4e1c1d46 // mov v6.s[3], w10 3276 WORD $0x6f077786 // bic v6.4s, #252, lsl #24 3277 WORD $0x3c9e0126 // stur q6, [x9, #-32] 3278 WORD $0xfd401406 // ldr d6, [x0, #40] 3279 WORD $0xbd402407 // ldr s7, [x0, #36] 3280 WORD $0xb940300a // ldr w10, [x0, #48] 3281 WORD $0x2ea244d0 // ushl v16.2s, v6.2s, v2.2s 3282 WORD $0x0e8638e7 // zip1 v7.2s, v7.2s, v6.2s 3283 WORD $0x0e0c3ccb // mov w11, v6.s[1] 3284 WORD $0x2ea544e6 // ushl v6.2s, v7.2s, v5.2s 3285 WORD $0x138b314a // extr w10, w10, w11, #12 3286 WORD $0x0ea61e06 // orr v6.8b, v16.8b, v6.8b 3287 WORD $0x4e141d46 // mov v6.s[2], w10 3288 WORD $0x4e1c1d86 // mov v6.s[3], w12 3289 WORD $0x6f077786 // bic v6.4s, #252, lsl #24 3290 WORD $0x3c9f0126 // stur q6, [x9, #-16] 3291 WORD $0x2946ac0a // ldp w10, w11, [x0, #52] 3292 WORD $0x2947b40c // ldp w12, w13, [x0, #60] 3293 WORD $0x138a696e // extr w14, w11, w10, #26 3294 WORD $0x1e270146 // fmov s6, w10 3295 WORD $0x138b518b // extr w11, w12, w11, #20 3296 WORD $0x4e0c1dc6 // mov v6.s[1], w14 3297 WORD $0x138c39ac // extr w12, w13, w12, #14 3298 WORD $0x4e1c1d86 // mov v6.s[3], w12 3299 WORD $0x6f077786 // bic v6.4s, #252, lsl #24 3300 WORD $0x3d800126 // str q6, [x9] 3301 WORD $0x29482c0a // ldp w10, w11, [x0, #64] 3302 WORD $0xfd402406 // ldr d6, [x0, #72] 3303 WORD $0x1e270167 // fmov s7, w11 3304 WORD $0x138a216a // extr w10, w11, w10, #8 3305 WORD $0x0e8638e7 // zip1 v7.2s, v7.2s, v6.2s 3306 WORD $0x2ea044c6 // ushl v6.2s, v6.2s, v0.2s 3307 WORD $0x1e270150 // fmov s16, w10 3308 WORD $0x2ea344e7 // ushl v7.2s, v7.2s, v3.2s 3309 WORD $0x4e0c1d90 // mov v16.s[1], w12 3310 WORD $0x0ea71cc6 // orr v6.8b, v6.8b, v7.8b 3311 WORD $0x6e1804d0 // mov v16.d[1], v6.d[0] 3312 WORD $0x6f077790 // bic v16.4s, #252, lsl #24 3313 WORD $0x3d800530 // str q16, [x9, #16] 3314 WORD $0xfd402806 // ldr d6, [x0, #80] 3315 WORD $0xbd404c07 // ldr s7, [x0, #76] 3316 WORD $0xb940580a // ldr w10, [x0, #88] 3317 WORD $0x2ea144d0 // ushl v16.2s, v6.2s, v1.2s 3318 WORD $0x0e8638e7 // zip1 v7.2s, v7.2s, v6.2s 3319 WORD $0x0e0c3ccb // mov w11, v6.s[1] 3320 WORD $0x2ea444e6 // ushl v6.2s, v7.2s, v4.2s 3321 WORD $0x53047d6c // lsr w12, w11, #4 3322 WORD $0x0ea61e06 // orr v6.8b, v16.8b, v6.8b 3323 WORD $0x138b794a // extr w10, w10, w11, #30 3324 WORD $0x4e141d86 // mov v6.s[2], w12 3325 WORD $0x4e1c1d46 // mov v6.s[3], w10 3326 WORD $0x6f077786 // bic v6.4s, #252, lsl #24 3327 WORD $0x3d800926 // str q6, [x9, #32] 3328 WORD $0xfc45c006 // ldur d6, [x0, #92] 3329 WORD $0xbd405807 // ldr s7, [x0, #88] 3330 WORD $0xb940640a // ldr w10, [x0, #100] 3331 WORD $0x9101a000 // add x0, x0, #104 3332 WORD $0x2ea244d0 // ushl v16.2s, v6.2s, v2.2s 3333 WORD $0x0e8638e7 // zip1 v7.2s, v7.2s, v6.2s 3334 WORD $0x0e0c3ccb // mov w11, v6.s[1] 3335 WORD $0x2ea544e6 // ushl v6.2s, v7.2s, v5.2s 3336 WORD $0x53067d4c // lsr w12, w10, #6 3337 WORD $0x138b314a // extr w10, w10, w11, #12 3338 WORD $0x0ea61e06 // orr v6.8b, v16.8b, v6.8b 3339 WORD $0x4e141d46 // mov v6.s[2], w10 3340 WORD $0x4e1c1d86 // mov v6.s[3], w12 3341 WORD $0x6f077786 // bic v6.4s, #252, lsl #24 3342 WORD $0x3d800d26 // str q6, [x9, #48] 3343 WORD $0x91020129 // add x9, x9, #128 3344 3345 BNE LBB0_98 3346 LBB0_99: 3347 MOVD R19, num+32(FP) 3348 WORD $0xf9400bf3 // ldr x19, [sp, #16] 3349 WORD $0xa8c27bfd // ldp x29, x30, [sp], #32 3350 RET 3351