github.com/apache/arrow/go/v10@v10.0.1/parquet/internal/utils/bit_packing_neon_arm64.s (about)

     1  //+build !noasm !appengine
     2  
     3  // ARROW-15336
     4  // (C2GOASM doesn't work correctly for Arm64)
     5  // Partly GENERATED BY asm2plan9s.
     6  
     7  DATA LJTI<>+0x000(SB)/2, $0
     8  DATA LJTI<>+0x002(SB)/2, $9
     9  DATA LJTI<>+0x004(SB)/2, $72
    10  DATA LJTI<>+0x006(SB)/2, $133
    11  DATA LJTI<>+0x008(SB)/2, $210
    12  DATA LJTI<>+0x00a(SB)/2, $291
    13  DATA LJTI<>+0x00c(SB)/2, $390
    14  DATA LJTI<>+0x00e(SB)/2, $474
    15  DATA LJTI<>+0x010(SB)/2, $567
    16  DATA LJTI<>+0x012(SB)/2, $657
    17  DATA LJTI<>+0x014(SB)/2, $758
    18  DATA LJTI<>+0x016(SB)/2, $854
    19  DATA LJTI<>+0x018(SB)/2, $957
    20  DATA LJTI<>+0x01a(SB)/2, $1048
    21  DATA LJTI<>+0x01c(SB)/2, $4601
    22  DATA LJTI<>+0x01e(SB)/2, $1250
    23  DATA LJTI<>+0x020(SB)/2, $1354
    24  DATA LJTI<>+0x022(SB)/2, $1436
    25  DATA LJTI<>+0x024(SB)/2, $1541
    26  DATA LJTI<>+0x026(SB)/2, $1645
    27  DATA LJTI<>+0x028(SB)/2, $1773
    28  DATA LJTI<>+0x02a(SB)/2, $1875
    29  DATA LJTI<>+0x02c(SB)/2, $2015
    30  DATA LJTI<>+0x02e(SB)/2, $2145
    31  DATA LJTI<>+0x030(SB)/2, $2292
    32  DATA LJTI<>+0x032(SB)/2, $2390
    33  DATA LJTI<>+0x034(SB)/2, $3140
    34  DATA LJTI<>+0x036(SB)/2, $2533
    35  DATA LJTI<>+0x038(SB)/2, $2667
    36  DATA LJTI<>+0x03a(SB)/2, $2781
    37  DATA LJTI<>+0x03c(SB)/2, $2902
    38  DATA LJTI<>+0x03e(SB)/2, $3008
    39  DATA LJTI<>+0x040(SB)/2, $3117
    40  GLOBL LJTI<>+0(SB), 8, $66
    41  
    42  
    43  // func _unpack32_neon(in, out unsafe.Pointer, batchSize, nbits int) (num int)
    44  TEXT ยท_unpack32_neon(SB), $24-40
    45  
    46      MOVD    in+0(FP), R0
    47      MOVD    out+8(FP), R1
    48      MOVD    batchSize+16(FP), R2
    49      MOVD    nbits+24(FP), R3
    50  
    51      WORD $0xa9be7bfd // stp    x29, x30, [sp, #-32]!
    52      WORD $0x11007c48 // add    w8, w2, #31
    53      WORD $0x7100005f // cmp    w2, #0
    54      WORD $0x1a82b108 // csel    w8, w8, w2, lt
    55      WORD $0xf9000bf3 // str    x19, [sp, #16]
    56      WORD $0x7100807f // cmp    w3, #32
    57      WORD $0x121b6913 // and    w19, w8, #0xffffffe0
    58      WORD $0x910003fd // mov    x29, sp
    59      BHI LBB0_99
    60      WORD $0x2a0303e9 // mov    w9, w3
    61      MOVD LJTI<>+0x00(SB), R10
    62      WORD $0x1000000b // adr    x11, LBB0_2
    63      WORD $0x7869794c // ldrh    w12, [x10, x9, lsl #1]
    64      WORD $0x8b0c096b // add    x11, x11, x12, lsl #2
    65      WORD $0x13057d08 // asr    w8, w8, #5
    66      WORD $0xd61f0160 // br    x11
    67  LBB0_2:
    68      WORD $0x7100805f // cmp    w2, #32
    69      BLT LBB0_99
    70      WORD $0x51000508 // sub    w8, w8, #1
    71      WORD $0xd379e108 // lsl    x8, x8, #7
    72      WORD $0x91020102 // add    x2, x8, #128
    73      WORD $0xaa0103e0 // mov    x0, x1
    74      WORD $0x2a1f03e1 // mov    w1, wzr
    75      WORD $0x94000000 // bl    memset
    76  	JMP LBB0_99
    77  LBB0_4:
    78      WORD $0x7100805f // cmp    w2, #32
    79  	BLT LBB0_99
    80      VMOVQ $0x0000000500000004, $0x0000000700000006, V1 // LCPI0_1
    81      VMOVQ $0x0000000900000008, $0x0000000b0000000a, V2 // LCPI0_3
    82      VMOVQ $0x0000000d0000000c, $0x0000000f0000000e, V3 // LCPI0_5
    83      VMOVQ $0x0000001100000010, $0x0000001300000012, V4 // LCPI0_7
    84      VMOVQ $0x0000001500000014, $0x0000001700000016, V5 // LCPI0_9
    85      VMOVQ $0x0000001900000018, $0x0000001b0000001a, V6 // LCPI0_11
    86      VMOVQ $0x0000001d0000001c, $0x0000001f0000001e, V7 // LCPI0_121
    87      WORD $0x91010029 // add    x9, x1, #64 
    88      WORD $0x4f000420 // movi    v0.4s, #1
    89      WORD $0x6ea0b821 // neg    v1.4s, v1.4s
    90      WORD $0x6ea0b842 // neg    v2.4s, v2.4s
    91      WORD $0x6ea0b863 // neg    v3.4s, v3.4s
    92      WORD $0x6ea0b884 // neg    v4.4s, v4.4s
    93      WORD $0x6ea0b8a5 // neg    v5.4s, v5.4s
    94      WORD $0x6ea0b8c6 // neg    v6.4s, v6.4s
    95      WORD $0x6ea0b8e7 // neg    v7.4s, v7.4s
    96      WORD $0xaa0003ea // mov    x10, x0
    97  LBB0_6:
    98      WORD $0xb940000b // ldr    w11, [x0]
    99      WORD $0xf1000508 // subs    x8, x8, #1
   100      WORD $0x53017d6c // lsr    w12, w11, #1
   101      WORD $0x1e270170 // fmov    s16, w11
   102      WORD $0x53027d6d // lsr    w13, w11, #2
   103      WORD $0x4e0c1d90 // mov    v16.s[1], w12
   104      WORD $0x53037d6e // lsr    w14, w11, #3
   105      WORD $0x4e141db0 // mov    v16.s[2], w13
   106      WORD $0x4e1c1dd0 // mov    v16.s[3], w14
   107      WORD $0x4e201e10 // and    v16.16b, v16.16b, v0.16b
   108      WORD $0x3c9c0130 // stur    q16, [x9, #-64]
   109      WORD $0x4ddfc950 // ld1r    { v16.4s }, [x10], #4
   110      WORD $0x6ea14610 // ushl    v16.4s, v16.4s, v1.4s
   111      WORD $0x4e201e10 // and    v16.16b, v16.16b, v0.16b
   112      WORD $0x3c9d0130 // stur    q16, [x9, #-48]
   113      WORD $0x4d40c810 // ld1r    { v16.4s }, [x0]
   114      WORD $0x6ea24610 // ushl    v16.4s, v16.4s, v2.4s
   115      WORD $0x4e201e10 // and    v16.16b, v16.16b, v0.16b
   116      WORD $0x3c9e0130 // stur    q16, [x9, #-32]
   117      WORD $0x4d40c810 // ld1r    { v16.4s }, [x0]
   118      WORD $0x6ea34610 // ushl    v16.4s, v16.4s, v3.4s
   119      WORD $0x4e201e10 // and    v16.16b, v16.16b, v0.16b
   120      WORD $0x3c9f0130 // stur    q16, [x9, #-16]
   121      WORD $0x4d40c810 // ld1r    { v16.4s }, [x0]
   122      WORD $0x6ea44610 // ushl    v16.4s, v16.4s, v4.4s
   123      WORD $0x4e201e10 // and    v16.16b, v16.16b, v0.16b
   124      WORD $0x3d800130 // str    q16, [x9]
   125      WORD $0x4d40c810 // ld1r    { v16.4s }, [x0]
   126      WORD $0x6ea54610 // ushl    v16.4s, v16.4s, v5.4s
   127      WORD $0x4e201e10 // and    v16.16b, v16.16b, v0.16b
   128      WORD $0x3d800530 // str    q16, [x9, #16]
   129      WORD $0x4d40c810 // ld1r    { v16.4s }, [x0]
   130      WORD $0x6ea64610 // ushl    v16.4s, v16.4s, v6.4s
   131      WORD $0x4e201e10 // and    v16.16b, v16.16b, v0.16b
   132      WORD $0x3d800930 // str    q16, [x9, #32]
   133      WORD $0x4d40c810 // ld1r    { v16.4s }, [x0]
   134      WORD $0xaa0a03e0 // mov    x0, x10
   135      WORD $0x6ea74610 // ushl    v16.4s, v16.4s, v7.4s
   136      WORD $0x4e201e10 // and    v16.16b, v16.16b, v0.16b
   137      WORD $0x3d800d30 // str    q16, [x9, #48]
   138      WORD $0x91020129 // add    x9, x9, #128
   139      BNE LBB0_6
   140      JMP LBB0_99
   141  LBB0_7:
   142      WORD $0x7100805f // cmp    w2, #32
   143      BLT LBB0_99
   144      VMOVQ $0x0000000a00000008, $0x0000000e0000000c, V1 // LCPI0_15
   145      VMOVQ $0x0000001200000010, $0x0000001600000014, V2 // LCPI0_17
   146      VMOVQ $0x0000001a00000018, $0x0000001e0000001c, V3 // LCPI0_120
   147      WORD $0x91010029 // add    x9, x1, #64
   148      WORD $0x4f000460 // movi    v0.4s, #3
   149      WORD $0x6ea0b821 // neg    v1.4s, v1.4s
   150      WORD $0x6ea0b842 // neg    v2.4s, v2.4s
   151      WORD $0x6ea0b863 // neg    v3.4s, v3.4s
   152  LBB0_9:
   153      WORD $0xb940000a // ldr    w10, [x0]
   154      WORD $0xaa0003eb // mov    x11, x0
   155      WORD $0xf1000508 // subs    x8, x8, #1
   156      WORD $0x53027d4c // lsr    w12, w10, #2
   157      WORD $0x1e270144 // fmov    s4, w10
   158      WORD $0x53047d4d // lsr    w13, w10, #4
   159      WORD $0x4e0c1d84 // mov    v4.s[1], w12
   160      WORD $0x53067d4e // lsr    w14, w10, #6
   161      WORD $0x4e141da4 // mov    v4.s[2], w13
   162      WORD $0x4e1c1dc4 // mov    v4.s[3], w14
   163      WORD $0x4e201c84 // and    v4.16b, v4.16b, v0.16b
   164      WORD $0x3c9c0124 // stur    q4, [x9, #-64]
   165      WORD $0x4ddfc964 // ld1r    { v4.4s }, [x11], #4
   166      WORD $0x6ea14484 // ushl    v4.4s, v4.4s, v1.4s
   167      WORD $0x4e201c84 // and    v4.16b, v4.16b, v0.16b
   168      WORD $0x3c9d0124 // stur    q4, [x9, #-48]
   169      WORD $0x4d40c804 // ld1r    { v4.4s }, [x0]
   170      WORD $0x6ea24484 // ushl    v4.4s, v4.4s, v2.4s
   171      WORD $0x4e201c84 // and    v4.16b, v4.16b, v0.16b
   172      WORD $0x3c9e0124 // stur    q4, [x9, #-32]
   173      WORD $0xb840840a // ldr    w10, [x0], #8
   174      WORD $0x4e040d44 // dup    v4.4s, w10
   175      WORD $0x6ea34484 // ushl    v4.4s, v4.4s, v3.4s
   176      WORD $0x4e201c84 // and    v4.16b, v4.16b, v0.16b
   177      WORD $0x3c9f0124 // stur    q4, [x9, #-16]
   178      WORD $0xb940016a // ldr    w10, [x11]
   179      WORD $0x53027d4c // lsr    w12, w10, #2
   180      WORD $0x1e270144 // fmov    s4, w10
   181      WORD $0x53047d4d // lsr    w13, w10, #4
   182      WORD $0x4e0c1d84 // mov    v4.s[1], w12
   183      WORD $0x53067d4e // lsr    w14, w10, #6
   184      WORD $0x4e141da4 // mov    v4.s[2], w13
   185      WORD $0x4e1c1dc4 // mov    v4.s[3], w14
   186      WORD $0x4e201c84 // and    v4.16b, v4.16b, v0.16b
   187      WORD $0x3d800124 // str    q4, [x9]
   188      WORD $0x4d40c964 // ld1r    { v4.4s }, [x11]
   189      WORD $0x6ea14484 // ushl    v4.4s, v4.4s, v1.4s
   190      WORD $0x4e201c84 // and    v4.16b, v4.16b, v0.16b
   191      WORD $0x3d800524 // str    q4, [x9, #16]
   192      WORD $0x4d40c964 // ld1r    { v4.4s }, [x11]
   193      WORD $0x6ea24484 // ushl    v4.4s, v4.4s, v2.4s
   194      WORD $0x4e201c84 // and    v4.16b, v4.16b, v0.16b
   195      WORD $0x3d800924 // str    q4, [x9, #32]
   196      WORD $0x4d40c964 // ld1r    { v4.4s }, [x11]
   197      WORD $0x6ea34484 // ushl    v4.4s, v4.4s, v3.4s
   198      WORD $0x4e201c84 // and    v4.16b, v4.16b, v0.16b
   199      WORD $0x3d800d24 // str    q4, [x9, #48]
   200      WORD $0x91020129 // add    x9, x9, #128
   201      BNE LBB0_9
   202      JMP LBB0_99
   203  LBB0_10:
   204      WORD $0x7100805f // cmp    w2, #32
   205      BLT LBB0_99
   206      VMOVQ $0x0000000f0000000c, $0x0000001500000012, V1 // LCPI0_21
   207      MOVD $0x0000001b00000018, R2 // LCPI0_23
   208      VMOVQ $0x0000000700000004, $0x0000000d0000000a, V3 // LCPI0_25
   209      VMOVQ $0x0000001300000010, $0x0000001900000016, V4 // LCPI0_27
   210      MOVD $0x0000000500000002, R5 // LCPI0_29
   211      VMOVQ $0x0000000b00000008, $0x000000110000000e, V6 // LCPI0_31
   212      VMOVQ $0x0000001700000014, $0x0000001d0000001a, V7 // LCPI0_119
   213      WORD $0x91010029 // add    x9, x1, #64 
   214      WORD $0x4f0004e0 // movi    v0.4s, #7
   215      WORD $0x6ea0b821 // neg    v1.4s, v1.4s
   216      WORD $0x2ea0b842 // neg    v2.2s, v2.2s
   217      WORD $0x6ea0b863 // neg    v3.4s, v3.4s
   218      WORD $0x6ea0b884 // neg    v4.4s, v4.4s
   219      WORD $0x2ea0b8a5 // neg    v5.2s, v5.2s
   220      WORD $0x6ea0b8c6 // neg    v6.4s, v6.4s
   221      WORD $0x6ea0b8e7 // neg    v7.4s, v7.4s
   222  LBB0_12:
   223      WORD $0xb940000a // ldr    w10, [x0]
   224      WORD $0xf1000508 // subs    x8, x8, #1
   225      WORD $0x53037d4b // lsr    w11, w10, #3
   226      WORD $0x1e270150 // fmov    s16, w10
   227      WORD $0x53067d4c // lsr    w12, w10, #6
   228      WORD $0x4e0c1d70 // mov    v16.s[1], w11
   229      WORD $0x53097d4d // lsr    w13, w10, #9
   230      WORD $0x4e141d90 // mov    v16.s[2], w12
   231      WORD $0x4e1c1db0 // mov    v16.s[3], w13
   232      WORD $0x4e201e10 // and    v16.16b, v16.16b, v0.16b
   233      WORD $0xaa0003ea // mov    x10, x0
   234      WORD $0x3c9c0130 // stur    q16, [x9, #-64]
   235      WORD $0x4ddfc950 // ld1r    { v16.4s }, [x10], #4
   236      WORD $0x6ea14610 // ushl    v16.4s, v16.4s, v1.4s
   237      WORD $0x4e201e10 // and    v16.16b, v16.16b, v0.16b
   238      WORD $0x3c9d0130 // stur    q16, [x9, #-48]
   239      WORD $0xb940000b // ldr    w11, [x0]
   240      WORD $0xb940014c // ldr    w12, [x10]
   241      WORD $0x0e040d70 // dup    v16.2s, w11
   242      WORD $0x138b798b // extr    w11, w12, w11, #30
   243      WORD $0x2ea24610 // ushl    v16.2s, v16.2s, v2.2s
   244      WORD $0x53017d8c // lsr    w12, w12, #1
   245      WORD $0x4e141d70 // mov    v16.s[2], w11
   246      WORD $0x4e1c1d90 // mov    v16.s[3], w12
   247      WORD $0x4e201e10 // and    v16.16b, v16.16b, v0.16b
   248      WORD $0x3c9e0130 // stur    q16, [x9, #-32]
   249      WORD $0x4d40c950 // ld1r    { v16.4s }, [x10]
   250      WORD $0x9100200c // add    x12, x0, #8 
   251      WORD $0x6ea34610 // ushl    v16.4s, v16.4s, v3.4s
   252      WORD $0x4e201e10 // and    v16.16b, v16.16b, v0.16b
   253      WORD $0x3c9f0130 // stur    q16, [x9, #-16]
   254      WORD $0x4d40c950 // ld1r    { v16.4s }, [x10]
   255      WORD $0x6ea44610 // ushl    v16.4s, v16.4s, v4.4s
   256      WORD $0x4e201e10 // and    v16.16b, v16.16b, v0.16b
   257      WORD $0x3d800130 // str    q16, [x9]
   258      WORD $0xb940014a // ldr    w10, [x10]
   259      WORD $0xb940080b // ldr    w11, [x0, #8]
   260      WORD $0x91003000 // add    x0, x0, #12 
   261      WORD $0x531c7d4d // lsr    w13, w10, #28
   262      WORD $0x138a7d6a // extr    w10, w11, w10, #31
   263      WORD $0x0e040d70 // dup    v16.2s, w11
   264      WORD $0x1e2701b1 // fmov    s17, w13
   265      WORD $0x2ea54610 // ushl    v16.2s, v16.2s, v5.2s
   266      WORD $0x4e0c1d51 // mov    v17.s[1], w10
   267      WORD $0x6e180611 // mov    v17.d[1], v16.d[0]
   268      WORD $0x4e201e30 // and    v16.16b, v17.16b, v0.16b
   269      WORD $0x3d800530 // str    q16, [x9, #16]
   270      WORD $0x4d40c990 // ld1r    { v16.4s }, [x12]
   271      WORD $0x6ea64610 // ushl    v16.4s, v16.4s, v6.4s
   272      WORD $0x4e201e10 // and    v16.16b, v16.16b, v0.16b
   273      WORD $0x3d800930 // str    q16, [x9, #32]
   274      WORD $0x4d40c990 // ld1r    { v16.4s }, [x12]
   275      WORD $0x6ea74610 // ushl    v16.4s, v16.4s, v7.4s
   276      WORD $0x4e201e10 // and    v16.16b, v16.16b, v0.16b
   277      WORD $0x3d800d30 // str    q16, [x9, #48]
   278      WORD $0x91020129 // add    x9, x9, #128
   279      BNE LBB0_12
   280      JMP LBB0_99
   281  LBB0_13:
   282      WORD $0x7100805f // cmp    w2, #32
   283      BLT LBB0_99
   284      VMOVQ $0x0000001400000010, $0x0000001c00000018, V1 // LCPI0_118
   285      WORD $0x91010029 // add    x9, x1, #64
   286      WORD $0x4f0005e0 // movi    v0.4s, #15
   287      WORD $0x6ea0b821 // neg    v1.4s, v1.4s
   288  LBB0_15:
   289      WORD $0xb940000a // ldr    w10, [x0]
   290      WORD $0xaa0003eb // mov    x11, x0
   291      WORD $0xf1000508 // subs    x8, x8, #1
   292      WORD $0x53047d4c // lsr    w12, w10, #4
   293      WORD $0x1e270142 // fmov    s2, w10
   294      WORD $0x53087d4d // lsr    w13, w10, #8
   295      WORD $0x4e0c1d82 // mov    v2.s[1], w12
   296      WORD $0x530c7d4e // lsr    w14, w10, #12
   297      WORD $0x4e141da2 // mov    v2.s[2], w13
   298      WORD $0x4e1c1dc2 // mov    v2.s[3], w14
   299      WORD $0x4e201c42 // and    v2.16b, v2.16b, v0.16b
   300      WORD $0x3c9c0122 // stur    q2, [x9, #-64]
   301      WORD $0x4ddfc962 // ld1r    { v2.4s }, [x11], #4
   302      WORD $0x6ea14442 // ushl    v2.4s, v2.4s, v1.4s
   303      WORD $0x4e201c42 // and    v2.16b, v2.16b, v0.16b
   304      WORD $0x3c9d0122 // stur    q2, [x9, #-48]
   305      WORD $0xb940016a // ldr    w10, [x11]
   306      WORD $0x53047d4c // lsr    w12, w10, #4
   307      WORD $0x1e270142 // fmov    s2, w10
   308      WORD $0x53087d4d // lsr    w13, w10, #8
   309      WORD $0x4e0c1d82 // mov    v2.s[1], w12
   310      WORD $0x530c7d4e // lsr    w14, w10, #12
   311      WORD $0x4e141da2 // mov    v2.s[2], w13
   312      WORD $0x4e1c1dc2 // mov    v2.s[3], w14
   313      WORD $0x4e201c42 // and    v2.16b, v2.16b, v0.16b
   314      WORD $0x3c9e0122 // stur    q2, [x9, #-32]
   315      WORD $0x4d40c962 // ld1r    { v2.4s }, [x11]
   316      WORD $0x9100200b // add    x11, x0, #8 
   317      WORD $0x6ea14442 // ushl    v2.4s, v2.4s, v1.4s
   318      WORD $0x4e201c42 // and    v2.16b, v2.16b, v0.16b
   319      WORD $0x3c9f0122 // stur    q2, [x9, #-16]
   320      WORD $0xb940080a // ldr    w10, [x0, #8]
   321      WORD $0x53047d4c // lsr    w12, w10, #4
   322      WORD $0x1e270142 // fmov    s2, w10
   323      WORD $0x53087d4d // lsr    w13, w10, #8
   324      WORD $0x4e0c1d82 // mov    v2.s[1], w12
   325      WORD $0x530c7d4e // lsr    w14, w10, #12
   326      WORD $0x4e141da2 // mov    v2.s[2], w13
   327      WORD $0x4e1c1dc2 // mov    v2.s[3], w14
   328      WORD $0x4e201c42 // and    v2.16b, v2.16b, v0.16b
   329      WORD $0x3c9f0122 // stur    q2, [x9, #-16]
   330      WORD $0xb940080a // ldr    w10, [x0, #8]
   331      WORD $0x53047d4c // lsr    w12, w10, #4
   332      WORD $0x1e270142 // fmov    s2, w10
   333      WORD $0x53087d4d // lsr    w13, w10, #8
   334      WORD $0x4e0c1d82 // mov    v2.s[1], w12
   335      WORD $0x530c7d4e // lsr    w14, w10, #12
   336      WORD $0x4e141da2 // mov    v2.s[2], w13
   337      WORD $0x4e1c1dc2 // mov    v2.s[3], w14
   338      WORD $0x4e201c42 // and    v2.16b, v2.16b, v0.16b
   339      WORD $0x3d800122 // str    q2, [x9]
   340      WORD $0x4d40c962 // ld1r    { v2.4s }, [x11]
   341      WORD $0x9100300b // add    x11, x0, #12
   342      WORD $0x6ea14442 // ushl    v2.4s, v2.4s, v1.4s
   343      WORD $0x4e201c42 // and    v2.16b, v2.16b, v0.16b
   344      WORD $0x3d800522 // str    q2, [x9, #16]
   345      WORD $0xb9400c0a // ldr    w10, [x0, #12]
   346      WORD $0x91004000 // add    x0, x0, #16             
   347      WORD $0x53047d4c // lsr    w12, w10, #4
   348      WORD $0x1e270142 // fmov    s2, w10
   349      WORD $0x53087d4d // lsr    w13, w10, #8
   350      WORD $0x4e0c1d82 // mov    v2.s[1], w12
   351      WORD $0x530c7d4e // lsr    w14, w10, #12
   352      WORD $0x4e141da2 // mov    v2.s[2], w13
   353      WORD $0x4e1c1dc2 // mov    v2.s[3], w14
   354      WORD $0x4e201c42 // and    v2.16b, v2.16b, v0.16b
   355      WORD $0x3d800922 // str    q2, [x9, #32]
   356      WORD $0x4d40c962 // ld1r    { v2.4s }, [x11]
   357      WORD $0x6ea14442 // ushl    v2.4s, v2.4s, v1.4s
   358      WORD $0x4e201c42 // and    v2.16b, v2.16b, v0.16b
   359      WORD $0x3d800d22 // str    q2, [x9, #48]
   360      WORD $0x91020129 //add    x9, x9, #128
   361      BNE LBB0_15
   362      JMP LBB0_99
   363  LBB0_16:
   364      WORD $0x7100805f // cmp    w2, #32
   365      BLT LBB0_99
   366      MOVD $0x0000001900000014, R1 // LCPI0_37
   367      VMOVQ $0x0000000d00000008, $0x0000001700000012, V2 // LCPI0_39
   368      MOVD $0x0000000600000001, R3 // LCPI0_41
   369      MOVD $0x0000001500000010, R4 // LCPI0_43
   370      VMOVQ $0x0000000900000004, $0x000000130000000e, V5 // LCPI0_45
   371      MOVD $0x0000000700000002, R6 // LCPI0_47
   372      VMOVQ $0x000000110000000c, $0x0000001b00000016, V7 // LCPI0_117
   373      WORD $0x91010029 // add    x9, x1, #64
   374      WORD $0x4f0007e0 // movi    v0.4s, #31
   375      WORD $0x2ea0b821 //neg    v1.2s, v1.2s
   376      WORD $0x6ea0b842 //neg    v2.4s, v2.4s
   377      WORD $0x2ea0b863 //neg    v3.2s, v3.2s
   378      WORD $0x2ea0b884 //neg    v4.2s, v4.2s
   379      WORD $0x6ea0b8a5 //neg    v5.4s, v5.4s
   380      WORD $0x2ea0b8c6 //neg    v6.2s, v6.2s
   381      WORD $0x6ea0b8e7 //neg    v7.4s, v7.4s
   382  LBB0_18:
   383      WORD $0xb940000a //ldr    w10, [x0]
   384      WORD $0xf1000508 //subs    x8, x8, #1
   385      WORD $0x53057d4b //lsr    w11, w10, #5
   386      WORD $0x1e270150 //fmov    s16, w10
   387      WORD $0x530a7d4c //lsr    w12, w10, #10
   388      WORD $0x4e0c1d70 //mov    v16.s[1], w11
   389      WORD $0x530f7d4d //lsr    w13, w10, #15
   390      WORD $0x4e141d90 //mov    v16.s[2], w12
   391      WORD $0x4e1c1db0 //mov    v16.s[3], w13
   392      WORD $0x4e201e10 //and    v16.16b, v16.16b, v0.16b
   393      WORD $0x3c9c0130 //    stur    q16, [x9, #-64]
   394      WORD $0x29402c0a //ldp    w10, w11, [x0]
   395      WORD $0x9100100c //add    x12, x0, #4
   396      WORD $0x0e040d50 //    dup    v16.2s, w10
   397      WORD $0x138a796a //    extr    w10, w11, w10, #30
   398      WORD $0x2ea14610 //    ushl    v16.2s, v16.2s, v1.2s
   399      WORD $0x53037d6b //    lsr    w11, w11, #3
   400      WORD $0x4e141d50 //    mov    v16.s[2], w10
   401      WORD $0x4e1c1d70 //    mov    v16.s[3], w11
   402      WORD $0x4e201e10 //    and    v16.16b, v16.16b, v0.16b
   403      WORD $0x3c9d0130 //    stur    q16, [x9, #-48]
   404      WORD $0x4d40c990 //    ld1r    { v16.4s }, [x12]
   405      WORD $0x9100300c //    add    x12, x0, #12
   406      WORD $0x6ea24610 //    ushl    v16.4s, v16.4s, v2.4s
   407      WORD $0x4e201e10 //    and    v16.16b, v16.16b, v0.16b
   408      WORD $0x3c9e0130 //    stur    q16, [x9, #-32]
   409      WORD $0x2940ac0a //    ldp    w10, w11, [x0, #4]
   410      WORD $0x138a716a //    extr    w10, w11, w10, #28
   411      WORD $0x0e040d70 //    dup    v16.2s, w11
   412      WORD $0x2ea34610 //    ushl    v16.2s, v16.2s, v3.2s
   413      WORD $0x1e270151 //    fmov    s17, w10
   414      WORD $0x6e0c0611 //    mov    v17.s[1], v16.s[0]
   415      WORD $0x530b7d6b //    lsr    w11, w11, #11
   416      WORD $0x6e142611 //    mov    v17.s[2], v16.s[1]
   417      WORD $0x4e1c1d71 //    mov    v17.s[3], w11
   418      WORD $0x4e201e30 //    and    v16.16b, v17.16b, v0.16b
   419      WORD $0x3c9f0130 //    stur    q16, [x9, #-16]
   420      WORD $0x29412c0a //    ldp    w10, w11, [x0, #8]
   421      WORD $0x0e040d50 //    dup    v16.2s, w10
   422      WORD $0x531a7d4d //    lsr    w13, w10, #26
   423      WORD $0x2ea44610 //    ushl    v16.2s, v16.2s, v4.2s
   424      WORD $0x138a7d6a //    extr    w10, w11, w10, #31
   425      WORD $0x4e141db0 //    mov    v16.s[2], w13
   426      WORD $0x4e1c1d50 //    mov    v16.s[3], w10
   427      WORD $0x4e201e10 //    and    v16.16b, v16.16b, v0.16b
   428      WORD $0x3d800130 //    str    q16, [x9]
   429      WORD $0x4d40c990 //    ld1r    { v16.4s }, [x12]
   430      WORD $0x9100400c //    add    x12, x0, #16
   431      WORD $0x6ea54610 //    ushl    v16.4s, v16.4s, v5.4s
   432      WORD $0x4e201e10 //    and    v16.16b, v16.16b, v0.16b
   433      WORD $0x3d800530 //    str    q16, [x9, #16]
   434      WORD $0x2941ac0a //    ldp    w10, w11, [x0, #12]
   435      WORD $0x91005000 //    add    x0, x0, #20
   436      WORD $0x53187d4d //    lsr    w13, w10, #24
   437      WORD $0x138a756a //    extr    w10, w11, w10, #29
   438      WORD $0x0e040d70 //    dup    v16.2s, w11
   439      WORD $0x1e2701b1 //    fmov    s17, w13
   440      WORD $0x2ea64610 //    ushl    v16.2s, v16.2s, v6.2s
   441      WORD $0x4e0c1d51 //    mov    v17.s[1], w10
   442      WORD $0x6e180611 //    mov    v17.d[1], v16.d[0]
   443      WORD $0x4e201e30 //    and    v16.16b, v17.16b, v0.16b
   444      WORD $0x3d800930 //    str    q16, [x9, #32]
   445      WORD $0x4d40c990 //    ld1r    { v16.4s }, [x12]
   446      WORD $0x6ea74610 //    ushl    v16.4s, v16.4s, v7.4s
   447      WORD $0x4e201e10 //    and    v16.16b, v16.16b, v0.16b
   448      WORD $0x3d800d30 //    str    q16, [x9, #48]
   449      WORD $0x91020129 //    add    x9, x9, #128
   450      BNE LBB0_18
   451      JMP LBB0_99
   452  LBB0_19:
   453      WORD $0x7100805f // cmp    w2, #32
   454      BLT LBB0_99
   455      MOVD $0x0000000a00000004, R1 // LCPI0_51
   456      MOVD $0x0000001600000010, R2 // LCPI0_53
   457      VMOVQ $0x0000000e00000008, $0x0000001a00000014, V3 // LCPI0_116
   458      WORD $0x91010029 // add    x9, x1, #64
   459      WORD $0x4f0107e0 // movi    v0.4s, #63
   460      WORD $0x2ea0b821 // neg    v1.2s, v1.2s
   461      WORD $0x2ea0b842 // neg    v2.2s, v2.2s
   462      WORD $0x6ea0b863 // neg    v3.4s, v3.4s
   463  LBB0_21:
   464      WORD $0xb940000a //    ldr    w10, [x0]
   465      WORD $0xf1000508 //    subs    x8, x8, #1
   466      WORD $0x53067d4b //    lsr    w11, w10, #6
   467      WORD $0x1e270144 //    fmov    s4, w10
   468      WORD $0x530c7d4c //    lsr    w12, w10, #12
   469      WORD $0x4e0c1d64 //    mov    v4.s[1], w11
   470      WORD $0x53127d4d //    lsr    w13, w10, #18
   471      WORD $0x4e141d84 //    mov    v4.s[2], w12
   472      WORD $0x4e1c1da4 //    mov    v4.s[3], w13
   473      WORD $0x4e201c84 //    and    v4.16b, v4.16b, v0.16b
   474      WORD $0x3c9c0124 //    stur    q4, [x9, #-64]
   475      WORD $0x29402c0a //    ldp    w10, w11, [x0]
   476      WORD $0x53187d4c //    lsr    w12, w10, #24
   477      WORD $0x138a796a //    extr    w10, w11, w10, #30
   478      WORD $0x0e040d64 //    dup    v4.2s, w11
   479      WORD $0x1e270185 //    fmov    s5, w12
   480      WORD $0x2ea14484 //    ushl    v4.2s, v4.2s, v1.2s
   481      WORD $0x4e0c1d45 //    mov    v5.s[1], w10
   482      WORD $0x6e180485 //    mov    v5.d[1], v4.d[0]
   483      WORD $0x4e201ca4 //    and    v4.16b, v5.16b, v0.16b
   484      WORD $0x3c9d0124 //    stur    q4, [x9, #-48]
   485      WORD $0x2940ac0a //    ldp    w10, w11, [x0, #4]
   486      WORD $0x9100200c //    add    x12, x0, #8
   487      WORD $0x0e040d44 //    dup    v4.2s, w10
   488      WORD $0x138a716a //    extr    w10, w11, w10, #28
   489      WORD $0x2ea24484 //    ushl    v4.2s, v4.2s, v2.2s
   490      WORD $0x53027d6b //    lsr    w11, w11, #2
   491      WORD $0x4e141d44 //    mov    v4.s[2], w10
   492      WORD $0x4e1c1d64 //    mov    v4.s[3], w11
   493      WORD $0x4e201c84 //    and    v4.16b, v4.16b, v0.16b
   494      WORD $0x3c9e0124 //    stur    q4, [x9, #-32]
   495      WORD $0x4d40c984 //    ld1r    { v4.4s }, [x12]
   496      WORD $0x6ea34484 //    ushl    v4.4s, v4.4s, v3.4s
   497      WORD $0x4e201c84 //    and    v4.16b, v4.16b, v0.16b
   498      WORD $0x3c9f0124 //    stur    q4, [x9, #-16]
   499      WORD $0xb9400c0a //    ldr    w10, [x0, #12]
   500      WORD $0x53067d4b //    lsr    w11, w10, #6
   501      WORD $0x1e270144 //    fmov    s4, w10
   502      WORD $0x530c7d4c //    lsr    w12, w10, #12
   503      WORD $0x4e0c1d64 //    mov    v4.s[1], w11
   504      WORD $0x53127d4d //    lsr    w13, w10, #18
   505      WORD $0x4e141d84 //    mov    v4.s[2], w12
   506      WORD $0x4e1c1da4 //    mov    v4.s[3], w13
   507      WORD $0x4e201c84 //    and    v4.16b, v4.16b, v0.16b
   508      WORD $0x3d800124 //    str    q4, [x9]
   509      WORD $0x2941ac0a //    ldp    w10, w11, [x0, #12]
   510      WORD $0x53187d4c //    lsr    w12, w10, #24
   511      WORD $0x138a796a //    extr    w10, w11, w10, #30
   512      WORD $0x0e040d64 //    dup    v4.2s, w11
   513      WORD $0x1e270185 //    fmov    s5, w12
   514      WORD $0x2ea14484 //    ushl    v4.2s, v4.2s, v1.2s
   515      WORD $0x4e0c1d45 //    mov    v5.s[1], w10
   516      WORD $0x6e180485 //    mov    v5.d[1], v4.d[0]
   517      WORD $0x4e201ca4 //    and    v4.16b, v5.16b, v0.16b
   518      WORD $0x3d800524 //    str    q4, [x9, #16]
   519      WORD $0x29422c0a //    ldp    w10, w11, [x0, #16]
   520      WORD $0x9100500c //    add    x12, x0, #20
   521      WORD $0x91006000 //    add    x0, x0, #24
   522      WORD $0x0e040d44 //    dup    v4.2s, w10
   523      WORD $0x138a716a //    extr    w10, w11, w10, #28
   524      WORD $0x2ea24484 //    ushl    v4.2s, v4.2s, v2.2s
   525      WORD $0x53027d6b //    lsr    w11, w11, #2
   526      WORD $0x4e141d44 //    mov    v4.s[2], w10
   527      WORD $0x4e1c1d64 //    mov    v4.s[3], w11
   528      WORD $0x4e201c84 //    and    v4.16b, v4.16b, v0.16b
   529      WORD $0x3d800924 //    str    q4, [x9, #32]
   530      WORD $0x4d40c984 //    ld1r    { v4.4s }, [x12]
   531      WORD $0x6ea34484 //    ushl    v4.4s, v4.4s, v3.4s
   532      WORD $0x4e201c84 //    and    v4.16b, v4.16b, v0.16b
   533      WORD $0x3d800d24 //    str    q4, [x9, #48]
   534      WORD $0x91020129 //    add    x9, x9, #128
   535      BNE LBB0_21
   536      JMP LBB0_99
   537  LBB0_22:
   538      WORD $0x7100805f // cmp    w2, #32
   539      BLT LBB0_99
   540      MOVD $0x0000000d00000006, R1 // LCPI0_59
   541      MOVD $0x0000000900000002, R2 // LCPI0_61
   542      MOVD $0x0000001700000010, R3 // LCPI0_63
   543      MOVD $0x000000130000000c, R4 // LCPI0_65
   544      MOVD $0x0000000f00000008, R5 // LCPI0_67
   545      VMOVQ $0x0000000b00000004, $0x0000001900000012, V6 // LCPI0_115
   546      WORD $0x91010029 //    add    x9, x1, #64
   547      WORD $0x4f0307e0 //    movi    v0.4s, #127
   548      WORD $0x2ea0b821 //    neg    v1.2s, v1.2s
   549      WORD $0x2ea0b842 //    neg    v2.2s, v2.2s
   550      WORD $0x2ea0b863 //    neg    v3.2s, v3.2s
   551      WORD $0x2ea0b884 //    neg    v4.2s, v4.2s
   552      WORD $0x2ea0b8a5 //    neg    v5.2s, v5.2s
   553      WORD $0x6ea0b8c6 //    neg    v6.4s, v6.4s
   554  LBB0_24:
   555      WORD $0xb940000a //     ldr    w10, [x0]
   556      WORD $0xf1000508 //     subs    x8, x8, #1
   557      WORD $0x53077d4b //     lsr    w11, w10, #7
   558      WORD $0x1e270147 //     fmov    s7, w10
   559      WORD $0x530e7d4c //     lsr    w12, w10, #14
   560      WORD $0x4e0c1d67 //     mov    v7.s[1], w11
   561      WORD $0x53157d4d //     lsr    w13, w10, #21
   562      WORD $0x4e141d87 //     mov    v7.s[2], w12
   563      WORD $0x4e1c1da7 //     mov    v7.s[3], w13
   564      WORD $0x4e201cf0 //     and    v16.16b, v7.16b, v0.16b
   565      WORD $0x3c9c0130 //     stur    q16, [x9, #-64]
   566      WORD $0xb940040a //     ldr    w10, [x0, #4]
   567      WORD $0x53037d4b //     lsr    w11, w10, #3
   568      WORD $0x530a7d4c //     lsr    w12, w10, #10
   569      WORD $0x4e0c1d67 //     mov    v7.s[1], w11
   570      WORD $0x53117d4a //     lsr    w10, w10, #17
   571      WORD $0x4e141d87 //     mov    v7.s[2], w12
   572      WORD $0x4e1c1d47 //     mov    v7.s[3], w10
   573      WORD $0x4e201ce7 //     and    v7.16b, v7.16b, v0.16b
   574      WORD $0x3c9d0127 //     stur    q7, [x9, #-48]
   575      WORD $0x2940ac0a //     ldp    w10, w11, [x0, #4]
   576      WORD $0x53187d4c //     lsr    w12, w10, #24
   577      WORD $0x138a7d6a //     extr    w10, w11, w10, #31
   578      WORD $0x0e040d67 //     dup    v7.2s, w11
   579      WORD $0x1e270190 //     fmov    s16, w12
   580      WORD $0x2ea144e7 //     ushl    v7.2s, v7.2s, v1.2s
   581      WORD $0x4e0c1d50 //     mov    v16.s[1], w10
   582      WORD $0x6e1804f0 //     mov    v16.d[1], v7.d[0]
   583      WORD $0x4e201e07 //     and    v7.16b, v16.16b, v0.16b
   584      WORD $0x3c9e0127 //     stur    q7, [x9, #-32]
   585      WORD $0x29412c0a //     ldp    w10, w11, [x0, #8]
   586      WORD $0x53147d4c //     lsr    w12, w10, #20
   587      WORD $0x138a6d6a //     extr    w10, w11, w10, #27
   588      WORD $0x0e040d67 //     dup    v7.2s, w11
   589      WORD $0x1e270190 //     fmov    s16, w12
   590      WORD $0x2ea244e7 //     ushl    v7.2s, v7.2s, v2.2s
   591      WORD $0x4e0c1d50 //     mov    v16.s[1], w10
   592      WORD $0x6e1804f0 //     mov    v16.d[1], v7.d[0]
   593      WORD $0x4e201e07 //     and    v7.16b, v16.16b, v0.16b
   594      WORD $0x3c9f0127 //     stur    q7, [x9, #-16]
   595      WORD $0x2941ac0a //     ldp    w10, w11, [x0, #12]
   596      WORD $0x9100600c //     add    x12, x0, #24
   597      WORD $0x0e040d47 //     dup    v7.2s, w10
   598      WORD $0x138a796a //     extr    w10, w11, w10, #30
   599      WORD $0x2ea344e7 //     ushl    v7.2s, v7.2s, v3.2s
   600      WORD $0x53057d6b //     lsr    w11, w11, #5
   601      WORD $0x4e141d47 //     mov    v7.s[2], w10
   602      WORD $0x4e1c1d67 //     mov    v7.s[3], w11
   603      WORD $0x4e201ce7 //     and    v7.16b, v7.16b, v0.16b
   604      WORD $0x3d800127 //     str    q7, [x9]
   605      WORD $0x29422c0a //     ldp    w10, w11, [x0, #16]
   606      WORD $0x0e040d47 //     dup    v7.2s, w10
   607      WORD $0x138a696a //     extr    w10, w11, w10, #26
   608      WORD $0x2ea444e7 //     ushl    v7.2s, v7.2s, v4.2s
   609      WORD $0x53017d6b //     lsr    w11, w11, #1
   610      WORD $0x4e141d47 //     mov    v7.s[2], w10
   611      WORD $0x4e1c1d67 //     mov    v7.s[3], w11
   612      WORD $0x4e201ce7 //     and    v7.16b, v7.16b, v0.16b
   613      WORD $0x3d800527 //     str    q7, [x9, #16]
   614      WORD $0x2942ac0a //     ldp    w10, w11, [x0, #20]
   615      WORD $0x91007000 //     add    x0, x0, #28
   616      WORD $0x0e040d47 //     dup    v7.2s, w10
   617      WORD $0x53167d4d //     lsr    w13, w10, #22
   618      WORD $0x2ea544e7 //     ushl    v7.2s, v7.2s, v5.2s
   619      WORD $0x138a756a //     extr    w10, w11, w10, #29
   620      WORD $0x4e141da7 //     mov    v7.s[2], w13
   621      WORD $0x4e1c1d47 //     mov    v7.s[3], w10
   622      WORD $0x4e201ce7 //     and    v7.16b, v7.16b, v0.16b
   623      WORD $0x3d800927 //     str    q7, [x9, #32]
   624      WORD $0x4d40c987 //     ld1r    { v7.4s }, [x12]
   625      WORD $0x6ea644e7 //     ushl    v7.4s, v7.4s, v6.4s
   626      WORD $0x4e201ce7 //     and    v7.16b, v7.16b, v0.16b
   627      WORD $0x3d800d27 //     str    q7, [x9, #48]
   628      WORD $0x91020129 //     add    x9, x9, #128
   629      BNE LBB0_24
   630      JMP LBB0_99
   631  LBB0_25:
   632      WORD $0x7100805f // cmp    w2, #32
   633      BLT LBB0_99
   634      WORD $0x91010029 // add    x9, x1, #64
   635      WORD $0x6f00e620 // movi    v0.2d, #0x0000ff000000ff
   636  LBB0_27:
   637      WORD $0xb940000a //    ldr    w10, [x0]
   638      WORD $0xf1000508 //    subs    x8, x8, #1
   639      WORD $0x53087d4b //    lsr    w11, w10, #8
   640      WORD $0x1e270141 //    fmov    s1, w10
   641      WORD $0x53107d4c //    lsr    w12, w10, #16
   642      WORD $0x4e0c1d61 //    mov    v1.s[1], w11
   643      WORD $0x53187d4d //    lsr    w13, w10, #24
   644      WORD $0x4e141d81 //    mov    v1.s[2], w12
   645      WORD $0x4e1c1da1 //    mov    v1.s[3], w13
   646      WORD $0x4e201c21 //    and    v1.16b, v1.16b, v0.16b
   647      WORD $0x3c9c0121 //    stur    q1, [x9, #-64]
   648      WORD $0xb940040a //    ldr    w10, [x0, #4]
   649      WORD $0x53087d4b //    lsr    w11, w10, #8
   650      WORD $0x1e270141 //    fmov    s1, w10
   651      WORD $0x53107d4c //    lsr    w12, w10, #16
   652      WORD $0x4e0c1d61 //    mov    v1.s[1], w11
   653      WORD $0x53187d4d //    lsr    w13, w10, #24
   654      WORD $0x4e141d81 //    mov    v1.s[2], w12
   655      WORD $0x4e1c1da1 //    mov    v1.s[3], w13
   656      WORD $0x4e201c21 //    and    v1.16b, v1.16b, v0.16b
   657      WORD $0x3c9d0121 //    stur    q1, [x9, #-48]
   658      WORD $0xb940080a //    ldr    w10, [x0, #8]
   659      WORD $0x53087d4b //    lsr    w11, w10, #8
   660      WORD $0x1e270141 //    fmov    s1, w10
   661      WORD $0x53107d4c //    lsr    w12, w10, #16
   662      WORD $0x4e0c1d61 //    mov    v1.s[1], w11
   663      WORD $0x53187d4d //    lsr    w13, w10, #24
   664      WORD $0x4e141d81 //    mov    v1.s[2], w12
   665      WORD $0x4e1c1da1 //    mov    v1.s[3], w13
   666      WORD $0x4e201c21 //    and    v1.16b, v1.16b, v0.16b
   667      WORD $0x3c9e0121 //    stur    q1, [x9, #-32]
   668      WORD $0xb9400c0a //    ldr    w10, [x0, #12]
   669      WORD $0x53087d4b //    lsr    w11, w10, #8
   670      WORD $0x1e270141 //    fmov    s1, w10
   671      WORD $0x53107d4c //    lsr    w12, w10, #16
   672      WORD $0x4e0c1d61 //    mov    v1.s[1], w11
   673      WORD $0x53187d4d //    lsr    w13, w10, #24
   674      WORD $0x4e141d81 //    mov    v1.s[2], w12
   675      WORD $0x4e1c1da1 //    mov    v1.s[3], w13
   676      WORD $0x4e201c21 //    and    v1.16b, v1.16b, v0.16b
   677      WORD $0x3c9f0121 //    stur    q1, [x9, #-16]
   678      WORD $0xb940100a //    ldr    w10, [x0, #16]
   679      WORD $0x53087d4b //    lsr    w11, w10, #8
   680      WORD $0x1e270141 //    fmov    s1, w10
   681      WORD $0x53107d4c //    lsr    w12, w10, #16
   682      WORD $0x4e0c1d61 //    mov    v1.s[1], w11
   683      WORD $0x53187d4d //    lsr    w13, w10, #24
   684      WORD $0x4e141d81 //    mov    v1.s[2], w12
   685      WORD $0x4e1c1da1 //    mov    v1.s[3], w13
   686      WORD $0x4e201c21 //    and    v1.16b, v1.16b, v0.16b
   687      WORD $0x3d800121 //    str    q1, [x9]
   688      WORD $0xb940140a //    ldr    w10, [x0, #20]
   689      WORD $0x53087d4b //    lsr    w11, w10, #8
   690      WORD $0x1e270141 //    fmov    s1, w10
   691      WORD $0x53107d4c //    lsr    w12, w10, #16
   692      WORD $0x4e0c1d61 //    mov    v1.s[1], w11
   693      WORD $0x53187d4d //    lsr    w13, w10, #24
   694      WORD $0x4e141d81 //    mov    v1.s[2], w12
   695      WORD $0x4e1c1da1 //    mov    v1.s[3], w13
   696      WORD $0x4e201c21 //    and    v1.16b, v1.16b, v0.16b
   697      WORD $0x3d800521 //    str    q1, [x9, #16]
   698      WORD $0xb940180a //    ldr    w10, [x0, #24]
   699      WORD $0x53087d4b //    lsr    w11, w10, #8
   700      WORD $0x1e270141 //    fmov    s1, w10
   701      WORD $0x53107d4c //    lsr    w12, w10, #16
   702      WORD $0x4e0c1d61 //    mov    v1.s[1], w11
   703      WORD $0x53187d4d //    lsr    w13, w10, #24
   704      WORD $0x4e141d81 //    mov    v1.s[2], w12
   705      WORD $0x4e1c1da1 //    mov    v1.s[3], w13
   706      WORD $0x4e201c21 //    and    v1.16b, v1.16b, v0.16b
   707      WORD $0x3d800921 //    str    q1, [x9, #32]
   708      WORD $0xb9401c0a //    ldr    w10, [x0, #28]
   709      WORD $0x91008000 //    add    x0, x0, #32
   710      WORD $0x53087d4b //    lsr    w11, w10, #8
   711      WORD $0x1e270141 //    fmov    s1, w10
   712      WORD $0x53107d4c //    lsr    w12, w10, #16
   713      WORD $0x4e0c1d61 //    mov    v1.s[1], w11
   714      WORD $0x53187d4d //    lsr    w13, w10, #24
   715      WORD $0x4e141d81 //    mov    v1.s[2], w12
   716      WORD $0x4e1c1da1 //    mov    v1.s[3], w13
   717      WORD $0x4e201c21 //    and    v1.16b, v1.16b, v0.16b
   718      WORD $0x3d800d21 //    str    q1, [x9, #48]
   719      WORD $0x91020129 //    add    x9, x9, #128
   720      BNE LBB0_27
   721      JMP LBB0_99
   722  LBB0_28:
   723      WORD $0x7100805f // cmp    w2, #32
   724      BLT LBB0_99
   725      MOVD $0x0000001100000008, R1 // LCPI0_73
   726      MOVD $0x000000150000000C, R2 // LCPI0_75
   727      MOVD $0x0000000b00000002, R3 // LCPI0_77
   728      MOVD $0x0000000f00000006, R4 // LCPI0_79
   729      MOVD $0x0000000a00000001, R5 // LCPI0_81
   730      MOVD $0x0000000e00000005, R6 // LCPI0_83
   731      WORD $0x91010029 //    add    x9, x1, #64
   732      WORD $0x4f00c420 //    movi    v0.4s, #1, msl #8
   733      WORD $0x2ea0b821 //    neg    v1.2s, v1.2s
   734      WORD $0x2ea0b842 //    neg    v2.2s, v2.2s
   735      WORD $0x2ea0b863 //    neg    v3.2s, v3.2s
   736      WORD $0x2ea0b884 //    neg    v4.2s, v4.2s
   737      WORD $0x2ea0b8a5 //    neg    v5.2s, v5.2s
   738      WORD $0x2ea0b8c6 //    neg    v6.2s, v6.2s
   739  LBB0_30:
   740      WORD $0x29402c0a //    ldp    w10, w11, [x0]
   741      WORD $0xf1000508 //    subs    x8, x8, #1
   742      WORD $0x53097d4c //    lsr    w12, w10, #9
   743      WORD $0x1e270147 //    fmov    s7, w10
   744      WORD $0x53127d4d //    lsr    w13, w10, #18
   745      WORD $0x4e0c1d87 //    mov    v7.s[1], w12
   746      WORD $0x138a6d6b //    extr    w11, w11, w10, #27
   747      WORD $0x4e141da7 //    mov    v7.s[2], w13
   748      WORD $0x4e1c1d67 //    mov    v7.s[3], w11
   749      WORD $0x4e201cf0 //    and    v16.16b, v7.16b, v0.16b
   750      WORD $0x3c9c0130 //    stur    q16, [x9, #-64]
   751      WORD $0x2940ac0a //    ldp    w10, w11, [x0, #4]
   752      WORD $0x530d7d4c //    lsr    w12, w10, #13
   753      WORD $0x53167d4d //    lsr    w13, w10, #22
   754      WORD $0x4e0c1d87 //    mov    v7.s[1], w12
   755      WORD $0x138a7d6a //    extr    w10, w11, w10, #31
   756      WORD $0x4e141da7 //    mov    v7.s[2], w13
   757      WORD $0x4e1c1d47 //    mov    v7.s[3], w10
   758      WORD $0x4e201ce7 //    and    v7.16b, v7.16b, v0.16b
   759      WORD $0x3c9d0127 //    stur    q7, [x9, #-48]
   760      WORD $0x29412c0a //    ldp    w10, w11, [x0, #8]
   761      WORD $0x0e040d47 //    dup    v7.2s, w10
   762      WORD $0x138a696a //    extr    w10, w11, w10, #26
   763      WORD $0x2ea144e7 //    ushl    v7.2s, v7.2s, v1.2s
   764      WORD $0x53037d6b //    lsr    w11, w11, #3
   765      WORD $0x4e141d47 //    mov    v7.s[2], w10
   766      WORD $0x4e1c1d67 //    mov    v7.s[3], w11
   767      WORD $0x4e201ce7 //    and    v7.16b, v7.16b, v0.16b
   768      WORD $0x3c9e0127 //    stur    q7, [x9, #-32]
   769      WORD $0x2941ac0a //    ldp    w10, w11, [x0, #12]
   770      WORD $0x0e040d47 //    dup    v7.2s, w10
   771      WORD $0x138a796a //    extr    w10, w11, w10, #30
   772      WORD $0x2ea244e7 //    ushl    v7.2s, v7.2s, v2.2s
   773      WORD $0x53077d6b //    lsr    w11, w11, #7
   774      WORD $0x4e141d47 //    mov    v7.s[2], w10
   775      WORD $0x4e1c1d67 //    mov    v7.s[3], w11
   776      WORD $0x4e201ce7 //    and    v7.16b, v7.16b, v0.16b
   777      WORD $0x3c9f0127 //    stur    q7, [x9, #-16]
   778      WORD $0x29422c0a //    ldp    w10, w11, [x0, #16]
   779      WORD $0x53107d4c //    lsr    w12, w10, #16
   780      WORD $0x138a656a //    extr    w10, w11, w10, #25
   781      WORD $0x0e040d67 //    dup    v7.2s, w11
   782      WORD $0x1e270190 //    fmov    s16, w12
   783      WORD $0x2ea344e7 //    ushl    v7.2s, v7.2s, v3.2s
   784      WORD $0x4e0c1d50 //    mov    v16.s[1], w10
   785      WORD $0x6e1804f0 //    mov    v16.d[1], v7.d[0]
   786      WORD $0x4e201e07 //    and    v7.16b, v16.16b, v0.16b
   787      WORD $0x3d800127 //    str    q7, [x9]
   788      WORD $0x2942ac0a //    ldp    w10, w11, [x0, #20]
   789      WORD $0x53147d4c //    lsr    w12, w10, #20
   790      WORD $0x138a756a //    extr    w10, w11, w10, #29
   791      WORD $0x0e040d67 //    dup    v7.2s, w11
   792      WORD $0x1e270190 //    fmov    s16, w12
   793      WORD $0x2ea444e7 //    ushl    v7.2s, v7.2s, v4.2s
   794      WORD $0x4e0c1d50 //    mov    v16.s[1], w10
   795      WORD $0x6e1804f0 //    mov    v16.d[1], v7.d[0]
   796      WORD $0x4e201e07 //    and    v7.16b, v16.16b, v0.16b
   797      WORD $0x3d800527 //    str    q7, [x9, #16]
   798      WORD $0x29432c0a //    ldp    w10, w11, [x0, #24]
   799      WORD $0x138a616a //    extr    w10, w11, w10, #24
   800      WORD $0x0e040d67 //    dup    v7.2s, w11
   801      WORD $0x2ea544e7 //    ushl    v7.2s, v7.2s, v5.2s
   802      WORD $0x1e270150 //    fmov    s16, w10
   803      WORD $0x6e0c04f0 //    mov    v16.s[1], v7.s[0]
   804      WORD $0x53137d6b //    lsr    w11, w11, #19
   805      WORD $0x6e1424f0 //    mov    v16.s[2], v7.s[1]
   806      WORD $0x4e1c1d70 //    mov    v16.s[3], w11
   807      WORD $0x4e201e07 //    and    v7.16b, v16.16b, v0.16b
   808      WORD $0x3d800927 //    str    q7, [x9, #32]
   809      WORD $0x2943ac0a //    ldp    w10, w11, [x0, #28]
   810      WORD $0x91009000 //    add    x0, x0, #36
   811      WORD $0x138a716a //    extr    w10, w11, w10, #28
   812      WORD $0x0e040d67 //    dup    v7.2s, w11
   813      WORD $0x2ea644e7 //    ushl    v7.2s, v7.2s, v6.2s
   814      WORD $0x1e270150 //    fmov    s16, w10
   815      WORD $0x6e0c04f0 //    mov    v16.s[1], v7.s[0]
   816      WORD $0x53177d6b //    lsr    w11, w11, #23
   817      WORD $0x6e1424f0 //    mov    v16.s[2], v7.s[1]
   818      WORD $0x4e1c1d70 //    mov    v16.s[3], w11
   819      WORD $0x4e201e07 //    and    v7.16b, v16.16b, v0.16b
   820      WORD $0x3d800d27 //    str    q7, [x9, #48]
   821      WORD $0x91020129 //    add    x9, x9, #128
   822      BNE LBB0_30
   823      JMP LBB0_99
   824  LBB0_31:
   825      WORD $0x7100805f //cmp    w2, #32
   826      BLT LBB0_99
   827      MOVD $0x0000000e00000004, R1 // LCPI0_87
   828      MOVD $0x0000000c00000002, R2 // LCPI0_89
   829      MOVD $0x0000001200000008, R3 // LCPI0_85
   830      WORD $0x91010029 //    add    x9, x1, #64
   831      WORD $0x4f00c460 //    movi    v0.4s, #3, msl #8
   832      WORD $0x2ea0b821 //    neg    v1.2s, v1.2s
   833      WORD $0x2ea0b842 //    neg    v2.2s, v2.2s
   834      WORD $0x2ea0b863 //    neg    v3.2s, v3.2s
   835  LBB0_33:
   836      WORD $0x29402c0a //    ldp    w10, w11, [x0]
   837      WORD $0xf1000508 //    subs    x8, x8, #1
   838      WORD $0x530a7d4c //    lsr    w12, w10, #10
   839      WORD $0x1e270144 //    fmov    s4, w10
   840      WORD $0x53147d4d //    lsr    w13, w10, #20
   841      WORD $0x4e0c1d84 //    mov    v4.s[1], w12
   842      WORD $0x138a796b //    extr    w11, w11, w10, #30
   843      WORD $0x4e141da4 //    mov    v4.s[2], w13
   844      WORD $0x4e1c1d64 //    mov    v4.s[3], w11
   845      WORD $0x4e201c85 //    and    v5.16b, v4.16b, v0.16b
   846      WORD $0x3c9c0125 //    stur    q5, [x9, #-64]
   847      WORD $0x2940ac0a //    ldp    w10, w11, [x0, #4]
   848      WORD $0x53127d4c //    lsr    w12, w10, #18
   849      WORD $0x138a716a //    extr    w10, w11, w10, #28
   850      WORD $0x4e0c1d84 //    mov    v4.s[1], w12
   851      WORD $0x53067d6b //    lsr    w11, w11, #6
   852      WORD $0x4e141d44 //    mov    v4.s[2], w10
   853      WORD $0x4e1c1d64 //    mov    v4.s[3], w11
   854      WORD $0x4e201c84 //    and    v4.16b, v4.16b, v0.16b
   855      WORD $0x3c9d0124 //    stur    q4, [x9, #-48]
   856      WORD $0x29412c0a //    ldp    w10, w11, [x0, #8]
   857      WORD $0x53107d4c //    lsr    w12, w10, #16
   858      WORD $0x138a696a //    extr    w10, w11, w10, #26
   859      WORD $0x0e040d64 //    dup    v4.2s, w11
   860      WORD $0x1e270185 //    fmov    s5, w12
   861      WORD $0x2ea14484 //    ushl    v4.2s, v4.2s, v1.2s
   862      WORD $0x4e0c1d45 //    mov    v5.s[1], w10
   863      WORD $0x6e180485 //    mov    v5.d[1], v4.d[0]
   864      WORD $0x4e201ca4 //    and    v4.16b, v5.16b, v0.16b
   865      WORD $0x3c9e0124 //    stur    q4, [x9, #-32]
   866      WORD $0x2941ac0a //    ldp    w10, w11, [x0, #12]
   867      WORD $0x138a616a //    extr    w10, w11, w10, #24
   868      WORD $0x0e040d64 //    dup    v4.2s, w11
   869      WORD $0x2ea24484 //    ushl    v4.2s, v4.2s, v2.2s
   870      WORD $0x1e270145 //    fmov    s5, w10
   871      WORD $0x6e0c0485 //    mov    v5.s[1], v4.s[0]
   872      WORD $0x53167d6b //    lsr    w11, w11, #22
   873      WORD $0x6e142485 //    mov    v5.s[2], v4.s[1]
   874      WORD $0x4e1c1d65 //    mov    v5.s[3], w11
   875      WORD $0x4e201ca4 //    and    v4.16b, v5.16b, v0.16b
   876      WORD $0x3c9f0124 //    stur    q4, [x9, #-16]
   877      WORD $0x2942ac0a //    ldp    w10, w11, [x0, #20]
   878      WORD $0x530a7d4c //    lsr    w12, w10, #10
   879      WORD $0x1e270144 //    fmov    s4, w10
   880      WORD $0x53147d4d //    lsr    w13, w10, #20
   881      WORD $0x4e0c1d84 //    mov    v4.s[1], w12
   882      WORD $0x138a796b //    extr    w11, w11, w10, #30
   883      WORD $0x4e141da4 //    mov    v4.s[2], w13
   884      WORD $0x4e1c1d64 //    mov    v4.s[3], w11
   885      WORD $0x4e201c84 //    and    v4.16b, v4.16b, v0.16b
   886      WORD $0x3d800124 //    str    q4, [x9]
   887      WORD $0x29432c0a //    ldp    w10, w11, [x0, #24]
   888      WORD $0x0e040d44 //    dup    v4.2s, w10
   889      WORD $0x138a716a //    extr    w10, w11, w10, #28
   890      WORD $0x2ea34484 //    ushl    v4.2s, v4.2s, v3.2s
   891      WORD $0x53067d6b //    lsr    w11, w11, #6
   892      WORD $0x4e141d44 //    mov    v4.s[2], w10
   893      WORD $0x4e1c1d64 //    mov    v4.s[3], w11
   894      WORD $0x4e201c84 //    and    v4.16b, v4.16b, v0.16b
   895      WORD $0x3d800524 //    str    q4, [x9, #16]
   896      WORD $0x2943ac0a //    ldp    w10, w11, [x0, #28]
   897      WORD $0x53107d4c //    lsr    w12, w10, #16
   898      WORD $0x138a696a //    extr    w10, w11, w10, #26
   899      WORD $0x0e040d64 //    dup    v4.2s, w11
   900      WORD $0x1e270185 //    fmov    s5, w12
   901      WORD $0x2ea14484 //    ushl    v4.2s, v4.2s, v1.2s
   902      WORD $0x4e0c1d45 //    mov    v5.s[1], w10
   903      WORD $0x6e180485 //    mov    v5.d[1], v4.d[0]
   904      WORD $0x4e201ca4 //    and    v4.16b, v5.16b, v0.16b
   905      WORD $0x3d800924 //    str    q4, [x9, #32]
   906      WORD $0x29442c0a //    ldp    w10, w11, [x0, #32]
   907      WORD $0x9100a000 //    add    x0, x0, #40
   908      WORD $0x138a616a //    extr    w10, w11, w10, #24
   909      WORD $0x0e040d64 //    dup    v4.2s, w11
   910      WORD $0x2ea24484 //    ushl    v4.2s, v4.2s, v2.2s
   911      WORD $0x1e270145 //    fmov    s5, w10
   912      WORD $0x6e0c0485 //    mov    v5.s[1], v4.s[0]
   913      WORD $0x53167d6b //    lsr    w11, w11, #22
   914      WORD $0x6e142485 //    mov    v5.s[2], v4.s[1]
   915      WORD $0x4e1c1d65 //    mov    v5.s[3], w11
   916      WORD $0x4e201ca4 //    and    v4.16b, v5.16b, v0.16b
   917      WORD $0x3d800d24 //    str    q4, [x9, #48]
   918      WORD $0x91020129 //    add    x9, x9, #128
   919      BNE LBB0_33
   920      JMP LBB0_99
   921  LBB0_34:
   922      WORD $0x7100805f //    cmp    w2, #32
   923      BL LBB0_99
   924      MOVD $0x0000000e00000003, R1 // LCPI0_93
   925      MOVD $0x0000000f00000004, R2 // LCPI0_95
   926      MOVD $0x0000001100000006, R3 // LCPI0_97
   927      MOVD $0x0000001200000007, R4 // LCPI0_99
   928      MOVD $0x0000001300000008, R5 // LCPI0_101
   929      MOVD $0x000000150000000a, R6 // LCPI0_114
   930      WORD $0x91010029 //    add    x9, x1, #64
   931      WORD $0x4f00c4e0 //    movi    v0.4s, #7, msl #8
   932      WORD $0x2ea0b821 //    neg    v1.2s, v1.2s
   933      WORD $0x2ea0b842 //    neg    v2.2s, v2.2s
   934      WORD $0x2ea0b863 //    neg    v3.2s, v3.2s
   935      WORD $0x2ea0b884 //    neg    v4.2s, v4.2s
   936      WORD $0x2ea0b8a5 //    neg    v5.2s, v5.2s
   937      WORD $0x2ea0b8c6 //    neg    v6.2s, v6.2s
   938  LBB0_36:
   939      WORD $0x29402c0a //    ldp    w10, w11, [x0]
   940      WORD $0xf1000508 //    subs    x8, x8, #1
   941      WORD $0x530b7d4c //    lsr    w12, w10, #11
   942      WORD $0x1e270147 //    fmov    s7, w10
   943      WORD $0x138a596d //    extr    w13, w11, w10, #22
   944      WORD $0x4e0c1d87 //    mov    v7.s[1], w12
   945      WORD $0x53017d6b //    lsr    w11, w11, #1
   946      WORD $0x4e141da7 //    mov    v7.s[2], w13
   947      WORD $0x4e1c1d67 //    mov    v7.s[3], w11
   948      WORD $0x4e201cf0 //    and    v16.16b, v7.16b, v0.16b
   949      WORD $0x3c9c0130 //    stur    q16, [x9, #-64]
   950      WORD $0x2940ac0a //    ldp    w10, w11, [x0, #4]
   951      WORD $0x138a5d6a //    extr    w10, w11, w10, #23
   952      WORD $0x53027d6c //    lsr    w12, w11, #2
   953      WORD $0x4e0c1d47 //    mov    v7.s[1], w10
   954      WORD $0x530d7d6b //    lsr    w11, w11, #13
   955      WORD $0x4e141d87 //    mov    v7.s[2], w12
   956      WORD $0x4e1c1d67 //    mov    v7.s[3], w11
   957      WORD $0x4e201ce7 //    and    v7.16b, v7.16b, v0.16b
   958      WORD $0x3c9d0127 //    stur    q7, [x9, #-48]
   959      WORD $0x29412c0a //    ldp    w10, w11, [x0, #8]
   960      WORD $0xb940100c //    ldr    w12, [x0, #16]
   961      WORD $0x138a616a //    extr    w10, w11, w10, #24
   962      WORD $0x0e040d67 //    dup    v7.2s, w11
   963      WORD $0x2ea144e7 //    ushl    v7.2s, v7.2s, v1.2s
   964      WORD $0x1e270150 //    fmov    s16, w10
   965      WORD $0x6e0c04f0 //    mov    v16.s[1], v7.s[0]
   966      WORD $0x138b658b //    extr    w11, w12, w11, #25
   967      WORD $0x6e1424f0 //    mov    v16.s[2], v7.s[1]
   968      WORD $0x4e1c1d70 //    mov    v16.s[3], w11
   969      WORD $0x4e201e07 //    and    v7.16b, v16.16b, v0.16b
   970      WORD $0x3c9e0127 //    stur    q7, [x9, #-32]
   971      WORD $0x29422c0a //    ldp    w10, w11, [x0, #16]
   972      WORD $0x0e040d47 //    dup    v7.2s, w10
   973      WORD $0x138a696a //    extr    w10, w11, w10, #26
   974      WORD $0x2ea244e7 //    ushl    v7.2s, v7.2s, v2.2s
   975      WORD $0x53057d6b //    lsr    w11, w11, #5
   976      WORD $0x4e141d47 //    mov    v7.s[2], w10
   977      WORD $0x4e1c1d67 //    mov    v7.s[3], w11
   978      WORD $0x4e201ce7 //    and    v7.16b, v7.16b, v0.16b
   979      WORD $0x3c9f0127 //    stur    q7, [x9, #-16]
   980      WORD $0x2942ac0a //    ldp    w10, w11, [x0, #20]
   981      WORD $0x53107d4c //    lsr    w12, w10, #16
   982      WORD $0x138a6d6a //    extr    w10, w11, w10, #27
   983      WORD $0x0e040d67 //    dup    v7.2s, w11
   984      WORD $0x1e270190 //    fmov    s16, w12
   985      WORD $0x2ea344e7 //    ushl    v7.2s, v7.2s, v3.2s
   986      WORD $0x4e0c1d50 //    mov    v16.s[1], w10
   987      WORD $0x6e1804f0 //    mov    v16.d[1], v7.d[0]
   988      WORD $0x4e201e07 //    and    v7.16b, v16.16b, v0.16b
   989      WORD $0x3d800127 //    str    q7, [x9]
   990      WORD $0x29432c0a //    ldp    w10, w11, [x0, #24]
   991      WORD $0xb940200c //    ldr    w12, [x0, #32]
   992      WORD $0x138a716a //    extr    w10, w11, w10, #28
   993      WORD $0x0e040d67 //    dup    v7.2s, w11
   994      WORD $0x2ea444e7 //    ushl    v7.2s, v7.2s, v4.2s
   995      WORD $0x1e270150 //    fmov    s16, w10
   996      WORD $0x6e0c04f0 //    mov    v16.s[1], v7.s[0]
   997      WORD $0x138b758b //    extr    w11, w12, w11, #29
   998      WORD $0x6e1424f0 //    mov    v16.s[2], v7.s[1]
   999      WORD $0x4e1c1d70 //    mov    v16.s[3], w11
  1000      WORD $0x4e201e07 //    and    v7.16b, v16.16b, v0.16b
  1001      WORD $0x3d800527 //    str    q7, [x9, #16]
  1002      WORD $0x29442c0a //    ldp    w10, w11, [x0, #32]
  1003      WORD $0x0e040d47 //    dup    v7.2s, w10
  1004      WORD $0x138a796a //    extr    w10, w11, w10, #30
  1005      WORD $0x2ea544e7 //    ushl    v7.2s, v7.2s, v5.2s
  1006      WORD $0x53097d6b //    lsr    w11, w11, #9
  1007      WORD $0x4e141d47 //    mov    v7.s[2], w10
  1008      WORD $0x4e1c1d67 //    mov    v7.s[3], w11
  1009      WORD $0x4e201ce7 //    and    v7.16b, v7.16b, v0.16b
  1010      WORD $0x3d800927 //    str    q7, [x9, #32]
  1011      WORD $0x2944ac0a //    ldp    w10, w11, [x0, #36]
  1012      WORD $0x9100b000 //    add    x0, x0, #44
  1013      WORD $0x53147d4c //    lsr    w12, w10, #20
  1014      WORD $0x138a7d6a //    extr    w10, w11, w10, #31
  1015      WORD $0x0e040d67 //    dup    v7.2s, w11
  1016      WORD $0x1e270190 //    fmov    s16, w12
  1017      WORD $0x2ea644e7 //    ushl    v7.2s, v7.2s, v6.2s
  1018      WORD $0x4e0c1d50 //    mov    v16.s[1], w10
  1019      WORD $0x6e1804f0 //    mov    v16.d[1], v7.d[0]
  1020      WORD $0x4e201e07 //    and    v7.16b, v16.16b, v0.16b
  1021      WORD $0x3d800d27 //    str    q7, [x9, #48]
  1022      WORD $0x91020129 //    add    x9, x9, #128
  1023      BNE LBB0_36
  1024      JMP LBB0_99
  1025  LBB0_37:
  1026      WORD $0x7100805f //    cmp    w2, #32
  1027      BLT LBB0_99
  1028      MOVD $0x0000000e00000003, R1 // LCPI0_113
  1029      WORD $0x91010029 //    add    x9, x1, #64
  1030      WORD $0x4f00c5e0 //    movi    v0.4s, #15, msl #8
  1031      WORD $0x2ea0b821 //    neg    v1.2s, v1.2s
  1032  LBB0_39:
  1033      WORD $0x29402c0a //    ldp    w10, w11, [x0]
  1034      WORD $0xf1000508 //    subs    x8, x8, #1
  1035      WORD $0x530c7d4c //    lsr    w12, w10, #12
  1036      WORD $0x1e270142 //    fmov    s2, w10
  1037      WORD $0x138a616d //    extr    w13, w11, w10, #24
  1038      WORD $0x4e0c1d82 //    mov    v2.s[1], w12
  1039      WORD $0x53047d6b //    lsr    w11, w11, #4
  1040      WORD $0x4e141da2 //    mov    v2.s[2], w13
  1041      WORD $0x4e1c1d62 //    mov    v2.s[3], w11
  1042      WORD $0x4e201c43 //    and    v3.16b, v2.16b, v0.16b
  1043      WORD $0x3c9c0123 //    stur    q3, [x9, #-64]
  1044      WORD $0x2940ac0a //    ldp    w10, w11, [x0, #4]
  1045      WORD $0x138a716a //    extr    w10, w11, w10, #28
  1046      WORD $0x53087d6c //    lsr    w12, w11, #8
  1047      WORD $0x4e0c1d42 //    mov    v2.s[1], w10
  1048      WORD $0x53147d6b //    lsr    w11, w11, #20
  1049      WORD $0x4e141d82 //    mov    v2.s[2], w12
  1050      WORD $0x4e1c1d62 //    mov    v2.s[3], w11
  1051      WORD $0x4e201c42 //    and    v2.16b, v2.16b, v0.16b
  1052      WORD $0x3c9d0122 //    stur    q2, [x9, #-48]
  1053      WORD $0x2941ac0a //    ldp    w10, w11, [x0, #12]
  1054      WORD $0x530c7d4c //    lsr    w12, w10, #12
  1055      WORD $0x1e270142 //    fmov    s2, w10
  1056      WORD $0x138a616d //    extr    w13, w11, w10, #24
  1057      WORD $0x4e0c1d82 //    mov    v2.s[1], w12
  1058      WORD $0x53047d6b //    lsr    w11, w11, #4
  1059      WORD $0x4e141da2 //    mov    v2.s[2], w13
  1060      WORD $0x4e1c1d62 //    mov    v2.s[3], w11
  1061      WORD $0x4e201c42 //    and    v2.16b, v2.16b, v0.16b
  1062      WORD $0x3c9e0122 //    stur    q2, [x9, #-32]
  1063      WORD $0x29422c0a //    ldp    w10, w11, [x0, #16]
  1064      WORD $0x53107d4c //    lsr    w12, w10, #16
  1065      WORD $0x138a716a //    extr    w10, w11, w10, #28
  1066      WORD $0x0e040d62 //    dup    v2.2s, w11
  1067      WORD $0x1e270183 //    fmov    s3, w12
  1068      WORD $0x2ea14442 //    ushl    v2.2s, v2.2s, v1.2s
  1069      WORD $0x4e0c1d43 //    mov    v3.s[1], w10
  1070      WORD $0x6e180443 //    mov    v3.d[1], v2.d[0]
  1071      WORD $0x4e201c62 //    and    v2.16b, v3.16b, v0.16b
  1072      WORD $0x3c9f0122 //    stur    q2, [x9, #-16]
  1073      WORD $0x29432c0a //    ldp    w10, w11, [x0, #24]
  1074      WORD $0x530c7d4c //    lsr    w12, w10, #12
  1075      WORD $0x1e270142 //    fmov    s2, w10
  1076      WORD $0x138a616d //    extr    w13, w11, w10, #24
  1077      WORD $0x4e0c1d82 //    mov    v2.s[1], w12
  1078      WORD $0x53047d6b //    lsr    w11, w11, #4
  1079      WORD $0x4e141da2 //    mov    v2.s[2], w13
  1080      WORD $0x4e1c1d62 //    mov    v2.s[3], w11
  1081      WORD $0x4e201c42 //    and    v2.16b, v2.16b, v0.16b
  1082      WORD $0x3d800122 //    str    q2, [x9]
  1083      WORD $0x2943ac0a //    ldp    w10, w11, [x0, #28]
  1084      WORD $0x53107d4c //    lsr    w12, w10, #16
  1085      WORD $0x138a716a //    extr    w10, w11, w10, #28
  1086      WORD $0x0e040d62 //    dup    v2.2s, w11
  1087      WORD $0x1e270183 //    fmov    s3, w12
  1088      WORD $0x2ea14442 //    ushl    v2.2s, v2.2s, v1.2s
  1089      WORD $0x4e0c1d43 //    mov    v3.s[1], w10
  1090      WORD $0x6e180443 //    mov    v3.d[1], v2.d[0]
  1091      WORD $0x4e201c62 //    and    v2.16b, v3.16b, v0.16b
  1092      WORD $0x3d800522 //    str    q2, [x9, #16]
  1093      WORD $0x2944ac0a //    ldp    w10, w11, [x0, #36]
  1094      WORD $0x530c7d4c //    lsr    w12, w10, #12
  1095      WORD $0x1e270142 //    fmov    s2, w10
  1096      WORD $0x138a616d //    extr    w13, w11, w10, #24
  1097      WORD $0x4e0c1d82 //    mov    v2.s[1], w12
  1098      WORD $0x53047d6b //    lsr    w11, w11, #4
  1099      WORD $0x4e141da2 //    mov    v2.s[2], w13
  1100      WORD $0x4e1c1d62 //    mov    v2.s[3], w11
  1101      WORD $0x4e201c42 //    and    v2.16b, v2.16b, v0.16b
  1102      WORD $0x3d800922 //    str    q2, [x9, #32]
  1103      WORD $0x29452c0a //    ldp    w10, w11, [x0, #40]
  1104      WORD $0x9100c000 //    add    x0, x0, #48
  1105      WORD $0x53107d4c //    lsr    w12, w10, #16
  1106      WORD $0x138a716a //    extr    w10, w11, w10, #28
  1107      WORD $0x0e040d62 //    dup    v2.2s, w11
  1108      WORD $0x1e270183 //    fmov    s3, w12
  1109      WORD $0x2ea14442 //    ushl    v2.2s, v2.2s, v1.2s
  1110      WORD $0x4e0c1d43 //    mov    v3.s[1], w10
  1111      WORD $0x6e180443 //    mov    v3.d[1], v2.d[0]
  1112      WORD $0x4e201c62 //    and    v2.16b, v3.16b, v0.16b
  1113      WORD $0x3d800d22 //    str    q2, [x9, #48]
  1114      WORD $0x91020129 //    add    x9, x9, #128
  1115      BNE LBB0_39
  1116      JMP LBB0_99
  1117  LBB0_40:
  1118      WORD $0x7100805f //    cmp    w2, #32
  1119      BLT LBB0_99
  1120      MOVD $0x0000000f00000002, R1 // LCPI0_105
  1121      MOVD $0x0000001100000004, R2 // LCPI0_107
  1122      MOVD $0x0000001200000005, R3 // LCPI0_109
  1123      MOVD $0x0000001300000006, R4 // LCPI0_112
  1124      WORD $0x91010029 //    add    x9, x1, #64
  1125      WORD $0x4f00c7e0 //    movi    v0.4s, #31, msl #8
  1126      WORD $0x2ea0b821 //    neg    v1.2s, v1.2s
  1127      WORD $0x2ea0b842 //    neg    v2.2s, v2.2s
  1128      WORD $0x2ea0b863 //    neg    v3.2s, v3.2s
  1129      WORD $0x2ea0b884 //    neg    v4.2s, v4.2s
  1130  LBB0_42:
  1131      WORD $0x29402c0a //    ldp    w10, w11, [x0]
  1132      WORD $0xf1000508 //    subs    x8, x8, #1
  1133      WORD $0x530d7d4c //    lsr    w12, w10, #13
  1134      WORD $0x1e270145 //    fmov    s5, w10
  1135      WORD $0x138a696d //    extr    w13, w11, w10, #26
  1136      WORD $0x4e0c1d85 //    mov    v5.s[1], w12
  1137      WORD $0x53077d6b //    lsr    w11, w11, #7
  1138      WORD $0x4e141da5 //    mov    v5.s[2], w13
  1139      WORD $0x4e1c1d65 //    mov    v5.s[3], w11
  1140      WORD $0x4e201ca6 //    and    v6.16b, v5.16b, v0.16b
  1141      WORD $0x3c9c0126 //    stur    q6, [x9, #-64]
  1142      WORD $0x29412c0a //    ldp    w10, w11, [x0, #8]
  1143      WORD $0x53017d4c //    lsr    w12, w10, #1
  1144      WORD $0x530e7d4d //    lsr    w13, w10, #14
  1145      WORD $0x4e0c1d85 //    mov    v5.s[1], w12
  1146      WORD $0x138a6d6a //    extr    w10, w11, w10, #27
  1147      WORD $0x4e141da5 //    mov    v5.s[2], w13
  1148      WORD $0x4e1c1d45 //    mov    v5.s[3], w10
  1149      WORD $0x4e201ca5 //    and    v5.16b, v5.16b, v0.16b
  1150      WORD $0x3c9d0125 //    stur    q5, [x9, #-48]
  1151      WORD $0x2941ac0a //    ldp    w10, w11, [x0, #12]
  1152      WORD $0x53087d4c //    lsr    w12, w10, #8
  1153      WORD $0x138a556a //    extr    w10, w11, w10, #21
  1154      WORD $0x0e040d65 //    dup    v5.2s, w11
  1155      WORD $0x1e270186 //    fmov    s6, w12
  1156      WORD $0x2ea144a5 //    ushl    v5.2s, v5.2s, v1.2s
  1157      WORD $0x4e0c1d46 //    mov    v6.s[1], w10
  1158      WORD $0x6e1804a6 //    mov    v6.d[1], v5.d[0]
  1159      WORD $0x4e201cc5 //    and    v5.16b, v6.16b, v0.16b
  1160      WORD $0x3c9e0125 //    stur    q5, [x9, #-32]
  1161      WORD $0x29422c0a //    ldp    w10, w11, [x0, #16]
  1162      WORD $0xb940180c //    ldr    w12, [x0, #24]
  1163      WORD $0x138a716a //    extr    w10, w11, w10, #28
  1164      WORD $0x53097d6d //    lsr    w13, w11, #9
  1165      WORD $0x1e270145 //    fmov    s5, w10
  1166      WORD $0x138b598b //    extr    w11, w12, w11, #22
  1167      WORD $0x4e0c1da5 //    mov    v5.s[1], w13
  1168      WORD $0x53037d8c //    lsr    w12, w12, #3
  1169      WORD $0x4e141d65 //    mov    v5.s[2], w11
  1170      WORD $0x4e1c1d85 //    mov    v5.s[3], w12
  1171      WORD $0x4e201ca5 //    and    v5.16b, v5.16b, v0.16b
  1172      WORD $0x3c9f0125 //    stur    q5, [x9, #-16]
  1173      WORD $0x29432c0a //    ldp    w10, w11, [x0, #24]
  1174      WORD $0xb940200c //    ldr    w12, [x0, #32]
  1175      WORD $0x53107d4d //    lsr    w13, w10, #16
  1176      WORD $0x138a756a //    extr    w10, w11, w10, #29
  1177      WORD $0x1e2701a5 //    fmov    s5, w13
  1178      WORD $0x530a7d6e //    lsr    w14, w11, #10
  1179      WORD $0x4e0c1d45 //    mov    v5.s[1], w10
  1180      WORD $0x138b5d8b //    extr    w11, w12, w11, #23
  1181      WORD $0x4e141dc5 //    mov    v5.s[2], w14
  1182      WORD $0x4e1c1d65 //    mov    v5.s[3], w11
  1183      WORD $0x4e201ca5 //    and    v5.16b, v5.16b, v0.16b
  1184      WORD $0x3d800125 //    str    q5, [x9]
  1185      WORD $0x29442c0a //    ldp    w10, w11, [x0, #32]
  1186      WORD $0x0e040d45 //    dup    v5.2s, w10
  1187      WORD $0x138a796a //    extr    w10, w11, w10, #30
  1188      WORD $0x2ea244a5 //    ushl    v5.2s, v5.2s, v2.2s
  1189      WORD $0x530b7d6b //    lsr    w11, w11, #11
  1190      WORD $0x4e141d45 //    mov    v5.s[2], w10
  1191      WORD $0x4e1c1d65 //    mov    v5.s[3], w11
  1192      WORD $0x4e201ca5 //    and    v5.16b, v5.16b, v0.16b
  1193      WORD $0x3d800525 //    str    q5, [x9, #16]
  1194      WORD $0x2944ac0a //    ldp    w10, w11, [x0, #36]
  1195      WORD $0xb9402c0c //    ldr    w12, [x0, #44]
  1196      WORD $0x138a616a //    extr    w10, w11, w10, #24
  1197      WORD $0x0e040d65 //    dup    v5.2s, w11
  1198      WORD $0x2ea344a5 //    ushl    v5.2s, v5.2s, v3.2s
  1199      WORD $0x1e270146 //    fmov    s6, w10
  1200      WORD $0x6e0c04a6 //    mov    v6.s[1], v5.s[0]
  1201      WORD $0x138b7d8b //    extr    w11, w12, w11, #31
  1202      WORD $0x6e1424a6 //    mov    v6.s[2], v5.s[1]
  1203      WORD $0x4e1c1d66 //    mov    v6.s[3], w11
  1204      WORD $0x4e201cc5 //    and    v5.16b, v6.16b, v0.16b
  1205      WORD $0x3d800925 //    str    q5, [x9, #32]
  1206      WORD $0x2945ac0a //    ldp    w10, w11, [x0, #44]
  1207      WORD $0x9100d000 //    add    x0, x0, #52
  1208      WORD $0x530c7d4c //    lsr    w12, w10, #12
  1209      WORD $0x138a656a //    extr    w10, w11, w10, #25
  1210      WORD $0x0e040d65 //    dup    v5.2s, w11
  1211      WORD $0x1e270186 //    fmov    s6, w12
  1212      WORD $0x2ea444a5 //    ushl    v5.2s, v5.2s, v4.2s
  1213      WORD $0x4e0c1d46 //    mov    v6.s[1], w10
  1214      WORD $0x6e1804a6 //    mov    v6.d[1], v5.d[0]
  1215      WORD $0x4e201cc5 //    and    v5.16b, v6.16b, v0.16b
  1216      WORD $0x3d800d25 //    str    q5, [x9, #48]
  1217      WORD $0x91020129 //    add    x9, x9, #128
  1218      BNE LBB0_42
  1219      JMP LBB0_99
  1220  LBB0_43:
  1221      WORD $0x7100805f //    cmp    w2, #32
  1222      BLT LBB0_99
  1223      MOVD $0x0000001200000004, R1 // LCPI0_111
  1224      WORD $0x91010029 //    add    x9, x1, #64
  1225      WORD $0x4f01c7e0 //    movi    v0.4s, #63, msl #8
  1226      WORD $0x2ea0b821 //    neg    v1.2s, v1.2s
  1227  LBB0_45:
  1228      WORD $0x29402c0a //    ldp    w10, w11, [x0]
  1229      WORD $0xf1000508 //    subs    x8, x8, #1
  1230      WORD $0x530e7d4c //    lsr    w12, w10, #14
  1231      WORD $0x1e270142 //    fmov    s2, w10
  1232      WORD $0x138a716d //    extr    w13, w11, w10, #28
  1233      WORD $0x4e0c1d82 //    mov    v2.s[1], w12
  1234      WORD $0x530a7d6b //    lsr    w11, w11, #10
  1235      WORD $0x4e141da2 //    mov    v2.s[2], w13
  1236      WORD $0x4e1c1d62 //    mov    v2.s[3], w11
  1237      WORD $0x4e201c42 //    and    v2.16b, v2.16b, v0.16b
  1238      WORD $0x3c9c0122 //    stur    q2, [x9, #-64]
  1239      WORD $0x2940ac0a //    ldp    w10, w11, [x0, #4]
  1240      WORD $0xb9400c0c //    ldr    w12, [x0, #12]
  1241      WORD $0x138a616a //    extr    w10, w11, w10, #24
  1242      WORD $0x53067d6d //    lsr    w13, w11, #6
  1243      WORD $0x1e270142 //    fmov    s2, w10
  1244      WORD $0x138b518b //    extr    w11, w12, w11, #20
  1245      WORD $0x4e0c1da2 //    mov    v2.s[1], w13
  1246      WORD $0x53027d8c //    lsr    w12, w12, #2
  1247      WORD $0x4e141d62 //    mov    v2.s[2], w11
  1248      WORD $0x4e1c1d82 //    mov    v2.s[3], w12
  1249      WORD $0x4e201c42 //    and    v2.16b, v2.16b, v0.16b
  1250      WORD $0x3c9d0122 //    stur    q2, [x9, #-48]
  1251      WORD $0x2941ac0a //    ldp    w10, w11, [x0, #12]
  1252      WORD $0xb940140c //    ldr    w12, [x0, #20]
  1253      WORD $0x53107d4d //    lsr    w13, w10, #16
  1254      WORD $0x138a796a //    extr    w10, w11, w10, #30
  1255      WORD $0x1e2701a2 //    fmov    s2, w13
  1256      WORD $0x530c7d6e //    lsr    w14, w11, #12
  1257      WORD $0x4e0c1d42 //    mov    v2.s[1], w10
  1258      WORD $0x138b698b //    extr    w11, w12, w11, #26
  1259      WORD $0x4e141dc2 //    mov    v2.s[2], w14
  1260      WORD $0x4e1c1d62 //    mov    v2.s[3], w11
  1261      WORD $0x4e201c42 //    and    v2.16b, v2.16b, v0.16b
  1262      WORD $0x3c9e0122 //    stur    q2, [x9, #-32]
  1263      WORD $0x2942ac0a //    ldp    w10, w11, [x0, #20]
  1264      WORD $0x53087d4c //    lsr    w12, w10, #8
  1265      WORD $0x138a596a //    extr    w10, w11, w10, #22
  1266      WORD $0x0e040d62 //    dup    v2.2s, w11
  1267      WORD $0x1e270183 //    fmov    s3, w12
  1268      WORD $0x2ea14442 //    ushl    v2.2s, v2.2s, v1.2s
  1269      WORD $0x4e0c1d43 //    mov    v3.s[1], w10
  1270      WORD $0x6e180443 //    mov    v3.d[1], v2.d[0]
  1271      WORD $0x4e201c62 //    and    v2.16b, v3.16b, v0.16b
  1272      WORD $0x3c9f0122 //    stur    q2, [x9, #-16]
  1273      WORD $0x2943ac0a //    ldp    w10, w11, [x0, #28]
  1274      WORD $0x530e7d4c //    lsr    w12, w10, #14
  1275      WORD $0x1e270142 //    fmov    s2, w10
  1276      WORD $0x138a716d //    extr    w13, w11, w10, #28
  1277      WORD $0x4e0c1d82 //    mov    v2.s[1], w12
  1278      WORD $0x530a7d6b //    lsr    w11, w11, #10
  1279      WORD $0x4e141da2 //    mov    v2.s[2], w13
  1280      WORD $0x4e1c1d62 //    mov    v2.s[3], w11
  1281      WORD $0x4e201c42 //    and    v2.16b, v2.16b, v0.16b
  1282      WORD $0x3d800122 //    str    q2, [x9]
  1283      WORD $0x29442c0a //    ldp    w10, w11, [x0, #32]
  1284      WORD $0xb940280c //    ldr    w12, [x0, #40]
  1285      WORD $0x138a616a //    extr    w10, w11, w10, #24
  1286      WORD $0x53067d6d //    lsr    w13, w11, #6
  1287      WORD $0x1e270142 //    fmov    s2, w10
  1288      WORD $0x138b518b //    extr    w11, w12, w11, #20
  1289      WORD $0x4e0c1da2 //    mov    v2.s[1], w13
  1290      WORD $0x53027d8c //    lsr    w12, w12, #2
  1291      WORD $0x4e141d62 //    mov    v2.s[2], w11
  1292      WORD $0x4e1c1d82 //    mov    v2.s[3], w12
  1293      WORD $0x4e201c42 //    and    v2.16b, v2.16b, v0.16b
  1294      WORD $0x3d800522 //    str    q2, [x9, #16]
  1295      WORD $0x29452c0a //    ldp    w10, w11, [x0, #40]
  1296      WORD $0xb940300c //    ldr    w12, [x0, #48]
  1297      WORD $0x53107d4d //    lsr    w13, w10, #16
  1298      WORD $0x138a796a //    extr    w10, w11, w10, #30
  1299      WORD $0x1e2701a2 //    fmov    s2, w13
  1300      WORD $0x530c7d6e //    lsr    w14, w11, #12
  1301      WORD $0x4e0c1d42 //    mov    v2.s[1], w10
  1302      WORD $0x138b698b //    extr    w11, w12, w11, #26
  1303      WORD $0x4e141dc2 //    mov    v2.s[2], w14
  1304      WORD $0x4e1c1d62 //    mov    v2.s[3], w11
  1305      WORD $0x4e201c42 //    and    v2.16b, v2.16b, v0.16b
  1306      WORD $0x3d800922 //    str    q2, [x9, #32]
  1307      WORD $0x29462c0a //    ldp    w10, w11, [x0, #48]
  1308      WORD $0x9100e000 //    add    x0, x0, #56
  1309      WORD $0x53087d4c //    lsr    w12, w10, #8
  1310      WORD $0x138a596a //    extr    w10, w11, w10, #22
  1311      WORD $0x0e040d62 //    dup    v2.2s, w11
  1312      WORD $0x1e270183 //    fmov    s3, w12
  1313      WORD $0x2ea14442 //    ushl    v2.2s, v2.2s, v1.2s
  1314      WORD $0x4e0c1d43 //    mov    v3.s[1], w10
  1315      WORD $0x6e180443 //    mov    v3.d[1], v2.d[0]
  1316      WORD $0x4e201c62 //    and    v2.16b, v3.16b, v0.16b
  1317      WORD $0x3d800d22 //    str    q2, [x9, #48]
  1318      WORD $0x91020129 //    add    x9, x9, #128
  1319      BNE LBB0_45
  1320      JMP LBB0_99
  1321  LBB0_46:
  1322      WORD $0x7100805f //    cmp    w2, #32
  1323      BLT LBB0_99
  1324      MOVD $0x0000001100000002, R1 // LCPI0_110
  1325      WORD $0x91010029 //    add    x9, x1, #64
  1326      WORD $0x4f03c7e0 //    movi    v0.4s, #127, msl #8
  1327      WORD $0x2ea0b821 //    neg    v1.2s, v1.2s
  1328  LBB0_48:
  1329      WORD $0x29402c0a //    ldp    w10, w11, [x0]
  1330      WORD $0xf1000508 //    subs    x8, x8, #1
  1331      WORD $0x530f7d4c //    lsr    w12, w10, #15
  1332      WORD $0x1e270142 //    fmov    s2, w10
  1333      WORD $0x138a796d //    extr    w13, w11, w10, #30
  1334      WORD $0x4e0c1d82 //    mov    v2.s[1], w12
  1335      WORD $0x530d7d6b //    lsr    w11, w11, #13
  1336      WORD $0x4e141da2 //    mov    v2.s[2], w13
  1337      WORD $0x4e1c1d62 //    mov    v2.s[3], w11
  1338      WORD $0x4e201c42 //    and    v2.16b, v2.16b, v0.16b
  1339      WORD $0x3c9c0122 //    stur    q2, [x9, #-64]
  1340      WORD $0x2940ac0a //    ldp    w10, w11, [x0, #4]
  1341      WORD $0xb9400c0c //    ldr    w12, [x0, #12]
  1342      WORD $0x138a716a //    extr    w10, w11, w10, #28
  1343      WORD $0x530b7d6d //    lsr    w13, w11, #11
  1344      WORD $0x1e270142 //    fmov    s2, w10
  1345      WORD $0x138b698b //    extr    w11, w12, w11, #26
  1346      WORD $0x4e0c1da2 //    mov    v2.s[1], w13
  1347      WORD $0x53097d8c //    lsr    w12, w12, #9
  1348      WORD $0x4e141d62 //    mov    v2.s[2], w11
  1349      WORD $0x4e1c1d82 //    mov    v2.s[3], w12
  1350      WORD $0x4e201c42 //    and    v2.16b, v2.16b, v0.16b
  1351      WORD $0x3c9d0122 //    stur    q2, [x9, #-48]
  1352      WORD $0x2941ac0a //    ldp    w10, w11, [x0, #12]
  1353      WORD $0xb940140c //    ldr    w12, [x0, #20]
  1354      WORD $0x138a616a //    extr    w10, w11, w10, #24
  1355      WORD $0x53077d6d //    lsr    w13, w11, #7
  1356      WORD $0x1e270142 //    fmov    s2, w10
  1357      WORD $0x138b598b //    extr    w11, w12, w11, #22
  1358      WORD $0x4e0c1da2 //    mov    v2.s[1], w13
  1359      WORD $0x53057d8c //    lsr    w12, w12, #5
  1360      WORD $0x4e141d62 //    mov    v2.s[2], w11
  1361      WORD $0x4e1c1d82 //    mov    v2.s[3], w12
  1362      WORD $0x4e201c42 //    and    v2.16b, v2.16b, v0.16b
  1363      WORD $0x3c9e0122 //    stur    q2, [x9, #-32]
  1364      WORD $0x2942ac0a //    ldp    w10, w11, [x0, #20]
  1365      WORD $0xb9401c0c //    ldr    w12, [x0, #28]
  1366      WORD $0x138a516a //    extr    w10, w11, w10, #20
  1367      WORD $0x53037d6d //    lsr    w13, w11, #3
  1368      WORD $0x1e270142 //    fmov    s2, w10
  1369      WORD $0x138b498b //    extr    w11, w12, w11, #18
  1370      WORD $0x4e0c1da2 //    mov    v2.s[1], w13
  1371      WORD $0x53017d8c //    lsr    w12, w12, #1
  1372      WORD $0x4e141d62 //    mov    v2.s[2], w11
  1373      WORD $0x4e1c1d82 //    mov    v2.s[3], w12
  1374      WORD $0x4e201c42 //    and    v2.16b, v2.16b, v0.16b
  1375      WORD $0x3c9f0122 //    stur    q2, [x9, #-16]
  1376      WORD $0x2943ac0a //    ldp    w10, w11, [x0, #28]
  1377      WORD $0xb940240c //    ldr    w12, [x0, #36]
  1378      WORD $0x53107d4d //    lsr    w13, w10, #16
  1379      WORD $0x138a7d6a //    extr    w10, w11, w10, #31
  1380      WORD $0x1e2701a2 //    fmov    s2, w13
  1381      WORD $0x530e7d6e //    lsr    w14, w11, #14
  1382      WORD $0x4e0c1d42 //    mov    v2.s[1], w10
  1383      WORD $0x138b758b //    extr    w11, w12, w11, #29
  1384      WORD $0x4e141dc2 //    mov    v2.s[2], w14
  1385      WORD $0x4e1c1d62 //    mov    v2.s[3], w11
  1386      WORD $0x4e201c42 //    and    v2.16b, v2.16b, v0.16b
  1387      WORD $0x3d800122 //    str    q2, [x9]
  1388      WORD $0x2944ac0a //    ldp    w10, w11, [x0, #36]
  1389      WORD $0xb9402c0c //    ldr    w12, [x0, #44]
  1390      WORD $0x530c7d4d //    lsr    w13, w10, #12
  1391      WORD $0x138a6d6a //    extr    w10, w11, w10, #27
  1392      WORD $0x1e2701a2 //    fmov    s2, w13
  1393      WORD $0x530a7d6e //    lsr    w14, w11, #10
  1394      WORD $0x4e0c1d42 //    mov    v2.s[1], w10
  1395      WORD $0x138b658b //    extr    w11, w12, w11, #25
  1396      WORD $0x4e141dc2 //    mov    v2.s[2], w14
  1397      WORD $0x4e1c1d62 //    mov    v2.s[3], w11
  1398      WORD $0x4e201c42 //    and    v2.16b, v2.16b, v0.16b
  1399      WORD $0x3d800522 //    str    q2, [x9, #16]
  1400      WORD $0x2945ac0a //    ldp    w10, w11, [x0, #44]
  1401      WORD $0xb940340c //    ldr    w12, [x0, #52]
  1402      WORD $0x53087d4d //    lsr    w13, w10, #8
  1403      WORD $0x138a5d6a //    extr    w10, w11, w10, #23
  1404      WORD $0x1e2701a2 //    fmov    s2, w13
  1405      WORD $0x53067d6e //    lsr    w14, w11, #6
  1406      WORD $0x4e0c1d42 //    mov    v2.s[1], w10
  1407      WORD $0x138b558b //    extr    w11, w12, w11, #21
  1408      WORD $0x4e141dc2 //    mov    v2.s[2], w14
  1409      WORD $0x4e1c1d62 //    mov    v2.s[3], w11
  1410      WORD $0x4e201c42 //    and    v2.16b, v2.16b, v0.16b
  1411      WORD $0x3d800922 //    str    q2, [x9, #32]
  1412      WORD $0x2946ac0a //    ldp    w10, w11, [x0, #52]
  1413      WORD $0x9100f000 //    add    x0, x0, #60
  1414      WORD $0x53047d4c //    lsr    w12, w10, #4
  1415      WORD $0x138a4d6a //    extr    w10, w11, w10, #19
  1416      WORD $0x0e040d62 //    dup    v2.2s, w11
  1417      WORD $0x1e270183 //    fmov    s3, w12
  1418      WORD $0x2ea14442 //    ushl    v2.2s, v2.2s, v1.2s
  1419      WORD $0x4e0c1d43 //    mov    v3.s[1], w10
  1420      WORD $0x6e180443 //    mov    v3.d[1], v2.d[0]
  1421      WORD $0x4e201c62 //    and    v2.16b, v3.16b, v0.16b
  1422      WORD $0x3d800d22 //    str    q2, [x9, #48]
  1423      WORD $0x91020129 //    add    x9, x9, #128
  1424      BNE LBB0_48
  1425      JMP LBB0_99
  1426  LBB0_49:
  1427      WORD $0x7100805f //    cmp    w2, #32 
  1428      BLT LBB0_99
  1429      WORD $0x91010029 //    add    x9, x1, #64
  1430      WORD $0x6f01e660 //    movi    v0.2d, #0x00ffff0000ffff
  1431  LBB0_51:
  1432      WORD $0x29402c0a //    ldp    w10, w11, [x0]
  1433      WORD $0xf1000508 //    subs    x8, x8, #1
  1434      WORD $0x53107d4c //    lsr    w12, w10, #16
  1435      WORD $0x1e270141 //    fmov    s1, w10
  1436      WORD $0x4e0c1d81 //    mov    v1.s[1], w12
  1437      WORD $0x53107d6d //    lsr    w13, w11, #16
  1438      WORD $0x4e141d61 //    mov    v1.s[2], w11
  1439      WORD $0x4e1c1da1 //    mov    v1.s[3], w13
  1440      WORD $0x4e201c21 //    and    v1.16b, v1.16b, v0.16b
  1441      WORD $0x3c9c0121 //    stur    q1, [x9, #-64]
  1442      WORD $0x29412c0a //    ldp    w10, w11, [x0, #8]
  1443      WORD $0x53107d4c //    lsr    w12, w10, #16
  1444      WORD $0x1e270141 //    fmov    s1, w10
  1445      WORD $0x4e0c1d81 //    mov    v1.s[1], w12
  1446      WORD $0x53107d6d //    lsr    w13, w11, #16
  1447      WORD $0x4e141d61 //    mov    v1.s[2], w11
  1448      WORD $0x4e1c1da1 //    mov    v1.s[3], w13
  1449      WORD $0x4e201c21 //    and    v1.16b, v1.16b, v0.16b
  1450      WORD $0x3c9d0121 //    stur    q1, [x9, #-48]
  1451      WORD $0x29422c0a //    ldp    w10, w11, [x0, #16]
  1452      WORD $0x53107d4c //    lsr    w12, w10, #16
  1453      WORD $0x1e270141 //    fmov    s1, w10
  1454      WORD $0x4e0c1d81 //    mov    v1.s[1], w12
  1455      WORD $0x53107d6d //    lsr    w13, w11, #16
  1456      WORD $0x4e141d61 //    mov    v1.s[2], w11
  1457      WORD $0x4e1c1da1 //    mov    v1.s[3], w13
  1458      WORD $0x4e201c21 //    and    v1.16b, v1.16b, v0.16b
  1459      WORD $0x3c9e0121 //    stur    q1, [x9, #-32]
  1460      WORD $0x29432c0a //    ldp    w10, w11, [x0, #24]
  1461      WORD $0x53107d4c //    lsr    w12, w10, #16
  1462      WORD $0x1e270141 //    fmov    s1, w10
  1463      WORD $0x4e0c1d81 //    mov    v1.s[1], w12
  1464      WORD $0x53107d6d //    lsr    w13, w11, #16
  1465      WORD $0x4e141d61 //    mov    v1.s[2], w11
  1466      WORD $0x4e1c1da1 //    mov    v1.s[3], w13
  1467      WORD $0x4e201c21 //    and    v1.16b, v1.16b, v0.16b
  1468      WORD $0x3c9f0121 //    stur    q1, [x9, #-16]
  1469      WORD $0x29442c0a //    ldp    w10, w11, [x0, #32]
  1470      WORD $0x53107d4c //    lsr    w12, w10, #16
  1471      WORD $0x1e270141 //    fmov    s1, w10
  1472      WORD $0x4e0c1d81 //    mov    v1.s[1], w12
  1473      WORD $0x53107d6d //    lsr    w13, w11, #16
  1474      WORD $0x4e141d61 //    mov    v1.s[2], w11
  1475      WORD $0x4e1c1da1 //    mov    v1.s[3], w13
  1476      WORD $0x4e201c21 //    and    v1.16b, v1.16b, v0.16b
  1477      WORD $0x3d800121 //    str    q1, [x9]
  1478      WORD $0x29452c0a //    ldp    w10, w11, [x0, #40]
  1479      WORD $0x53107d4c //    lsr    w12, w10, #16
  1480      WORD $0x1e270141 //    fmov    s1, w10
  1481      WORD $0x4e0c1d81 //    mov    v1.s[1], w12
  1482      WORD $0x53107d6d //    lsr    w13, w11, #16
  1483      WORD $0x4e141d61 //    mov    v1.s[2], w11
  1484      WORD $0x4e1c1da1 //    mov    v1.s[3], w13
  1485      WORD $0x4e201c21 //    and    v1.16b, v1.16b, v0.16b
  1486      WORD $0x3d800521 //    str    q1, [x9, #16]
  1487      WORD $0x29462c0a //    ldp    w10, w11, [x0, #48]
  1488      WORD $0x53107d4c //    lsr    w12, w10, #16
  1489      WORD $0x1e270141 //    fmov    s1, w10
  1490      WORD $0x4e0c1d81 //    mov    v1.s[1], w12
  1491      WORD $0x53107d6d //    lsr    w13, w11, #16
  1492      WORD $0x4e141d61 //    mov    v1.s[2], w11
  1493      WORD $0x4e1c1da1 //    mov    v1.s[3], w13
  1494      WORD $0x4e201c21 //    and    v1.16b, v1.16b, v0.16b
  1495      WORD $0x3d800921 //    str    q1, [x9, #32]
  1496      WORD $0x29472c0a //    ldp    w10, w11, [x0, #56]
  1497      WORD $0x91010000 //    add    x0, x0, #64
  1498      WORD $0x53107d4c //    lsr    w12, w10, #16
  1499      WORD $0x1e270141 //    fmov    s1, w10
  1500      WORD $0x4e0c1d81 //    mov    v1.s[1], w12
  1501      WORD $0x53107d6d //    lsr    w13, w11, #16
  1502      WORD $0x4e141d61 //    mov    v1.s[2], w11
  1503      WORD $0x4e1c1da1 //    mov    v1.s[3], w13
  1504      WORD $0x4e201c21 //    and    v1.16b, v1.16b, v0.16b
  1505      WORD $0x3d800d21 //    str    q1, [x9, #48]
  1506      WORD $0x91020129 //    add    x9, x9, #128
  1507      BNE LBB0_51
  1508      JMP LBB0_99
  1509  LBB0_52:
  1510      WORD $0x7100805f //    cmp    w2, #32
  1511      BLT LBB0_99
  1512      WORD $0x91010029 //    add    x9, x1, #64
  1513      WORD $0x4f00d420 //    movi    v0.4s, #1, msl #16
  1514  LBB0_54:
  1515      WORD $0x29402c0a //    ldp    w10, w11, [x0]
  1516      WORD $0xb940080c //    ldr    w12, [x0, #8]
  1517      WORD $0xf1000508 //    subs    x8, x8, #1
  1518      WORD $0x138a456d //    extr    w13, w11, w10, #17
  1519      WORD $0x1e270141 //    fmov    s1, w10
  1520      WORD $0x53027d6e //    lsr    w14, w11, #2
  1521      WORD $0x4e0c1da1 //    mov    v1.s[1], w13
  1522      WORD $0x138b4d8b //    extr    w11, w12, w11, #19
  1523      WORD $0x4e141dc1 //    mov    v1.s[2], w14
  1524      WORD $0x4e1c1d61 //    mov    v1.s[3], w11
  1525      WORD $0x4e201c21 //    and    v1.16b, v1.16b, v0.16b
  1526      WORD $0x3c9c0121 //    stur    q1, [x9, #-64]
  1527      WORD $0x29412c0a //    ldp    w10, w11, [x0, #8]
  1528      WORD $0xb940100c //    ldr    w12, [x0, #16]
  1529      WORD $0x53047d4d //    lsr    w13, w10, #4
  1530      WORD $0x138a556a //    extr    w10, w11, w10, #21
  1531      WORD $0x1e2701a1 //    fmov    s1, w13
  1532      WORD $0x53067d6e //    lsr    w14, w11, #6
  1533      WORD $0x4e0c1d41 //    mov    v1.s[1], w10
  1534      WORD $0x138b5d8b //    extr    w11, w12, w11, #23
  1535      WORD $0x4e141dc1 //    mov    v1.s[2], w14
  1536      WORD $0x4e1c1d61 //    mov    v1.s[3], w11
  1537      WORD $0x4e201c21 //    and    v1.16b, v1.16b, v0.16b
  1538      WORD $0x3c9d0121 //    stur    q1, [x9, #-48]
  1539      WORD $0x29422c0a //    ldp    w10, w11, [x0, #16]
  1540      WORD $0xb940180c //    ldr    w12, [x0, #24]
  1541      WORD $0x53087d4d //    lsr    w13, w10, #8
  1542      WORD $0x138a656a //    extr    w10, w11, w10, #25
  1543      WORD $0x1e2701a1 //    fmov    s1, w13
  1544      WORD $0x530a7d6e //    lsr    w14, w11, #10
  1545      WORD $0x4e0c1d41 //    mov    v1.s[1], w10
  1546      WORD $0x138b6d8b //    extr    w11, w12, w11, #27
  1547      WORD $0x4e141dc1 //    mov    v1.s[2], w14
  1548      WORD $0x4e1c1d61 //    mov    v1.s[3], w11
  1549      WORD $0x4e201c21 //    and    v1.16b, v1.16b, v0.16b
  1550      WORD $0x3c9e0121 //    stur    q1, [x9, #-32]
  1551      WORD $0x29432c0a //    ldp    w10, w11, [x0, #24]
  1552      WORD $0xb940200c //    ldr    w12, [x0, #32]
  1553      WORD $0x530c7d4d //    lsr    w13, w10, #12
  1554      WORD $0x138a756a //    extr    w10, w11, w10, #29
  1555      WORD $0x1e2701a1 //    fmov    s1, w13
  1556      WORD $0x530e7d6e //    lsr    w14, w11, #14
  1557      WORD $0x4e0c1d41 //    mov    v1.s[1], w10
  1558      WORD $0x138b7d8b //    extr    w11, w12, w11, #31
  1559      WORD $0x4e141dc1 //    mov    v1.s[2], w14
  1560      WORD $0x4e1c1d61 //    mov    v1.s[3], w11
  1561      WORD $0x4e201c21 //    and    v1.16b, v1.16b, v0.16b
  1562      WORD $0x3c9f0121 //    stur    q1, [x9, #-16]
  1563      WORD $0x29442c0a //    ldp    w10, w11, [x0, #32]
  1564      WORD $0xb940280c //    ldr    w12, [x0, #40]
  1565      WORD $0x138a416a //    extr    w10, w11, w10, #16
  1566      WORD $0x53017d6d //    lsr    w13, w11, #1
  1567      WORD $0x1e270141 //    fmov    s1, w10
  1568      WORD $0x138b498b //    extr    w11, w12, w11, #18
  1569      WORD $0x4e0c1da1 //    mov    v1.s[1], w13
  1570      WORD $0x53037d8c //    lsr    w12, w12, #3
  1571      WORD $0x4e141d61 //    mov    v1.s[2], w11
  1572      WORD $0x4e1c1d81 //    mov    v1.s[3], w12
  1573      WORD $0x4e201c21 //    and    v1.16b, v1.16b, v0.16b
  1574      WORD $0x3d800121 //    str    q1, [x9]
  1575      WORD $0x29452c0a //    ldp    w10, w11, [x0, #40]
  1576      WORD $0xb940300c //    ldr    w12, [x0, #48]
  1577      WORD $0x138a516a //    extr    w10, w11, w10, #20
  1578      WORD $0x53057d6d //    lsr    w13, w11, #5
  1579      WORD $0x1e270141 //    fmov    s1, w10
  1580      WORD $0x138b598b //    extr    w11, w12, w11, #22
  1581      WORD $0x4e0c1da1 //    mov    v1.s[1], w13
  1582      WORD $0x53077d8c //    lsr    w12, w12, #7
  1583      WORD $0x4e141d61 //    mov    v1.s[2], w11
  1584      WORD $0x4e1c1d81 //    mov    v1.s[3], w12
  1585      WORD $0x4e201c21 //    and    v1.16b, v1.16b, v0.16b
  1586      WORD $0x3d800521 //    str    q1, [x9, #16]
  1587      WORD $0x29462c0a //    ldp    w10, w11, [x0, #48]
  1588      WORD $0xb940380c //    ldr    w12, [x0, #56]
  1589      WORD $0x138a616a //    extr    w10, w11, w10, #24
  1590      WORD $0x53097d6d //    lsr    w13, w11, #9
  1591      WORD $0x1e270141 //    fmov    s1, w10
  1592      WORD $0x138b698b //    extr    w11, w12, w11, #26
  1593      WORD $0x4e0c1da1 //    mov    v1.s[1], w13
  1594      WORD $0x530b7d8c //    lsr    w12, w12, #11
  1595      WORD $0x4e141d61 //    mov    v1.s[2], w11
  1596      WORD $0x4e1c1d81 //    mov    v1.s[3], w12
  1597      WORD $0x4e201c21 //    and    v1.16b, v1.16b, v0.16b
  1598      WORD $0x3d800921 //    str    q1, [x9, #32]
  1599      WORD $0x29472c0a //    ldp    w10, w11, [x0, #56]
  1600      WORD $0xb940400c //    ldr    w12, [x0, #64]
  1601      WORD $0x91011000 //    add    x0, x0, #68
  1602      WORD $0x138a716a //    extr    w10, w11, w10, #28
  1603      WORD $0x530d7d6d //    lsr    w13, w11, #13
  1604      WORD $0x1e270141 //    fmov    s1, w10
  1605      WORD $0x138b798b //    extr    w11, w12, w11, #30
  1606      WORD $0x4e0c1da1 //    mov    v1.s[1], w13
  1607      WORD $0x530f7d8c //    lsr    w12, w12, #15
  1608      WORD $0x4e141d61 //    mov    v1.s[2], w11
  1609      WORD $0x4e1c1d81 //    mov    v1.s[3], w12
  1610      WORD $0x4e201c21 //    and    v1.16b, v1.16b, v0.16b
  1611      WORD $0x3d800d21 //    str    q1, [x9, #48]
  1612      WORD $0x91020129 //    add    x9, x9, #128
  1613      BNE LBB0_54
  1614      JMP LBB0_99
  1615  LBB0_55:
  1616      WORD $0x7100805f //    cmp    w2, #32
  1617      BLT LBB0_99
  1618      WORD $0x91010029 //    add    x9, x1, #64
  1619      WORD $0x4f00d460 //    movi    v0.4s, #3, msl #16
  1620  LBB0_57:        
  1621      WORD $0x29402c0a //    ldp    w10, w11, [x0]
  1622      WORD $0xb940080c //    ldr    w12, [x0, #8]
  1623      WORD $0xf1000508 //    subs    x8, x8, #1
  1624      WORD $0x138a496d //    extr    w13, w11, w10, #18
  1625      WORD $0x1e270141 //    fmov    s1, w10
  1626      WORD $0x53047d6e //    lsr    w14, w11, #4
  1627      WORD $0x4e0c1da1 //    mov    v1.s[1], w13
  1628      WORD $0x138b598b //    extr    w11, w12, w11, #22
  1629      WORD $0x4e141dc1 //    mov    v1.s[2], w14
  1630      WORD $0x4e1c1d61 //    mov    v1.s[3], w11
  1631      WORD $0x4e201c21 //    and    v1.16b, v1.16b, v0.16b
  1632      WORD $0x3c9c0121 //    stur    q1, [x9, #-64]
  1633      WORD $0x29412c0a //    ldp    w10, w11, [x0, #8]
  1634      WORD $0xb940100c //    ldr    w12, [x0, #16]
  1635      WORD $0x53087d4d //    lsr    w13, w10, #8
  1636      WORD $0x138a696a //    extr    w10, w11, w10, #26
  1637      WORD $0x1e2701a1 //    fmov    s1, w13
  1638      WORD $0x530c7d6e //    lsr    w14, w11, #12
  1639      WORD $0x4e0c1d41 //    mov    v1.s[1], w10
  1640      WORD $0x138b798b //    extr    w11, w12, w11, #30
  1641      WORD $0x4e141dc1 //    mov    v1.s[2], w14
  1642      WORD $0x4e1c1d61 //    mov    v1.s[3], w11
  1643      WORD $0x4e201c21 //    and    v1.16b, v1.16b, v0.16b
  1644      WORD $0x3c9d0121 //    stur    q1, [x9, #-48]
  1645      WORD $0x29422c0a //    ldp    w10, w11, [x0, #16]
  1646      WORD $0xb940180c //    ldr    w12, [x0, #24]
  1647      WORD $0x138a416a //    extr    w10, w11, w10, #16
  1648      WORD $0x53027d6d //    lsr    w13, w11, #2
  1649      WORD $0x1e270141 //    fmov    s1, w10
  1650      WORD $0x138b518b //    extr    w11, w12, w11, #20
  1651      WORD $0x4e0c1da1 //    mov    v1.s[1], w13
  1652      WORD $0x53067d8c //    lsr    w12, w12, #6
  1653      WORD $0x4e141d61 //    mov    v1.s[2], w11
  1654      WORD $0x4e1c1d81 //    mov    v1.s[3], w12
  1655      WORD $0x4e201c21 //    and    v1.16b, v1.16b, v0.16b
  1656      WORD $0x3c9e0121 //    stur    q1, [x9, #-32]
  1657      WORD $0x29432c0a //    ldp    w10, w11, [x0, #24]
  1658      WORD $0xb940200c //    ldr    w12, [x0, #32]
  1659      WORD $0x138a616a //    extr    w10, w11, w10, #24
  1660      WORD $0x530a7d6d //    lsr    w13, w11, #10
  1661      WORD $0x1e270141 //    fmov    s1, w10
  1662      WORD $0x138b718b //    extr    w11, w12, w11, #28
  1663      WORD $0x4e0c1da1 //    mov    v1.s[1], w13
  1664      WORD $0x530e7d8c //    lsr    w12, w12, #14
  1665      WORD $0x4e141d61 //    mov    v1.s[2], w11
  1666      WORD $0x4e1c1d81 //    mov    v1.s[3], w12
  1667      WORD $0x4e201c21 //    and    v1.16b, v1.16b, v0.16b
  1668      WORD $0x3c9f0121 //    stur    q1, [x9, #-16]
  1669      WORD $0x2944ac0a //    ldp    w10, w11, [x0, #36]
  1670      WORD $0xb9402c0c //    ldr    w12, [x0, #44]
  1671      WORD $0x138a496d //    extr    w13, w11, w10, #18
  1672      WORD $0x1e270141 //    fmov    s1, w10
  1673      WORD $0x53047d6e //    lsr    w14, w11, #4
  1674      WORD $0x4e0c1da1 //    mov    v1.s[1], w13
  1675      WORD $0x138b598b //    extr    w11, w12, w11, #22
  1676      WORD $0x4e141dc1 //    mov    v1.s[2], w14
  1677      WORD $0x4e1c1d61 //    mov    v1.s[3], w11
  1678      WORD $0x4e201c21 //    and    v1.16b, v1.16b, v0.16b
  1679      WORD $0x3d800121 //    str    q1, [x9]
  1680      WORD $0x2945ac0a //    ldp    w10, w11, [x0, #44]
  1681      WORD $0xb940340c //    ldr    w12, [x0, #52]
  1682      WORD $0x53087d4d //    lsr    w13, w10, #8
  1683      WORD $0x138a696a //    extr    w10, w11, w10, #26
  1684      WORD $0x1e2701a1 //    fmov    s1, w13
  1685      WORD $0x530c7d6e //    lsr    w14, w11, #12
  1686      WORD $0x4e0c1d41 //    mov    v1.s[1], w10
  1687      WORD $0x138b798b //    extr    w11, w12, w11, #30
  1688      WORD $0x4e141dc1 //    mov    v1.s[2], w14
  1689      WORD $0x4e1c1d61 //    mov    v1.s[3], w11
  1690      WORD $0x4e201c21 //    and    v1.16b, v1.16b, v0.16b
  1691      WORD $0x3d800521 //    str    q1, [x9, #16]
  1692      WORD $0x2946ac0a //    ldp    w10, w11, [x0, #52]
  1693      WORD $0xb9403c0c //    ldr    w12, [x0, #60]
  1694      WORD $0x138a416a //    extr    w10, w11, w10, #16
  1695      WORD $0x53027d6d //    lsr    w13, w11, #2
  1696      WORD $0x1e270141 //    fmov    s1, w10
  1697      WORD $0x138b518b //    extr    w11, w12, w11, #20
  1698      WORD $0x4e0c1da1 //    mov    v1.s[1], w13
  1699      WORD $0x53067d8c //    lsr    w12, w12, #6
  1700      WORD $0x4e141d61 //    mov    v1.s[2], w11
  1701      WORD $0x4e1c1d81 //    mov    v1.s[3], w12
  1702      WORD $0x4e201c21 //    and    v1.16b, v1.16b, v0.16b
  1703      WORD $0x3d800921 //    str    q1, [x9, #32]
  1704      WORD $0x2947ac0a //    ldp    w10, w11, [x0, #60]
  1705      WORD $0xb940440c //    ldr    w12, [x0, #68]
  1706      WORD $0x91012000 //    add    x0, x0, #72
  1707      WORD $0x138a616a //    extr    w10, w11, w10, #24
  1708      WORD $0x530a7d6d //    lsr    w13, w11, #10
  1709      WORD $0x1e270141 //    fmov    s1, w10
  1710      WORD $0x138b718b //    extr    w11, w12, w11, #28
  1711      WORD $0x4e0c1da1 //    mov    v1.s[1], w13
  1712      WORD $0x530e7d8c //    lsr    w12, w12, #14
  1713      WORD $0x4e141d61 //    mov    v1.s[2], w11
  1714      WORD $0x4e1c1d81 //    mov    v1.s[3], w12
  1715      WORD $0x4e201c21 //    and    v1.16b, v1.16b, v0.16b
  1716      WORD $0x3d800d21 //    str    q1, [x9, #48]
  1717      WORD $0x91020129 //    add    x9, x9, #128
  1718      BNE LBB0_57
  1719      JMP LBB0_99
  1720  LBB0_58:
  1721      WORD $0x7100805f //    cmp    w2, #32
  1722      BLT LBB0_99
  1723      MOVD $0x000000120000001f, R5 // LCPI0_102
  1724      MOVD $0x0000000e00000001, R1 // LCPI0_103
  1725      MOVD $0x000000110000001e, R6 // LCPI0_104
  1726      MOVD $0x0000000f00000002, R2 // LCPI0_105
  1727      MOVD $0x0000000f0000001c, R7 // LCPI0_106
  1728      MOVD $0x0000001100000004, R3 // LCPI0_107
  1729      MOVD $0x0000000e0000001b, R16 // LCPI0_108
  1730      MOVD $0x0000001200000005, R4 // LCPI0_109
  1731      WORD $0x91010029 //    add    x9, x1, #64
  1732      WORD $0x4f00d4e0 //    movi    v0.4s, #7, msl #16
  1733      WORD $0x2ea0b8a5 //    neg    v5.2s, v5.2s
  1734      WORD $0x2ea0b8c6 //    neg    v6.2s, v6.2s
  1735      WORD $0x2ea0b8e7 //    neg    v7.2s, v7.2s
  1736      WORD $0x2ea0ba10 //    neg    v16.2s, v16.2s
  1737  LBB0_60:
  1738      WORD $0x29402c0a //    ldp    w10, w11, [x0]
  1739      WORD $0xb940080c //    ldr    w12, [x0, #8]
  1740      WORD $0xf1000508 //    subs    x8, x8, #1
  1741      WORD $0x138a4d6d //    extr    w13, w11, w10, #19
  1742      WORD $0x1e270151 //    fmov    s17, w10
  1743      WORD $0x53067d6e //    lsr    w14, w11, #6
  1744      WORD $0x4e0c1db1 //    mov    v17.s[1], w13
  1745      WORD $0x138b658b //    extr    w11, w12, w11, #25
  1746      WORD $0x4e141dd1 //    mov    v17.s[2], w14
  1747      WORD $0x4e1c1d71 //    mov    v17.s[3], w11
  1748      WORD $0x4e201e31 //    and    v17.16b, v17.16b, v0.16b
  1749      WORD $0x3c9c0131 //    stur    q17, [x9, #-64]
  1750      WORD $0xb940080a //    ldr    w10, [x0, #8]
  1751      WORD $0xfc40c011 //    ldur    d17, [x0, #12]
  1752      WORD $0x1e270152 //    fmov    s18, w10
  1753      WORD $0x0e913a52 //    zip1    v25.2s, v25.2s, v17.2s
  1754      WORD $0x530c7d4b //    lsr    w11, w10, #12
  1755      WORD $0x2ea14633 //    ushl    v19.2s, v17.2s, v1.2s
  1756      WORD $0x0e0c3e2a //    mov    w10, v17.s[1]
  1757      WORD $0x2ea54651 //    ushl    v17.2s, v25.2s, v5.2s
  1758      WORD $0x1e270172 //    fmov    s18, w11
  1759      WORD $0x0eb11e71 //    orr    v17.8b, v19.8b, v17.8b
  1760      WORD $0x6e0c0632 //    mov    v25.s[1], v17.s[0]
  1761      WORD $0x53057d4a //    lsr    w10, w10, #5
  1762      WORD $0x6e142632 //    mov    v25.s[2], v17.s[1]
  1763      WORD $0x4e1c1d52 //    mov    v25.s[3], w10
  1764      WORD $0x4e201e51 //    and    v17.16b, v25.16b, v0.16b
  1765      WORD $0x3c9d0131 //    stur    q17, [x9, #-48]
  1766      WORD $0x29422c0a //    ldp    w10, w11, [x0, #16]
  1767      WORD $0xfd400c11 //    ldr    d17, [x0, #24]
  1768      WORD $0x1e270172 //    fmov    s18, w11
  1769      WORD $0x138a616a //    extr    w10, w11, w10, #24
  1770      WORD $0x0e913a52 //    zip1    v25.2s, v25.2s, v17.2s
  1771      WORD $0x530b7d6c //    lsr    w12, w11, #11
  1772      WORD $0x2ea24631 //    ushl    v17.2s, v17.2s, v2.2s
  1773      WORD $0x1e270153 //    fmov    s19, w10
  1774      WORD $0x2ea64652 //    ushl    v25.2s, v25.2s, v6.2s
  1775      WORD $0x4e0c1d93 //    mov    v19.s[1], w12
  1776      WORD $0x0eb21e31 //    orr    v17.8b, v17.8b, v25.8b
  1777      WORD $0x6e180633 //    mov    v19.d[1], v17.d[0]
  1778      WORD $0x4e201e71 //    and    v17.16b, v19.16b, v0.16b
  1779      WORD $0x3c9e0131 //    stur    q17, [x9, #-32]
  1780      WORD $0x2943ac0a //    ldp    w10, w11, [x0, #28]
  1781      WORD $0xb940240c //    ldr    w12, [x0, #36]
  1782      WORD $0x53047d4d //    lsr    w13, w10, #4
  1783      WORD $0x138a5d6a //    extr    w10, w11, w10, #23
  1784      WORD $0x1e2701b1 //    fmov    s17, w13
  1785      WORD $0x530a7d6e //    lsr    w14, w11, #10
  1786      WORD $0x4e0c1d51 //    mov    v17.s[1], w10
  1787      WORD $0x138b758b //    extr    w11, w12, w11, #29
  1788      WORD $0x4e141dd1 //    mov    v17.s[2], w14
  1789      WORD $0x4e1c1d71 //    mov    v17.s[3], w11
  1790      WORD $0x4e201e31 //    and    v17.16b, v17.16b, v0.16b
  1791      WORD $0x3c9f0131 //    stur    q17, [x9, #-16]
  1792      WORD $0x2944ac0a //    ldp    w10, w11, [x0, #36]
  1793      WORD $0xb9402c0c //    ldr    w12, [x0, #44]
  1794      WORD $0x138a416a //    extr    w10, w11, w10, #16
  1795      WORD $0x53037d6d //    lsr    w13, w11, #3
  1796      WORD $0x1e270151 //    fmov    s17, w10
  1797      WORD $0x138b598b //    extr    w11, w12, w11, #22
  1798      WORD $0x4e0c1db1 //    mov    v17.s[1], w13
  1799      WORD $0x53097d8c //    lsr    w12, w12, #9
  1800      WORD $0x4e141d71 //    mov    v17.s[2], w11
  1801      WORD $0x4e1c1d91 //    mov    v17.s[3], w12
  1802      WORD $0x4e201e31 //    and    v17.16b, v17.16b, v0.16b
  1803      WORD $0x3d800131 //    str    q17, [x9]
  1804      WORD $0xfd401811 //    ldr    d17, [x0, #48]
  1805      WORD $0xbd402c12 //    ldr    s18, [x0, #44]
  1806      WORD $0xb940380a //    ldr    w10, [x0, #56]
  1807      WORD $0x2ea34633 //    ushl    v19.2s, v17.2s, v3.2s
  1808      WORD $0x0e913a52 //    zip1    v25.2s, v25.2s, v17.2s
  1809      WORD $0x0e0c3e2b //    mov    w11, v17.s[1]
  1810      WORD $0x2ea74651 //    ushl    v17.2s, v25.2s, v7.2s
  1811      WORD $0x53027d6c //    lsr    w12, w11, #2
  1812      WORD $0x0eb11e71 //    orr    v17.8b, v19.8b, v17.8b
  1813      WORD $0x138b554a //    extr    w10, w10, w11, #21
  1814      WORD $0x4e141d91 //    mov    v17.s[2], w12
  1815      WORD $0x4e1c1d51 //    mov    v17.s[3], w10
  1816      WORD $0x4e201e31 //    and    v17.16b, v17.16b, v0.16b
  1817      WORD $0x3d800531 //    str    q17, [x9, #16]
  1818      WORD $0xb940380a //    ldr    w10, [x0, #56]
  1819      WORD $0xfc43c011 //    ldur    d17, [x0, #60]
  1820      WORD $0x1e270152 //    fmov    s18, w10
  1821      WORD $0x0e913a52 //    zip1    v25.2s, v25.2s, v17.2s
  1822      WORD $0x53087d4b //    lsr    w11, w10, #8
  1823      WORD $0x2ea44633 //    ushl    v19.2s, v17.2s, v4.2s
  1824      WORD $0x0e0c3e2a //    mov    w10, v17.s[1]
  1825      WORD $0x2eb04651 //    ushl    v17.2s, v25.2s, v16.2s
  1826      WORD $0x1e270172 //    fmov    s18, w11
  1827      WORD $0x0eb11e71 //    orr    v17.8b, v19.8b, v17.8b
  1828      WORD $0x6e0c0632 //    mov    v25.s[1], v17.s[0]
  1829      WORD $0x53017d4a //    lsr    w10, w10, #1
  1830      WORD $0x6e142632 //    mov    v25.s[2], v17.s[1]
  1831      WORD $0x4e1c1d52 //    mov    v25.s[3], w10
  1832      WORD $0x4e201e51 //    and    v17.16b, v25.16b, v0.16b
  1833      WORD $0x3d800931 //    str    q17, [x9, #32]
  1834      WORD $0x29482c0a //    ldp    w10, w11, [x0, #64]
  1835      WORD $0xb940480c //    ldr    w12, [x0, #72]
  1836      WORD $0x91013000 //    add    x0, x0, #76
  1837      WORD $0x138a516a //    extr    w10, w11, w10, #20
  1838      WORD $0x53077d6d //    lsr    w13, w11, #7
  1839      WORD $0x1e270151 //    fmov    s17, w10
  1840      WORD $0x138b698b //    extr    w11, w12, w11, #26
  1841      WORD $0x4e0c1db1 //    mov    v17.s[1], w13
  1842      WORD $0x530d7d8c //    lsr    w12, w12, #13
  1843      WORD $0x4e141d71 //    mov    v17.s[2], w11
  1844      WORD $0x4e1c1d91 //    mov    v17.s[3], w12
  1845      WORD $0x4e201e31 //    and    v17.16b, v17.16b, v0.16b
  1846      WORD $0x3d800d31 //    str    q17, [x9, #48]
  1847      WORD $0x91020129 //    add    x9, x9, #128
  1848      BNE LBB0_60
  1849      JMP LBB0_99
  1850  LBB0_61:
  1851      WORD $0x7100805f //    cmp    w2, #32
  1852      BLT LBB0_99
  1853      WORD $0x91010029 //    add    x9, x1, #64
  1854      WORD $0x4f00d5e0 //    movi    v0.4s, #15, msl #16
  1855  LBB0_63:
  1856      WORD $0x29402c0a //    ldp    w10, w11, [x0]
  1857      WORD $0xb940080c //    ldr    w12, [x0, #8]
  1858      WORD $0xf1000508 //    subs    x8, x8, #1
  1859      WORD $0x138a516d //    extr    w13, w11, w10, #20
  1860      WORD $0x1e270141 //    fmov    s1, w10
  1861      WORD $0x53087d6e //    lsr    w14, w11, #8
  1862      WORD $0x4e0c1da1 //    mov    v1.s[1], w13
  1863      WORD $0x138b718b //    extr    w11, w12, w11, #28
  1864      WORD $0x4e141dc1 //    mov    v1.s[2], w14
  1865      WORD $0x4e1c1d61 //    mov    v1.s[3], w11
  1866      WORD $0x4e201c21 //    and    v1.16b, v1.16b, v0.16b
  1867      WORD $0x3c9c0121 //    stur    q1, [x9, #-64]
  1868      WORD $0x29412c0a //    ldp    w10, w11, [x0, #8]
  1869      WORD $0xb940100c //    ldr    w12, [x0, #16]
  1870      WORD $0x138a416a //    extr    w10, w11, w10, #16
  1871      WORD $0x53047d6d //    lsr    w13, w11, #4
  1872      WORD $0x1e270141 //    fmov    s1, w10
  1873      WORD $0x138b618b //    extr    w11, w12, w11, #24
  1874      WORD $0x4e0c1da1 //    mov    v1.s[1], w13
  1875      WORD $0x530c7d8c //    lsr    w12, w12, #12
  1876      WORD $0x4e141d61 //    mov    v1.s[2], w11
  1877      WORD $0x4e1c1d81 //    mov    v1.s[3], w12
  1878      WORD $0x4e201c21 //    and    v1.16b, v1.16b, v0.16b
  1879      WORD $0x3c9d0121 //    stur    q1, [x9, #-48]
  1880      WORD $0x2942ac0a //    ldp    w10, w11, [x0, #20]
  1881      WORD $0xb9401c0c //    ldr    w12, [x0, #28]
  1882      WORD $0x138a516d //    extr    w13, w11, w10, #20
  1883      WORD $0x1e270141 //    fmov    s1, w10
  1884      WORD $0x53087d6e //    lsr    w14, w11, #8
  1885      WORD $0x4e0c1da1 //    mov    v1.s[1], w13
  1886      WORD $0x138b718b //    extr    w11, w12, w11, #28
  1887      WORD $0x4e141dc1 //    mov    v1.s[2], w14
  1888      WORD $0x4e1c1d61 //    mov    v1.s[3], w11
  1889      WORD $0x4e201c21 //    and    v1.16b, v1.16b, v0.16b
  1890      WORD $0x3c9e0121 //    stur    q1, [x9, #-32]
  1891      WORD $0x2943ac0a //    ldp    w10, w11, [x0, #28]
  1892      WORD $0xb940240c //    ldr    w12, [x0, #36]
  1893      WORD $0x138a416a //    extr    w10, w11, w10, #16
  1894      WORD $0x53047d6d //    lsr    w13, w11, #4
  1895      WORD $0x1e270141 //    fmov    s1, w10
  1896      WORD $0x138b618b //    extr    w11, w12, w11, #24
  1897      WORD $0x4e0c1da1 //    mov    v1.s[1], w13
  1898      WORD $0x530c7d8c //    lsr    w12, w12, #12
  1899      WORD $0x4e141d61 //    mov    v1.s[2], w11
  1900      WORD $0x4e1c1d81 //    mov    v1.s[3], w12
  1901      WORD $0x4e201c21 //    and    v1.16b, v1.16b, v0.16b
  1902      WORD $0x3c9f0121 //    stur    q1, [x9, #-16]
  1903      WORD $0x29452c0a //    ldp    w10, w11, [x0, #40]
  1904      WORD $0xb940300c //    ldr    w12, [x0, #48]
  1905      WORD $0x138a516d //    extr    w13, w11, w10, #20
  1906      WORD $0x1e270141 //    fmov    s1, w10
  1907      WORD $0x53087d6e //    lsr    w14, w11, #8
  1908      WORD $0x4e0c1da1 //    mov    v1.s[1], w13
  1909      WORD $0x138b718b //    extr    w11, w12, w11, #28
  1910      WORD $0x4e141dc1 //    mov    v1.s[2], w14
  1911      WORD $0x4e1c1d61 //    mov    v1.s[3], w11
  1912      WORD $0x4e201c21 //    and    v1.16b, v1.16b, v0.16b
  1913      WORD $0x3d800121 //    str    q1, [x9]
  1914      WORD $0x29462c0a //    ldp    w10, w11, [x0, #48]
  1915      WORD $0xb940380c //    ldr    w12, [x0, #56]
  1916      WORD $0x138a416a //    extr    w10, w11, w10, #16
  1917      WORD $0x53047d6d //    lsr    w13, w11, #4
  1918      WORD $0x1e270141 //    fmov    s1, w10
  1919      WORD $0x138b618b //    extr    w11, w12, w11, #24
  1920      WORD $0x4e0c1da1 //    mov    v1.s[1], w13
  1921      WORD $0x530c7d8c //    lsr    w12, w12, #12
  1922      WORD $0x4e141d61 //    mov    v1.s[2], w11
  1923      WORD $0x4e1c1d81 //    mov    v1.s[3], w12
  1924      WORD $0x4e201c21 //    and    v1.16b, v1.16b, v0.16b
  1925      WORD $0x3d800521 //    str    q1, [x9, #16]
  1926      WORD $0x2947ac0a //    ldp    w10, w11, [x0, #60]
  1927      WORD $0xb940440c //    ldr    w12, [x0, #68]
  1928      WORD $0x138a516d //    extr    w13, w11, w10, #20
  1929      WORD $0x1e270141 //    fmov    s1, w10
  1930      WORD $0x53087d6e //    lsr    w14, w11, #8
  1931      WORD $0x4e0c1da1 //    mov    v1.s[1], w13
  1932      WORD $0x138b718b //    extr    w11, w12, w11, #28
  1933      WORD $0x4e141dc1 //    mov    v1.s[2], w14
  1934      WORD $0x4e1c1d61 //    mov    v1.s[3], w11
  1935      WORD $0x4e201c21 //    and    v1.16b, v1.16b, v0.16b
  1936      WORD $0x3d800921 //    str    q1, [x9, #32]
  1937      WORD $0x2948ac0a //    ldp    w10, w11, [x0, #68]
  1938      WORD $0xb9404c0c //    ldr    w12, [x0, #76]
  1939      WORD $0x91014000 //    add    x0, x0, #80
  1940      WORD $0x138a416a //    extr    w10, w11, w10, #16
  1941      WORD $0x53047d6d //    lsr    w13, w11, #4
  1942      WORD $0x1e270141 //    fmov    s1, w10
  1943      WORD $0x138b618b //    extr    w11, w12, w11, #24
  1944      WORD $0x4e0c1da1 //    mov    v1.s[1], w13
  1945      WORD $0x530c7d8c //    lsr    w12, w12, #12
  1946      WORD $0x4e141d61 //    mov    v1.s[2], w11
  1947      WORD $0x4e1c1d81 //    mov    v1.s[3], w12
  1948      WORD $0x4e201c21 //    and    v1.16b, v1.16b, v0.16b
  1949      WORD $0x3d800d21 //    str    q1, [x9, #48]
  1950      WORD $0x91020129 //    add    x9, x9, #128
  1951      BNE LBB0_63
  1952      JMP LBB0_99
  1953  LBB0_64:
  1954      WORD $0x7100805f //    cmp    w2, #32
  1955      BLT LBB0_99
  1956      MOVD $0x000000130000001e, R7 // LCPI0_90
  1957      MOVD $0x0000000d00000002, R1 // LCPI0_91
  1958      MOVD $0x000000120000001d, R16 // LCPI0_92
  1959      MOVD $0x0000000e00000003, R2 // LCPI0_93
  1960      MOVD $0x000000110000001c, R17 // LCPI0_94
  1961      MOVD $0x0000000f00000004, R3 // LCPI0_95
  1962      MOVD $0x0000000f0000001a, R25 // LCPI0_96
  1963      MOVD $0x0000001100000006, R4 // LCPI0_97
  1964      MOVD $0x0000000e00000019, R19 // LCPI0_98
  1965      MOVD $0x0000001200000007, R5 // LCPI0_99
  1966      MOVD $0x0000000d00000018, R20 // LCPI0_100
  1967      MOVD $0x0000001300000008, R6 // LCPI0_101
  1968      WORD $0x91010029 //    add    x9, x1, #64
  1969      WORD $0x4f00d7e0 //    movi    v0.4s, #31, msl #16
  1970      WORD $0x2ea0b8e7 //    neg    v7.2s, v7.2s
  1971      WORD $0x2ea0ba10 //    neg    v16.2s, v16.2s
  1972      WORD $0x2ea0ba31 //    neg    v17.2s, v17.2s
  1973      WORD $0x2ea0bb39 // neg    v25.2s, v25.2s
  1974      WORD $0x2ea0ba73 //    neg    v19.2s, v19.2s
  1975      WORD $0x2ea0ba94 //    neg    v20.2s, v20.2s
  1976  LBB0_66:
  1977      WORD $0x29402c0a //    ldp    w10, w11, [x0]
  1978      WORD $0xb940080c //    ldr    w12, [x0, #8]
  1979      WORD $0xf1000508 //    subs    x8, x8, #1
  1980      WORD $0x138a556d //    extr    w13, w11, w10, #21
  1981      WORD $0x1e270155 //    fmov    s21, w10
  1982      WORD $0x530a7d6e //    lsr    w14, w11, #10
  1983      WORD $0x4e0c1db5 //    mov    v21.s[1], w13
  1984      WORD $0x138b7d8b //    extr    w11, w12, w11, #31
  1985      WORD $0x4e141dd5 //    mov    v21.s[2], w14
  1986      WORD $0x4e1c1d75 //    mov    v21.s[3], w11
  1987      WORD $0x4e201eb5 //    and    v21.16b, v21.16b, v0.16b
  1988      WORD $0x3c9c0135 //    stur    q21, [x9, #-64]
  1989      WORD $0x29412c0a //    ldp    w10, w11, [x0, #8]
  1990      WORD $0xfd400815 //    ldr    d21, [x0, #16]
  1991      WORD $0x1e270176 //    fmov    s22, w11
  1992      WORD $0x138a516a //    extr    w10, w11, w10, #20
  1993      WORD $0x0e953ad6 //    zip1    v22.2s, v22.2s, v21.2s
  1994      WORD $0x53097d6c //    lsr    w12, w11, #9
  1995      WORD $0x2ea146b5 //    ushl    v21.2s, v21.2s, v1.2s
  1996      WORD $0x1e270157 //    fmov    s23, w10
  1997      WORD $0x2ea746d6 //    ushl    v22.2s, v22.2s, v7.2s
  1998      WORD $0x4e0c1d97 //    mov    v23.s[1], w12
  1999      WORD $0x0eb61eb5 //    orr    v21.8b, v21.8b, v22.8b
  2000      WORD $0x6e1806b7 //    mov    v23.d[1], v21.d[0]
  2001      WORD $0x4e201ef5 //    and    v21.16b, v23.16b, v0.16b
  2002      WORD $0x3c9d0135 //    stur    q21, [x9, #-48]
  2003      WORD $0xb940140a //    ldr    w10, [x0, #20]
  2004      WORD $0xfd400c15 //    ldr    d21, [x0, #24]
  2005      WORD $0x1e270156 //    fmov    s22, w10
  2006      WORD $0x0e953ad6 //    zip1    v22.2s, v22.2s, v21.2s
  2007      WORD $0x53087d4b //    lsr    w11, w10, #8
  2008      WORD $0x2ea246b7 //    ushl    v23.2s, v21.2s, v2.2s
  2009      WORD $0x0e0c3eaa //    mov    w10, v21.s[1]
  2010      WORD $0x2eb046d5 //    ushl    v21.2s, v22.2s, v16.2s
  2011      WORD $0x1e270176 //    fmov    s22, w11
  2012      WORD $0x0eb51ef5 //    orr    v21.8b, v23.8b, v21.8b
  2013      WORD $0x6e0c06b6 //    mov    v22.s[1], v21.s[0]
  2014      WORD $0x53077d4a //    lsr    w10, w10, #7
  2015      WORD $0x6e1426b6 //    mov    v22.s[2], v21.s[1]
  2016      WORD $0x4e1c1d56 //    mov    v22.s[3], w10
  2017      WORD $0x4e201ed5 //    and    v21.16b, v22.16b, v0.16b
  2018      WORD $0x3c9e0135 //    stur    q21, [x9, #-32]
  2019      WORD $0xfd401015 //    ldr    d21, [x0, #32]
  2020      WORD $0xbd401c16 //    ldr    s22, [x0, #28]
  2021      WORD $0xb940280a //    ldr    w10, [x0, #40]
  2022      WORD $0x2ea346b7 //    ushl    v23.2s, v21.2s, v3.2s
  2023      WORD $0x0e953ad6 //    zip1    v22.2s, v22.2s, v21.2s
  2024      WORD $0x0e0c3eab //    mov    w11, v21.s[1]
  2025      WORD $0x2eb146d5 //    ushl    v21.2s, v22.2s, v17.2s
  2026      WORD $0x53067d6c //    lsr    w12, w11, #6
  2027      WORD $0x0eb51ef5 //    orr    v21.8b, v23.8b, v21.8b
  2028      WORD $0x138b6d4a //    extr    w10, w10, w11, #27
  2029      WORD $0x4e141d95 //    mov    v21.s[2], w12
  2030      WORD $0x4e1c1d55 //    mov    v21.s[3], w10
  2031      WORD $0x4e201eb5 //    and    v21.16b, v21.16b, v0.16b
  2032      WORD $0x3c9f0135 //    stur    q21, [x9, #-16]
  2033      WORD $0x29452c0a //    ldp    w10, w11, [x0, #40]
  2034      WORD $0xfd401815 //    ldr    d21, [x0, #48]
  2035      WORD $0x1e270176 //    fmov    s22, w11
  2036      WORD $0x138a416a //    extr    w10, w11, w10, #16
  2037      WORD $0x0e953ad6 //    zip1    v22.2s, v22.2s, v21.2s
  2038      WORD $0x53057d6c //    lsr    w12, w11, #5
  2039      WORD $0x2ea446b5 //    ushl    v21.2s, v21.2s, v4.2s
  2040      WORD $0x1e270157 //    fmov    s23, w10
  2041      WORD $0x2eb246d6 //    ushl    v22.2s, v22.2s, v25.2s
  2042      WORD $0x4e0c1d97 //    mov    v23.s[1], w12
  2043      WORD $0x0eb61eb5 //    orr    v21.8b, v21.8b, v22.8b
  2044      WORD $0x6e1806b7 //    mov    v23.d[1], v21.d[0]
  2045      WORD $0x4e201ef5 //    and    v21.16b, v23.16b, v0.16b
  2046      WORD $0x3d800135 //    str    q21, [x9]
  2047      WORD $0xb940340a //    ldr    w10, [x0, #52]
  2048      WORD $0xfd401c15 //    ldr    d21, [x0, #56]
  2049      WORD $0x1e270156 //    fmov    s22, w10
  2050      WORD $0x0e953ad6 //    zip1    v22.2s, v22.2s, v21.2s
  2051      WORD $0x53047d4b //    lsr    w11, w10, #4
  2052      WORD $0x2ea546b7 //    ushl    v23.2s, v21.2s, v5.2s
  2053      WORD $0x0e0c3eaa //    mov    w10, v21.s[1]
  2054      WORD $0x2eb346d5 //    ushl    v21.2s, v22.2s, v19.2s
  2055      WORD $0x1e270176 //    fmov    s22, w11
  2056      WORD $0x0eb51ef5 //    orr    v21.8b, v23.8b, v21.8b
  2057      WORD $0x6e0c06b6 //    mov    v22.s[1], v21.s[0]
  2058      WORD $0x53037d4a //    lsr    w10, w10, #3
  2059      WORD $0x6e1426b6 //    mov    v22.s[2], v21.s[1]
  2060      WORD $0x4e1c1d56 //    mov    v22.s[3], w10
  2061      WORD $0x4e201ed5 //    and    v21.16b, v22.16b, v0.16b
  2062      WORD $0x3d800535 //    str    q21, [x9, #16]
  2063      WORD $0xfd402015 //    ldr    d21, [x0, #64]
  2064      WORD $0xbd403c16 //    ldr    s22, [x0, #60]
  2065      WORD $0xb940480a //    ldr    w10, [x0, #72]
  2066      WORD $0x2ea646b7 //    ushl    v23.2s, v21.2s, v6.2s
  2067      WORD $0x0e953ad6 //    zip1    v22.2s, v22.2s, v21.2s
  2068      WORD $0x0e0c3eab //    mov    w11, v21.s[1]
  2069      WORD $0x2eb446d5 //    ushl    v21.2s, v22.2s, v20.2s
  2070      WORD $0x53027d6c //    lsr    w12, w11, #2
  2071      WORD $0x0eb51ef5 //    orr    v21.8b, v23.8b, v21.8b
  2072      WORD $0x138b5d4a //    extr    w10, w10, w11, #23
  2073      WORD $0x4e141d95 //    mov    v21.s[2], w12
  2074      WORD $0x4e1c1d55 //    mov    v21.s[3], w10
  2075      WORD $0x4e201eb5 //    and    v21.16b, v21.16b, v0.16b
  2076      WORD $0x3d800935 //    str    q21, [x9, #32]
  2077      WORD $0x29492c0a //    ldp    w10, w11, [x0, #72]
  2078      WORD $0xb940500c //    ldr    w12, [x0, #80]
  2079      WORD $0x91015000 //    add    x0, x0, #84
  2080      WORD $0x138a316a //    extr    w10, w11, w10, #12
  2081      WORD $0x53017d6d //    lsr    w13, w11, #1
  2082      WORD $0x1e270155 //    fmov    s21, w10
  2083      WORD $0x138b598b //    extr    w11, w12, w11, #22
  2084      WORD $0x4e0c1db5 //    mov    v21.s[1], w13
  2085      WORD $0x530b7d8c //    lsr    w12, w12, #11
  2086      WORD $0x4e141d75 //    mov    v21.s[2], w11
  2087      WORD $0x4e1c1d95 //    mov    v21.s[3], w12
  2088      WORD $0x4e201eb5 //    and    v21.16b, v21.16b, v0.16b
  2089      WORD $0x3d800d35 //    str    q21, [x9, #48]
  2090      WORD $0x91020129 //    add    x9, x9, #128
  2091  
  2092      BNE LBB0_66
  2093      JMP LBB0_99
  2094  LBB0_67:
  2095      WORD $0x7100805f //    cmp    w2, #32
  2096      BLT LBB0_99
  2097      MOVD $0x0000000e00000018, R4 // LCPI0_84
  2098      MOVD $0x0000001200000008, R1 // LCPI0_85
  2099      MOVD $0x000000120000001c, R5 // LCPI0_86
  2100      MOVD $0x0000000e00000004, R2 // LCPI0_87
  2101      MOVD $0x000000140000001e, R6 // LCPI0_88
  2102      MOVD $0x0000000c00000002, R3 // LCPI0_89
  2103  
  2104      WORD $0x91010029 //    add    x9, x1, #64
  2105      WORD $0x4f01d7e0 //    movi    v0.4s, #63, msl #16
  2106      WORD $0x2ea0b884 //    neg    v4.2s, v4.2s
  2107      WORD $0x2ea0b8a5 //    neg    v5.2s, v5.2s
  2108      WORD $0x2ea0b8c6 //    neg    v6.2s, v6.2s
  2109  LBB0_69:
  2110      WORD $0x29402c0a //    ldp    w10, w11, [x0]
  2111      WORD $0xb940080c //    ldr    w12, [x0, #8]
  2112      WORD $0xf1000508 //    subs    x8, x8, #1
  2113      WORD $0x138a596d //    extr    w13, w11, w10, #22
  2114      WORD $0x1e270147 //    fmov    s7, w10
  2115      WORD $0x138b318b //    extr    w11, w12, w11, #12
  2116      WORD $0x4e0c1da7 //    mov    v7.s[1], w13
  2117      WORD $0x53027d8c //    lsr    w12, w12, #2
  2118      WORD $0x4e141d67 //    mov    v7.s[2], w11
  2119      WORD $0x4e1c1d87 //    mov    v7.s[3], w12
  2120      WORD $0x4e201ce7 //    and    v7.16b, v7.16b, v0.16b
  2121      WORD $0x3c9c0127 //    stur    q7, [x9, #-64]
  2122      WORD $0xfc40c007 //    ldur    d7, [x0, #12]
  2123      WORD $0xbd400810 //    ldr    s16, [x0, #8]
  2124      WORD $0xb940140a //    ldr    w10, [x0, #20]
  2125      WORD $0x2ea144f1 //    ushl    v17.2s, v7.2s, v1.2s
  2126      WORD $0x0e873a10 //    zip1    v16.2s, v16.2s, v7.2s
  2127      WORD $0x0e0c3ceb //    mov    w11, v7.s[1]
  2128      WORD $0x2ea44607 //    ushl    v7.2s, v16.2s, v4.2s
  2129      WORD $0x53047d6c //    lsr    w12, w11, #4
  2130      WORD $0x0ea71e27 //    orr    v7.8b, v17.8b, v7.8b
  2131      WORD $0x138b694a //    extr    w10, w10, w11, #26
  2132      WORD $0x4e141d87 //    mov    v7.s[2], w12
  2133      WORD $0x4e1c1d47 //    mov    v7.s[3], w10
  2134      WORD $0x4e201ce7 //    and    v7.16b, v7.16b, v0.16b
  2135      WORD $0x3c9d0127 //    stur    q7, [x9, #-48]
  2136      WORD $0x2942ac0a //    ldp    w10, w11, [x0, #20]
  2137      WORD $0xfc41c007 //    ldur    d7, [x0, #28]
  2138      WORD $0x1e270170 //    fmov    s16, w11
  2139      WORD $0x138a416a //    extr    w10, w11, w10, #16
  2140      WORD $0x0e873a10 //    zip1    v16.2s, v16.2s, v7.2s
  2141      WORD $0x53067d6c //    lsr    w12, w11, #6
  2142      WORD $0x2ea244e7 //    ushl    v7.2s, v7.2s, v2.2s
  2143      WORD $0x1e270151 //    fmov    s17, w10
  2144      WORD $0x2ea54610 //    ushl    v16.2s, v16.2s, v5.2s
  2145      WORD $0x4e0c1d91 //    mov    v17.s[1], w12
  2146      WORD $0x0eb01ce7 //    orr    v7.8b, v7.8b, v16.8b
  2147      WORD $0x6e1804f1 //    mov    v17.d[1], v7.d[0]
  2148      WORD $0x4e201e27 //    and    v7.16b, v17.16b, v0.16b
  2149      WORD $0x3c9e0127 //    stur    q7, [x9, #-32]
  2150      WORD $0xb940200a //    ldr    w10, [x0, #32]
  2151      WORD $0xfc424007 //    ldur    d7, [x0, #36]
  2152      WORD $0x1e270150 //    fmov    s16, w10
  2153      WORD $0x0e873a10 //    zip1    v16.2s, v16.2s, v7.2s
  2154      WORD $0x53087d4b //    lsr    w11, w10, #8
  2155      WORD $0x2ea344f1 //    ushl    v17.2s, v7.2s, v3.2s
  2156      WORD $0x0e0c3cea //    mov    w10, v7.s[1]
  2157      WORD $0x2ea64607 //    ushl    v7.2s, v16.2s, v6.2s
  2158      WORD $0x1e270170 //    fmov    s16, w11
  2159      WORD $0x0ea71e27 //    orr    v7.8b, v17.8b, v7.8b
  2160      WORD $0x6e0c04f0 //    mov    v16.s[1], v7.s[0]
  2161      WORD $0x530a7d4a //    lsr    w10, w10, #10
  2162      WORD $0x6e1424f0 //    mov    v16.s[2], v7.s[1]
  2163      WORD $0x4e1c1d50 //    mov    v16.s[3], w10
  2164      WORD $0x4e201e07 //    and    v7.16b, v16.16b, v0.16b
  2165      WORD $0x3c9f0127 //    stur    q7, [x9, #-16]
  2166      WORD $0x2945ac0a //    ldp    w10, w11, [x0, #44]
  2167      WORD $0x138a596d //    extr    w13, w11, w10, #22
  2168      WORD $0x1e270147 //    fmov    s7, w10
  2169      WORD $0x138b318b //    extr    w11, w12, w11, #12
  2170      WORD $0x4e0c1da7 //    mov    v7.s[1], w13
  2171      WORD $0x53027d8c //    lsr    w12, w12, #2
  2172      WORD $0x4e141d67 //    mov    v7.s[2], w11
  2173      WORD $0x4e1c1d87 //    mov    v7.s[3], w12
  2174      WORD $0x4e201ce7 //    and    v7.16b, v7.16b, v0.16b
  2175      WORD $0x3d800127 //    str    q7, [x9]
  2176      WORD $0xfd401c07 //    ldr    d7, [x0, #56]
  2177      WORD $0xbd403410 //    ldr    s16, [x0, #52]
  2178      WORD $0xb940400a //    ldr    w10, [x0, #64]
  2179      WORD $0x2ea144f1 //    ushl    v17.2s, v7.2s, v1.2s
  2180      WORD $0x0e873a10 //    zip1    v16.2s, v16.2s, v7.2s
  2181      WORD $0x0e0c3ceb //    mov    w11, v7.s[1]
  2182      WORD $0x2ea44607 //    ushl    v7.2s, v16.2s, v4.2s
  2183      WORD $0x53047d6c //    lsr    w12, w11, #4
  2184      WORD $0x0ea71e27 //    orr    v7.8b, v17.8b, v7.8b
  2185      WORD $0x138b694a //    extr    w10, w10, w11, #26
  2186      WORD $0x4e141d87 //    mov    v7.s[2], w12
  2187      WORD $0x4e1c1d47 //    mov    v7.s[3], w10
  2188      WORD $0x4e201ce7 //    and    v7.16b, v7.16b, v0.16b
  2189      WORD $0x3d800527 //    str    q7, [x9, #16]
  2190      WORD $0x29482c0a //    ldp    w10, w11, [x0, #64]
  2191      WORD $0xfd402407 //    ldr    d7, [x0, #72]
  2192      WORD $0x1e270170 //    fmov    s16, w11
  2193      WORD $0x138a416a //    extr    w10, w11, w10, #16
  2194      WORD $0x0e873a10 //    zip1    v16.2s, v16.2s, v7.2s
  2195      WORD $0x53067d6c //    lsr    w12, w11, #6
  2196      WORD $0x2ea244e7 //    ushl    v7.2s, v7.2s, v2.2s
  2197      WORD $0x1e270151 //    fmov    s17, w10
  2198      WORD $0x2ea54610 //    ushl    v16.2s, v16.2s, v5.2s
  2199      WORD $0x4e0c1d91 //    mov    v17.s[1], w12
  2200      WORD $0x0eb01ce7 //    orr    v7.8b, v7.8b, v16.8b
  2201      WORD $0x6e1804f1 //    mov    v17.d[1], v7.d[0]
  2202      WORD $0x4e201e27 //    and    v7.16b, v17.16b, v0.16b
  2203      WORD $0x3d800927 //    str    q7, [x9, #32]
  2204      WORD $0xb9404c0a //    ldr    w10, [x0, #76]
  2205      WORD $0xfd402807 //    ldr    d7, [x0, #80]
  2206      WORD $0x91016000 //    add    x0, x0, #88
  2207      WORD $0x1e270150 //    fmov    s16, w10
  2208      WORD $0x0e873a10 //    zip1    v16.2s, v16.2s, v7.2s
  2209      WORD $0x53087d4b //    lsr    w11, w10, #8
  2210      WORD $0x2ea344f1 //    ushl    v17.2s, v7.2s, v3.2s
  2211      WORD $0x0e0c3cea //    mov    w10, v7.s[1]
  2212      WORD $0x2ea64607 //    ushl    v7.2s, v16.2s, v6.2s
  2213      WORD $0x1e270170 //    fmov    s16, w11
  2214      WORD $0x0ea71e27 //    orr    v7.8b, v17.8b, v7.8b
  2215      WORD $0x6e0c04f0 //    mov    v16.s[1], v7.s[0]
  2216      WORD $0x530a7d4a //    lsr    w10, w10, #10
  2217      WORD $0x6e1424f0 //    mov    v16.s[2], v7.s[1]
  2218      WORD $0x4e1c1d50 //    mov    v16.s[3], w10
  2219      WORD $0x4e201e07 //    and    v7.16b, v16.16b, v0.16b
  2220      WORD $0x3d800d27 //    str    q7, [x9, #48]
  2221      WORD $0x91020129 //    add    x9, x9, #128
  2222  
  2223      BNE LBB0_69
  2224      JMP LBB0_99
  2225  LBB0_70:
  2226      WORD $0x7100805f //    cmp    w2, #32
  2227      BLT LBB0_99
  2228      MOVD $0x000000130000001c, R16 // LCPI0_70
  2229      MOVD $0x0000000d00000004, R1 // LCPI0_71
  2230      MOVD $0x0000000f00000018, R17 // LCPI0_72
  2231      MOVD $0x0000001100000008, R2 // LCPI0_73
  2232      MOVD $0x0000000b00000014, R25 // LCPI0_74
  2233      MOVD $0x000000150000000c, R3 // LCPI0_75
  2234      MOVD $0x000000150000001e, R19 // LCPI0_76
  2235      MOVD $0x0000000b00000002, R4 // LCPI0_77
  2236      MOVD $0x000000110000001a, R20 // LCPI0_78
  2237      MOVD $0x0000000f00000006, R5 // LCPI0_79
  2238      MOVD $0x000000160000001f, R21 // LCPI0_80
  2239      MOVD $0x0000000a00000001, R6 // LCPI0_81
  2240      MOVD $0x000000120000001b, R22 // LCPI0_82
  2241      MOVD $0x0000000e00000005, R7 // LCPI0_83
  2242  
  2243      WORD $0x91010029 //    add    x9, x1, #64
  2244      WORD $0x4f03d7e0 //    movi    v0.4s, #127, msl #16
  2245      WORD $0x2ea0ba10 //    neg    v16.2s, v16.2s
  2246      WORD $0x2ea0ba31 //    neg    v17.2s, v17.2s
  2247      WORD $0x2ea0bb39 //neg    v25.2s, v25.2s
  2248      WORD $0x2ea0ba73 //    neg    v19.2s, v19.2s
  2249      WORD $0x2ea0ba94 //    neg    v20.2s, v20.2s
  2250      WORD $0x2ea0bab5 //    neg    v21.2s, v21.2s
  2251      WORD $0x2ea0bad6 //    neg    v22.2s, v22.2s
  2252  LBB0_72: 
  2253      WORD $0x29402c0a //    ldp    w10, w11, [x0]
  2254      WORD $0xb940080c //    ldr    w12, [x0, #8]
  2255      WORD $0xf1000508 //    subs    x8, x8, #1
  2256      WORD $0x138a5d6d //    extr    w13, w11, w10, #23
  2257      WORD $0x1e270157 //    fmov    s23, w10
  2258      WORD $0x138b398b //    extr    w11, w12, w11, #14
  2259      WORD $0x4e0c1db7 //    mov    v23.s[1], w13
  2260      WORD $0x53057d8c //    lsr    w12, w12, #5
  2261      WORD $0x4e141d77 //    mov    v23.s[2], w11
  2262      WORD $0x4e1c1d97 //    mov    v23.s[3], w12
  2263      WORD $0x4e201ef7 //    and    v23.16b, v23.16b, v0.16b
  2264      WORD $0x3c9c0137 //    stur    q23, [x9, #-64]
  2265      WORD $0xfc40c017 //    ldur    d23, [x0, #12]
  2266      WORD $0xbd400818 //    ldr    s24, [x0, #8]
  2267      WORD $0xb940140a //    ldr    w10, [x0, #20]
  2268      WORD $0x2ea146f9 //    ushl    v25.2s, v23.2s, v1.2s
  2269      WORD $0x0e973b18 //    zip1    v24.2s, v24.2s, v23.2s
  2270      WORD $0x0e0c3eeb //    mov    w11, v23.s[1]
  2271      WORD $0x2eb04717 //    ushl    v23.2s, v24.2s, v16.2s
  2272      WORD $0x53017d4c //    lsr    w12, w10, #1
  2273      WORD $0x138b294a //    extr    w10, w10, w11, #10
  2274      WORD $0x0eb71f37 //    orr    v23.8b, v25.8b, v23.8b
  2275      WORD $0x4e141d57 //    mov    v23.s[2], w10
  2276      WORD $0x4e1c1d97 //    mov    v23.s[3], w12
  2277      WORD $0x4e201ef7 //    and    v23.16b, v23.16b, v0.16b
  2278      WORD $0x3c9d0137 //    stur    q23, [x9, #-48]
  2279      WORD $0xfd400c17 //    ldr    d23, [x0, #24]
  2280      WORD $0xbd401418 //    ldr    s24, [x0, #20]
  2281      WORD $0xb940200a //    ldr    w10, [x0, #32]
  2282      WORD $0x2ea246f9 //    ushl    v25.2s, v23.2s, v2.2s
  2283      WORD $0x0e973b18 //    zip1    v24.2s, v24.2s, v23.2s
  2284      WORD $0x0e0c3eeb //    mov    w11, v23.s[1]
  2285      WORD $0x2eb14717 //    ushl    v23.2s, v24.2s, v17.2s
  2286      WORD $0x53067d6c //    lsr    w12, w11, #6
  2287      WORD $0x0eb71f37 //    orr    v23.8b, v25.8b, v23.8b
  2288      WORD $0x138b754a //    extr    w10, w10, w11, #29
  2289      WORD $0x4e141d97 //    mov    v23.s[2], w12
  2290      WORD $0x4e1c1d57 //    mov    v23.s[3], w10
  2291      WORD $0x4e201ef7 //    and    v23.16b, v23.16b, v0.16b
  2292      WORD $0x3c9e0137 //    stur    q23, [x9, #-32]
  2293      WORD $0xfc424017 //    ldur    d23, [x0, #36]
  2294      WORD $0xbd402018 //    ldr    s24, [x0, #32]
  2295      WORD $0xb9402c0a //    ldr    w10, [x0, #44]
  2296      WORD $0x2ea346f9 //    ushl    v25.2s, v23.2s, v3.2s
  2297      WORD $0x0e973b18 //    zip1    v24.2s, v24.2s, v23.2s
  2298      WORD $0x0e0c3eeb //    mov    w11, v23.s[1]
  2299      WORD $0x2eb24717 //    ushl    v23.2s, v24.2s, v25.2s
  2300      WORD $0x53027d6c //    lsr    w12, w11, #2
  2301      WORD $0x0eb71f37 //    orr    v23.8b, v25.8b, v23.8b
  2302      WORD $0x138b654a //    extr    w10, w10, w11, #25
  2303      WORD $0x4e141d97 //    mov    v23.s[2], w12
  2304      WORD $0x4e1c1d57 //    mov    v23.s[3], w10
  2305      WORD $0x4e201ef7 //    and    v23.16b, v23.16b, v0.16b
  2306      WORD $0x3c9f0137 //    stur    q23, [x9, #-16]
  2307      WORD $0x2945ac0a //    ldp    w10, w11, [x0, #44]
  2308      WORD $0xfc434017 //    ldur    d23, [x0, #52]
  2309      WORD $0x1e270178 //    fmov    s24, w11
  2310      WORD $0x138a416a //    extr    w10, w11, w10, #16
  2311      WORD $0x0e973b18 //    zip1    v24.2s, v24.2s, v23.2s
  2312      WORD $0x53077d6c //    lsr    w12, w11, #7
  2313      WORD $0x2ea446f7 //    ushl    v23.2s, v23.2s, v4.2s
  2314      WORD $0x1e270159 //    fmov    s25, w10
  2315      WORD $0x2eb34718 //    ushl    v24.2s, v24.2s, v19.2s
  2316      WORD $0x4e0c1d99 //    mov    v25.s[1], w12
  2317      WORD $0x0eb81ef7 //    orr    v23.8b, v23.8b, v24.8b
  2318      WORD $0x6e1806f9 //    mov    v25.d[1], v23.d[0]
  2319      WORD $0x4e201f37 //    and    v23.16b, v25.16b, v0.16b
  2320      WORD $0x3d800137 //    str    q23, [x9]
  2321      WORD $0x29472c0a //    ldp    w10, w11, [x0, #56]
  2322      WORD $0xfd402017 //    ldr    d23, [x0, #64]
  2323      WORD $0x1e270178 //    fmov    s24, w11
  2324      WORD $0x138a316a //    extr    w10, w11, w10, #12
  2325      WORD $0x0e973b18 //    zip1    v24.2s, v24.2s, v23.2s
  2326      WORD $0x53037d6c //    lsr    w12, w11, #3
  2327      WORD $0x2ea546f7 //    ushl    v23.2s, v23.2s, v5.2s
  2328      WORD $0x1e270159 //    fmov    s25, w10
  2329      WORD $0x2eb44718 //    ushl    v24.2s, v24.2s, v20.2s
  2330      WORD $0x4e0c1d99 //    mov    v25.s[1], w12
  2331      WORD $0x0eb81ef7 //    orr    v23.8b, v23.8b, v24.8b
  2332      WORD $0x6e1806f9 //    mov    v25.d[1], v23.d[0]
  2333      WORD $0x4e201f37 //    and    v23.16b, v25.16b, v0.16b
  2334      WORD $0x3d800537 //    str    q23, [x9, #16]
  2335      WORD $0xb940440a //    ldr    w10, [x0, #68]
  2336      WORD $0xfd402417 //    ldr    d23, [x0, #72]
  2337      WORD $0xb940500b //    ldr    w11, [x0, #80]
  2338      WORD $0x1e270158 //    fmov    s24, w10
  2339      WORD $0x0e973b18 //    zip1    v24.2s, v24.2s, v23.2s
  2340      WORD $0x53087d4c //    lsr    w12, w10, #8
  2341      WORD $0x2ea646f9 //    ushl    v25.2s, v23.2s, v6.2s
  2342      WORD $0x0e0c3eea //    mov    w10, v23.s[1]
  2343      WORD $0x2eb54717 //    ushl    v23.2s, v24.2s, v21.2s
  2344      WORD $0x1e270198 //    fmov    s24, w12
  2345      WORD $0x0eb71f37 //    orr    v23.8b, v25.8b, v23.8b
  2346      WORD $0x6e0c06f8 //    mov    v24.s[1], v23.s[0]
  2347      WORD $0x138a356a //    extr    w10, w11, w10, #13
  2348      WORD $0x6e1426f8 //    mov    v24.s[2], v23.s[1]
  2349      WORD $0x4e1c1d58 //    mov    v24.s[3], w10
  2350      WORD $0x4e201f17 //    and    v23.16b, v24.16b, v0.16b
  2351      WORD $0x3d800937 //    str    q23, [x9, #32]
  2352      WORD $0xb940500a //    ldr    w10, [x0, #80]
  2353      WORD $0xfc454017 //    ldur    d23, [x0, #84]
  2354      WORD $0x91017000 //    add    x0, x0, #92
  2355      WORD $0x1e270158 //    fmov    s24, w10
  2356      WORD $0x0e973b18 //    zip1    v24.2s, v24.2s, v23.2s
  2357      WORD $0x53047d4b //    lsr    w11, w10, #4
  2358      WORD $0x2ea746f9 //    ushl    v25.2s, v23.2s, v7.2s
  2359      WORD $0x0e0c3eea //    mov    w10, v23.s[1]
  2360      WORD $0x2eb64717 //    ushl    v23.2s, v24.2s, v22.2s
  2361      WORD $0x1e270178 //    fmov    s24, w11
  2362      WORD $0x0eb71f37 //    orr    v23.8b, v25.8b, v23.8b
  2363      WORD $0x6e0c06f8 //    mov    v24.s[1], v23.s[0]
  2364      WORD $0x53097d4a //    lsr    w10, w10, #9
  2365      WORD $0x6e1426f8 //    mov    v24.s[2], v23.s[1]
  2366      WORD $0x4e1c1d58 //    mov    v24.s[3], w10
  2367      WORD $0x4e201f17 //    and    v23.16b, v24.16b, v0.16b
  2368      WORD $0x3d800d37 //    str    q23, [x9, #48]
  2369      WORD $0x91020129 //    add    x9, x9, #128
  2370  
  2371      BNE LBB0_72
  2372      JMP LBB0_99
  2373  LBB0_73:
  2374      WORD $0x7100805f //    cmp    w2, #32
  2375      BLT LBB0_99
  2376      WORD $0x91001829 //    add    x9, x1, #6
  2377  LBB0_75:
  2378      WORD $0x29402c0a //    ldp    w10, w11, [x0]
  2379      WORD $0xb940080c //    ldr    w12, [x0, #8]
  2380      WORD $0xf1000508 //    subs    x8, x8, #1
  2381      WORD $0x138a616d //    extr    w13, w11, w10, #24
  2382      WORD $0x1e270140 //    fmov    s0, w10
  2383      WORD $0x138b418b //    extr    w11, w12, w11, #16
  2384      WORD $0x4e0c1da0 //    mov    v0.s[1], w13
  2385      WORD $0x53087d8c //    lsr    w12, w12, #8
  2386      WORD $0x4e141d60 //    mov    v0.s[2], w11
  2387      WORD $0x4e1c1d80 //    mov    v0.s[3], w12
  2388      WORD $0x6f0777e0 //    bic    v0.4s, #255, lsl #24
  2389      WORD $0x3c9c0120 //    stur    q0, [x9, #-64]
  2390      WORD $0x2941ac0a //    ldp    w10, w11, [x0, #12]
  2391      WORD $0xb940140c //    ldr    w12, [x0, #20]
  2392      WORD $0x138a616d //    extr    w13, w11, w10, #24
  2393      WORD $0x1e270140 //    fmov    s0, w10
  2394      WORD $0x138b418b //    extr    w11, w12, w11, #16
  2395      WORD $0x4e0c1da0 //    mov    v0.s[1], w13
  2396      WORD $0x53087d8c //    lsr    w12, w12, #8
  2397      WORD $0x4e141d60 //    mov    v0.s[2], w11
  2398      WORD $0x4e1c1d80 //    mov    v0.s[3], w12
  2399      WORD $0x6f0777e0 //    bic    v0.4s, #255, lsl #24
  2400      WORD $0x3c9d0120 //    stur    q0, [x9, #-48]
  2401      WORD $0x29432c0a //    ldp    w10, w11, [x0, #24]
  2402      WORD $0xb940200c //    ldr    w12, [x0, #32]
  2403      WORD $0x138a616d //    extr    w13, w11, w10, #24
  2404      WORD $0x1e270140 //    fmov    s0, w10
  2405      WORD $0x138b418b //    extr    w11, w12, w11, #16
  2406      WORD $0x4e0c1da0 //    mov    v0.s[1], w13
  2407      WORD $0x53087d8c //    lsr    w12, w12, #8
  2408      WORD $0x4e141d60 //    mov    v0.s[2], w11
  2409      WORD $0x4e1c1d80 //    mov    v0.s[3], w12
  2410      WORD $0x6f0777e0 //    bic    v0.4s, #255, lsl #24
  2411      WORD $0x3c9e0120 //    stur    q0, [x9, #-32]
  2412      WORD $0x2944ac0a //    ldp    w10, w11, [x0, #36]
  2413      WORD $0xb9402c0c //    ldr    w12, [x0, #44]
  2414      WORD $0x138a616d //    extr    w13, w11, w10, #24
  2415      WORD $0x1e270140 //    fmov    s0, w10
  2416      WORD $0x138b418b //    extr    w11, w12, w11, #16
  2417      WORD $0x4e0c1da0 //    mov    v0.s[1], w13
  2418      WORD $0x53087d8c //    lsr    w12, w12, #8
  2419      WORD $0x4e141d60 //    mov    v0.s[2], w11
  2420      WORD $0x4e1c1d80 //    mov    v0.s[3], w12
  2421      WORD $0x6f0777e0 //    bic    v0.4s, #255, lsl #24
  2422      WORD $0x3c9f0120 //    stur    q0, [x9, #-16]
  2423      WORD $0x29462c0a //    ldp    w10, w11, [x0, #48]
  2424      WORD $0xb940380c //    ldr    w12, [x0, #56]
  2425      WORD $0x138a616d //    extr    w13, w11, w10, #24
  2426      WORD $0x1e270140 //    fmov    s0, w10
  2427      WORD $0x138b418b //    extr    w11, w12, w11, #16
  2428      WORD $0x4e0c1da0 //    mov    v0.s[1], w13
  2429      WORD $0x53087d8c //    lsr    w12, w12, #8
  2430      WORD $0x4e141d60 //    mov    v0.s[2], w11
  2431      WORD $0x4e1c1d80 //    mov    v0.s[3], w12
  2432      WORD $0x6f0777e0 //    bic    v0.4s, #255, lsl #24
  2433      WORD $0x3d800120 //    str    q0, [x9]
  2434      WORD $0x2947ac0a //    ldp    w10, w11, [x0, #60]
  2435      WORD $0xb940440c //    ldr    w12, [x0, #68]
  2436      WORD $0x138a616d //    extr    w13, w11, w10, #24
  2437      WORD $0x1e270140 //    fmov    s0, w10
  2438      WORD $0x138b418b //    extr    w11, w12, w11, #16
  2439      WORD $0x4e0c1da0 //    mov    v0.s[1], w13
  2440      WORD $0x53087d8c //    lsr    w12, w12, #8
  2441      WORD $0x4e141d60 //    mov    v0.s[2], w11
  2442      WORD $0x4e1c1d80 //    mov    v0.s[3], w12
  2443      WORD $0x6f0777e0 //    bic    v0.4s, #255, lsl #24
  2444      WORD $0x3d800520 //    str    q0, [x9, #16]
  2445      WORD $0x29492c0a //    ldp    w10, w11, [x0, #72]
  2446      WORD $0xb940500c //    ldr    w12, [x0, #80]
  2447      WORD $0x138a616d //    extr    w13, w11, w10, #24
  2448      WORD $0x1e270140 //    fmov    s0, w10
  2449      WORD $0x138b418b //    extr    w11, w12, w11, #16
  2450      WORD $0x4e0c1da0 //    mov    v0.s[1], w13
  2451      WORD $0x53087d8c //    lsr    w12, w12, #8
  2452      WORD $0x4e141d60 //    mov    v0.s[2], w11
  2453      WORD $0x4e1c1d80 //    mov    v0.s[3], w12
  2454      WORD $0x6f0777e0 //    bic    v0.4s, #255, lsl #24
  2455      WORD $0x3d800920 //    str    q0, [x9, #32]
  2456      WORD $0x294aac0a //    ldp    w10, w11, [x0, #84]
  2457      WORD $0xb9405c0c //    ldr    w12, [x0, #92]
  2458      WORD $0x91018000 //    add    x0, x0, #96
  2459      WORD $0x138a616d //    extr    w13, w11, w10, #24
  2460      WORD $0x1e270140 //    fmov    s0, w10
  2461      WORD $0x138b418b //    extr    w11, w12, w11, #16
  2462      WORD $0x4e0c1da0 //    mov    v0.s[1], w13
  2463      WORD $0x53087d8c //    lsr    w12, w12, #8
  2464      WORD $0x4e141d60 //    mov    v0.s[2], w11
  2465      WORD $0x4e1c1d80 //    mov    v0.s[3], w12
  2466      WORD $0x6f0777e0 //    bic    v0.4s, #255, lsl #24
  2467      WORD $0x3d800d20 //    str    q0, [x9, #48]
  2468      WORD $0x91020129 //    add    x9, x9, #128
  2469  
  2470      BNE LBB0_75
  2471      JMP LBB0_99
  2472  LBB0_76:
  2473      WORD $0x7100805f //    cmp    w2, #32
  2474      BLT LBB0_99
  2475      MOVD $0x000000160000001d, R7 // LCPI0_56
  2476      MOVD $0x0000000a00000003, R0 // LCPI0_57
  2477      MOVD $0x000000130000001a, R16 // LCPI0_58
  2478      MOVD $0x0000000d00000006, R1 // LCPI0_59
  2479      MOVD $0x000000170000001e, R17 // LCPI0_60
  2480      MOVD $0x0000000900000002, R2 // LCPI0_61
  2481      MOVD $0x0000000900000010, R25 // LCPI0_62
  2482      MOVD $0x0000001700000010, R3 // LCPI0_63
  2483      MOVD $0x0000000d00000014, R19 // LCPI0_64
  2484      MOVD $0x000000130000000c, R4 // LCPI0_65
  2485      MOVD $0x0000001100000018, R20 // LCPI0_66
  2486      MOVD $0x0000000f00000008, R5 // LCPI0_67
  2487      MOVD $0x000000150000001c, R21 // LCPI0_68
  2488      MOVD $0x0000000b00000004, R6 // LCPI0_69
  2489  
  2490      WORD $0x91010029 //    add    x9, x1, #64
  2491      WORD $0x2ea0b8e7 //    neg    v7.2s, v7.2s
  2492      WORD $0x2ea0ba10 //    neg    v16.2s, v16.2s
  2493      WORD $0x2ea0ba31 //    neg    v17.2s, v17.2s
  2494      WORD $0x2ea0bb39 //    neg    v25.2s, v25.2s
  2495      WORD $0x2ea0ba73 //    neg    v19.2s, v19.2s
  2496      WORD $0x2ea0ba94 //    neg    v20.2s, v20.2s
  2497      WORD $0x2ea0bab5 //    neg    v21.2s, v21.2s
  2498  LBB0_78:
  2499      WORD $0x29402c0a //    ldp    w10, w11, [x0]
  2500      WORD $0x2941340c //    ldp    w12, w13, [x0, #8]
  2501      WORD $0xf1000508 //    subs    x8, x8, #1
  2502      WORD $0x138a656e //    extr    w14, w11, w10, #25
  2503      WORD $0x1e270156 //    fmov    s22, w10
  2504      WORD $0x138b498b //    extr    w11, w12, w11, #18
  2505      WORD $0x4e0c1dd6 //    mov    v22.s[1], w14
  2506      WORD $0x138c2dac //    extr    w12, w13, w12, #11
  2507      WORD $0x4e141d76 //    mov    v22.s[2], w11
  2508      WORD $0x4e1c1d96 //    mov    v22.s[3], w12
  2509      WORD $0x6f0777d6 //    bic    v22.4s, #254, lsl #24
  2510      WORD $0x3c9c0136 //    stur    q22, [x9, #-64]
  2511      WORD $0xb9400c0a //    ldr    w10, [x0, #12]
  2512      WORD $0xfd400816 //    ldr    d22, [x0, #16]
  2513      WORD $0xb940180b //    ldr    w11, [x0, #24]
  2514      WORD $0x1e270157 //    fmov    s23, w10
  2515      WORD $0x0e963af7 //    zip1    v23.2s, v23.2s, v22.2s
  2516      WORD $0x53047d4c //    lsr    w12, w10, #4
  2517      WORD $0x2ea046d8 //    ushl    v24.2s, v22.2s, v0.2s
  2518      WORD $0x0e0c3eca //    mov    w10, v22.s[1]
  2519      WORD $0x2ea746f6 //    ushl    v22.2s, v23.2s, v7.2s
  2520      WORD $0x1e270197 //    fmov    s23, w12
  2521      WORD $0x0eb61f16 //    orr    v22.8b, v24.8b, v22.8b
  2522      WORD $0x6e0c06d7 //    mov    v23.s[1], v22.s[0]
  2523      WORD $0x138a3d6a //    extr    w10, w11, w10, #15
  2524      WORD $0x6e1426d7 //    mov    v23.s[2], v22.s[1]
  2525      WORD $0x4e1c1d57 //    mov    v23.s[3], w10
  2526      WORD $0x6f0777d7 //    bic    v23.4s, #254, lsl #24
  2527      WORD $0x3c9d0137 //    stur    q23, [x9, #-48]
  2528      WORD $0x29432c0a //    ldp    w10, w11, [x0, #24]
  2529      WORD $0xfd401016 //    ldr    d22, [x0, #32]
  2530      WORD $0x1e270177 //    fmov    s23, w11
  2531      WORD $0x138a216a //    extr    w10, w11, w10, #8
  2532      WORD $0x0e963af7 //    zip1    v23.2s, v23.2s, v22.2s
  2533      WORD $0x53017d6c //    lsr    w12, w11, #1
  2534      WORD $0x2ea146d6 //    ushl    v22.2s, v22.2s, v1.2s
  2535      WORD $0x1e270158 //    fmov    s24, w10
  2536      WORD $0x2eb046f7 //    ushl    v23.2s, v23.2s, v16.2s
  2537      WORD $0x4e0c1d98 //    mov    v24.s[1], w12
  2538      WORD $0x0eb71ed6 //    orr    v22.8b, v22.8b, v23.8b
  2539      WORD $0x6e1806d8 //    mov    v24.d[1], v22.d[0]
  2540      WORD $0x6f0777d8 //    bic    v24.4s, #254, lsl #24
  2541      WORD $0x3c9e0138 //    stur    q24, [x9, #-32]
  2542      WORD $0x2944ac0a //    ldp    w10, w11, [x0, #36]
  2543      WORD $0xfc42c016 //    ldur    d22, [x0, #44]
  2544      WORD $0x1e270177 //    fmov    s23, w11
  2545      WORD $0x138a316a //    extr    w10, w11, w10, #12
  2546      WORD $0x0e963af7 //    zip1    v23.2s, v23.2s, v22.2s
  2547      WORD $0x53057d6c //    lsr    w12, w11, #5
  2548      WORD $0x2ea246d6 //    ushl    v22.2s, v22.2s, v2.2s
  2549      WORD $0x1e270158 //    fmov    s24, w10
  2550      WORD $0x2eb146f7 //    ushl    v23.2s, v23.2s, v17.2s
  2551      WORD $0x4e0c1d98 //    mov    v24.s[1], w12
  2552      WORD $0x0eb71ed6 //    orr    v22.8b, v22.8b, v23.8b
  2553      WORD $0x6e1806d8 //    mov    v24.d[1], v22.d[0]
  2554      WORD $0x6f0777d8 //    bic    v24.4s, #254, lsl #24
  2555      WORD $0x3c9f0138 //    stur    q24, [x9, #-16]
  2556      WORD $0xfc434016 //    ldur    d22, [x0, #52]
  2557      WORD $0xbd403017 //    ldr    s23, [x0, #48]
  2558      WORD $0xb9403c0a //    ldr    w10, [x0, #60]
  2559      WORD $0x2ea346d8 //    ushl    v24.2s, v22.2s, v3.2s
  2560      WORD $0x0e963af7 //    zip1    v23.2s, v23.2s, v22.2s
  2561      WORD $0x0e0c3ecb //    mov    w11, v22.s[1]
  2562      WORD $0x2eb246f6 //    ushl    v22.2s, v23.2s, v25.2s
  2563      WORD $0x53027d6c //    lsr    w12, w11, #2
  2564      WORD $0x0eb61f16 //    orr    v22.8b, v24.8b, v22.8b
  2565      WORD $0x138b6d4a //    extr    w10, w10, w11, #27
  2566      WORD $0x4e141d96 //    mov    v22.s[2], w12
  2567      WORD $0x4e1c1d56 //    mov    v22.s[3], w10
  2568      WORD $0x6f0777d6 //    bic    v22.4s, #254, lsl #24
  2569      WORD $0x3d800136 //    str    q22, [x9]
  2570      WORD $0xfd402016 //    ldr    d22, [x0, #64]
  2571      WORD $0xbd403c17 //    ldr    s23, [x0, #60]
  2572      WORD $0xb940480a //    ldr    w10, [x0, #72]
  2573      WORD $0x2ea446d8 //    ushl    v24.2s, v22.2s, v4.2s
  2574      WORD $0x0e963af7 //    zip1    v23.2s, v23.2s, v22.2s
  2575      WORD $0x0e0c3ecb //    mov    w11, v22.s[1]
  2576      WORD $0x2eb346f6 //    ushl    v22.2s, v23.2s, v19.2s
  2577      WORD $0x53067d6c //    lsr    w12, w11, #6
  2578      WORD $0x0eb61f16 //    orr    v22.8b, v24.8b, v22.8b
  2579      WORD $0x138b7d4a //    extr    w10, w10, w11, #31
  2580      WORD $0x4e141d96 //    mov    v22.s[2], w12
  2581      WORD $0x4e1c1d56 //    mov    v22.s[3], w10
  2582      WORD $0x6f0777d6 //    bic    v22.4s, #254, lsl #24
  2583      WORD $0x3d800536 //    str    q22, [x9, #16]
  2584      WORD $0xfc44c016 //    ldur    d22, [x0, #76]
  2585      WORD $0xbd404817 //    ldr    s23, [x0, #72]
  2586      WORD $0xb940540a //    ldr    w10, [x0, #84]
  2587      WORD $0x2ea546d8 //    ushl    v24.2s, v22.2s, v5.2s
  2588      WORD $0x0e963af7 //    zip1    v23.2s, v23.2s, v22.2s
  2589      WORD $0x0e0c3ecb //    mov    w11, v22.s[1]
  2590      WORD $0x2eb446f6 //    ushl    v22.2s, v23.2s, v20.2s
  2591      WORD $0x53037d4c //    lsr    w12, w10, #3
  2592      WORD $0x138b294a //    extr    w10, w10, w11, #10
  2593      WORD $0x0eb61f16 //    orr    v22.8b, v24.8b, v22.8b
  2594      WORD $0x4e141d56 //    mov    v22.s[2], w10
  2595      WORD $0x4e1c1d96 //    mov    v22.s[3], w12
  2596      WORD $0x6f0777d6 //    bic    v22.4s, #254, lsl #24
  2597      WORD $0x3d800936 //    str    q22, [x9, #32]
  2598      WORD $0xfd402c16 //    ldr    d22, [x0, #88]
  2599      WORD $0xbd405417 //    ldr    s23, [x0, #84]
  2600      WORD $0xb940600a //    ldr    w10, [x0, #96]
  2601      WORD $0x91019000 //    add    x0, x0, #100
  2602      WORD $0x2ea646d8 //    ushl    v24.2s, v22.2s, v6.2s
  2603      WORD $0x0e963af7 //    zip1    v23.2s, v23.2s, v22.2s
  2604      WORD $0x0e0c3ecb //    mov    w11, v22.s[1]
  2605      WORD $0x2eb546f6 //    ushl    v22.2s, v23.2s, v21.2s
  2606      WORD $0x138b394a //    extr    w10, w10, w11, #14
  2607      WORD $0x0eb61f16 //    orr    v22.8b, v24.8b, v22.8b
  2608      WORD $0x4e141d56 //    mov    v22.s[2], w10
  2609      WORD $0x4e1c1d96 //    mov    v22.s[3], w12
  2610      WORD $0x6f0777d6 //    bic    v22.4s, #254, lsl #24
  2611      WORD $0x3d800d36 //    str    q22, [x9, #48]
  2612      WORD $0x91020129 //    add    x9, x9, #128
  2613  
  2614      BNE LBB0_78
  2615      JMP LBB0_99
  2616  LBB0_79:
  2617      WORD $0x7100805f //    cmp    w2, #32
  2618      BLT LBB0_99
  2619      MOVD $0x000000070000000c, R7 // LCPI0_36
  2620      MOVD $0x0000001900000014, R0 // LCPI0_37
  2621      VMOVQ $0x0000001300000018, $0x000000090000000e, V16 // LCPI0_38
  2622      VMOVQ $0x0000000d00000008, $0x0000001700000012, V1 // LCPI0_39
  2623      MOVD $0x0000001a0000001f, R17 // LCPI0_40
  2624      MOVD $0x0000000600000001, R2 // LCPI0_41
  2625      MOVD $0x0000000b00000010, R25 // LCPI0_42
  2626      MOVD $0x0000001500000010, R3 // LCPI0_43
  2627      VMOVQ $0x000000170000001c, $0x0000000d00000012, V19 // LCPI0_44
  2628      VMOVQ $0x0000000900000004, $0x000000130000000e, V4 // LCPI0_45
  2629      MOVD $0x000000190000001e, R20 // LCPI0_46
  2630      MOVD $0x0000000700000002, R5 // LCPI0_47
  2631      MOVD $0x0000000f00000014, R21 // LCPI0_48
  2632      MOVD $0x000000110000000c, R6 // LCPI0_49
  2633  
  2634      WORD $0x91010029 //    add    x9, x1, #64
  2635      WORD $0x2ea0b8e7 //    neg    v7.2s, v7.2s
  2636      WORD $0x6ea0ba10 //    neg    v16.4s, v16.4s
  2637      WORD $0x2ea0ba31 //    neg    v17.2s, v17.2s
  2638      WORD $0x2ea0bb39 //    neg    v25.2s, v25.2s
  2639      WORD $0x6ea0ba73 //    neg    v19.4s, v19.4s
  2640      WORD $0x2ea0ba94 //    neg    v20.2s, v20.2s
  2641      WORD $0x2ea0bab5 //    neg    v21.2s, v21.2s
  2642  LBB0_81: 
  2643      WORD $0x29402c0a //    ldp    w10, w11, [x0]
  2644      WORD $0x2941340c //    ldp    w12, w13, [x0, #8]
  2645      WORD $0xf1000508 //    subs    x8, x8, #1
  2646      WORD $0x138a6d6e //    extr    w14, w11, w10, #27
  2647      WORD $0x1e270156 //    fmov    s22, w10
  2648      WORD $0x138b598b //    extr    w11, w12, w11, #22
  2649      WORD $0x4e0c1dd6 //    mov    v22.s[1], w14
  2650      WORD $0x138c45ac //    extr    w12, w13, w12, #17
  2651      WORD $0x4e141d76 //    mov    v22.s[2], w11
  2652      WORD $0x4e1c1d96 //    mov    v22.s[3], w12
  2653      WORD $0x6f077716 //    bic    v22.4s, #248, lsl #24
  2654      WORD $0x3c9c0136 //    stur    q22, [x9, #-64]
  2655      WORD $0xfd400816 //    ldr    d22, [x0, #16]
  2656      WORD $0xbd400c17 //    ldr    s23, [x0, #12]
  2657      WORD $0xb940180a //    ldr    w10, [x0, #24]
  2658      WORD $0x2ea046d8 //    ushl    v24.2s, v22.2s, v0.2s
  2659      WORD $0x0e963af7 //    zip1    v23.2s, v23.2s, v22.2s
  2660      WORD $0x0e0c3ecb //    mov    w11, v22.s[1]
  2661      WORD $0x2ea746f6 //    ushl    v22.2s, v23.2s, v7.2s
  2662      WORD $0x53027d6c //    lsr    w12, w11, #2
  2663      WORD $0x0eb61f16 //    orr    v22.8b, v24.8b, v22.8b
  2664      WORD $0x138b754a //    extr    w10, w10, w11, #29
  2665      WORD $0x4e141d96 //    mov    v22.s[2], w12
  2666      WORD $0x4e1c1d56 //    mov    v22.s[3], w10
  2667      WORD $0x6f077716 //    bic    v22.4s, #248, lsl #24
  2668      WORD $0x3c9d0136 //    stur    q22, [x9, #-48]
  2669      WORD $0xbd401816 //    ldr    s22, [x0, #24]
  2670      WORD $0x3cc1c017 //    ldur    q23, [x0, #28]
  2671      WORD $0x6e1622d6 //    ext    v22.16b, v22.16b, v22.16b, #4
  2672      WORD $0x6e1762d6 //    ext    v22.16b, v22.16b, v23.16b, #12
  2673      WORD $0x6ea146f8 //    ushl    v24.4s, v23.4s, v1.4s
  2674      WORD $0x6eb046d6 //    ushl    v22.4s, v22.4s, v16.4s
  2675      WORD $0x4eb61f16 //    orr    v22.16b, v24.16b, v22.16b
  2676      WORD $0x6f077716 //    bic    v22.4s, #248, lsl #24
  2677      WORD $0x3c9e0136 //    stur    q22, [x9, #-32]
  2678      WORD $0xb940280a //    ldr    w10, [x0, #40]
  2679      WORD $0xfc42c016 //    ldur    d22, [x0, #44]
  2680      WORD $0xb940340b //    ldr    w11, [x0, #52]
  2681      WORD $0x1e270157 //    fmov    s23, w10
  2682      WORD $0x0e963af7 //    zip1    v23.2s, v23.2s, v22.2s
  2683      WORD $0x53047d4c //    lsr    w12, w10, #4
  2684      WORD $0x2ea246d8 //    ushl    v24.2s, v22.2s, v2.2s
  2685      WORD $0x0e0c3eca //    mov    w10, v22.s[1]
  2686      WORD $0x2eb146f6 //    ushl    v22.2s, v23.2s, v17.2s
  2687      WORD $0x1e270197 //    fmov    s23, w12
  2688      WORD $0x0eb61f16 //    orr    v22.8b, v24.8b, v22.8b
  2689      WORD $0x6e0c06d7 //    mov    v23.s[1], v22.s[0]
  2690      WORD $0x138a556a //    extr    w10, w11, w10, #21
  2691      WORD $0x6e1426d7 //    mov    v23.s[2], v22.s[1]
  2692      WORD $0x4e1c1d57 //    mov    v23.s[3], w10
  2693      WORD $0x6f077717 //    bic    v23.4s, #248, lsl #24
  2694      WORD $0x3c9f0137 //    stur    q23, [x9, #-16]
  2695      WORD $0xfd401c16 //    ldr    d22, [x0, #56]
  2696      WORD $0xbd403417 //    ldr    s23, [x0, #52]
  2697      WORD $0xb940400a //    ldr    w10, [x0, #64]
  2698      WORD $0x2ea346d8 //    ushl    v24.2s, v22.2s, v3.2s
  2699      WORD $0x0e963af7 //    zip1    v23.2s, v23.2s, v22.2s
  2700      WORD $0x0e0c3ecb //    mov    w11, v22.s[1]
  2701      WORD $0x2eb246f6 //    ushl    v22.2s, v23.2s, v25.2s
  2702      WORD $0x53017d4c //    lsr    w12, w10, #1
  2703      WORD $0x138b194a //    extr    w10, w10, w11, #6
  2704      WORD $0x0eb61f16 //    orr    v22.8b, v24.8b, v22.8b
  2705      WORD $0x4e141d56 //    mov    v22.s[2], w10
  2706      WORD $0x4e1c1d96 //    mov    v22.s[3], w12
  2707      WORD $0x6f077716 //    bic    v22.4s, #248, lsl #24
  2708      WORD $0x3d800136 //    str    q22, [x9]
  2709      WORD $0xbd404016 //    ldr    s22, [x0, #64]
  2710      WORD $0x3cc44017 //    ldur    q23, [x0, #68]
  2711      WORD $0x6e1622d6 //    ext    v22.16b, v22.16b, v22.16b, #4
  2712      WORD $0x6e1762d6 //    ext    v22.16b, v22.16b, v23.16b, #12
  2713      WORD $0x6ea446f8 //    ushl    v24.4s, v23.4s, v4.4s
  2714      WORD $0x6eb346d6 //    ushl    v22.4s, v22.4s, v19.4s
  2715      WORD $0x4eb61f16 //    orr    v22.16b, v24.16b, v22.16b
  2716      WORD $0x6f077716 //    bic    v22.4s, #248, lsl #24
  2717      WORD $0x3d800536 //    str    q22, [x9, #16]
  2718      WORD $0x294a2c0a //    ldp    w10, w11, [x0, #80]
  2719      WORD $0xfd402c16 //    ldr    d22, [x0, #88]
  2720      WORD $0x1e270177 //    fmov    s23, w11
  2721      WORD $0x138a216a //    extr    w10, w11, w10, #8
  2722      WORD $0x0e963af7 //    zip1    v23.2s, v23.2s, v22.2s
  2723      WORD $0x53037d6c //    lsr    w12, w11, #3
  2724      WORD $0x2ea546d6 //    ushl    v22.2s, v22.2s, v5.2s
  2725      WORD $0x1e270158 //    fmov    s24, w10
  2726      WORD $0x2eb446f7 //    ushl    v23.2s, v23.2s, v20.2s
  2727      WORD $0x4e0c1d98 //    mov    v24.s[1], w12
  2728      WORD $0x0eb71ed6 //    orr    v22.8b, v22.8b, v23.8b
  2729      WORD $0x6e1806d8 //    mov    v24.d[1], v22.d[0]
  2730      WORD $0x6f077718 //    bic    v24.4s, #248, lsl #24
  2731      WORD $0x3d800938 //    str    q24, [x9, #32]
  2732      WORD $0xfd403016 //    ldr    d22, [x0, #96]
  2733      WORD $0xbd405c17 //    ldr    s23, [x0, #92]
  2734      WORD $0xb940680a //    ldr    w10, [x0, #104]
  2735      WORD $0x9101b000 //    add    x0, x0, #108
  2736      WORD $0x2ea646d8 //    ushl    v24.2s, v22.2s, v6.2s
  2737      WORD $0x0e963af7 //    zip1    v23.2s, v23.2s, v22.2s
  2738      WORD $0x0e0c3ecb //    mov    w11, v22.s[1]
  2739      WORD $0x2eb546f6 //    ushl    v22.2s, v23.2s, v21.2s
  2740      WORD $0x53057d4c //    lsr    w12, w10, #5
  2741      WORD $0x138b294a //    extr    w10, w10, w11, #10
  2742      WORD $0x0eb61f16 //    orr    v22.8b, v24.8b, v22.8b
  2743      WORD $0x4e141d56 //    mov    v22.s[2], w10
  2744      WORD $0x4e1c1d96 //    mov    v22.s[3], w12
  2745      WORD $0x6f077716 //    bic    v22.4s, #248, lsl #24
  2746      WORD $0x3d800d36 //    str    q22, [x9, #48]
  2747      WORD $0x91020129 //    add    x9, x9, #128 
  2748  
  2749      BNE LBB0_81
  2750      JMP LBB0_99
  2751  LBB0_82:
  2752      WORD $0x7100805f //    cmp    w2, #32
  2753      BLT LBB0_99
  2754      MOVD $0x0000000c00000010, R1 // LCPI0_34
  2755      MOVD $0x0000001400000010, R0 // LCPI0_35
  2756  
  2757      WORD $0x91010029 //    add    x9, x1, #64
  2758      WORD $0x2ea0b821 //    neg    v1.2s, v1.2s
  2759  LBB0_84:
  2760      WORD $0x29402c0a //    ldp    w10, w11, [x0]
  2761      WORD $0x2941340c //    ldp    w12, w13, [x0, #8]
  2762      WORD $0xf1000508 //    subs    x8, x8, #1
  2763      WORD $0x138a716e //    extr    w14, w11, w10, #28
  2764      WORD $0x1e270142 //    fmov    s2, w10
  2765      WORD $0x138b618b //    extr    w11, w12, w11, #24
  2766      WORD $0x4e0c1dc2 //    mov    v2.s[1], w14
  2767      WORD $0x138c51ac //    extr    w12, w13, w12, #20
  2768      WORD $0x4e141d62 //    mov    v2.s[2], w11
  2769      WORD $0x4e1c1d82 //    mov    v2.s[3], w12
  2770      WORD $0x6f077602 //    bic    v2.4s, #240, lsl #24
  2771      WORD $0x3c9c0122 //    stur    q2, [x9, #-64]
  2772      WORD $0xfd400802 //    ldr    d2, [x0, #16]
  2773      WORD $0xbd400c03 //    ldr    s3, [x0, #12]
  2774      WORD $0xb940180a //    ldr    w10, [x0, #24]
  2775      WORD $0x2ea04444 //    ushl    v4.2s, v2.2s, v0.2s
  2776      WORD $0x0e823863 //    zip1    v3.2s, v3.2s, v2.2s
  2777      WORD $0x0e0c3c4b //    mov    w11, v2.s[1]
  2778      WORD $0x2ea14462 //    ushl    v2.2s, v3.2s, v1.2s
  2779      WORD $0x53047d4c //    lsr    w12, w10, #4
  2780      WORD $0x138b214a //    extr    w10, w10, w11, #8
  2781      WORD $0x0ea21c82 //    orr    v2.8b, v4.8b, v2.8b
  2782      WORD $0x4e141d42 //    mov    v2.s[2], w10
  2783      WORD $0x4e1c1d82 //    mov    v2.s[3], w12
  2784      WORD $0x6f077602 //    bic    v2.4s, #240, lsl #24
  2785      WORD $0x3c9d0122 //    stur    q2, [x9, #-48]
  2786      WORD $0x2943ac0a //    ldp    w10, w11, [x0, #28]
  2787      WORD $0x2944b40c //    ldp    w12, w13, [x0, #36]
  2788      WORD $0x138a716e //    extr    w14, w11, w10, #28
  2789      WORD $0x1e270142 //    fmov    s2, w10
  2790      WORD $0x138b618b //    extr    w11, w12, w11, #24
  2791      WORD $0x4e0c1dc2 //    mov    v2.s[1], w14
  2792      WORD $0x138c51ac //    extr    w12, w13, w12, #20
  2793      WORD $0x4e141d62 //    mov    v2.s[2], w11
  2794      WORD $0x4e1c1d82 //    mov    v2.s[3], w12
  2795      WORD $0x6f077602 //    bic    v2.4s, #240, lsl #24
  2796      WORD $0x3c9e0122 //    stur    q2, [x9, #-32]
  2797      WORD $0xfc42c002 //    ldur    d2, [x0, #44]
  2798      WORD $0xbd402803 //    ldr    s3, [x0, #40]
  2799      WORD $0xb940340a //    ldr    w10, [x0, #52]
  2800      WORD $0x2ea04444 //    ushl    v4.2s, v2.2s, v0.2s
  2801      WORD $0x0e823863 //    zip1    v3.2s, v3.2s, v2.2s
  2802      WORD $0x0e0c3c4b //    mov    w11, v2.s[1]
  2803      WORD $0x2ea14462 //    ushl    v2.2s, v3.2s, v1.2s
  2804      WORD $0x53047d4c //    lsr    w12, w10, #4
  2805      WORD $0x138b214a //    extr    w10, w10, w11, #8
  2806      WORD $0x0ea21c82 //    orr    v2.8b, v4.8b, v2.8b
  2807      WORD $0x4e141d42 //    mov    v2.s[2], w10
  2808      WORD $0x4e1c1d82 //    mov    v2.s[3], w12
  2809      WORD $0x6f077602 //    bic    v2.4s, #240, lsl #24
  2810      WORD $0x3c9f0122 //    stur    q2, [x9, #-16]
  2811      WORD $0x29472c0a //    ldp    w10, w11, [x0, #56]
  2812      WORD $0x2948340c //    ldp    w12, w13, [x0, #64]
  2813      WORD $0x138a716e //    extr    w14, w11, w10, #28
  2814      WORD $0x1e270142 //    fmov    s2, w10
  2815      WORD $0x138b618b //    extr    w11, w12, w11, #24
  2816      WORD $0x4e0c1dc2 //    mov    v2.s[1], w14
  2817      WORD $0x138c51ac //    extr    w12, w13, w12, #20
  2818      WORD $0x4e141d62 //    mov    v2.s[2], w11
  2819      WORD $0x4e1c1d82 //    mov    v2.s[3], w12
  2820      WORD $0x6f077602 //    bic    v2.4s, #240, lsl #24
  2821      WORD $0x3d800122 //    str    q2, [x9]
  2822      WORD $0xfd402402 //    ldr    d2, [x0, #72]
  2823      WORD $0xbd404403 //    ldr    s3, [x0, #68]
  2824      WORD $0xb940500a //    ldr    w10, [x0, #80]
  2825      WORD $0x2ea04444 //    ushl    v4.2s, v2.2s, v0.2s
  2826      WORD $0x0e823863 //    zip1    v3.2s, v3.2s, v2.2s
  2827      WORD $0x0e0c3c4b //    mov    w11, v2.s[1]
  2828      WORD $0x2ea14462 //    ushl    v2.2s, v3.2s, v1.2s
  2829      WORD $0x53047d4c //    lsr    w12, w10, #4
  2830      WORD $0x138b214a //    extr    w10, w10, w11, #8
  2831      WORD $0x0ea21c82 //    orr    v2.8b, v4.8b, v2.8b
  2832      WORD $0x4e141d42 //    mov    v2.s[2], w10
  2833      WORD $0x4e1c1d82 //    mov    v2.s[3], w12
  2834      WORD $0x6f077602 //    bic    v2.4s, #240, lsl #24
  2835      WORD $0x3d800522 //    str    q2, [x9, #16]
  2836      WORD $0x294aac0a //    ldp    w10, w11, [x0, #84]
  2837      WORD $0x294bb40c //    ldp    w12, w13, [x0, #92]
  2838      WORD $0x138a716e //    extr    w14, w11, w10, #28
  2839      WORD $0x1e270142 //    fmov    s2, w10
  2840      WORD $0x138b618b //    extr    w11, w12, w11, #24
  2841      WORD $0x4e0c1dc2 //    mov    v2.s[1], w14
  2842      WORD $0x138c51ac //    extr    w12, w13, w12, #20
  2843      WORD $0x4e141d62 //    mov    v2.s[2], w11
  2844      WORD $0x4e1c1d82 //    mov    v2.s[3], w12
  2845      WORD $0x6f077602 //    bic    v2.4s, #240, lsl #24
  2846      WORD $0x3d800922 //    str    q2, [x9, #32]
  2847      WORD $0xfc464002 //    ldur    d2, [x0, #100]
  2848      WORD $0xbd406003 //    ldr    s3, [x0, #96]
  2849      WORD $0xb9406c0a //    ldr    w10, [x0, #108]
  2850      WORD $0x9101c000 //    add    x0, x0, #112
  2851      WORD $0x2ea04444 //    ushl    v4.2s, v2.2s, v0.2s
  2852      WORD $0x0e823863 //    zip1    v3.2s, v3.2s, v2.2s
  2853      WORD $0x0e0c3c4b //    mov    w11, v2.s[1]
  2854      WORD $0x2ea14462 //    ushl    v2.2s, v3.2s, v1.2s
  2855      WORD $0x53047d4c //    lsr    w12, w10, #4
  2856      WORD $0x138b214a //    extr    w10, w10, w11, #8
  2857      WORD $0x0ea21c82 //    orr    v2.8b, v4.8b, v2.8b
  2858      WORD $0x4e141d42 //    mov    v2.s[2], w10
  2859      WORD $0x4e1c1d82 //    mov    v2.s[3], w12
  2860      WORD $0x6f077602 //    bic    v2.4s, #240, lsl #24
  2861      WORD $0x3d800d22 //    str    q2, [x9, #48]
  2862      WORD $0x91020129 //    add    x9, x9, #128
  2863  
  2864      BNE LBB0_84
  2865      JMP LBB0_99
  2866  LBB0_85:
  2867      WORD $0x7100805f //    cmp    w2, #32
  2868      BLT LBB0_99
  2869      VMOVQ $0x0000001100000014, $0x0000000b0000000e, V7 // LCPI0_20
  2870      VMOVQ $0x0000000f0000000c, $0x0000001500000012, V0 // LCPI0_21
  2871      MOVD $0x0000000500000008, R16 // LCPI0_22
  2872      MOVD $0x0000001b00000018, R1 // LCPI0_23
  2873      VMOVQ $0x000000190000001c, $0x0000001300000016, V17 // LCPI0_24
  2874      VMOVQ $0x0000000700000004, $0x0000000d0000000a, V2 // LCPI0_25
  2875      VMOVQ $0x0000000d00000010, $0x000000070000000a, V25 // LCPI0_26
  2876      VMOVQ $0x0000001300000010, $0x0000001900000016, V3 // LCPI0_27
  2877      MOVD $0x0000001b0000001e, R19 // LCPI0_28
  2878      MOVD $0x0000000500000002, R4 // LCPI0_29
  2879      VMOVQ $0x0000001500000018, $0x0000000f00000012, V20 // LCPI0_30
  2880      VMOVQ $0x0000000b00000008, $0x000000110000000e, V5 // LCPI0_31
  2881      MOVD $0x000000090000000c, R21 // LCPI0_32
  2882      MOVD $0x0000001700000014, R6 // LCPI0_33
  2883  
  2884      WORD $0x91010029 //    add    x9, x1, #64
  2885      WORD $0x6ea0b8e7 //    neg    v7.4s, v7.4s
  2886      WORD $0x2ea0ba10 //    neg    v16.2s, v16.2s
  2887      WORD $0x6ea0ba31 //    neg    v17.4s, v17.4s
  2888      WORD $0x6ea0ba52 //    neg    v25.4s, v25.4s
  2889      WORD $0x2ea0ba73 //    neg    v19.2s, v19.2s
  2890      WORD $0x6ea0ba94 //    neg    v20.4s, v20.4s
  2891      WORD $0x2ea0bab5 //    neg    v21.2s, v21.2s
  2892  LBB0_87:   
  2893      WORD $0x29402c0a //    ldp    w10, w11, [x0]
  2894      WORD $0x2941340c //    ldp    w12, w13, [x0, #8]
  2895      WORD $0xf1000508 //    subs    x8, x8, #1
  2896      WORD $0x138a756e //    extr    w14, w11, w10, #29
  2897      WORD $0x1e270156 //    fmov    s22, w10
  2898      WORD $0x138b698b //    extr    w11, w12, w11, #26
  2899      WORD $0x4e0c1dd6 //    mov    v22.s[1], w14
  2900      WORD $0x138c5dac //    extr    w12, w13, w12, #23
  2901      WORD $0x4e141d76 //    mov    v22.s[2], w11
  2902      WORD $0x4e1c1d96 //    mov    v22.s[3], w12
  2903      WORD $0x6f077416 //    bic    v22.4s, #224, lsl #24
  2904      WORD $0x3c9c0136 //    stur    q22, [x9, #-64]
  2905      WORD $0xbd400c16 //    ldr    s22, [x0, #12]
  2906      WORD $0x3dc00417 //    ldr    q23, [x0, #16]
  2907      WORD $0x6e1622d6 //    ext    v22.16b, v22.16b, v22.16b, #4
  2908      WORD $0x6e1762d6 //    ext    v22.16b, v22.16b, v23.16b, #12
  2909      WORD $0x6ea046f8 //    ushl    v24.4s, v23.4s, v0.4s
  2910      WORD $0x6ea746d6 //    ushl    v22.4s, v22.4s, v7.4s
  2911      WORD $0x4eb61f16 //    orr    v22.16b, v24.16b, v22.16b
  2912      WORD $0x6f077416 //    bic    v22.4s, #224, lsl #24
  2913      WORD $0x3c9d0136 //    stur    q22, [x9, #-48]
  2914      WORD $0xfd401016 //    ldr    d22, [x0, #32]
  2915      WORD $0xbd401c17 //    ldr    s23, [x0, #28]
  2916      WORD $0xb940280a //    ldr    w10, [x0, #40]
  2917      WORD $0x2ea146d8 //    ushl    v24.2s, v22.2s, v1.2s
  2918      WORD $0x0e963af7 //    zip1    v23.2s, v23.2s, v22.2s
  2919      WORD $0x0e0c3ecb //    mov    w11, v22.s[1]
  2920      WORD $0x2eb046f6 //    ushl    v22.2s, v23.2s, v16.2s
  2921      WORD $0x53027d6c //    lsr    w12, w11, #2
  2922      WORD $0x0eb61f16 //    orr    v22.8b, v24.8b, v22.8b
  2923      WORD $0x138b7d4a //    extr    w10, w10, w11, #31
  2924      WORD $0x4e141d96 //    mov    v22.s[2], w12
  2925      WORD $0x4e1c1d56 //    mov    v22.s[3], w10
  2926      WORD $0x6f077416 //    bic    v22.4s, #224, lsl #24
  2927      WORD $0x3c9e0136 //    stur    q22, [x9, #-32]
  2928      WORD $0xbd402816 //    ldr    s22, [x0, #40]
  2929      WORD $0x3cc2c017 //    ldur    q23, [x0, #44]
  2930      WORD $0x6e1622d6 //    ext    v22.16b, v22.16b, v22.16b, #4
  2931      WORD $0x6ea246f8 //    ushl    v24.4s, v23.4s, v2.4s
  2932      WORD $0x6e1762d6 //    ext    v22.16b, v22.16b, v23.16b, #12
  2933      WORD $0x6f077418 //    bic    v24.4s, #224, lsl #24
  2934      WORD $0x6eb146d6 //    ushl    v22.4s, v22.4s, v17.4s
  2935      WORD $0x4eb61f16 //    orr    v22.16b, v24.16b, v22.16b
  2936      WORD $0x3c9f0136 //    stur    q22, [x9, #-16]
  2937      WORD $0xbd403816 //    ldr    s22, [x0, #56]
  2938      WORD $0x3cc3c017 //    ldur    q23, [x0, #60]
  2939      WORD $0x6e1622d6 //    ext    v22.16b, v22.16b, v22.16b, #4
  2940      WORD $0x6e1762d6 //    ext    v22.16b, v22.16b, v23.16b, #12
  2941      WORD $0x6ea346f8 //    ushl    v24.4s, v23.4s, v3.4s
  2942      WORD $0x6eb246d6 //    ushl    v22.4s, v22.4s, v25.4s
  2943      WORD $0x4eb61f16 //    orr    v22.16b, v24.16b, v22.16b
  2944      WORD $0x6f077416 //    bic    v22.4s, #224, lsl #24
  2945      WORD $0x3d800136 //    str    q22, [x9]
  2946      WORD $0x29492c0a //    ldp    w10, w11, [x0, #72]
  2947      WORD $0xfd402816 //    ldr    d22, [x0, #80]
  2948      WORD $0x1e270177 //    fmov    s23, w11
  2949      WORD $0x138a116a //    extr    w10, w11, w10, #4
  2950      WORD $0x0e963af7 //    zip1    v23.2s, v23.2s, v22.2s
  2951      WORD $0x53017d6c //    lsr    w12, w11, #1
  2952      WORD $0x2ea446d6 //    ushl    v22.2s, v22.2s, v4.2s
  2953      WORD $0x1e270158 //    fmov    s24, w10
  2954      WORD $0x2eb346f7 //    ushl    v23.2s, v23.2s, v19.2s
  2955      WORD $0x4e0c1d98 //    mov    v24.s[1], w12
  2956      WORD $0x0eb71ed6 //    orr    v22.8b, v22.8b, v23.8b
  2957      WORD $0x6e1806d8 //    mov    v24.d[1], v22.d[0]
  2958      WORD $0x6f077418 //    bic    v24.4s, #224, lsl #24
  2959      WORD $0x3d800538 //    str    q24, [x9, #16]
  2960      WORD $0xbd405416 //    ldr    s22, [x0, #84]
  2961      WORD $0x3cc58017 //    ldur    q23, [x0, #88]
  2962      WORD $0x6e1622d6 //    ext    v22.16b, v22.16b, v22.16b, #4
  2963      WORD $0x6e1762d6 //    ext    v22.16b, v22.16b, v23.16b, #12
  2964      WORD $0x6ea546f8 //    ushl    v24.4s, v23.4s, v5.4s
  2965      WORD $0x6eb446d6 //    ushl    v22.4s, v22.4s, v20.4s
  2966      WORD $0x4eb61f16 //    orr    v22.16b, v24.16b, v22.16b
  2967      WORD $0x6f077416 //    bic    v22.4s, #224, lsl #24
  2968      WORD $0x3d800936 //    str    q22, [x9, #32]
  2969      WORD $0xfd403416 //    ldr    d22, [x0, #104]
  2970      WORD $0xbd406417 //    ldr    s23, [x0, #100]
  2971      WORD $0xb940700a //    ldr    w10, [x0, #112]
  2972      WORD $0x9101d000 //    add    x0, x0, #116
  2973      WORD $0x2ea646d8 //    ushl    v24.2s, v22.2s, v6.2s
  2974      WORD $0x0e963af7 //    zip1    v23.2s, v23.2s, v22.2s
  2975      WORD $0x0e0c3ecb //    mov    w11, v22.s[1]
  2976      WORD $0x2eb546f6 //    ushl    v22.2s, v23.2s, v21.2s
  2977      WORD $0x53037d4c //    lsr    w12, w10, #3
  2978      WORD $0x138b194a //    extr    w10, w10, w11, #6
  2979      WORD $0x0eb61f16 //    orr    v22.8b, v24.8b, v22.8b
  2980      WORD $0x4e141d56 //    mov    v22.s[2], w10
  2981      WORD $0x4e1c1d96 //    mov    v22.s[3], w12
  2982      WORD $0x6f077416 //    bic    v22.4s, #224, lsl #24
  2983      WORD $0x3d800d36 //    str    q22, [x9, #48]
  2984      WORD $0x91020129 //    add    x9, x9, #128 
  2985  
  2986      BNE LBB0_87
  2987      JMP LBB0_99
  2988  LBB0_88:
  2989      WORD $0x7100805f //    cmp    w2, #32
  2990      BLT LBB0_99
  2991      VMOVQ $0x0000001600000018, $0x0000001200000014, V3 // LCPI0_14
  2992      VMOVQ $0x0000000a00000008, $0x0000000e0000000c, V0 // LCPI0_15
  2993      VMOVQ $0x0000000e00000010, $0x0000000a0000000c, V4 // LCPI0_16
  2994      VMOVQ $0x0000001200000010, $0x0000001600000014, V1 // LCPI0_17
  2995      MOVD $0x0000000600000008, R5 // LCPI0_18
  2996      MOVD $0x0000001a00000018, R2 // LCPI0_19
  2997  
  2998      WORD $0x91010029 //    add    x9, x1, #64
  2999      WORD $0x6ea0b863 //    neg    v3.4s, v3.4s
  3000      WORD $0x6ea0b884 //    neg    v4.4s, v4.4s
  3001      WORD $0x2ea0b8a5 //    neg    v5.2s, v5.2s
  3002  LBB0_90:
  3003      WORD $0x29402c0a //    ldp    w10, w11, [x0]
  3004      WORD $0x2941340c //    ldp    w12, w13, [x0, #8]
  3005      WORD $0xf1000508 //    subs    x8, x8, #1
  3006      WORD $0x138a796e //    extr    w14, w11, w10, #30
  3007      WORD $0x1e270146 //    fmov    s6, w10
  3008      WORD $0x138b718b //    extr    w11, w12, w11, #28
  3009      WORD $0x4e0c1dc6 //    mov    v6.s[1], w14
  3010      WORD $0x138c69ac //    extr    w12, w13, w12, #26
  3011      WORD $0x4e141d66 //    mov    v6.s[2], w11
  3012      WORD $0x4e1c1d86 //    mov    v6.s[3], w12
  3013      WORD $0x6f067406 //    bic    v6.4s, #192, lsl #24
  3014      WORD $0x3c9c0126 //    stur    q6, [x9, #-64]
  3015      WORD $0xbd400c06 //    ldr    s6, [x0, #12]
  3016      WORD $0x3dc00407 //    ldr    q7, [x0, #16]
  3017      WORD $0x6e0620c6 //    ext    v6.16b, v6.16b, v6.16b, #4
  3018      WORD $0x6ea044f0 //    ushl    v16.4s, v7.4s, v0.4s
  3019      WORD $0x6e0760c6 //    ext    v6.16b, v6.16b, v7.16b, #12
  3020      WORD $0x6f067410 //    bic    v16.4s, #192, lsl #24
  3021      WORD $0x6ea344c6 //    ushl    v6.4s, v6.4s, v3.4s
  3022      WORD $0x4ea61e06 //    orr    v6.16b, v16.16b, v6.16b
  3023      WORD $0x3c9d0126 //    stur    q6, [x9, #-48]
  3024      WORD $0xbd401c06 //    ldr    s6, [x0, #28]
  3025      WORD $0x3dc00807 //    ldr    q7, [x0, #32]
  3026      WORD $0x6e0620c6 //    ext    v6.16b, v6.16b, v6.16b, #4
  3027      WORD $0x6ea144f0 //    ushl    v16.4s, v7.4s, v1.4s
  3028      WORD $0x6e0760c6 //    ext    v6.16b, v6.16b, v7.16b, #12
  3029      WORD $0x6f067410 //    bic    v16.4s, #192, lsl #24
  3030      WORD $0x6ea444c6 //    ushl    v6.4s, v6.4s, v4.4s
  3031      WORD $0x4ea61e06 //    orr    v6.16b, v16.16b, v6.16b
  3032      WORD $0x3c9e0126 //    stur    q6, [x9, #-32]
  3033      WORD $0xfd401806 //    ldr    d6, [x0, #48]
  3034      WORD $0xbd402c07 //    ldr    s7, [x0, #44]
  3035      WORD $0xb940380a //    ldr    w10, [x0, #56]
  3036      WORD $0x2ea244d0 //    ushl    v16.2s, v6.2s, v2.2s
  3037      WORD $0x0e8638e7 //    zip1    v7.2s, v7.2s, v6.2s
  3038      WORD $0x0e0c3ccb //    mov    w11, v6.s[1]
  3039      WORD $0x2ea544e6 //    ushl    v6.2s, v7.2s, v5.2s
  3040      WORD $0x53027d4c //    lsr    w12, w10, #2
  3041      WORD $0x138b114a //    extr    w10, w10, w11, #4
  3042      WORD $0x0ea61e06 //    orr    v6.8b, v16.8b, v6.8b
  3043      WORD $0x4e141d46 //    mov    v6.s[2], w10
  3044      WORD $0x4e1c1d86 //    mov    v6.s[3], w12
  3045      WORD $0x6f067406 //    bic    v6.4s, #192, lsl #24
  3046      WORD $0x3c9f0126 //    stur    q6, [x9, #-16]
  3047      WORD $0x2947ac0a //    ldp    w10, w11, [x0, #60]
  3048      WORD $0x2948b40c //    ldp    w12, w13, [x0, #68]
  3049      WORD $0x138a796e //    extr    w14, w11, w10, #30
  3050      WORD $0x1e270146 //    fmov    s6, w10
  3051      WORD $0x138b718b //    extr    w11, w12, w11, #28
  3052      WORD $0x4e0c1dc6 //    mov    v6.s[1], w14
  3053      WORD $0x138c69ac //    extr    w12, w13, w12, #26
  3054      WORD $0x4e141d66 //    mov    v6.s[2], w11
  3055      WORD $0x4e1c1d86 //    mov    v6.s[3], w12
  3056      WORD $0x6f067406 //    bic    v6.4s, #192, lsl #24
  3057      WORD $0x3d800126 //    str    q6, [x9]
  3058      WORD $0xbd404806 //    ldr    s6, [x0, #72]
  3059      WORD $0x3cc4c007 //    ldur    q7, [x0, #76]
  3060      WORD $0x6e0620c6 //    ext    v6.16b, v6.16b, v6.16b, #4
  3061      WORD $0x6ea044f0 //    ushl    v16.4s, v7.4s, v0.4s
  3062      WORD $0x6e0760c6 //    ext    v6.16b, v6.16b, v7.16b, #12
  3063      WORD $0x6f067410 //    bic    v16.4s, #192, lsl #24
  3064      WORD $0x6ea344c6 //    ushl    v6.4s, v6.4s, v3.4s
  3065      WORD $0x4ea61e06 //    orr    v6.16b, v16.16b, v6.16b
  3066      WORD $0x3d800526 //    str    q6, [x9, #16]
  3067      WORD $0xbd405806 //    ldr    s6, [x0, #88]
  3068      WORD $0x3cc5c007 //    ldur    q7, [x0, #92]
  3069      WORD $0x6e0620c6 // ext    v6.16b, v6.16b, v6.16b, #4
  3070      WORD $0x6ea144f0 //    ushl    v16.4s, v7.4s, v1.4s
  3071      WORD $0x6e0760c6 //    ext    v6.16b, v6.16b, v7.16b, #12
  3072      WORD $0x6f067410 //    bic    v16.4s, #192, lsl #24
  3073      WORD $0x6ea444c6 //    ushl    v6.4s, v6.4s, v4.4s
  3074      WORD $0x4ea61e06 //    orr    v6.16b, v16.16b, v6.16b
  3075      WORD $0x3d800926 //    str    q6, [x9, #32]
  3076      WORD $0xfc46c006 //    ldur    d6, [x0, #108]
  3077      WORD $0xbd406807 //    ldr    s7, [x0, #104]
  3078      WORD $0xb940740a //    ldr    w10, [x0, #116]
  3079      WORD $0x9101e000 //    add    x0, x0, #120
  3080      WORD $0x2ea244d0 //    ushl    v16.2s, v6.2s, v2.2s
  3081      WORD $0x0e8638e7 //    zip1    v7.2s, v7.2s, v6.2s
  3082      WORD $0x0e0c3ccb //    mov    w11, v6.s[1]
  3083      WORD $0x2ea544e6 //    ushl    v6.2s, v7.2s, v5.2s
  3084      WORD $0x53027d4c //    lsr    w12, w10, #2
  3085      WORD $0x138b114a //    extr    w10, w10, w11, #4
  3086      WORD $0x0ea61e06 //    orr    v6.8b, v16.8b, v6.8b
  3087      WORD $0x4e141d46 //    mov    v6.s[2], w10
  3088      WORD $0x4e1c1d86 //    mov    v6.s[3], w12
  3089      WORD $0x6f067406 //    bic    v6.4s, #192, lsl #24
  3090      WORD $0x3d800d26 //    str    q6, [x9, #48]
  3091      WORD $0x91020129 //    add    x9, x9, #128 
  3092  
  3093      BNE LBB0_90
  3094      JMP LBB0_99
  3095  LBB0_91:
  3096      WORD $0x7100805f //    cmp    w2, #32
  3097      BLT LBB0_99
  3098      VMOVQ $0x0000001b0000001c, $0x000000190000001a, V7 // LCPI0_0
  3099      VMOVQ $0x0000000500000004, $0x0000000700000006, V0 // LCPI0_1
  3100      VMOVQ $0x0000001700000018, $0x0000001500000016, V16 // LCPI0_2
  3101      VMOVQ $0x0000000900000008, $0x0000000b0000000a, V1 // LCPI0_3
  3102      VMOVQ $0x0000001300000014, $0x0000001100000012, V17 // LCPI0_4
  3103      VMOVQ $0x0000000d0000000c, $0x0000000f0000000e, V2 // LCPI0_5
  3104      VMOVQ $0x0000000f00000010, $0x0000000d0000000e, V25 // LCPI0_6
  3105      VMOVQ $0x0000001100000010, $0x0000001300000012, V3 // LCPI0_7
  3106      VMOVQ $0x0000000b0000000c, $0x000000090000000a, V19 // LCPI0_8
  3107      VMOVQ $0x0000001500000014, $0x0000001700000016, V4 // LCPI0_9
  3108      VMOVQ $0x0000000700000008, $0x0000000500000006, V20 // LCPI0_10
  3109      VMOVQ $0x0000001900000018, $0x0000001b0000001a, V5 // LCPI0_11
  3110      MOVD $0x0000000300000004, R21 // LCPI0_12
  3111      MOVD $0x0000001d0000001c, R6 // LCPI0_13
  3112  
  3113      WORD $0x91010029 //    add    x9, x1, #64
  3114      WORD $0x6ea0b8e7 //    neg    v7.4s, v7.4s
  3115      WORD $0x6ea0ba10 //    neg    v16.4s, v16.4s
  3116      WORD $0x6ea0ba31 //    neg    v17.4s, v17.4s
  3117      WORD $0x6ea0ba52 //    neg    v25.4s, v25.4s
  3118      WORD $0x6ea0ba73 //    neg    v19.4s, v19.4s
  3119      WORD $0x6ea0ba94 //    neg    v20.4s, v20.4s
  3120      WORD $0x2ea0bab5 //    neg    v21.2s, v21.2s
  3121  LBB0_93:
  3122      WORD $0x29402c0a //    ldp    w10, w11, [x0]
  3123      WORD $0x2941340c //    ldp    w12, w13, [x0, #8]
  3124      WORD $0xf1000508 //    subs    x8, x8, #1
  3125      WORD $0x138a7d6e //    extr    w14, w11, w10, #31
  3126      WORD $0x1e270156 //    fmov    s22, w10
  3127      WORD $0x4e0c1dd6 //    mov    v22.s[1], w14
  3128      WORD $0x138c75ac //    extr    w12, w13, w12, #29
  3129      WORD $0x4e141d76 //    mov    v22.s[2], w11
  3130      WORD $0x4e1c1d96 //    mov    v22.s[3], w12
  3131      WORD $0x6f047416 //    bic    v22.4s, #128, lsl #24
  3132      WORD $0x3c9c0136 //    stur    q22, [x9, #-64]
  3133      WORD $0xbd400c16 //    ldr    s22, [x0, #12]
  3134      WORD $0x3dc00417 //    ldr    q23, [x0, #16]
  3135      WORD $0x6e1622d6 //    ext    v22.16b, v22.16b, v22.16b, #4
  3136      WORD $0x6ea046f8 //    ushl    v24.4s, v23.4s, v0.4s
  3137      WORD $0x6e1762d6 //    ext    v22.16b, v22.16b, v23.16b, #12
  3138      WORD $0x6f047418 //    bic    v24.4s, #128, lsl #24
  3139      WORD $0x6ea746d6 //    ushl    v22.4s, v22.4s, v7.4s
  3140      WORD $0x4eb61f16 //    orr    v22.16b, v24.16b, v22.16b
  3141      WORD $0x3c9d0136 //    stur    q22, [x9, #-48]
  3142      WORD $0xbd401c16 //    ldr    s22, [x0, #28]
  3143      WORD $0x3dc00817 //    ldr    q23, [x0, #32]
  3144      WORD $0x6e1622d6 //    ext    v22.16b, v22.16b, v22.16b, #4
  3145      WORD $0x6ea146f8 //    ushl    v24.4s, v23.4s, v1.4s
  3146      WORD $0x6e1762d6 //    ext    v22.16b, v22.16b, v23.16b, #12
  3147      WORD $0x6f047418 //    bic    v24.4s, #128, lsl #24
  3148      WORD $0x6eb046d6 //    ushl    v22.4s, v22.4s, v16.4s
  3149      WORD $0x4eb61f16 //    orr    v22.16b, v24.16b, v22.16b
  3150      WORD $0x3c9e0136 //    stur    q22, [x9, #-32]
  3151      WORD $0xbd402c16 //    ldr    s22, [x0, #44]
  3152      WORD $0x3dc00c17 //    ldr    q23, [x0, #48]
  3153      WORD $0x6e1622d6 //    ext    v22.16b, v22.16b, v22.16b, #4
  3154      WORD $0x6ea246f8 //    ushl    v24.4s, v23.4s, v2.4s
  3155      WORD $0x6e1762d6 //    ext    v22.16b, v22.16b, v23.16b, #12
  3156      WORD $0x6f047418 //    bic    v24.4s, #128, lsl #24
  3157      WORD $0x4eb61f16 //    orr    v22.16b, v24.16b, v22.16b
  3158      WORD $0x3c9f0136 //    stur    q22, [x9, #-16]
  3159      WORD $0xbd403c16 //    ldr    s22, [x0, #60]
  3160      WORD $0x3dc01017 //    ldr    q23, [x0, #64]
  3161      WORD $0x6e1622d6 //    ext    v22.16b, v22.16b, v22.16b, #4
  3162      WORD $0x6ea346f8 //    ushl    v24.4s, v23.4s, v3.4s
  3163      WORD $0x6e1762d6 //    ext    v22.16b, v22.16b, v23.16b, #12
  3164      WORD $0x6f047418 //    bic    v24.4s, #128, lsl #24
  3165      WORD $0x6eb246d6 //    ushl    v22.4s, v22.4s, v25.4s
  3166      WORD $0x4eb61f16 //    orr    v22.16b, v24.16b, v22.16b
  3167      WORD $0x3d800136 //    str    q22, [x9]
  3168      WORD $0xbd404c16 //    ldr    s22, [x0, #76]
  3169      WORD $0x3dc01417 //    ldr    q23, [x0, #80]
  3170      WORD $0x6e1622d6 //    ext    v22.16b, v22.16b, v22.16b, #4
  3171      WORD $0x6ea446f8 //    ushl    v24.4s, v23.4s, v4.4s
  3172      WORD $0x6e1762d6 //    ext    v22.16b, v22.16b, v23.16b, #12
  3173      WORD $0x6f047418 //    bic    v24.4s, #128, lsl #24
  3174      WORD $0x6eb346d6 //    ushl    v22.4s, v22.4s, v19.4s
  3175      WORD $0x4eb61f16 //    orr    v22.16b, v24.16b, v22.16b
  3176      WORD $0x3d800536 //    str    q22, [x9, #16]
  3177      WORD $0xbd405c16 //    ldr    s22, [x0, #92]
  3178      WORD $0x3dc01817 //    ldr    q23, [x0, #96]
  3179      WORD $0x6e1622d6 //    ext    v22.16b, v22.16b, v22.16b, #4
  3180      WORD $0x6ea546f8 //    ushl    v24.4s, v23.4s, v5.4s
  3181      WORD $0x6e1762d6 //    ext    v22.16b, v22.16b, v23.16b, #12
  3182      WORD $0x6f047418 //    bic    v24.4s, #128, lsl #24
  3183      WORD $0x6eb446d6 //    ushl    v22.4s, v22.4s, v20.4s
  3184      WORD $0x4eb61f16 //    orr    v22.16b, v24.16b, v22.16b
  3185      WORD $0x3d800936 //    str    q22, [x9, #32]
  3186      WORD $0xfd403816 //    ldr    d22, [x0, #112]
  3187      WORD $0xbd406c17 //    ldr    s23, [x0, #108]
  3188      WORD $0xb940780a //    ldr    w10, [x0, #120]
  3189      WORD $0x9101f000 //    add    x0, x0, #124
  3190      WORD $0x2ea646d8 //    ushl    v24.2s, v22.2s, v6.2s
  3191      WORD $0x0e963af7 //    zip1    v23.2s, v23.2s, v22.2s
  3192      WORD $0x0e0c3ecb //    mov    w11, v22.s[1]
  3193      WORD $0x2eb546f6 //    ushl    v22.2s, v23.2s, v21.2s
  3194      WORD $0x53017d4c //    lsr    w12, w10, #1
  3195      WORD $0x138b094a //    extr    w10, w10, w11, #2
  3196      WORD $0x0eb61f16 //    orr    v22.8b, v24.8b, v22.8b
  3197      WORD $0x4e141d56 //    mov    v22.s[2], w10
  3198      WORD $0x4e1c1d96 //    mov    v22.s[3], w12
  3199      WORD $0x6f047416 //    bic    v22.4s, #128, lsl #24
  3200      WORD $0x3d800d36 //    str    q22, [x9, #48]
  3201      WORD $0x91020129 //    add    x9, x9, #128 
  3202  
  3203      BNE LBB0_93
  3204      JMP LBB0_99
  3205  LBB0_94:
  3206      WORD $0x7100805f //    cmp    w2, #32
  3207      BLT LBB0_99
  3208  LBB0_95:
  3209      WORD $0xad410400 //    ldp    q0, q1, [x0, #32]
  3210      WORD $0xad400c02 //    ldp    q2, q3, [x0]
  3211      WORD $0xf1000508 //    subs    x8, x8, #1
  3212      WORD $0xad010420 //    stp    q0, q1, [x1, #32]
  3213      WORD $0xad000c22 //    stp    q2, q3, [x1]
  3214      WORD $0xad430400 //    ldp    q0, q1, [x0, #96]
  3215      WORD $0xad420c02 //    ldp    q2, q3, [x0, #64]
  3216      WORD $0x91020000 //    add    x0, x0, #128
  3217      WORD $0xad030420 //    stp    q0, q1, [x1, #96]
  3218      WORD $0xad020c22 //    stp    q2, q3, [x1, #64]
  3219      WORD $0x91020021 //    add    x1, x1, #128
  3220  
  3221      BNE LBB0_95
  3222      JMP LBB0_99
  3223  LBB0_96:
  3224      WORD $0x7100805f //    cmp    w2, #32
  3225      BLT LBB0_99
  3226      MOVD $0x000000160000001c, R3 // LCPI0_50
  3227      MOVD $0x0000000a00000004, R0 // LCPI0_51
  3228      MOVD $0x0000000a00000010, R4 // LCPI0_52
  3229      MOVD $0x0000001600000010, R1 // LCPI0_53
  3230      MOVD $0x0000001200000018, R5 // LCPI0_54
  3231      MOVD $0x0000000e00000008, R2 // LCPI0_55
  3232  
  3233      WORD $0x91010029 //    add    x9, x1, #64
  3234      WORD $0x2ea0b863 //    neg    v3.2s, v3.2s
  3235      WORD $0x2ea0b884 //    neg    v4.2s, v4.2s
  3236      WORD $0x2ea0b8a5 //    neg    v5.2s, v5.2s
  3237  LBB0_98:
  3238      WORD $0x29402c0a //    ldp    w10, w11, [x0]
  3239      WORD $0x2941340c //    ldp    w12, w13, [x0, #8]
  3240      WORD $0xf1000508 //    subs    x8, x8, #1
  3241      WORD $0x138a696e //    extr    w14, w11, w10, #26
  3242      WORD $0x1e270146 //    fmov    s6, w10
  3243      WORD $0x138b518b //    extr    w11, w12, w11, #20
  3244      WORD $0x4e0c1dc6 //    mov    v6.s[1], w14
  3245      WORD $0x138c39ac //    extr    w12, w13, w12, #14
  3246      WORD $0x4e141d66 //    mov    v6.s[2], w11
  3247      WORD $0x4e1c1d86 //    mov    v6.s[3], w12
  3248      WORD $0x6f077786 //    bic    v6.4s, #252, lsl #24
  3249      WORD $0x3c9c0126 //    stur    q6, [x9, #-64]
  3250      WORD $0x2941ac0a //    ldp    w10, w11, [x0, #12]
  3251      WORD $0xfc414006 //    ldur    d6, [x0, #20]
  3252      WORD $0x1e270167 //    fmov    s7, w11
  3253      WORD $0x138a216a //    extr    w10, w11, w10, #8
  3254      WORD $0x0e8638e7 //    zip1    v7.2s, v7.2s, v6.2s
  3255      WORD $0x53027d6c //    lsr    w12, w11, #2
  3256      WORD $0x2ea044c6 //    ushl    v6.2s, v6.2s, v0.2s
  3257      WORD $0x1e270150 //    fmov    s16, w10
  3258      WORD $0x2ea344e7 //    ushl    v7.2s, v7.2s, v3.2s
  3259      WORD $0x4e0c1d90 //    mov    v16.s[1], w12
  3260      WORD $0x0ea71cc6 //    orr    v6.8b, v6.8b, v7.8b
  3261      WORD $0x6e1804d0 //    mov    v16.d[1], v6.d[0]
  3262      WORD $0x6f077790 //    bic    v16.4s, #252, lsl #24
  3263      WORD $0x3c9d0130 //    stur    q16, [x9, #-48]
  3264      WORD $0xfc41c006 //    ldur    d6, [x0, #28]
  3265      WORD $0xbd401807 //    ldr    s7, [x0, #24]
  3266      WORD $0xb940240a //    ldr    w10, [x0, #36]
  3267      WORD $0x2ea144d0 //    ushl    v16.2s, v6.2s, v1.2s
  3268      WORD $0x0e8638e7 //    zip1    v7.2s, v7.2s, v6.2s
  3269      WORD $0x0e0c3ccb //    mov    w11, v6.s[1]
  3270      WORD $0x2ea444e6 //    ushl    v6.2s, v7.2s, v4.2s
  3271      WORD $0x53047d6c //    lsr    w12, w11, #4
  3272      WORD $0x0ea61e06 //    orr    v6.8b, v16.8b, v6.8b
  3273      WORD $0x138b794a //    extr    w10, w10, w11, #30
  3274      WORD $0x4e141d86 //    mov    v6.s[2], w12
  3275      WORD $0x4e1c1d46 //    mov    v6.s[3], w10
  3276      WORD $0x6f077786 //    bic    v6.4s, #252, lsl #24
  3277      WORD $0x3c9e0126 //    stur    q6, [x9, #-32]
  3278      WORD $0xfd401406 //    ldr    d6, [x0, #40]
  3279      WORD $0xbd402407 //    ldr    s7, [x0, #36]
  3280      WORD $0xb940300a //    ldr    w10, [x0, #48]
  3281      WORD $0x2ea244d0 //    ushl    v16.2s, v6.2s, v2.2s
  3282      WORD $0x0e8638e7 //    zip1    v7.2s, v7.2s, v6.2s
  3283      WORD $0x0e0c3ccb //    mov    w11, v6.s[1]
  3284      WORD $0x2ea544e6 //    ushl    v6.2s, v7.2s, v5.2s
  3285      WORD $0x138b314a //    extr    w10, w10, w11, #12
  3286      WORD $0x0ea61e06 //    orr    v6.8b, v16.8b, v6.8b
  3287      WORD $0x4e141d46 //    mov    v6.s[2], w10
  3288      WORD $0x4e1c1d86 //    mov    v6.s[3], w12
  3289      WORD $0x6f077786 //    bic    v6.4s, #252, lsl #24
  3290      WORD $0x3c9f0126 //    stur    q6, [x9, #-16]
  3291      WORD $0x2946ac0a //    ldp    w10, w11, [x0, #52]
  3292      WORD $0x2947b40c //    ldp    w12, w13, [x0, #60]
  3293      WORD $0x138a696e //    extr    w14, w11, w10, #26
  3294      WORD $0x1e270146 //    fmov    s6, w10
  3295      WORD $0x138b518b //    extr    w11, w12, w11, #20
  3296      WORD $0x4e0c1dc6 //    mov    v6.s[1], w14
  3297      WORD $0x138c39ac //    extr    w12, w13, w12, #14
  3298      WORD $0x4e1c1d86 //    mov    v6.s[3], w12
  3299      WORD $0x6f077786 //    bic    v6.4s, #252, lsl #24
  3300      WORD $0x3d800126 //    str    q6, [x9]
  3301      WORD $0x29482c0a //    ldp    w10, w11, [x0, #64]
  3302      WORD $0xfd402406 //    ldr    d6, [x0, #72]
  3303      WORD $0x1e270167 //    fmov    s7, w11
  3304      WORD $0x138a216a //    extr    w10, w11, w10, #8
  3305      WORD $0x0e8638e7 //    zip1    v7.2s, v7.2s, v6.2s
  3306      WORD $0x2ea044c6 //    ushl    v6.2s, v6.2s, v0.2s
  3307      WORD $0x1e270150 //    fmov    s16, w10
  3308      WORD $0x2ea344e7 //    ushl    v7.2s, v7.2s, v3.2s
  3309      WORD $0x4e0c1d90 //    mov    v16.s[1], w12
  3310      WORD $0x0ea71cc6 //    orr    v6.8b, v6.8b, v7.8b
  3311      WORD $0x6e1804d0 //    mov    v16.d[1], v6.d[0]
  3312      WORD $0x6f077790 //    bic    v16.4s, #252, lsl #24
  3313      WORD $0x3d800530 //    str    q16, [x9, #16]
  3314      WORD $0xfd402806 //    ldr    d6, [x0, #80]
  3315      WORD $0xbd404c07 //    ldr    s7, [x0, #76]
  3316      WORD $0xb940580a //    ldr    w10, [x0, #88]
  3317      WORD $0x2ea144d0 //    ushl    v16.2s, v6.2s, v1.2s
  3318      WORD $0x0e8638e7 //    zip1    v7.2s, v7.2s, v6.2s
  3319      WORD $0x0e0c3ccb //    mov    w11, v6.s[1]
  3320      WORD $0x2ea444e6 //    ushl    v6.2s, v7.2s, v4.2s
  3321      WORD $0x53047d6c //    lsr    w12, w11, #4
  3322      WORD $0x0ea61e06 //    orr    v6.8b, v16.8b, v6.8b
  3323      WORD $0x138b794a //    extr    w10, w10, w11, #30
  3324      WORD $0x4e141d86 //    mov    v6.s[2], w12
  3325      WORD $0x4e1c1d46 //    mov    v6.s[3], w10
  3326      WORD $0x6f077786 //    bic    v6.4s, #252, lsl #24
  3327      WORD $0x3d800926 //    str    q6, [x9, #32]
  3328      WORD $0xfc45c006 //    ldur    d6, [x0, #92]
  3329      WORD $0xbd405807 //    ldr    s7, [x0, #88]
  3330      WORD $0xb940640a //    ldr    w10, [x0, #100]
  3331      WORD $0x9101a000 //    add    x0, x0, #104
  3332      WORD $0x2ea244d0 //    ushl    v16.2s, v6.2s, v2.2s
  3333      WORD $0x0e8638e7 //    zip1    v7.2s, v7.2s, v6.2s
  3334      WORD $0x0e0c3ccb //    mov    w11, v6.s[1]
  3335      WORD $0x2ea544e6 //    ushl    v6.2s, v7.2s, v5.2s
  3336      WORD $0x53067d4c //    lsr    w12, w10, #6
  3337      WORD $0x138b314a //    extr    w10, w10, w11, #12
  3338      WORD $0x0ea61e06 //    orr    v6.8b, v16.8b, v6.8b
  3339      WORD $0x4e141d46 //    mov    v6.s[2], w10
  3340      WORD $0x4e1c1d86 //    mov    v6.s[3], w12
  3341      WORD $0x6f077786 //    bic    v6.4s, #252, lsl #24
  3342      WORD $0x3d800d26 //    str    q6, [x9, #48]
  3343      WORD $0x91020129 //    add    x9, x9, #128
  3344  
  3345      BNE LBB0_98
  3346  LBB0_99:
  3347      MOVD R19, num+32(FP)
  3348      WORD $0xf9400bf3 //    ldr    x19, [sp, #16]
  3349      WORD $0xa8c27bfd //    ldp    x29, x30, [sp], #32
  3350      RET
  3351