github.com/apache/arrow/go/v10@v10.0.1/parquet/internal/bmi/bitmap_neon_arm64.s (about)

     1  //+build !noasm !appengine
     2  
     3  // (C2GOASM doesn't work correctly for Arm64)
     4  // func _extract_bits_neon(bitmap, selectBitmap uint64) (res uint64)
     5  TEXT ·_extract_bits_neon(SB), $0-24
     6  
     7      MOVD bitmap+0(FP), R0
     8      MOVD selectBitmap+8(FP), R1
     9  
    10      WORD $0xa9bf7bfd // stp    x29, x30, [sp, #-16]!
    11      WORD $0x910003fd // mov    x29, sp
    12      CBZ R1, LBB0_4
    13      WORD $0xaa0003e8 // mov    x8, x0
    14      WORD $0xaa1f03e0 // mov    x0, xzr
    15      WORD $0x52800029 // mov    w9, #1
    16  LBB0_2:
    17      WORD $0x8a08002a // and    x10, x1, x8
    18      WORD $0xcb0103eb // neg    x11, x1
    19      WORD $0xea0b015f // tst    x10, x11
    20      WORD $0xd100042c // sub    x12, x1, #1
    21      WORD $0x9a8903ea // csel    x10, xzr, x9, eq
    22      WORD $0xea010181 // ands    x1, x12, x1
    23      WORD $0xaa000140 // orr    x0, x10, x0
    24      WORD $0xd37ff929 // lsl    x9, x9, #1
    25      BNE LBB0_2
    26      WORD $0xa8c17bfd // ldp    x29, x30, [sp], #16
    27      MOVD R0, res+16(FP)
    28      RET
    29  LBB0_4:
    30      WORD $0xaa1f03e0 // mov    x0, xzr
    31      WORD $0xa8c17bfd // ldp    x29, x30, [sp], #16
    32      RET
    33  
    34  // func _levels_to_bitmap_neon(levels unsafe.Pointer, numLevels int, rhs int16) (res uint64)
    35  TEXT ·_levels_to_bitmap_neon(SB), $0-32
    36  
    37      MOVD levels+0(FP), R0
    38      MOVD numLevels+8(FP), R1
    39      MOVD rhs+16(FP), R2
    40  
    41      WORD $0xa9bf7bfd // stp    x29, x30, [sp, #-16]!
    42      WORD $0x7100043f // cmp    w1, #1
    43      WORD $0x910003fd // mov    x29, sp
    44      BLT LBB1_3
    45  
    46      WORD $0x71000c3f // cmp    w1, #3
    47      WORD $0x2a0103e9 // mov    w9, w1
    48      BHI LBB1_4
    49      WORD $0xaa1f03ea // mov    x10, xzr
    50      WORD $0xaa1f03e8 // mov    x8, xzr
    51      JMP LBB1_7
    52  LBB1_3:
    53      WORD $0xaa1f03e8 // mov    x8, xzr
    54      JMP LBB1_8
    55  LBB1_4:
    56      VMOVQ $0x0000000000000000, $0x0000000000000001, V1 // adrp	x11, .LCPI1_0; ldr q1, [x11, :lo12:.LCPI1_0]
    57      WORD $0x5280004b // mov    w11, #2
    58      WORD $0x0e040c43 // dup    v3.2s, w2
    59      WORD $0x4e080d62 // dup    v2.2d, x11
    60      WORD $0x5280002b // mov    w11, #1
    61      WORD $0x927e752a // and    x10, x9, #0xfffffffc
    62      WORD $0x0f305464 // shl    v4.2s, v3.2s, #16
    63      WORD $0x4e080d63 // dup    v3.2d, x11
    64      WORD $0x5280008b // mov    w11, #4
    65      WORD $0x91001008 // add    x8, x0, #4
    66      WORD $0x6f00e400 // movi    v0.2d, #0000000000000000
    67      WORD $0x0f300484 // sshr    v4.2s, v4.2s, #16
    68      WORD $0x4e080d65 // dup    v5.2d, x11
    69      WORD $0xaa0a03eb // mov    x11, x10
    70      WORD $0x6f00e406 // movi    v6.2d, #0000000000000000
    71  LBB1_5:
    72      WORD $0x78dfc10c // ldursh    w12, [x8, #-4]
    73      WORD $0x79c0010d // ldrsh    w13, [x8]
    74      WORD $0x78dfe10e // ldursh    w14, [x8, #-2]
    75      WORD $0x4ee28431 // add    v17.2d, v1.2d, v2.2d
    76      WORD $0x1e270187 // fmov    s7, w12
    77      WORD $0x79c0050c // ldrsh    w12, [x8, #2]
    78      WORD $0x1e2701b0 // fmov    s16, w13
    79      WORD $0x4e0c1dc7 // mov    v7.s[1], w14
    80      WORD $0x0ea434e7 // cmgt    v7.2s, v7.2s, v4.2s
    81      WORD $0x4e0c1d90 // mov    v16.s[1], w12
    82      WORD $0x0ea43610 // cmgt    v16.2s, v16.2s, v4.2s
    83      WORD $0x2f20a4e7 // ushll    v7.2d, v7.2s, #0
    84      WORD $0x2f20a610 // ushll    v16.2d, v16.2s, #0
    85      WORD $0x4e231ce7 // and    v7.16b, v7.16b, v3.16b
    86      WORD $0x4e231e10 // and    v16.16b, v16.16b, v3.16b
    87      WORD $0x6ee144e7 // ushl    v7.2d, v7.2d, v1.2d
    88      WORD $0x6ef14610 // ushl    v16.2d, v16.2d, v17.2d
    89      WORD $0xf100116b // subs    x11, x11, #4
    90      WORD $0x4ee58421 // add    v1.2d, v1.2d, v5.2d
    91      WORD $0x4ea01ce0 // orr    v0.16b, v7.16b, v0.16b
    92      WORD $0x4ea61e06 // orr    v6.16b, v16.16b, v6.16b
    93      WORD $0x91002108 // add    x8, x8, #8
    94      BNE LBB1_5
    95      WORD $0x4ea01cc0 // orr    v0.16b, v6.16b, v0.16b
    96      WORD $0x4e180401 // dup    v1.2d, v0.d[1]
    97      WORD $0x4ea11c00 // orr    v0.16b, v0.16b, v1.16b
    98      WORD $0xeb09015f // cmp    x10, x9
    99      WORD $0x9e660008 // fmov    x8, d0
   100      BEQ LBB1_8
   101  LBB1_7:
   102      WORD $0x78ea780b // ldrsh    w11, [x0, x10, lsl #1]
   103      WORD $0x6b22a17f // cmp    w11, w2, sxth
   104      WORD $0x1a9fd7eb // cset    w11, gt
   105      WORD $0x9aca216b // lsl    x11, x11, x10
   106      WORD $0x9100054a // add    x10, x10, #1
   107      WORD $0xeb0a013f // cmp    x9, x10
   108      WORD $0xaa080168 // orr    x8, x11, x8
   109      BNE LBB1_7
   110  LBB1_8:
   111      WORD $0xa8c17bfd // ldp    x29, x30, [sp], #16
   112      MOVD R8, res+24(FP)
   113      RET
   114