github.com/apache/arrow/go/v14@v14.0.2/parquet/internal/bmi/bitmap_bmi2_amd64.s (about)

     1  //+build !noasm !appengine
     2  // AUTO-GENERATED BY C2GOASM -- DO NOT EDIT
     3  
     4  TEXT ·_extract_bits_bmi2(SB), $0-24
     5  
     6  	MOVQ bitmap+0(FP), DI
     7  	MOVQ selectBitmap+8(FP), SI
     8  
     9  	LONG $0xf5c2e2c4; BYTE $0xc6 // pext    rax, rdi, rsi
    10  	MOVQ AX, res+16(FP)
    11  	RET
    12  
    13  DATA LCDATA1<>+0x000(SB)/8, $0x0000000000000000
    14  DATA LCDATA1<>+0x008(SB)/8, $0x0000000000000001
    15  DATA LCDATA1<>+0x010(SB)/8, $0x0000000000000002
    16  DATA LCDATA1<>+0x018(SB)/8, $0x0000000000000003
    17  DATA LCDATA1<>+0x020(SB)/8, $0x0000000000000004
    18  DATA LCDATA1<>+0x028(SB)/8, $0x0000000000000008
    19  DATA LCDATA1<>+0x030(SB)/8, $0x000000000000000c
    20  DATA LCDATA1<>+0x038(SB)/8, $0x0000000000000001
    21  DATA LCDATA1<>+0x040(SB)/8, $0x0000000000000010
    22  GLOBL LCDATA1<>(SB), 8, $72
    23  
    24  TEXT ·_levels_to_bitmap_bmi2(SB), $0-32
    25  
    26  	MOVQ levels+0(FP), DI
    27  	MOVQ numLevels+8(FP), SI
    28  	MOVW rhs+16(FP), DX
    29  	LEAQ LCDATA1<>(SB), BP
    30  
    31  	WORD $0xf685             // test    esi, esi
    32  	JLE  LBB1_1
    33  	WORD $0x8941; BYTE $0xf0 // mov    r8d, esi
    34  	WORD $0xfe83; BYTE $0x0f // cmp    esi, 15
    35  	JA   LBB1_4
    36  	WORD $0xf631             // xor    esi, esi
    37  	WORD $0xc031             // xor    eax, eax
    38  	JMP  LBB1_7
    39  
    40  LBB1_1:
    41  	WORD $0xc031 // xor    eax, eax
    42  	JMP  LBB1_8
    43  
    44  LBB1_4:
    45  	WORD $0x8944; BYTE $0xc6       // mov    esi, r8d
    46  	WORD $0xe683; BYTE $0xf0       // and    esi, -16
    47  	LONG $0xc26ef9c5               // vmovd    xmm0, edx
    48  	LONG $0x7979e2c4; BYTE $0xc8   // vpbroadcastw    xmm1, xmm0
    49  	LONG $0xc0eff9c5               // vpxor    xmm0, xmm0, xmm0
    50  	LONG $0x556ffdc5; BYTE $0x00   // vmovdqa    ymm2, yword 0[rbp] /* [rip + .LCPI1_0] */
    51  	LONG $0x597d62c4; WORD $0x2065 // vpbroadcastq    ymm12, qword 32[rbp] /* [rip + .LCPI1_1] */
    52  	LONG $0x597de2c4; WORD $0x2865 // vpbroadcastq    ymm4, qword 40[rbp] /* [rip + .LCPI1_2] */
    53  	LONG $0x597de2c4; WORD $0x306d // vpbroadcastq    ymm5, qword 48[rbp] /* [rip + .LCPI1_3] */
    54  	LONG $0x597de2c4; WORD $0x3875 // vpbroadcastq    ymm6, qword 56[rbp] /* [rip + .LCPI1_4] */
    55  	LONG $0x597de2c4; WORD $0x407d // vpbroadcastq    ymm7, qword 64[rbp] /* [rip + .LCPI1_5] */
    56  	WORD $0xc031                   // xor    eax, eax
    57  	LONG $0xef3941c4; BYTE $0xc0   // vpxor    xmm8, xmm8, xmm8
    58  	LONG $0xef3141c4; BYTE $0xc9   // vpxor    xmm9, xmm9, xmm9
    59  	LONG $0xef2941c4; BYTE $0xd2   // vpxor    xmm10, xmm10, xmm10
    60  
    61  LBB1_5:
    62  	LONG $0xdad41dc5               // vpaddq    ymm11, ymm12, ymm2
    63  	LONG $0x5c7efac5; WORD $0x0847 // vmovq    xmm3, qword [rdi + 2*rax + 8]
    64  	LONG $0xd965e1c5               // vpcmpgtw    xmm3, xmm3, xmm1
    65  	LONG $0x347de2c4; BYTE $0xdb   // vpmovzxwq    ymm3, xmm3
    66  	LONG $0xdedbe5c5               // vpand    ymm3, ymm3, ymm6
    67  	LONG $0x47e5c2c4; BYTE $0xdb   // vpsllvq    ymm3, ymm3, ymm11
    68  	LONG $0xdcd46dc5               // vpaddq    ymm11, ymm2, ymm4
    69  	LONG $0xc3eb3dc5               // vpor    ymm8, ymm8, ymm3
    70  	LONG $0x5c7efac5; WORD $0x1047 // vmovq    xmm3, qword [rdi + 2*rax + 16]
    71  	LONG $0xd965e1c5               // vpcmpgtw    xmm3, xmm3, xmm1
    72  	LONG $0x347de2c4; BYTE $0xdb   // vpmovzxwq    ymm3, xmm3
    73  	LONG $0xdedbe5c5               // vpand    ymm3, ymm3, ymm6
    74  	LONG $0x47e5c2c4; BYTE $0xdb   // vpsllvq    ymm3, ymm3, ymm11
    75  	LONG $0xddd46dc5               // vpaddq    ymm11, ymm2, ymm5
    76  	LONG $0xcbeb35c5               // vpor    ymm9, ymm9, ymm3
    77  	LONG $0x5c7efac5; WORD $0x1847 // vmovq    xmm3, qword [rdi + 2*rax + 24]
    78  	LONG $0xd965e1c5               // vpcmpgtw    xmm3, xmm3, xmm1
    79  	LONG $0x347de2c4; BYTE $0xdb   // vpmovzxwq    ymm3, xmm3
    80  	LONG $0xdedbe5c5               // vpand    ymm3, ymm3, ymm6
    81  	LONG $0x47e5c2c4; BYTE $0xdb   // vpsllvq    ymm3, ymm3, ymm11
    82  	LONG $0xd3eb2dc5               // vpor    ymm10, ymm10, ymm3
    83  	LONG $0x1c7efac5; BYTE $0x47   // vmovq    xmm3, qword [rdi + 2*rax]
    84  	LONG $0xd965e1c5               // vpcmpgtw    xmm3, xmm3, xmm1
    85  	LONG $0x347de2c4; BYTE $0xdb   // vpmovzxwq    ymm3, xmm3
    86  	LONG $0xdedbe5c5               // vpand    ymm3, ymm3, ymm6
    87  	LONG $0x47e5e2c4; BYTE $0xda   // vpsllvq    ymm3, ymm3, ymm2
    88  	LONG $0xc0ebe5c5               // vpor    ymm0, ymm3, ymm0
    89  	LONG $0x10c08348               // add    rax, 16
    90  	LONG $0xd7d4edc5               // vpaddq    ymm2, ymm2, ymm7
    91  	WORD $0x3948; BYTE $0xc6       // cmp    rsi, rax
    92  	JNE  LBB1_5
    93  	LONG $0xc0ebbdc5               // vpor    ymm0, ymm8, ymm0
    94  	LONG $0xc0ebb5c5               // vpor    ymm0, ymm9, ymm0
    95  	LONG $0xc0ebadc5               // vpor    ymm0, ymm10, ymm0
    96  	LONG $0x397de3c4; WORD $0x01c1 // vextracti128    xmm1, ymm0, 1
    97  	LONG $0xc1ebf9c5               // vpor    xmm0, xmm0, xmm1
    98  	LONG $0xc870f9c5; BYTE $0x4e   // vpshufd    xmm1, xmm0, 78
    99  	LONG $0xc1ebf9c5               // vpor    xmm0, xmm0, xmm1
   100  	LONG $0x7ef9e1c4; BYTE $0xc0   // vmovq    rax, xmm0
   101  	WORD $0x394c; BYTE $0xc6       // cmp    rsi, r8
   102  	JE   LBB1_8
   103  
   104  LBB1_7:
   105  	WORD $0xc931                 // xor    ecx, ecx
   106  	LONG $0x77143966             // cmp    word [rdi + 2*rsi], dx
   107  	WORD $0x9f0f; BYTE $0xd1     // setg    cl
   108  	LONG $0xf7c9e2c4; BYTE $0xc9 // shlx    rcx, rcx, rsi
   109  	WORD $0x0948; BYTE $0xc8     // or    rax, rcx
   110  	LONG $0x01c68348             // add    rsi, 1
   111  	WORD $0x3949; BYTE $0xf0     // cmp    r8, rsi
   112  	JNE  LBB1_7
   113  
   114  LBB1_8:
   115  	VZEROUPPER
   116  	MOVQ AX, res+24(FP)
   117  	RET