github.com/apache/arrow/go/v14@v14.0.2/parquet/internal/bmi/bitmap_bmi2_amd64.s (about) 1 //+build !noasm !appengine 2 // AUTO-GENERATED BY C2GOASM -- DO NOT EDIT 3 4 TEXT ·_extract_bits_bmi2(SB), $0-24 5 6 MOVQ bitmap+0(FP), DI 7 MOVQ selectBitmap+8(FP), SI 8 9 LONG $0xf5c2e2c4; BYTE $0xc6 // pext rax, rdi, rsi 10 MOVQ AX, res+16(FP) 11 RET 12 13 DATA LCDATA1<>+0x000(SB)/8, $0x0000000000000000 14 DATA LCDATA1<>+0x008(SB)/8, $0x0000000000000001 15 DATA LCDATA1<>+0x010(SB)/8, $0x0000000000000002 16 DATA LCDATA1<>+0x018(SB)/8, $0x0000000000000003 17 DATA LCDATA1<>+0x020(SB)/8, $0x0000000000000004 18 DATA LCDATA1<>+0x028(SB)/8, $0x0000000000000008 19 DATA LCDATA1<>+0x030(SB)/8, $0x000000000000000c 20 DATA LCDATA1<>+0x038(SB)/8, $0x0000000000000001 21 DATA LCDATA1<>+0x040(SB)/8, $0x0000000000000010 22 GLOBL LCDATA1<>(SB), 8, $72 23 24 TEXT ·_levels_to_bitmap_bmi2(SB), $0-32 25 26 MOVQ levels+0(FP), DI 27 MOVQ numLevels+8(FP), SI 28 MOVW rhs+16(FP), DX 29 LEAQ LCDATA1<>(SB), BP 30 31 WORD $0xf685 // test esi, esi 32 JLE LBB1_1 33 WORD $0x8941; BYTE $0xf0 // mov r8d, esi 34 WORD $0xfe83; BYTE $0x0f // cmp esi, 15 35 JA LBB1_4 36 WORD $0xf631 // xor esi, esi 37 WORD $0xc031 // xor eax, eax 38 JMP LBB1_7 39 40 LBB1_1: 41 WORD $0xc031 // xor eax, eax 42 JMP LBB1_8 43 44 LBB1_4: 45 WORD $0x8944; BYTE $0xc6 // mov esi, r8d 46 WORD $0xe683; BYTE $0xf0 // and esi, -16 47 LONG $0xc26ef9c5 // vmovd xmm0, edx 48 LONG $0x7979e2c4; BYTE $0xc8 // vpbroadcastw xmm1, xmm0 49 LONG $0xc0eff9c5 // vpxor xmm0, xmm0, xmm0 50 LONG $0x556ffdc5; BYTE $0x00 // vmovdqa ymm2, yword 0[rbp] /* [rip + .LCPI1_0] */ 51 LONG $0x597d62c4; WORD $0x2065 // vpbroadcastq ymm12, qword 32[rbp] /* [rip + .LCPI1_1] */ 52 LONG $0x597de2c4; WORD $0x2865 // vpbroadcastq ymm4, qword 40[rbp] /* [rip + .LCPI1_2] */ 53 LONG $0x597de2c4; WORD $0x306d // vpbroadcastq ymm5, qword 48[rbp] /* [rip + .LCPI1_3] */ 54 LONG $0x597de2c4; WORD $0x3875 // vpbroadcastq ymm6, qword 56[rbp] /* [rip + .LCPI1_4] */ 55 LONG $0x597de2c4; WORD $0x407d // vpbroadcastq ymm7, qword 64[rbp] /* [rip + .LCPI1_5] */ 56 WORD $0xc031 // xor eax, eax 57 LONG $0xef3941c4; BYTE $0xc0 // vpxor xmm8, xmm8, xmm8 58 LONG $0xef3141c4; BYTE $0xc9 // vpxor xmm9, xmm9, xmm9 59 LONG $0xef2941c4; BYTE $0xd2 // vpxor xmm10, xmm10, xmm10 60 61 LBB1_5: 62 LONG $0xdad41dc5 // vpaddq ymm11, ymm12, ymm2 63 LONG $0x5c7efac5; WORD $0x0847 // vmovq xmm3, qword [rdi + 2*rax + 8] 64 LONG $0xd965e1c5 // vpcmpgtw xmm3, xmm3, xmm1 65 LONG $0x347de2c4; BYTE $0xdb // vpmovzxwq ymm3, xmm3 66 LONG $0xdedbe5c5 // vpand ymm3, ymm3, ymm6 67 LONG $0x47e5c2c4; BYTE $0xdb // vpsllvq ymm3, ymm3, ymm11 68 LONG $0xdcd46dc5 // vpaddq ymm11, ymm2, ymm4 69 LONG $0xc3eb3dc5 // vpor ymm8, ymm8, ymm3 70 LONG $0x5c7efac5; WORD $0x1047 // vmovq xmm3, qword [rdi + 2*rax + 16] 71 LONG $0xd965e1c5 // vpcmpgtw xmm3, xmm3, xmm1 72 LONG $0x347de2c4; BYTE $0xdb // vpmovzxwq ymm3, xmm3 73 LONG $0xdedbe5c5 // vpand ymm3, ymm3, ymm6 74 LONG $0x47e5c2c4; BYTE $0xdb // vpsllvq ymm3, ymm3, ymm11 75 LONG $0xddd46dc5 // vpaddq ymm11, ymm2, ymm5 76 LONG $0xcbeb35c5 // vpor ymm9, ymm9, ymm3 77 LONG $0x5c7efac5; WORD $0x1847 // vmovq xmm3, qword [rdi + 2*rax + 24] 78 LONG $0xd965e1c5 // vpcmpgtw xmm3, xmm3, xmm1 79 LONG $0x347de2c4; BYTE $0xdb // vpmovzxwq ymm3, xmm3 80 LONG $0xdedbe5c5 // vpand ymm3, ymm3, ymm6 81 LONG $0x47e5c2c4; BYTE $0xdb // vpsllvq ymm3, ymm3, ymm11 82 LONG $0xd3eb2dc5 // vpor ymm10, ymm10, ymm3 83 LONG $0x1c7efac5; BYTE $0x47 // vmovq xmm3, qword [rdi + 2*rax] 84 LONG $0xd965e1c5 // vpcmpgtw xmm3, xmm3, xmm1 85 LONG $0x347de2c4; BYTE $0xdb // vpmovzxwq ymm3, xmm3 86 LONG $0xdedbe5c5 // vpand ymm3, ymm3, ymm6 87 LONG $0x47e5e2c4; BYTE $0xda // vpsllvq ymm3, ymm3, ymm2 88 LONG $0xc0ebe5c5 // vpor ymm0, ymm3, ymm0 89 LONG $0x10c08348 // add rax, 16 90 LONG $0xd7d4edc5 // vpaddq ymm2, ymm2, ymm7 91 WORD $0x3948; BYTE $0xc6 // cmp rsi, rax 92 JNE LBB1_5 93 LONG $0xc0ebbdc5 // vpor ymm0, ymm8, ymm0 94 LONG $0xc0ebb5c5 // vpor ymm0, ymm9, ymm0 95 LONG $0xc0ebadc5 // vpor ymm0, ymm10, ymm0 96 LONG $0x397de3c4; WORD $0x01c1 // vextracti128 xmm1, ymm0, 1 97 LONG $0xc1ebf9c5 // vpor xmm0, xmm0, xmm1 98 LONG $0xc870f9c5; BYTE $0x4e // vpshufd xmm1, xmm0, 78 99 LONG $0xc1ebf9c5 // vpor xmm0, xmm0, xmm1 100 LONG $0x7ef9e1c4; BYTE $0xc0 // vmovq rax, xmm0 101 WORD $0x394c; BYTE $0xc6 // cmp rsi, r8 102 JE LBB1_8 103 104 LBB1_7: 105 WORD $0xc931 // xor ecx, ecx 106 LONG $0x77143966 // cmp word [rdi + 2*rsi], dx 107 WORD $0x9f0f; BYTE $0xd1 // setg cl 108 LONG $0xf7c9e2c4; BYTE $0xc9 // shlx rcx, rcx, rsi 109 WORD $0x0948; BYTE $0xc8 // or rax, rcx 110 LONG $0x01c68348 // add rsi, 1 111 WORD $0x3949; BYTE $0xf0 // cmp r8, rsi 112 JNE LBB1_7 113 114 LBB1_8: 115 VZEROUPPER 116 MOVQ AX, res+24(FP) 117 RET