github.com/apache/arrow/go/v14@v14.0.1/parquet/internal/bmi/_lib/bitmap_bmi2.s (about) 1 .text 2 .intel_syntax noprefix 3 .file "bitmap_bmi2.c" 4 .globl extract_bits_bmi2 # -- Begin function extract_bits_bmi2 5 .p2align 4, 0x90 6 .type extract_bits_bmi2,@function 7 extract_bits_bmi2: # @extract_bits_bmi2 8 # %bb.0: 9 push rbp 10 mov rbp, rsp 11 and rsp, -8 12 pext rax, rdi, rsi 13 mov rsp, rbp 14 pop rbp 15 ret 16 .Lfunc_end0: 17 .size extract_bits_bmi2, .Lfunc_end0-extract_bits_bmi2 18 # -- End function 19 .section .rodata.cst32,"aM",@progbits,32 20 .p2align 5 # -- Begin function levels_to_bitmap_bmi2 21 .LCPI1_0: 22 .quad 0 # 0x0 23 .quad 1 # 0x1 24 .quad 2 # 0x2 25 .quad 3 # 0x3 26 .section .rodata.cst8,"aM",@progbits,8 27 .p2align 3 28 .LCPI1_1: 29 .quad 4 # 0x4 30 .LCPI1_2: 31 .quad 8 # 0x8 32 .LCPI1_3: 33 .quad 12 # 0xc 34 .LCPI1_4: 35 .quad 1 # 0x1 36 .LCPI1_5: 37 .quad 16 # 0x10 38 .text 39 .globl levels_to_bitmap_bmi2 40 .p2align 4, 0x90 41 .type levels_to_bitmap_bmi2,@function 42 levels_to_bitmap_bmi2: # @levels_to_bitmap_bmi2 43 # %bb.0: 44 push rbp 45 mov rbp, rsp 46 and rsp, -8 47 test esi, esi 48 jle .LBB1_1 49 # %bb.2: 50 mov r8d, esi 51 cmp esi, 15 52 ja .LBB1_4 53 # %bb.3: 54 xor esi, esi 55 xor eax, eax 56 jmp .LBB1_7 57 .LBB1_1: 58 xor eax, eax 59 jmp .LBB1_8 60 .LBB1_4: 61 mov esi, r8d 62 and esi, -16 63 vmovd xmm0, edx 64 vpbroadcastw xmm1, xmm0 65 vpxor xmm0, xmm0, xmm0 66 vmovdqa ymm2, ymmword ptr [rip + .LCPI1_0] # ymm2 = [0,1,2,3] 67 vpbroadcastq ymm12, qword ptr [rip + .LCPI1_1] # ymm12 = [4,4,4,4] 68 vpbroadcastq ymm4, qword ptr [rip + .LCPI1_2] # ymm4 = [8,8,8,8] 69 vpbroadcastq ymm5, qword ptr [rip + .LCPI1_3] # ymm5 = [12,12,12,12] 70 vpbroadcastq ymm6, qword ptr [rip + .LCPI1_4] # ymm6 = [1,1,1,1] 71 vpbroadcastq ymm7, qword ptr [rip + .LCPI1_5] # ymm7 = [16,16,16,16] 72 xor eax, eax 73 vpxor xmm8, xmm8, xmm8 74 vpxor xmm9, xmm9, xmm9 75 vpxor xmm10, xmm10, xmm10 76 .p2align 4, 0x90 77 .LBB1_5: # =>This Inner Loop Header: Depth=1 78 vpaddq ymm11, ymm12, ymm2 79 vmovq xmm3, qword ptr [rdi + 2*rax + 8] # xmm3 = mem[0],zero 80 vpcmpgtw xmm3, xmm3, xmm1 81 vpmovzxwq ymm3, xmm3 # ymm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero 82 vpand ymm3, ymm3, ymm6 83 vpsllvq ymm3, ymm3, ymm11 84 vpaddq ymm11, ymm2, ymm4 85 vpor ymm8, ymm8, ymm3 86 vmovq xmm3, qword ptr [rdi + 2*rax + 16] # xmm3 = mem[0],zero 87 vpcmpgtw xmm3, xmm3, xmm1 88 vpmovzxwq ymm3, xmm3 # ymm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero 89 vpand ymm3, ymm3, ymm6 90 vpsllvq ymm3, ymm3, ymm11 91 vpaddq ymm11, ymm2, ymm5 92 vpor ymm9, ymm9, ymm3 93 vmovq xmm3, qword ptr [rdi + 2*rax + 24] # xmm3 = mem[0],zero 94 vpcmpgtw xmm3, xmm3, xmm1 95 vpmovzxwq ymm3, xmm3 # ymm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero 96 vpand ymm3, ymm3, ymm6 97 vpsllvq ymm3, ymm3, ymm11 98 vpor ymm10, ymm10, ymm3 99 vmovq xmm3, qword ptr [rdi + 2*rax] # xmm3 = mem[0],zero 100 vpcmpgtw xmm3, xmm3, xmm1 101 vpmovzxwq ymm3, xmm3 # ymm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero 102 vpand ymm3, ymm3, ymm6 103 vpsllvq ymm3, ymm3, ymm2 104 vpor ymm0, ymm3, ymm0 105 add rax, 16 106 vpaddq ymm2, ymm2, ymm7 107 cmp rsi, rax 108 jne .LBB1_5 109 # %bb.6: 110 vpor ymm0, ymm8, ymm0 111 vpor ymm0, ymm9, ymm0 112 vpor ymm0, ymm10, ymm0 113 vextracti128 xmm1, ymm0, 1 114 vpor xmm0, xmm0, xmm1 115 vpshufd xmm1, xmm0, 78 # xmm1 = xmm0[2,3,0,1] 116 vpor xmm0, xmm0, xmm1 117 vmovq rax, xmm0 118 cmp rsi, r8 119 je .LBB1_8 120 .p2align 4, 0x90 121 .LBB1_7: # =>This Inner Loop Header: Depth=1 122 xor ecx, ecx 123 cmp word ptr [rdi + 2*rsi], dx 124 setg cl 125 shlx rcx, rcx, rsi 126 or rax, rcx 127 add rsi, 1 128 cmp r8, rsi 129 jne .LBB1_7 130 .LBB1_8: 131 mov rsp, rbp 132 pop rbp 133 vzeroupper 134 ret 135 .Lfunc_end1: 136 .size levels_to_bitmap_bmi2, .Lfunc_end1-levels_to_bitmap_bmi2 137 # -- End function 138 .ident "Ubuntu clang version 11.1.0-++20210204121720+1fdec59bffc1-1~exp1~20210203232336.162" 139 .section ".note.GNU-stack","",@progbits 140 .addrsig