github.com/apache/arrow/go/v7@v7.0.1/parquet/internal/utils/min_max_avx2_amd64.s (about) 1 //+build !noasm !appengine 2 // AUTO-GENERATED BY C2GOASM -- DO NOT EDIT 3 4 DATA LCDATA1<>+0x000(SB)/8, $0x7fffffff80000000 5 GLOBL LCDATA1<>(SB), 8, $8 6 7 TEXT ·_int32_max_min_avx2(SB), $0-32 8 9 MOVQ values+0(FP), DI 10 MOVQ length+8(FP), SI 11 MOVQ minout+16(FP), DX 12 MOVQ maxout+24(FP), CX 13 LEAQ LCDATA1<>(SB), BP 14 15 WORD $0xf685 // test esi, esi 16 JLE LBB0_1 17 WORD $0x8941; BYTE $0xf0 // mov r8d, esi 18 WORD $0xfe83; BYTE $0x1f // cmp esi, 31 19 JA LBB0_4 20 LONG $0x0000ba41; WORD $0x8000 // mov r10d, -2147483648 21 LONG $0xffffffb8; BYTE $0x7f // mov eax, 2147483647 22 WORD $0x3145; BYTE $0xc9 // xor r9d, r9d 23 JMP LBB0_7 24 25 LBB0_1: 26 LONG $0xffffffb8; BYTE $0x7f // mov eax, 2147483647 27 LONG $0x000000be; BYTE $0x80 // mov esi, -2147483648 28 JMP LBB0_8 29 30 LBB0_4: 31 WORD $0x8945; BYTE $0xc1 // mov r9d, r8d 32 LONG $0x587de2c4; WORD $0x0065 // vpbroadcastd ymm4, dword 0[rbp] /* [rip + .LCPI0_0] */ 33 LONG $0xe0e18341 // and r9d, -32 34 LONG $0x587de2c4; WORD $0x0445 // vpbroadcastd ymm0, dword 4[rbp] /* [rip + .LCPI0_1] */ 35 WORD $0xc031 // xor eax, eax 36 LONG $0xc86ffdc5 // vmovdqa ymm1, ymm0 37 LONG $0xd06ffdc5 // vmovdqa ymm2, ymm0 38 LONG $0xd86ffdc5 // vmovdqa ymm3, ymm0 39 LONG $0xec6ffdc5 // vmovdqa ymm5, ymm4 40 LONG $0xf46ffdc5 // vmovdqa ymm6, ymm4 41 LONG $0xfc6ffdc5 // vmovdqa ymm7, ymm4 42 43 LBB0_5: 44 LONG $0x046f7ec5; BYTE $0x87 // vmovdqu ymm8, yword [rdi + 4*rax] 45 LONG $0x4c6f7ec5; WORD $0x2087 // vmovdqu ymm9, yword [rdi + 4*rax + 32] 46 LONG $0x546f7ec5; WORD $0x4087 // vmovdqu ymm10, yword [rdi + 4*rax + 64] 47 LONG $0x5c6f7ec5; WORD $0x6087 // vmovdqu ymm11, yword [rdi + 4*rax + 96] 48 LONG $0x397dc2c4; BYTE $0xc0 // vpminsd ymm0, ymm0, ymm8 49 LONG $0x3975c2c4; BYTE $0xc9 // vpminsd ymm1, ymm1, ymm9 50 LONG $0x396dc2c4; BYTE $0xd2 // vpminsd ymm2, ymm2, ymm10 51 LONG $0x3965c2c4; BYTE $0xdb // vpminsd ymm3, ymm3, ymm11 52 LONG $0x3d5dc2c4; BYTE $0xe0 // vpmaxsd ymm4, ymm4, ymm8 53 LONG $0x3d55c2c4; BYTE $0xe9 // vpmaxsd ymm5, ymm5, ymm9 54 LONG $0x3d4dc2c4; BYTE $0xf2 // vpmaxsd ymm6, ymm6, ymm10 55 LONG $0x3d45c2c4; BYTE $0xfb // vpmaxsd ymm7, ymm7, ymm11 56 LONG $0x20c08348 // add rax, 32 57 WORD $0x3949; BYTE $0xc1 // cmp r9, rax 58 JNE LBB0_5 59 LONG $0x3d5de2c4; BYTE $0xe5 // vpmaxsd ymm4, ymm4, ymm5 60 LONG $0x3d5de2c4; BYTE $0xe6 // vpmaxsd ymm4, ymm4, ymm6 61 LONG $0x3d5de2c4; BYTE $0xe7 // vpmaxsd ymm4, ymm4, ymm7 62 LONG $0x397de3c4; WORD $0x01e5 // vextracti128 xmm5, ymm4, 1 63 LONG $0x3d59e2c4; BYTE $0xe5 // vpmaxsd xmm4, xmm4, xmm5 64 LONG $0xec70f9c5; BYTE $0x4e // vpshufd xmm5, xmm4, 78 65 LONG $0x3d59e2c4; BYTE $0xe5 // vpmaxsd xmm4, xmm4, xmm5 66 LONG $0xec70f9c5; BYTE $0xe5 // vpshufd xmm5, xmm4, 229 67 LONG $0x3d59e2c4; BYTE $0xe5 // vpmaxsd xmm4, xmm4, xmm5 68 LONG $0x7e79c1c4; BYTE $0xe2 // vmovd r10d, xmm4 69 LONG $0x397de2c4; BYTE $0xc1 // vpminsd ymm0, ymm0, ymm1 70 LONG $0x397de2c4; BYTE $0xc2 // vpminsd ymm0, ymm0, ymm2 71 LONG $0x397de2c4; BYTE $0xc3 // vpminsd ymm0, ymm0, ymm3 72 LONG $0x397de3c4; WORD $0x01c1 // vextracti128 xmm1, ymm0, 1 73 LONG $0x3979e2c4; BYTE $0xc1 // vpminsd xmm0, xmm0, xmm1 74 LONG $0xc870f9c5; BYTE $0x4e // vpshufd xmm1, xmm0, 78 75 LONG $0x3979e2c4; BYTE $0xc1 // vpminsd xmm0, xmm0, xmm1 76 LONG $0xc870f9c5; BYTE $0xe5 // vpshufd xmm1, xmm0, 229 77 LONG $0x3979e2c4; BYTE $0xc1 // vpminsd xmm0, xmm0, xmm1 78 LONG $0xc07ef9c5 // vmovd eax, xmm0 79 WORD $0x8944; BYTE $0xd6 // mov esi, r10d 80 WORD $0x394d; BYTE $0xc1 // cmp r9, r8 81 JE LBB0_8 82 83 LBB0_7: 84 LONG $0x8f348b42 // mov esi, dword [rdi + 4*r9] 85 WORD $0xf039 // cmp eax, esi 86 WORD $0x4f0f; BYTE $0xc6 // cmovg eax, esi 87 WORD $0x3941; BYTE $0xf2 // cmp r10d, esi 88 LONG $0xf24d0f41 // cmovge esi, r10d 89 LONG $0x01c18349 // add r9, 1 90 WORD $0x8941; BYTE $0xf2 // mov r10d, esi 91 WORD $0x394d; BYTE $0xc8 // cmp r8, r9 92 JNE LBB0_7 93 94 LBB0_8: 95 WORD $0x3189 // mov dword [rcx], esi 96 WORD $0x0289 // mov dword [rdx], eax 97 VZEROUPPER 98 RET 99 100 TEXT ·_uint32_max_min_avx2(SB), $0-32 101 102 MOVQ values+0(FP), DI 103 MOVQ length+8(FP), SI 104 MOVQ minout+16(FP), DX 105 MOVQ maxout+24(FP), CX 106 107 WORD $0xf685 // test esi, esi 108 JLE LBB1_1 109 WORD $0x8941; BYTE $0xf0 // mov r8d, esi 110 WORD $0xfe83; BYTE $0x1f // cmp esi, 31 111 JA LBB1_4 112 WORD $0x3145; BYTE $0xc9 // xor r9d, r9d 113 LONG $0xffffffb8; BYTE $0xff // mov eax, -1 114 WORD $0x3145; BYTE $0xd2 // xor r10d, r10d 115 JMP LBB1_7 116 117 LBB1_1: 118 LONG $0xffffffb8; BYTE $0xff // mov eax, -1 119 WORD $0xf631 // xor esi, esi 120 JMP LBB1_8 121 122 LBB1_4: 123 WORD $0x8945; BYTE $0xc1 // mov r9d, r8d 124 LONG $0xe0e18341 // and r9d, -32 125 LONG $0xe4efd9c5 // vpxor xmm4, xmm4, xmm4 126 LONG $0xc076fdc5 // vpcmpeqd ymm0, ymm0, ymm0 127 WORD $0xc031 // xor eax, eax 128 LONG $0xc976f5c5 // vpcmpeqd ymm1, ymm1, ymm1 129 LONG $0xd276edc5 // vpcmpeqd ymm2, ymm2, ymm2 130 LONG $0xdb76e5c5 // vpcmpeqd ymm3, ymm3, ymm3 131 LONG $0xedefd1c5 // vpxor xmm5, xmm5, xmm5 132 LONG $0xf6efc9c5 // vpxor xmm6, xmm6, xmm6 133 LONG $0xffefc1c5 // vpxor xmm7, xmm7, xmm7 134 135 LBB1_5: 136 LONG $0x046f7ec5; BYTE $0x87 // vmovdqu ymm8, yword [rdi + 4*rax] 137 LONG $0x4c6f7ec5; WORD $0x2087 // vmovdqu ymm9, yword [rdi + 4*rax + 32] 138 LONG $0x546f7ec5; WORD $0x4087 // vmovdqu ymm10, yword [rdi + 4*rax + 64] 139 LONG $0x5c6f7ec5; WORD $0x6087 // vmovdqu ymm11, yword [rdi + 4*rax + 96] 140 LONG $0x3b7dc2c4; BYTE $0xc0 // vpminud ymm0, ymm0, ymm8 141 LONG $0x3b75c2c4; BYTE $0xc9 // vpminud ymm1, ymm1, ymm9 142 LONG $0x3b6dc2c4; BYTE $0xd2 // vpminud ymm2, ymm2, ymm10 143 LONG $0x3b65c2c4; BYTE $0xdb // vpminud ymm3, ymm3, ymm11 144 LONG $0x3f5dc2c4; BYTE $0xe0 // vpmaxud ymm4, ymm4, ymm8 145 LONG $0x3f55c2c4; BYTE $0xe9 // vpmaxud ymm5, ymm5, ymm9 146 LONG $0x3f4dc2c4; BYTE $0xf2 // vpmaxud ymm6, ymm6, ymm10 147 LONG $0x3f45c2c4; BYTE $0xfb // vpmaxud ymm7, ymm7, ymm11 148 LONG $0x20c08348 // add rax, 32 149 WORD $0x3949; BYTE $0xc1 // cmp r9, rax 150 JNE LBB1_5 151 LONG $0x3f5de2c4; BYTE $0xe5 // vpmaxud ymm4, ymm4, ymm5 152 LONG $0x3f5de2c4; BYTE $0xe6 // vpmaxud ymm4, ymm4, ymm6 153 LONG $0x3f5de2c4; BYTE $0xe7 // vpmaxud ymm4, ymm4, ymm7 154 LONG $0x397de3c4; WORD $0x01e5 // vextracti128 xmm5, ymm4, 1 155 LONG $0x3f59e2c4; BYTE $0xe5 // vpmaxud xmm4, xmm4, xmm5 156 LONG $0xec70f9c5; BYTE $0x4e // vpshufd xmm5, xmm4, 78 157 LONG $0x3f59e2c4; BYTE $0xe5 // vpmaxud xmm4, xmm4, xmm5 158 LONG $0xec70f9c5; BYTE $0xe5 // vpshufd xmm5, xmm4, 229 159 LONG $0x3f59e2c4; BYTE $0xe5 // vpmaxud xmm4, xmm4, xmm5 160 LONG $0x7e79c1c4; BYTE $0xe2 // vmovd r10d, xmm4 161 LONG $0x3b7de2c4; BYTE $0xc1 // vpminud ymm0, ymm0, ymm1 162 LONG $0x3b7de2c4; BYTE $0xc2 // vpminud ymm0, ymm0, ymm2 163 LONG $0x3b7de2c4; BYTE $0xc3 // vpminud ymm0, ymm0, ymm3 164 LONG $0x397de3c4; WORD $0x01c1 // vextracti128 xmm1, ymm0, 1 165 LONG $0x3b79e2c4; BYTE $0xc1 // vpminud xmm0, xmm0, xmm1 166 LONG $0xc870f9c5; BYTE $0x4e // vpshufd xmm1, xmm0, 78 167 LONG $0x3b79e2c4; BYTE $0xc1 // vpminud xmm0, xmm0, xmm1 168 LONG $0xc870f9c5; BYTE $0xe5 // vpshufd xmm1, xmm0, 229 169 LONG $0x3b79e2c4; BYTE $0xc1 // vpminud xmm0, xmm0, xmm1 170 LONG $0xc07ef9c5 // vmovd eax, xmm0 171 WORD $0x8944; BYTE $0xd6 // mov esi, r10d 172 WORD $0x394d; BYTE $0xc1 // cmp r9, r8 173 JE LBB1_8 174 175 LBB1_7: 176 LONG $0x8f348b42 // mov esi, dword [rdi + 4*r9] 177 WORD $0xf039 // cmp eax, esi 178 WORD $0x430f; BYTE $0xc6 // cmovae eax, esi 179 WORD $0x3941; BYTE $0xf2 // cmp r10d, esi 180 LONG $0xf2470f41 // cmova esi, r10d 181 LONG $0x01c18349 // add r9, 1 182 WORD $0x8941; BYTE $0xf2 // mov r10d, esi 183 WORD $0x394d; BYTE $0xc8 // cmp r8, r9 184 JNE LBB1_7 185 186 LBB1_8: 187 WORD $0x3189 // mov dword [rcx], esi 188 WORD $0x0289 // mov dword [rdx], eax 189 VZEROUPPER 190 RET 191 192 DATA LCDATA2<>+0x000(SB)/8, $0x8000000000000000 193 DATA LCDATA2<>+0x008(SB)/8, $0x7fffffffffffffff 194 GLOBL LCDATA2<>(SB), 8, $16 195 196 TEXT ·_int64_max_min_avx2(SB), $0-32 197 198 MOVQ values+0(FP), DI 199 MOVQ length+8(FP), SI 200 MOVQ minout+16(FP), DX 201 MOVQ maxout+24(FP), CX 202 LEAQ LCDATA2<>(SB), BP 203 204 QUAD $0xffffffffffffb848; WORD $0x7fff // mov rax, 9223372036854775807 205 WORD $0xf685 // test esi, esi 206 JLE LBB2_1 207 WORD $0x8941; BYTE $0xf0 // mov r8d, esi 208 WORD $0xfe83; BYTE $0x0f // cmp esi, 15 209 JA LBB2_4 210 LONG $0x01508d4c // lea r10, [rax + 1] 211 WORD $0x3145; BYTE $0xc9 // xor r9d, r9d 212 JMP LBB2_7 213 214 LBB2_1: 215 LONG $0x01708d48 // lea rsi, [rax + 1] 216 JMP LBB2_8 217 218 LBB2_4: 219 WORD $0x8945; BYTE $0xc1 // mov r9d, r8d 220 LONG $0x597de2c4; WORD $0x0065 // vpbroadcastq ymm4, qword 0[rbp] /* [rip + .LCPI2_0] */ 221 LONG $0xf0e18341 // and r9d, -16 222 LONG $0x597de2c4; WORD $0x0845 // vpbroadcastq ymm0, qword 8[rbp] /* [rip + .LCPI2_1] */ 223 WORD $0xc031 // xor eax, eax 224 LONG $0xd86ffdc5 // vmovdqa ymm3, ymm0 225 LONG $0xd06ffdc5 // vmovdqa ymm2, ymm0 226 LONG $0xc86ffdc5 // vmovdqa ymm1, ymm0 227 LONG $0xfc6ffdc5 // vmovdqa ymm7, ymm4 228 LONG $0xf46ffdc5 // vmovdqa ymm6, ymm4 229 LONG $0xec6ffdc5 // vmovdqa ymm5, ymm4 230 231 LBB2_5: 232 LONG $0x046f7ec5; BYTE $0xc7 // vmovdqu ymm8, yword [rdi + 8*rax] 233 LONG $0x373d62c4; BYTE $0xc8 // vpcmpgtq ymm9, ymm8, ymm0 234 LONG $0x4b3de3c4; WORD $0x90c0 // vblendvpd ymm0, ymm8, ymm0, ymm9 235 LONG $0x4c6f7ec5; WORD $0x20c7 // vmovdqu ymm9, yword [rdi + 8*rax + 32] 236 LONG $0x373562c4; BYTE $0xd3 // vpcmpgtq ymm10, ymm9, ymm3 237 LONG $0x4b35e3c4; WORD $0xa0db // vblendvpd ymm3, ymm9, ymm3, ymm10 238 LONG $0x546f7ec5; WORD $0x40c7 // vmovdqu ymm10, yword [rdi + 8*rax + 64] 239 LONG $0x372d62c4; BYTE $0xda // vpcmpgtq ymm11, ymm10, ymm2 240 LONG $0x4b2de3c4; WORD $0xb0d2 // vblendvpd ymm2, ymm10, ymm2, ymm11 241 LONG $0x5c6f7ec5; WORD $0x60c7 // vmovdqu ymm11, yword [rdi + 8*rax + 96] 242 LONG $0x372562c4; BYTE $0xe1 // vpcmpgtq ymm12, ymm11, ymm1 243 LONG $0x4b25e3c4; WORD $0xc0c9 // vblendvpd ymm1, ymm11, ymm1, ymm12 244 LONG $0x375d42c4; BYTE $0xe0 // vpcmpgtq ymm12, ymm4, ymm8 245 LONG $0x4b3de3c4; WORD $0xc0e4 // vblendvpd ymm4, ymm8, ymm4, ymm12 246 LONG $0x374542c4; BYTE $0xc1 // vpcmpgtq ymm8, ymm7, ymm9 247 LONG $0x4b35e3c4; WORD $0x80ff // vblendvpd ymm7, ymm9, ymm7, ymm8 248 LONG $0x374d42c4; BYTE $0xc2 // vpcmpgtq ymm8, ymm6, ymm10 249 LONG $0x4b2de3c4; WORD $0x80f6 // vblendvpd ymm6, ymm10, ymm6, ymm8 250 LONG $0x375542c4; BYTE $0xc3 // vpcmpgtq ymm8, ymm5, ymm11 251 LONG $0x4b25e3c4; WORD $0x80ed // vblendvpd ymm5, ymm11, ymm5, ymm8 252 LONG $0x10c08348 // add rax, 16 253 WORD $0x3949; BYTE $0xc1 // cmp r9, rax 254 JNE LBB2_5 255 LONG $0x375d62c4; BYTE $0xc7 // vpcmpgtq ymm8, ymm4, ymm7 256 LONG $0x4b45e3c4; WORD $0x80e4 // vblendvpd ymm4, ymm7, ymm4, ymm8 257 LONG $0x375de2c4; BYTE $0xfe // vpcmpgtq ymm7, ymm4, ymm6 258 LONG $0x4b4de3c4; WORD $0x70e4 // vblendvpd ymm4, ymm6, ymm4, ymm7 259 LONG $0x375de2c4; BYTE $0xf5 // vpcmpgtq ymm6, ymm4, ymm5 260 LONG $0x4b55e3c4; WORD $0x60e4 // vblendvpd ymm4, ymm5, ymm4, ymm6 261 LONG $0x197de3c4; WORD $0x01e5 // vextractf128 xmm5, ymm4, 1 262 LONG $0x3759e2c4; BYTE $0xf5 // vpcmpgtq xmm6, xmm4, xmm5 263 LONG $0x4b51e3c4; WORD $0x60e4 // vblendvpd xmm4, xmm5, xmm4, xmm6 264 LONG $0x0479e3c4; WORD $0x4eec // vpermilps xmm5, xmm4, 78 265 LONG $0x3759e2c4; BYTE $0xf5 // vpcmpgtq xmm6, xmm4, xmm5 266 LONG $0x4b51e3c4; WORD $0x60e4 // vblendvpd xmm4, xmm5, xmm4, xmm6 267 LONG $0x7ef9c1c4; BYTE $0xe2 // vmovq r10, xmm4 268 LONG $0x3765e2c4; BYTE $0xe0 // vpcmpgtq ymm4, ymm3, ymm0 269 LONG $0x4b65e3c4; WORD $0x40c0 // vblendvpd ymm0, ymm3, ymm0, ymm4 270 LONG $0x376de2c4; BYTE $0xd8 // vpcmpgtq ymm3, ymm2, ymm0 271 LONG $0x4b6de3c4; WORD $0x30c0 // vblendvpd ymm0, ymm2, ymm0, ymm3 272 LONG $0x3775e2c4; BYTE $0xd0 // vpcmpgtq ymm2, ymm1, ymm0 273 LONG $0x4b75e3c4; WORD $0x20c0 // vblendvpd ymm0, ymm1, ymm0, ymm2 274 LONG $0x197de3c4; WORD $0x01c1 // vextractf128 xmm1, ymm0, 1 275 LONG $0x3771e2c4; BYTE $0xd0 // vpcmpgtq xmm2, xmm1, xmm0 276 LONG $0x4b71e3c4; WORD $0x20c0 // vblendvpd xmm0, xmm1, xmm0, xmm2 277 LONG $0x0479e3c4; WORD $0x4ec8 // vpermilps xmm1, xmm0, 78 278 LONG $0x3771e2c4; BYTE $0xd0 // vpcmpgtq xmm2, xmm1, xmm0 279 LONG $0x4b71e3c4; WORD $0x20c0 // vblendvpd xmm0, xmm1, xmm0, xmm2 280 LONG $0x7ef9e1c4; BYTE $0xc0 // vmovq rax, xmm0 281 WORD $0x894c; BYTE $0xd6 // mov rsi, r10 282 WORD $0x394d; BYTE $0xc1 // cmp r9, r8 283 JE LBB2_8 284 285 LBB2_7: 286 LONG $0xcf348b4a // mov rsi, qword [rdi + 8*r9] 287 WORD $0x3948; BYTE $0xf0 // cmp rax, rsi 288 LONG $0xc64f0f48 // cmovg rax, rsi 289 WORD $0x3949; BYTE $0xf2 // cmp r10, rsi 290 LONG $0xf24d0f49 // cmovge rsi, r10 291 LONG $0x01c18349 // add r9, 1 292 WORD $0x8949; BYTE $0xf2 // mov r10, rsi 293 WORD $0x394d; BYTE $0xc8 // cmp r8, r9 294 JNE LBB2_7 295 296 LBB2_8: 297 WORD $0x8948; BYTE $0x31 // mov qword [rcx], rsi 298 WORD $0x8948; BYTE $0x02 // mov qword [rdx], rax 299 VZEROUPPER 300 RET 301 302 DATA LCDATA3<>+0x000(SB)/8, $0x8000000000000000 303 GLOBL LCDATA3<>(SB), 8, $8 304 305 TEXT ·_uint64_max_min_avx2(SB), $0-32 306 307 MOVQ values+0(FP), DI 308 MOVQ length+8(FP), SI 309 MOVQ minout+16(FP), DX 310 MOVQ maxout+24(FP), CX 311 LEAQ LCDATA3<>(SB), BP 312 313 WORD $0xf685 // test esi, esi 314 JLE LBB3_1 315 WORD $0x8941; BYTE $0xf0 // mov r8d, esi 316 WORD $0xfe83; BYTE $0x0f // cmp esi, 15 317 JA LBB3_4 318 LONG $0xffc0c748; WORD $0xffff; BYTE $0xff // mov rax, -1 319 WORD $0x3145; BYTE $0xc9 // xor r9d, r9d 320 WORD $0x3145; BYTE $0xd2 // xor r10d, r10d 321 JMP LBB3_7 322 323 LBB3_1: 324 LONG $0xffc0c748; WORD $0xffff; BYTE $0xff // mov rax, -1 325 WORD $0xf631 // xor esi, esi 326 JMP LBB3_8 327 328 LBB3_4: 329 WORD $0x8945; BYTE $0xc1 // mov r9d, r8d 330 LONG $0xf0e18341 // and r9d, -16 331 LONG $0xedefd1c5 // vpxor xmm5, xmm5, xmm5 332 LONG $0xc976f5c5 // vpcmpeqd ymm1, ymm1, ymm1 333 WORD $0xc031 // xor eax, eax 334 LONG $0x597de2c4; WORD $0x0045 // vpbroadcastq ymm0, qword 0[rbp] /* [rip + .LCPI3_0] */ 335 LONG $0xe476ddc5 // vpcmpeqd ymm4, ymm4, ymm4 336 LONG $0xdb76e5c5 // vpcmpeqd ymm3, ymm3, ymm3 337 LONG $0xd276edc5 // vpcmpeqd ymm2, ymm2, ymm2 338 LONG $0xef3941c4; BYTE $0xc0 // vpxor xmm8, xmm8, xmm8 339 LONG $0xffefc1c5 // vpxor xmm7, xmm7, xmm7 340 LONG $0xf6efc9c5 // vpxor xmm6, xmm6, xmm6 341 342 LBB3_5: 343 LONG $0x0c6f7ec5; BYTE $0xc7 // vmovdqu ymm9, yword [rdi + 8*rax] 344 LONG $0xd0ef75c5 // vpxor ymm10, ymm1, ymm0 345 LONG $0xd8ef35c5 // vpxor ymm11, ymm9, ymm0 346 LONG $0x372542c4; BYTE $0xd2 // vpcmpgtq ymm10, ymm11, ymm10 347 LONG $0x4b35e3c4; WORD $0xa0c9 // vblendvpd ymm1, ymm9, ymm1, ymm10 348 LONG $0xd0ef55c5 // vpxor ymm10, ymm5, ymm0 349 LONG $0x372d42c4; BYTE $0xd3 // vpcmpgtq ymm10, ymm10, ymm11 350 LONG $0x4b35e3c4; WORD $0xa0ed // vblendvpd ymm5, ymm9, ymm5, ymm10 351 LONG $0x4c6f7ec5; WORD $0x20c7 // vmovdqu ymm9, yword [rdi + 8*rax + 32] 352 LONG $0xd0ef5dc5 // vpxor ymm10, ymm4, ymm0 353 LONG $0xd8ef35c5 // vpxor ymm11, ymm9, ymm0 354 LONG $0x372542c4; BYTE $0xd2 // vpcmpgtq ymm10, ymm11, ymm10 355 LONG $0x4b35e3c4; WORD $0xa0e4 // vblendvpd ymm4, ymm9, ymm4, ymm10 356 LONG $0xd0ef3dc5 // vpxor ymm10, ymm8, ymm0 357 LONG $0x372d42c4; BYTE $0xd3 // vpcmpgtq ymm10, ymm10, ymm11 358 LONG $0x5c6f7ec5; WORD $0x40c7 // vmovdqu ymm11, yword [rdi + 8*rax + 64] 359 LONG $0x4b3543c4; WORD $0xa0c0 // vblendvpd ymm8, ymm9, ymm8, ymm10 360 LONG $0xc8ef65c5 // vpxor ymm9, ymm3, ymm0 361 LONG $0xd0ef25c5 // vpxor ymm10, ymm11, ymm0 362 LONG $0x372d42c4; BYTE $0xc9 // vpcmpgtq ymm9, ymm10, ymm9 363 LONG $0x4b25e3c4; WORD $0x90db // vblendvpd ymm3, ymm11, ymm3, ymm9 364 LONG $0xc8ef45c5 // vpxor ymm9, ymm7, ymm0 365 LONG $0x373542c4; BYTE $0xca // vpcmpgtq ymm9, ymm9, ymm10 366 LONG $0x4b25e3c4; WORD $0x90ff // vblendvpd ymm7, ymm11, ymm7, ymm9 367 LONG $0x4c6f7ec5; WORD $0x60c7 // vmovdqu ymm9, yword [rdi + 8*rax + 96] 368 LONG $0xd0ef6dc5 // vpxor ymm10, ymm2, ymm0 369 LONG $0xd8ef35c5 // vpxor ymm11, ymm9, ymm0 370 LONG $0x372542c4; BYTE $0xd2 // vpcmpgtq ymm10, ymm11, ymm10 371 LONG $0x4b35e3c4; WORD $0xa0d2 // vblendvpd ymm2, ymm9, ymm2, ymm10 372 LONG $0xd0ef4dc5 // vpxor ymm10, ymm6, ymm0 373 LONG $0x372d42c4; BYTE $0xd3 // vpcmpgtq ymm10, ymm10, ymm11 374 LONG $0x4b35e3c4; WORD $0xa0f6 // vblendvpd ymm6, ymm9, ymm6, ymm10 375 LONG $0x10c08348 // add rax, 16 376 WORD $0x3949; BYTE $0xc1 // cmp r9, rax 377 JNE LBB3_5 378 LONG $0xc8ef3dc5 // vpxor ymm9, ymm8, ymm0 379 LONG $0xd0ef55c5 // vpxor ymm10, ymm5, ymm0 380 LONG $0x372d42c4; BYTE $0xc9 // vpcmpgtq ymm9, ymm10, ymm9 381 LONG $0x4b3de3c4; WORD $0x90ed // vblendvpd ymm5, ymm8, ymm5, ymm9 382 LONG $0xc05755c5 // vxorpd ymm8, ymm5, ymm0 383 LONG $0xc8ef45c5 // vpxor ymm9, ymm7, ymm0 384 LONG $0x373d42c4; BYTE $0xc1 // vpcmpgtq ymm8, ymm8, ymm9 385 LONG $0x4b45e3c4; WORD $0x80ed // vblendvpd ymm5, ymm7, ymm5, ymm8 386 LONG $0xf857d5c5 // vxorpd ymm7, ymm5, ymm0 387 LONG $0xc0ef4dc5 // vpxor ymm8, ymm6, ymm0 388 LONG $0x3745c2c4; BYTE $0xf8 // vpcmpgtq ymm7, ymm7, ymm8 389 LONG $0x4b4de3c4; WORD $0x70ed // vblendvpd ymm5, ymm6, ymm5, ymm7 390 LONG $0x197de3c4; WORD $0x01ee // vextractf128 xmm6, ymm5, 1 391 LONG $0xc05749c5 // vxorpd xmm8, xmm6, xmm0 392 LONG $0xf857d1c5 // vxorpd xmm7, xmm5, xmm0 393 LONG $0x3741c2c4; BYTE $0xf8 // vpcmpgtq xmm7, xmm7, xmm8 394 LONG $0x4b49e3c4; WORD $0x70ed // vblendvpd xmm5, xmm6, xmm5, xmm7 395 LONG $0x0479e3c4; WORD $0x4ef5 // vpermilps xmm6, xmm5, 78 396 LONG $0xc05751c5 // vxorpd xmm8, xmm5, xmm0 397 LONG $0xf857c9c5 // vxorpd xmm7, xmm6, xmm0 398 LONG $0x3739e2c4; BYTE $0xff // vpcmpgtq xmm7, xmm8, xmm7 399 LONG $0x4b49e3c4; WORD $0x70ed // vblendvpd xmm5, xmm6, xmm5, xmm7 400 LONG $0xf0eff5c5 // vpxor ymm6, ymm1, ymm0 401 LONG $0xf8efddc5 // vpxor ymm7, ymm4, ymm0 402 LONG $0x3745e2c4; BYTE $0xf6 // vpcmpgtq ymm6, ymm7, ymm6 403 LONG $0x4b5de3c4; WORD $0x60c9 // vblendvpd ymm1, ymm4, ymm1, ymm6 404 LONG $0xe057f5c5 // vxorpd ymm4, ymm1, ymm0 405 LONG $0xf0efe5c5 // vpxor ymm6, ymm3, ymm0 406 LONG $0x374de2c4; BYTE $0xe4 // vpcmpgtq ymm4, ymm6, ymm4 407 LONG $0x4b65e3c4; WORD $0x40c9 // vblendvpd ymm1, ymm3, ymm1, ymm4 408 LONG $0x7ef9c1c4; BYTE $0xea // vmovq r10, xmm5 409 LONG $0xd857f5c5 // vxorpd ymm3, ymm1, ymm0 410 LONG $0xe0efedc5 // vpxor ymm4, ymm2, ymm0 411 LONG $0x375de2c4; BYTE $0xdb // vpcmpgtq ymm3, ymm4, ymm3 412 LONG $0x4b6de3c4; WORD $0x30c9 // vblendvpd ymm1, ymm2, ymm1, ymm3 413 LONG $0x197de3c4; WORD $0x01ca // vextractf128 xmm2, ymm1, 1 414 LONG $0xd857f1c5 // vxorpd xmm3, xmm1, xmm0 415 LONG $0xe057e9c5 // vxorpd xmm4, xmm2, xmm0 416 LONG $0x3759e2c4; BYTE $0xdb // vpcmpgtq xmm3, xmm4, xmm3 417 LONG $0x4b69e3c4; WORD $0x30c9 // vblendvpd xmm1, xmm2, xmm1, xmm3 418 LONG $0x0479e3c4; WORD $0x4ed1 // vpermilps xmm2, xmm1, 78 419 LONG $0xd857f1c5 // vxorpd xmm3, xmm1, xmm0 420 LONG $0xc057e9c5 // vxorpd xmm0, xmm2, xmm0 421 LONG $0x3779e2c4; BYTE $0xc3 // vpcmpgtq xmm0, xmm0, xmm3 422 LONG $0x4b69e3c4; WORD $0x00c1 // vblendvpd xmm0, xmm2, xmm1, xmm0 423 LONG $0x7ef9e1c4; BYTE $0xc0 // vmovq rax, xmm0 424 WORD $0x894c; BYTE $0xd6 // mov rsi, r10 425 WORD $0x394d; BYTE $0xc1 // cmp r9, r8 426 JE LBB3_8 427 428 LBB3_7: 429 LONG $0xcf348b4a // mov rsi, qword [rdi + 8*r9] 430 WORD $0x3948; BYTE $0xf0 // cmp rax, rsi 431 LONG $0xc6430f48 // cmovae rax, rsi 432 WORD $0x3949; BYTE $0xf2 // cmp r10, rsi 433 LONG $0xf2470f49 // cmova rsi, r10 434 LONG $0x01c18349 // add r9, 1 435 WORD $0x8949; BYTE $0xf2 // mov r10, rsi 436 WORD $0x394d; BYTE $0xc8 // cmp r8, r9 437 JNE LBB3_7 438 439 LBB3_8: 440 WORD $0x8948; BYTE $0x31 // mov qword [rcx], rsi 441 WORD $0x8948; BYTE $0x02 // mov qword [rdx], rax 442 VZEROUPPER 443 RET