github.com/apache/arrow/go/v7@v7.0.1/parquet/internal/utils/min_max_sse4_amd64.s (about) 1 //+build !noasm !appengine 2 // AUTO-GENERATED BY C2GOASM -- DO NOT EDIT 3 4 DATA LCDATA1<>+0x000(SB)/8, $0x8000000080000000 5 DATA LCDATA1<>+0x008(SB)/8, $0x8000000080000000 6 DATA LCDATA1<>+0x010(SB)/8, $0x7fffffff7fffffff 7 DATA LCDATA1<>+0x018(SB)/8, $0x7fffffff7fffffff 8 GLOBL LCDATA1<>(SB), 8, $32 9 10 TEXT ·_int32_max_min_sse4(SB), $0-32 11 12 MOVQ values+0(FP), DI 13 MOVQ length+8(FP), SI 14 MOVQ minout+16(FP), DX 15 MOVQ maxout+24(FP), CX 16 LEAQ LCDATA1<>(SB), BP 17 18 WORD $0xf685 // test esi, esi 19 JLE LBB0_1 20 WORD $0x8941; BYTE $0xf1 // mov r9d, esi 21 WORD $0xfe83; BYTE $0x07 // cmp esi, 7 22 JA LBB0_6 23 LONG $0x000000b8; BYTE $0x80 // mov eax, -2147483648 24 LONG $0xffffb841; WORD $0x7fff // mov r8d, 2147483647 25 WORD $0x3145; BYTE $0xdb // xor r11d, r11d 26 JMP LBB0_4 27 28 LBB0_1: 29 LONG $0xffffb841; WORD $0x7fff // mov r8d, 2147483647 30 LONG $0x000000b8; BYTE $0x80 // mov eax, -2147483648 31 JMP LBB0_13 32 33 LBB0_6: 34 WORD $0x8945; BYTE $0xcb // mov r11d, r9d 35 LONG $0xf8e38341 // and r11d, -8 36 LONG $0xf8438d49 // lea rax, [r11 - 8] 37 WORD $0x8949; BYTE $0xc0 // mov r8, rax 38 LONG $0x03e8c149 // shr r8, 3 39 LONG $0x01c08349 // add r8, 1 40 WORD $0x8548; BYTE $0xc0 // test rax, rax 41 JE LBB0_7 42 WORD $0x894d; BYTE $0xc2 // mov r10, r8 43 LONG $0xfee28349 // and r10, -2 44 WORD $0xf749; BYTE $0xda // neg r10 45 LONG $0x4d6f0f66; BYTE $0x00 // movdqa xmm1, oword 0[rbp] /* [rip + .LCPI0_0] */ 46 LONG $0x456f0f66; BYTE $0x10 // movdqa xmm0, oword 16[rbp] /* [rip + .LCPI0_1] */ 47 WORD $0xc031 // xor eax, eax 48 LONG $0xd06f0f66 // movdqa xmm2, xmm0 49 LONG $0xd96f0f66 // movdqa xmm3, xmm1 50 51 LBB0_9: 52 LONG $0x246f0ff3; BYTE $0x87 // movdqu xmm4, oword [rdi + 4*rax] 53 LONG $0x6c6f0ff3; WORD $0x1087 // movdqu xmm5, oword [rdi + 4*rax + 16] 54 LONG $0x746f0ff3; WORD $0x2087 // movdqu xmm6, oword [rdi + 4*rax + 32] 55 LONG $0x7c6f0ff3; WORD $0x3087 // movdqu xmm7, oword [rdi + 4*rax + 48] 56 LONG $0x39380f66; BYTE $0xc4 // pminsd xmm0, xmm4 57 LONG $0x39380f66; BYTE $0xd5 // pminsd xmm2, xmm5 58 LONG $0x3d380f66; BYTE $0xcc // pmaxsd xmm1, xmm4 59 LONG $0x3d380f66; BYTE $0xdd // pmaxsd xmm3, xmm5 60 LONG $0x39380f66; BYTE $0xc6 // pminsd xmm0, xmm6 61 LONG $0x39380f66; BYTE $0xd7 // pminsd xmm2, xmm7 62 LONG $0x3d380f66; BYTE $0xce // pmaxsd xmm1, xmm6 63 LONG $0x3d380f66; BYTE $0xdf // pmaxsd xmm3, xmm7 64 LONG $0x10c08348 // add rax, 16 65 LONG $0x02c28349 // add r10, 2 66 JNE LBB0_9 67 LONG $0x01c0f641 // test r8b, 1 68 JE LBB0_12 69 70 LBB0_11: 71 LONG $0x246f0ff3; BYTE $0x87 // movdqu xmm4, oword [rdi + 4*rax] 72 LONG $0x6c6f0ff3; WORD $0x1087 // movdqu xmm5, oword [rdi + 4*rax + 16] 73 LONG $0x3d380f66; BYTE $0xdd // pmaxsd xmm3, xmm5 74 LONG $0x3d380f66; BYTE $0xcc // pmaxsd xmm1, xmm4 75 LONG $0x39380f66; BYTE $0xd5 // pminsd xmm2, xmm5 76 LONG $0x39380f66; BYTE $0xc4 // pminsd xmm0, xmm4 77 78 LBB0_12: 79 LONG $0x39380f66; BYTE $0xc2 // pminsd xmm0, xmm2 80 LONG $0x3d380f66; BYTE $0xcb // pmaxsd xmm1, xmm3 81 LONG $0xd1700f66; BYTE $0x4e // pshufd xmm2, xmm1, 78 82 LONG $0x3d380f66; BYTE $0xd1 // pmaxsd xmm2, xmm1 83 LONG $0xca700f66; BYTE $0xe5 // pshufd xmm1, xmm2, 229 84 LONG $0x3d380f66; BYTE $0xca // pmaxsd xmm1, xmm2 85 LONG $0xc87e0f66 // movd eax, xmm1 86 LONG $0xc8700f66; BYTE $0x4e // pshufd xmm1, xmm0, 78 87 LONG $0x39380f66; BYTE $0xc8 // pminsd xmm1, xmm0 88 LONG $0xc1700f66; BYTE $0xe5 // pshufd xmm0, xmm1, 229 89 LONG $0x39380f66; BYTE $0xc1 // pminsd xmm0, xmm1 90 LONG $0x7e0f4166; BYTE $0xc0 // movd r8d, xmm0 91 WORD $0x394d; BYTE $0xcb // cmp r11, r9 92 JE LBB0_13 93 94 LBB0_4: 95 WORD $0xc689 // mov esi, eax 96 97 LBB0_5: 98 LONG $0x9f048b42 // mov eax, dword [rdi + 4*r11] 99 WORD $0x3941; BYTE $0xc0 // cmp r8d, eax 100 LONG $0xc04f0f44 // cmovg r8d, eax 101 WORD $0xc639 // cmp esi, eax 102 WORD $0x4d0f; BYTE $0xc6 // cmovge eax, esi 103 LONG $0x01c38349 // add r11, 1 104 WORD $0xc689 // mov esi, eax 105 WORD $0x394d; BYTE $0xd9 // cmp r9, r11 106 JNE LBB0_5 107 108 LBB0_13: 109 WORD $0x0189 // mov dword [rcx], eax 110 WORD $0x8944; BYTE $0x02 // mov dword [rdx], r8d 111 RET 112 113 LBB0_7: 114 LONG $0x4d6f0f66; BYTE $0x00 // movdqa xmm1, oword 0[rbp] /* [rip + .LCPI0_0] */ 115 LONG $0x456f0f66; BYTE $0x10 // movdqa xmm0, oword 16[rbp] /* [rip + .LCPI0_1] */ 116 WORD $0xc031 // xor eax, eax 117 LONG $0xd06f0f66 // movdqa xmm2, xmm0 118 LONG $0xd96f0f66 // movdqa xmm3, xmm1 119 LONG $0x01c0f641 // test r8b, 1 120 JNE LBB0_11 121 JMP LBB0_12 122 123 TEXT ·_uint32_max_min_sse4(SB), $0-32 124 125 MOVQ values+0(FP), DI 126 MOVQ length+8(FP), SI 127 MOVQ minout+16(FP), DX 128 MOVQ maxout+24(FP), CX 129 130 WORD $0xf685 // test esi, esi 131 JLE LBB1_1 132 WORD $0x8941; BYTE $0xf1 // mov r9d, esi 133 WORD $0xfe83; BYTE $0x07 // cmp esi, 7 134 JA LBB1_6 135 WORD $0x3145; BYTE $0xdb // xor r11d, r11d 136 LONG $0xffffb841; WORD $0xffff // mov r8d, -1 137 WORD $0xf631 // xor esi, esi 138 JMP LBB1_4 139 140 LBB1_1: 141 LONG $0xffffb841; WORD $0xffff // mov r8d, -1 142 WORD $0xf631 // xor esi, esi 143 JMP LBB1_13 144 145 LBB1_6: 146 WORD $0x8945; BYTE $0xcb // mov r11d, r9d 147 LONG $0xf8e38341 // and r11d, -8 148 LONG $0xf8438d49 // lea rax, [r11 - 8] 149 WORD $0x8949; BYTE $0xc0 // mov r8, rax 150 LONG $0x03e8c149 // shr r8, 3 151 LONG $0x01c08349 // add r8, 1 152 WORD $0x8548; BYTE $0xc0 // test rax, rax 153 JE LBB1_7 154 WORD $0x894d; BYTE $0xc2 // mov r10, r8 155 LONG $0xfee28349 // and r10, -2 156 WORD $0xf749; BYTE $0xda // neg r10 157 LONG $0xc9ef0f66 // pxor xmm1, xmm1 158 LONG $0xc0760f66 // pcmpeqd xmm0, xmm0 159 WORD $0xc031 // xor eax, eax 160 LONG $0xd2760f66 // pcmpeqd xmm2, xmm2 161 LONG $0xdbef0f66 // pxor xmm3, xmm3 162 163 LBB1_9: 164 LONG $0x246f0ff3; BYTE $0x87 // movdqu xmm4, oword [rdi + 4*rax] 165 LONG $0x6c6f0ff3; WORD $0x1087 // movdqu xmm5, oword [rdi + 4*rax + 16] 166 LONG $0x746f0ff3; WORD $0x2087 // movdqu xmm6, oword [rdi + 4*rax + 32] 167 LONG $0x7c6f0ff3; WORD $0x3087 // movdqu xmm7, oword [rdi + 4*rax + 48] 168 LONG $0x3b380f66; BYTE $0xc4 // pminud xmm0, xmm4 169 LONG $0x3b380f66; BYTE $0xd5 // pminud xmm2, xmm5 170 LONG $0x3f380f66; BYTE $0xcc // pmaxud xmm1, xmm4 171 LONG $0x3f380f66; BYTE $0xdd // pmaxud xmm3, xmm5 172 LONG $0x3b380f66; BYTE $0xc6 // pminud xmm0, xmm6 173 LONG $0x3b380f66; BYTE $0xd7 // pminud xmm2, xmm7 174 LONG $0x3f380f66; BYTE $0xce // pmaxud xmm1, xmm6 175 LONG $0x3f380f66; BYTE $0xdf // pmaxud xmm3, xmm7 176 LONG $0x10c08348 // add rax, 16 177 LONG $0x02c28349 // add r10, 2 178 JNE LBB1_9 179 LONG $0x01c0f641 // test r8b, 1 180 JE LBB1_12 181 182 LBB1_11: 183 LONG $0x246f0ff3; BYTE $0x87 // movdqu xmm4, oword [rdi + 4*rax] 184 LONG $0x6c6f0ff3; WORD $0x1087 // movdqu xmm5, oword [rdi + 4*rax + 16] 185 LONG $0x3f380f66; BYTE $0xdd // pmaxud xmm3, xmm5 186 LONG $0x3f380f66; BYTE $0xcc // pmaxud xmm1, xmm4 187 LONG $0x3b380f66; BYTE $0xd5 // pminud xmm2, xmm5 188 LONG $0x3b380f66; BYTE $0xc4 // pminud xmm0, xmm4 189 190 LBB1_12: 191 LONG $0x3b380f66; BYTE $0xc2 // pminud xmm0, xmm2 192 LONG $0x3f380f66; BYTE $0xcb // pmaxud xmm1, xmm3 193 LONG $0xd1700f66; BYTE $0x4e // pshufd xmm2, xmm1, 78 194 LONG $0x3f380f66; BYTE $0xd1 // pmaxud xmm2, xmm1 195 LONG $0xca700f66; BYTE $0xe5 // pshufd xmm1, xmm2, 229 196 LONG $0x3f380f66; BYTE $0xca // pmaxud xmm1, xmm2 197 LONG $0xce7e0f66 // movd esi, xmm1 198 LONG $0xc8700f66; BYTE $0x4e // pshufd xmm1, xmm0, 78 199 LONG $0x3b380f66; BYTE $0xc8 // pminud xmm1, xmm0 200 LONG $0xc1700f66; BYTE $0xe5 // pshufd xmm0, xmm1, 229 201 LONG $0x3b380f66; BYTE $0xc1 // pminud xmm0, xmm1 202 LONG $0x7e0f4166; BYTE $0xc0 // movd r8d, xmm0 203 WORD $0x394d; BYTE $0xcb // cmp r11, r9 204 JE LBB1_13 205 206 LBB1_4: 207 WORD $0xf089 // mov eax, esi 208 209 LBB1_5: 210 LONG $0x9f348b42 // mov esi, dword [rdi + 4*r11] 211 WORD $0x3941; BYTE $0xf0 // cmp r8d, esi 212 LONG $0xc6430f44 // cmovae r8d, esi 213 WORD $0xf039 // cmp eax, esi 214 WORD $0x470f; BYTE $0xf0 // cmova esi, eax 215 LONG $0x01c38349 // add r11, 1 216 WORD $0xf089 // mov eax, esi 217 WORD $0x394d; BYTE $0xd9 // cmp r9, r11 218 JNE LBB1_5 219 220 LBB1_13: 221 WORD $0x3189 // mov dword [rcx], esi 222 WORD $0x8944; BYTE $0x02 // mov dword [rdx], r8d 223 RET 224 225 LBB1_7: 226 LONG $0xc9ef0f66 // pxor xmm1, xmm1 227 LONG $0xc0760f66 // pcmpeqd xmm0, xmm0 228 WORD $0xc031 // xor eax, eax 229 LONG $0xd2760f66 // pcmpeqd xmm2, xmm2 230 LONG $0xdbef0f66 // pxor xmm3, xmm3 231 LONG $0x01c0f641 // test r8b, 1 232 JNE LBB1_11 233 JMP LBB1_12 234 235 DATA LCDATA2<>+0x000(SB)/8, $0x8000000000000000 236 DATA LCDATA2<>+0x008(SB)/8, $0x8000000000000000 237 DATA LCDATA2<>+0x010(SB)/8, $0x7fffffffffffffff 238 DATA LCDATA2<>+0x018(SB)/8, $0x7fffffffffffffff 239 GLOBL LCDATA2<>(SB), 8, $32 240 241 TEXT ·_int64_max_min_sse4(SB), $0-32 242 243 MOVQ values+0(FP), DI 244 MOVQ length+8(FP), SI 245 MOVQ minout+16(FP), DX 246 MOVQ maxout+24(FP), CX 247 LEAQ LCDATA2<>(SB), BP 248 249 QUAD $0xffffffffffffb849; WORD $0x7fff // mov r8, 9223372036854775807 250 WORD $0xf685 // test esi, esi 251 JLE LBB2_1 252 WORD $0x8941; BYTE $0xf1 // mov r9d, esi 253 WORD $0xfe83; BYTE $0x03 // cmp esi, 3 254 JA LBB2_6 255 LONG $0x01708d49 // lea rsi, [r8 + 1] 256 WORD $0x3145; BYTE $0xdb // xor r11d, r11d 257 JMP LBB2_4 258 259 LBB2_1: 260 LONG $0x01708d49 // lea rsi, [r8 + 1] 261 JMP LBB2_13 262 263 LBB2_6: 264 WORD $0x8945; BYTE $0xcb // mov r11d, r9d 265 LONG $0xfce38341 // and r11d, -4 266 LONG $0xfc438d49 // lea rax, [r11 - 4] 267 WORD $0x8949; BYTE $0xc0 // mov r8, rax 268 LONG $0x02e8c149 // shr r8, 2 269 LONG $0x01c08349 // add r8, 1 270 WORD $0x8548; BYTE $0xc0 // test rax, rax 271 JE LBB2_7 272 WORD $0x894d; BYTE $0xc2 // mov r10, r8 273 LONG $0xfee28349 // and r10, -2 274 WORD $0xf749; BYTE $0xda // neg r10 275 LONG $0x6f0f4466; WORD $0x004d // movdqa xmm9, oword 0[rbp] /* [rip + .LCPI2_0] */ 276 LONG $0x6f0f4466; WORD $0x1045 // movdqa xmm8, oword 16[rbp] /* [rip + .LCPI2_1] */ 277 WORD $0xc031 // xor eax, eax 278 LONG $0x6f0f4166; BYTE $0xd0 // movdqa xmm2, xmm8 279 LONG $0x6f0f4166; BYTE $0xf1 // movdqa xmm6, xmm9 280 281 LBB2_9: 282 LONG $0x3c6f0ff3; BYTE $0xc7 // movdqu xmm7, oword [rdi + 8*rax] 283 LONG $0xc76f0f66 // movdqa xmm0, xmm7 284 LONG $0x380f4166; WORD $0xc037 // pcmpgtq xmm0, xmm8 285 LONG $0xe76f0f66 // movdqa xmm4, xmm7 286 LONG $0x380f4166; WORD $0xe015 // blendvpd xmm4, xmm8, xmm0 287 LONG $0x4c6f0ff3; WORD $0x10c7 // movdqu xmm1, oword [rdi + 8*rax + 16] 288 LONG $0xc16f0f66 // movdqa xmm0, xmm1 289 LONG $0x37380f66; BYTE $0xc2 // pcmpgtq xmm0, xmm2 290 LONG $0xe96f0f66 // movdqa xmm5, xmm1 291 LONG $0x15380f66; BYTE $0xea // blendvpd xmm5, xmm2, xmm0 292 LONG $0x6f0f4166; BYTE $0xc1 // movdqa xmm0, xmm9 293 LONG $0x37380f66; BYTE $0xc7 // pcmpgtq xmm0, xmm7 294 LONG $0x380f4166; WORD $0xf915 // blendvpd xmm7, xmm9, xmm0 295 LONG $0xc66f0f66 // movdqa xmm0, xmm6 296 LONG $0x37380f66; BYTE $0xc1 // pcmpgtq xmm0, xmm1 297 LONG $0x15380f66; BYTE $0xce // blendvpd xmm1, xmm6, xmm0 298 LONG $0x5c6f0ff3; WORD $0x20c7 // movdqu xmm3, oword [rdi + 8*rax + 32] 299 LONG $0xc36f0f66 // movdqa xmm0, xmm3 300 LONG $0x37380f66; BYTE $0xc4 // pcmpgtq xmm0, xmm4 301 LONG $0x6f0f4466; BYTE $0xc3 // movdqa xmm8, xmm3 302 LONG $0x380f4466; WORD $0xc415 // blendvpd xmm8, xmm4, xmm0 303 LONG $0x646f0ff3; WORD $0x30c7 // movdqu xmm4, oword [rdi + 8*rax + 48] 304 LONG $0xc46f0f66 // movdqa xmm0, xmm4 305 LONG $0x37380f66; BYTE $0xc5 // pcmpgtq xmm0, xmm5 306 LONG $0xd46f0f66 // movdqa xmm2, xmm4 307 LONG $0x15380f66; BYTE $0xd5 // blendvpd xmm2, xmm5, xmm0 308 LONG $0xc7280f66 // movapd xmm0, xmm7 309 LONG $0x37380f66; BYTE $0xc3 // pcmpgtq xmm0, xmm3 310 LONG $0x15380f66; BYTE $0xdf // blendvpd xmm3, xmm7, xmm0 311 LONG $0xc1280f66 // movapd xmm0, xmm1 312 LONG $0x37380f66; BYTE $0xc4 // pcmpgtq xmm0, xmm4 313 LONG $0x15380f66; BYTE $0xe1 // blendvpd xmm4, xmm1, xmm0 314 LONG $0x08c08348 // add rax, 8 315 LONG $0x280f4466; BYTE $0xcb // movapd xmm9, xmm3 316 LONG $0xf4280f66 // movapd xmm6, xmm4 317 LONG $0x02c28349 // add r10, 2 318 JNE LBB2_9 319 LONG $0x01c0f641 // test r8b, 1 320 JE LBB2_12 321 322 LBB2_11: 323 LONG $0x4c6f0ff3; WORD $0x10c7 // movdqu xmm1, oword [rdi + 8*rax + 16] 324 LONG $0xc4280f66 // movapd xmm0, xmm4 325 LONG $0x37380f66; BYTE $0xc1 // pcmpgtq xmm0, xmm1 326 LONG $0xe96f0f66 // movdqa xmm5, xmm1 327 LONG $0x15380f66; BYTE $0xec // blendvpd xmm5, xmm4, xmm0 328 LONG $0x246f0ff3; BYTE $0xc7 // movdqu xmm4, oword [rdi + 8*rax] 329 LONG $0xc3280f66 // movapd xmm0, xmm3 330 LONG $0x37380f66; BYTE $0xc4 // pcmpgtq xmm0, xmm4 331 LONG $0xf46f0f66 // movdqa xmm6, xmm4 332 LONG $0x15380f66; BYTE $0xf3 // blendvpd xmm6, xmm3, xmm0 333 LONG $0xc16f0f66 // movdqa xmm0, xmm1 334 LONG $0x37380f66; BYTE $0xc2 // pcmpgtq xmm0, xmm2 335 LONG $0x15380f66; BYTE $0xca // blendvpd xmm1, xmm2, xmm0 336 LONG $0xc46f0f66 // movdqa xmm0, xmm4 337 LONG $0x380f4166; WORD $0xc037 // pcmpgtq xmm0, xmm8 338 LONG $0x380f4166; WORD $0xe015 // blendvpd xmm4, xmm8, xmm0 339 LONG $0x280f4466; BYTE $0xc4 // movapd xmm8, xmm4 340 LONG $0xd1280f66 // movapd xmm2, xmm1 341 LONG $0xde280f66 // movapd xmm3, xmm6 342 LONG $0xe5280f66 // movapd xmm4, xmm5 343 344 LBB2_12: 345 LONG $0xc3280f66 // movapd xmm0, xmm3 346 LONG $0x37380f66; BYTE $0xc4 // pcmpgtq xmm0, xmm4 347 LONG $0x15380f66; BYTE $0xe3 // blendvpd xmm4, xmm3, xmm0 348 LONG $0xcc700f66; BYTE $0x4e // pshufd xmm1, xmm4, 78 349 LONG $0xc46f0f66 // movdqa xmm0, xmm4 350 LONG $0x37380f66; BYTE $0xc1 // pcmpgtq xmm0, xmm1 351 LONG $0x15380f66; BYTE $0xcc // blendvpd xmm1, xmm4, xmm0 352 LONG $0x7e0f4866; BYTE $0xce // movq rsi, xmm1 353 LONG $0xc26f0f66 // movdqa xmm0, xmm2 354 LONG $0x380f4166; WORD $0xc037 // pcmpgtq xmm0, xmm8 355 LONG $0x380f4166; WORD $0xd015 // blendvpd xmm2, xmm8, xmm0 356 LONG $0xca700f66; BYTE $0x4e // pshufd xmm1, xmm2, 78 357 LONG $0xc16f0f66 // movdqa xmm0, xmm1 358 LONG $0x37380f66; BYTE $0xc2 // pcmpgtq xmm0, xmm2 359 LONG $0x15380f66; BYTE $0xca // blendvpd xmm1, xmm2, xmm0 360 LONG $0x7e0f4966; BYTE $0xc8 // movq r8, xmm1 361 WORD $0x394d; BYTE $0xcb // cmp r11, r9 362 JE LBB2_13 363 364 LBB2_4: 365 WORD $0x8948; BYTE $0xf0 // mov rax, rsi 366 367 LBB2_5: 368 LONG $0xdf348b4a // mov rsi, qword [rdi + 8*r11] 369 WORD $0x3949; BYTE $0xf0 // cmp r8, rsi 370 LONG $0xc64f0f4c // cmovg r8, rsi 371 WORD $0x3948; BYTE $0xf0 // cmp rax, rsi 372 LONG $0xf04d0f48 // cmovge rsi, rax 373 LONG $0x01c38349 // add r11, 1 374 WORD $0x8948; BYTE $0xf0 // mov rax, rsi 375 WORD $0x394d; BYTE $0xd9 // cmp r9, r11 376 JNE LBB2_5 377 378 LBB2_13: 379 WORD $0x8948; BYTE $0x31 // mov qword [rcx], rsi 380 WORD $0x894c; BYTE $0x02 // mov qword [rdx], r8 381 RET 382 383 LBB2_7: 384 LONG $0x5d280f66; BYTE $0x00 // movapd xmm3, oword 0[rbp] /* [rip + .LCPI2_0] */ 385 LONG $0x6f0f4466; WORD $0x1045 // movdqa xmm8, oword 16[rbp] /* [rip + .LCPI2_1] */ 386 WORD $0xc031 // xor eax, eax 387 LONG $0x6f0f4166; BYTE $0xd0 // movdqa xmm2, xmm8 388 LONG $0xe3280f66 // movapd xmm4, xmm3 389 LONG $0x01c0f641 // test r8b, 1 390 JNE LBB2_11 391 JMP LBB2_12 392 393 DATA LCDATA3<>+0x000(SB)/8, $0x8000000000000000 394 DATA LCDATA3<>+0x008(SB)/8, $0x8000000000000000 395 GLOBL LCDATA3<>(SB), 8, $16 396 397 TEXT ·_uint64_max_min_sse4(SB), $0-32 398 399 MOVQ values+0(FP), DI 400 MOVQ length+8(FP), SI 401 MOVQ minout+16(FP), DX 402 MOVQ maxout+24(FP), CX 403 LEAQ LCDATA3<>(SB), BP 404 405 WORD $0xf685 // test esi, esi 406 JLE LBB3_1 407 WORD $0x8941; BYTE $0xf1 // mov r9d, esi 408 WORD $0xfe83; BYTE $0x03 // cmp esi, 3 409 JA LBB3_6 410 LONG $0xffc0c749; WORD $0xffff; BYTE $0xff // mov r8, -1 411 WORD $0x3145; BYTE $0xdb // xor r11d, r11d 412 WORD $0xc031 // xor eax, eax 413 JMP LBB3_4 414 415 LBB3_1: 416 LONG $0xffc0c749; WORD $0xffff; BYTE $0xff // mov r8, -1 417 WORD $0xc031 // xor eax, eax 418 JMP LBB3_13 419 420 LBB3_6: 421 WORD $0x8945; BYTE $0xcb // mov r11d, r9d 422 LONG $0xfce38341 // and r11d, -4 423 LONG $0xfc438d49 // lea rax, [r11 - 4] 424 WORD $0x8949; BYTE $0xc0 // mov r8, rax 425 LONG $0x02e8c149 // shr r8, 2 426 LONG $0x01c08349 // add r8, 1 427 WORD $0x8548; BYTE $0xc0 // test rax, rax 428 JE LBB3_7 429 WORD $0x894d; BYTE $0xc2 // mov r10, r8 430 LONG $0xfee28349 // and r10, -2 431 WORD $0xf749; BYTE $0xda // neg r10 432 LONG $0xef0f4566; BYTE $0xc9 // pxor xmm9, xmm9 433 LONG $0x760f4566; BYTE $0xd2 // pcmpeqd xmm10, xmm10 434 WORD $0xc031 // xor eax, eax 435 LONG $0x6f0f4466; WORD $0x0045 // movdqa xmm8, oword 0[rbp] /* [rip + .LCPI3_0] */ 436 LONG $0x760f4566; BYTE $0xdb // pcmpeqd xmm11, xmm11 437 LONG $0xef0f4566; BYTE $0xe4 // pxor xmm12, xmm12 438 439 LBB3_9: 440 LONG $0x6f0f4166; BYTE $0xd2 // movdqa xmm2, xmm10 441 LONG $0xef0f4166; BYTE $0xd0 // pxor xmm2, xmm8 442 LONG $0x246f0ff3; BYTE $0xc7 // movdqu xmm4, oword [rdi + 8*rax] 443 LONG $0x6c6f0ff3; WORD $0x10c7 // movdqu xmm5, oword [rdi + 8*rax + 16] 444 LONG $0x6f0f44f3; WORD $0xc76c; BYTE $0x20 // movdqu xmm13, oword [rdi + 8*rax + 32] 445 LONG $0xc46f0f66 // movdqa xmm0, xmm4 446 LONG $0xef0f4166; BYTE $0xc0 // pxor xmm0, xmm8 447 LONG $0x6f0f4166; BYTE $0xc9 // movdqa xmm1, xmm9 448 LONG $0xef0f4166; BYTE $0xc8 // pxor xmm1, xmm8 449 LONG $0x37380f66; BYTE $0xc8 // pcmpgtq xmm1, xmm0 450 LONG $0x37380f66; BYTE $0xc2 // pcmpgtq xmm0, xmm2 451 LONG $0xdc6f0f66 // movdqa xmm3, xmm4 452 LONG $0x380f4166; WORD $0xda15 // blendvpd xmm3, xmm10, xmm0 453 LONG $0x746f0ff3; WORD $0x30c7 // movdqu xmm6, oword [rdi + 8*rax + 48] 454 LONG $0x6f0f4166; BYTE $0xfb // movdqa xmm7, xmm11 455 LONG $0xef0f4166; BYTE $0xf8 // pxor xmm7, xmm8 456 LONG $0xc56f0f66 // movdqa xmm0, xmm5 457 LONG $0xef0f4166; BYTE $0xc0 // pxor xmm0, xmm8 458 LONG $0x6f0f4166; BYTE $0xd4 // movdqa xmm2, xmm12 459 LONG $0xef0f4166; BYTE $0xd0 // pxor xmm2, xmm8 460 LONG $0x37380f66; BYTE $0xd0 // pcmpgtq xmm2, xmm0 461 LONG $0x37380f66; BYTE $0xc7 // pcmpgtq xmm0, xmm7 462 LONG $0xfd6f0f66 // movdqa xmm7, xmm5 463 LONG $0x380f4166; WORD $0xfb15 // blendvpd xmm7, xmm11, xmm0 464 LONG $0xc16f0f66 // movdqa xmm0, xmm1 465 LONG $0x380f4166; WORD $0xe115 // blendvpd xmm4, xmm9, xmm0 466 LONG $0xc26f0f66 // movdqa xmm0, xmm2 467 LONG $0x380f4166; WORD $0xec15 // blendvpd xmm5, xmm12, xmm0 468 LONG $0xd3280f66 // movapd xmm2, xmm3 469 LONG $0x570f4166; BYTE $0xd0 // xorpd xmm2, xmm8 470 LONG $0x6f0f4166; BYTE $0xc5 // movdqa xmm0, xmm13 471 LONG $0xef0f4166; BYTE $0xc0 // pxor xmm0, xmm8 472 LONG $0xcc280f66 // movapd xmm1, xmm4 473 LONG $0x570f4166; BYTE $0xc8 // xorpd xmm1, xmm8 474 LONG $0x37380f66; BYTE $0xc8 // pcmpgtq xmm1, xmm0 475 LONG $0x37380f66; BYTE $0xc2 // pcmpgtq xmm0, xmm2 476 LONG $0x6f0f4566; BYTE $0xd5 // movdqa xmm10, xmm13 477 LONG $0x380f4466; WORD $0xd315 // blendvpd xmm10, xmm3, xmm0 478 LONG $0xdf280f66 // movapd xmm3, xmm7 479 LONG $0x570f4166; BYTE $0xd8 // xorpd xmm3, xmm8 480 LONG $0xc66f0f66 // movdqa xmm0, xmm6 481 LONG $0xef0f4166; BYTE $0xc0 // pxor xmm0, xmm8 482 LONG $0xd5280f66 // movapd xmm2, xmm5 483 LONG $0x570f4166; BYTE $0xd0 // xorpd xmm2, xmm8 484 LONG $0x37380f66; BYTE $0xd0 // pcmpgtq xmm2, xmm0 485 LONG $0x37380f66; BYTE $0xc3 // pcmpgtq xmm0, xmm3 486 LONG $0x6f0f4466; BYTE $0xde // movdqa xmm11, xmm6 487 LONG $0x380f4466; WORD $0xdf15 // blendvpd xmm11, xmm7, xmm0 488 LONG $0xc16f0f66 // movdqa xmm0, xmm1 489 LONG $0x380f4466; WORD $0xec15 // blendvpd xmm13, xmm4, xmm0 490 LONG $0xc26f0f66 // movdqa xmm0, xmm2 491 LONG $0x15380f66; BYTE $0xf5 // blendvpd xmm6, xmm5, xmm0 492 LONG $0x08c08348 // add rax, 8 493 LONG $0x280f4566; BYTE $0xcd // movapd xmm9, xmm13 494 LONG $0x280f4466; BYTE $0xe6 // movapd xmm12, xmm6 495 LONG $0x02c28349 // add r10, 2 496 JNE LBB3_9 497 LONG $0x01c0f641 // test r8b, 1 498 JE LBB3_12 499 500 LBB3_11: 501 LONG $0x24100f66; BYTE $0xc7 // movupd xmm4, oword [rdi + 8*rax] 502 LONG $0x5c100f66; WORD $0x10c7 // movupd xmm3, oword [rdi + 8*rax + 16] 503 LONG $0x6d280f66; BYTE $0x00 // movapd xmm5, oword 0[rbp] /* [rip + .LCPI3_0] */ 504 LONG $0xc6280f66 // movapd xmm0, xmm6 505 LONG $0xc5570f66 // xorpd xmm0, xmm5 506 LONG $0xcb280f66 // movapd xmm1, xmm3 507 LONG $0xcd570f66 // xorpd xmm1, xmm5 508 LONG $0x37380f66; BYTE $0xc1 // pcmpgtq xmm0, xmm1 509 LONG $0xfb280f66 // movapd xmm7, xmm3 510 LONG $0x15380f66; BYTE $0xfe // blendvpd xmm7, xmm6, xmm0 511 LONG $0x280f4166; BYTE $0xc5 // movapd xmm0, xmm13 512 LONG $0xc5570f66 // xorpd xmm0, xmm5 513 LONG $0xd4280f66 // movapd xmm2, xmm4 514 LONG $0xd5570f66 // xorpd xmm2, xmm5 515 LONG $0x37380f66; BYTE $0xc2 // pcmpgtq xmm0, xmm2 516 LONG $0xf4280f66 // movapd xmm6, xmm4 517 LONG $0x380f4166; WORD $0xf515 // blendvpd xmm6, xmm13, xmm0 518 LONG $0x280f4166; BYTE $0xc3 // movapd xmm0, xmm11 519 LONG $0xc5570f66 // xorpd xmm0, xmm5 520 LONG $0x37380f66; BYTE $0xc8 // pcmpgtq xmm1, xmm0 521 LONG $0xc16f0f66 // movdqa xmm0, xmm1 522 LONG $0x380f4166; WORD $0xdb15 // blendvpd xmm3, xmm11, xmm0 523 LONG $0x570f4166; BYTE $0xea // xorpd xmm5, xmm10 524 LONG $0x37380f66; BYTE $0xd5 // pcmpgtq xmm2, xmm5 525 LONG $0xc26f0f66 // movdqa xmm0, xmm2 526 LONG $0x380f4166; WORD $0xe215 // blendvpd xmm4, xmm10, xmm0 527 LONG $0x280f4466; BYTE $0xd4 // movapd xmm10, xmm4 528 LONG $0x280f4466; BYTE $0xdb // movapd xmm11, xmm3 529 LONG $0x280f4466; BYTE $0xee // movapd xmm13, xmm6 530 LONG $0xf7280f66 // movapd xmm6, xmm7 531 532 LBB3_12: 533 LONG $0x4d280f66; BYTE $0x00 // movapd xmm1, oword 0[rbp] /* [rip + .LCPI3_0] */ 534 LONG $0xd6280f66 // movapd xmm2, xmm6 535 LONG $0xd1570f66 // xorpd xmm2, xmm1 536 LONG $0x280f4166; BYTE $0xc5 // movapd xmm0, xmm13 537 LONG $0xc1570f66 // xorpd xmm0, xmm1 538 LONG $0x37380f66; BYTE $0xc2 // pcmpgtq xmm0, xmm2 539 LONG $0x380f4166; WORD $0xf515 // blendvpd xmm6, xmm13, xmm0 540 LONG $0xd6700f66; BYTE $0x4e // pshufd xmm2, xmm6, 78 541 LONG $0xc6280f66 // movapd xmm0, xmm6 542 LONG $0xc1570f66 // xorpd xmm0, xmm1 543 LONG $0xda6f0f66 // movdqa xmm3, xmm2 544 LONG $0xd9ef0f66 // pxor xmm3, xmm1 545 LONG $0x37380f66; BYTE $0xc3 // pcmpgtq xmm0, xmm3 546 LONG $0x15380f66; BYTE $0xd6 // blendvpd xmm2, xmm6, xmm0 547 LONG $0x7e0f4866; BYTE $0xd0 // movq rax, xmm2 548 LONG $0x6f0f4166; BYTE $0xd2 // movdqa xmm2, xmm10 549 LONG $0xd1ef0f66 // pxor xmm2, xmm1 550 LONG $0x6f0f4166; BYTE $0xc3 // movdqa xmm0, xmm11 551 LONG $0xc1ef0f66 // pxor xmm0, xmm1 552 LONG $0x37380f66; BYTE $0xc2 // pcmpgtq xmm0, xmm2 553 LONG $0x380f4566; WORD $0xda15 // blendvpd xmm11, xmm10, xmm0 554 LONG $0x700f4166; WORD $0x4ed3 // pshufd xmm2, xmm11, 78 555 LONG $0x6f0f4166; BYTE $0xc3 // movdqa xmm0, xmm11 556 LONG $0xc1ef0f66 // pxor xmm0, xmm1 557 LONG $0xcaef0f66 // pxor xmm1, xmm2 558 LONG $0x37380f66; BYTE $0xc8 // pcmpgtq xmm1, xmm0 559 LONG $0xc16f0f66 // movdqa xmm0, xmm1 560 LONG $0x380f4166; WORD $0xd315 // blendvpd xmm2, xmm11, xmm0 561 LONG $0x7e0f4966; BYTE $0xd0 // movq r8, xmm2 562 WORD $0x394d; BYTE $0xcb // cmp r11, r9 563 JE LBB3_13 564 565 LBB3_4: 566 WORD $0x8948; BYTE $0xc6 // mov rsi, rax 567 568 LBB3_5: 569 LONG $0xdf048b4a // mov rax, qword [rdi + 8*r11] 570 WORD $0x3949; BYTE $0xc0 // cmp r8, rax 571 LONG $0xc0430f4c // cmovae r8, rax 572 WORD $0x3948; BYTE $0xc6 // cmp rsi, rax 573 LONG $0xc6470f48 // cmova rax, rsi 574 LONG $0x01c38349 // add r11, 1 575 WORD $0x8948; BYTE $0xc6 // mov rsi, rax 576 WORD $0x394d; BYTE $0xd9 // cmp r9, r11 577 JNE LBB3_5 578 579 LBB3_13: 580 WORD $0x8948; BYTE $0x01 // mov qword [rcx], rax 581 WORD $0x894c; BYTE $0x02 // mov qword [rdx], r8 582 RET 583 584 LBB3_7: 585 LONG $0x570f4566; BYTE $0xed // xorpd xmm13, xmm13 586 LONG $0x760f4566; BYTE $0xd2 // pcmpeqd xmm10, xmm10 587 WORD $0xc031 // xor eax, eax 588 LONG $0x760f4566; BYTE $0xdb // pcmpeqd xmm11, xmm11 589 LONG $0xf6570f66 // xorpd xmm6, xmm6 590 LONG $0x01c0f641 // test r8b, 1 591 JNE LBB3_11 592 JMP LBB3_12