github.com/parquet-go/parquet-go@v0.20.0/order_amd64.s (about) 1 //go:build !purego 2 3 #include "textflag.h" 4 5 #define UNDEFINED 0 6 #define ASCENDING 1 7 #define DESCENDING -1 8 9 DATA shift1x32<>+0(SB)/4, $1 10 DATA shift1x32<>+4(SB)/4, $2 11 DATA shift1x32<>+8(SB)/4, $3 12 DATA shift1x32<>+12(SB)/4, $4 13 DATA shift1x32<>+16(SB)/4, $5 14 DATA shift1x32<>+20(SB)/4, $6 15 DATA shift1x32<>+24(SB)/4, $7 16 DATA shift1x32<>+28(SB)/4, $8 17 DATA shift1x32<>+32(SB)/4, $9 18 DATA shift1x32<>+36(SB)/4, $10 19 DATA shift1x32<>+40(SB)/4, $11 20 DATA shift1x32<>+44(SB)/4, $12 21 DATA shift1x32<>+48(SB)/4, $13 22 DATA shift1x32<>+52(SB)/4, $14 23 DATA shift1x32<>+56(SB)/4, $15 24 DATA shift1x32<>+60(SB)/4, $15 25 GLOBL shift1x32<>(SB), RODATA|NOPTR, $64 26 27 DATA shift1x64<>+0(SB)/4, $1 28 DATA shift1x64<>+8(SB)/4, $2 29 DATA shift1x64<>+16(SB)/4, $3 30 DATA shift1x64<>+24(SB)/4, $4 31 DATA shift1x64<>+32(SB)/4, $5 32 DATA shift1x64<>+40(SB)/4, $6 33 DATA shift1x64<>+48(SB)/4, $7 34 DATA shift1x64<>+56(SB)/4, $7 35 GLOBL shift1x64<>(SB), RODATA|NOPTR, $64 36 37 // func orderOfInt32(data []int32) int 38 TEXT ·orderOfInt32(SB), NOSPLIT, $-32 39 MOVQ data_base+0(FP), R8 40 MOVQ data_len+8(FP), R9 41 XORQ SI, SI 42 XORQ DI, DI 43 44 CMPQ R9, $2 45 JB undefined 46 47 CMPB ·hasAVX512VL(SB), $0 48 JE test 49 50 CMPQ R9, $16 51 JB test 52 53 XORQ DX, DX 54 MOVQ R9, AX 55 SHRQ $4, AX 56 SHLQ $4, AX 57 MOVQ $15, CX 58 IDIVQ CX 59 IMULQ $15, AX 60 DECQ R9 61 62 VMOVDQU32 shift1x32<>(SB), Z2 63 KXORW K2, K2, K2 64 testAscending15: 65 VMOVDQU32 (R8)(SI*4), Z0 66 VMOVDQU32 Z2, Z1 67 VPERMI2D Z0, Z0, Z1 68 VPCMPD $2, Z1, Z0, K1 69 KORTESTW K2, K1 70 JNC testDescending15 71 ADDQ $15, SI 72 CMPQ SI, AX 73 JNE testAscending15 74 VZEROUPPER 75 JMP testAscending 76 testDescending15: 77 VMOVDQU32 (R8)(DI*4), Z0 78 VMOVDQU32 Z2, Z1 79 VPERMI2D Z0, Z0, Z1 80 VPCMPD $5, Z1, Z0, K1 81 KORTESTW K2, K1 82 JNC undefined15 83 ADDQ $15, DI 84 CMPQ DI, AX 85 JNE testDescending15 86 VZEROUPPER 87 JMP testDescending 88 89 test: 90 DECQ R9 91 testAscending: 92 CMPQ SI, R9 93 JAE ascending 94 MOVL (R8)(SI*4), BX 95 MOVL 4(R8)(SI*4), DX 96 INCQ SI 97 CMPL BX, DX 98 JLE testAscending 99 JMP testDescending 100 ascending: 101 MOVQ $ASCENDING, ret+24(FP) 102 RET 103 testDescending: 104 CMPQ DI, R9 105 JAE descending 106 MOVL (R8)(DI*4), BX 107 MOVL 4(R8)(DI*4), DX 108 INCQ DI 109 CMPL BX, DX 110 JGE testDescending 111 JMP undefined 112 descending: 113 MOVQ $DESCENDING, ret+24(FP) 114 RET 115 undefined15: 116 VZEROUPPER 117 undefined: 118 MOVQ $UNDEFINED, ret+24(FP) 119 RET 120 121 // func orderOfInt64(data []int64) int 122 TEXT ·orderOfInt64(SB), NOSPLIT, $-32 123 MOVQ data_base+0(FP), R8 124 MOVQ data_len+8(FP), R9 125 XORQ SI, SI 126 XORQ DI, DI 127 128 CMPQ R9, $2 129 JB undefined 130 131 CMPB ·hasAVX512VL(SB), $0 132 JE test 133 134 CMPQ R9, $8 135 JB test 136 137 XORQ DX, DX 138 MOVQ R9, AX 139 SHRQ $3, AX 140 SHLQ $3, AX 141 MOVQ $7, CX 142 IDIVQ CX 143 IMULQ $7, AX 144 DECQ R9 145 146 VMOVDQU64 shift1x64<>(SB), Z2 147 KXORB K2, K2, K2 148 testAscending7: 149 VMOVDQU64 (R8)(SI*8), Z0 150 VMOVDQU64 Z2, Z1 151 VPERMI2Q Z0, Z0, Z1 152 VPCMPQ $2, Z1, Z0, K1 153 KORTESTB K2, K1 154 JNC testDescending7 155 ADDQ $7, SI 156 CMPQ SI, AX 157 JNE testAscending7 158 VZEROUPPER 159 JMP testAscending 160 testDescending7: 161 VMOVDQU64 (R8)(DI*8), Z0 162 VMOVDQU64 Z2, Z1 163 VPERMI2Q Z0, Z0, Z1 164 VPCMPQ $5, Z1, Z0, K1 165 KORTESTB K2, K1 166 JNC undefined7 167 ADDQ $7, DI 168 CMPQ DI, AX 169 JNE testDescending7 170 VZEROUPPER 171 JMP testDescending 172 173 test: 174 DECQ R9 175 testAscending: 176 CMPQ SI, R9 177 JAE ascending 178 MOVQ (R8)(SI*8), BX 179 MOVQ 8(R8)(SI*8), DX 180 INCQ SI 181 CMPQ BX, DX 182 JLE testAscending 183 JMP testDescending 184 ascending: 185 MOVQ $ASCENDING, ret+24(FP) 186 RET 187 testDescending: 188 CMPQ DI, R9 189 JAE descending 190 MOVQ (R8)(DI*8), BX 191 MOVQ 8(R8)(DI*8), DX 192 INCQ DI 193 CMPQ BX, DX 194 JGE testDescending 195 JMP undefined 196 descending: 197 MOVQ $DESCENDING, ret+24(FP) 198 RET 199 undefined7: 200 VZEROUPPER 201 undefined: 202 MOVQ $UNDEFINED, ret+24(FP) 203 RET 204 205 // func orderOfUint32(data []uint32) int 206 TEXT ·orderOfUint32(SB), NOSPLIT, $-32 207 MOVQ data_base+0(FP), R8 208 MOVQ data_len+8(FP), R9 209 XORQ SI, SI 210 XORQ DI, DI 211 212 CMPQ R9, $2 213 JB undefined 214 215 CMPB ·hasAVX512VL(SB), $0 216 JE test 217 218 CMPQ R9, $16 219 JB test 220 221 XORQ DX, DX 222 MOVQ R9, AX 223 SHRQ $4, AX 224 SHLQ $4, AX 225 MOVQ $15, CX 226 IDIVQ CX 227 IMULQ $15, AX 228 DECQ R9 229 230 VMOVDQU32 shift1x32<>(SB), Z2 231 KXORW K2, K2, K2 232 testAscending15: 233 VMOVDQU32 (R8)(SI*4), Z0 234 VMOVDQU32 Z2, Z1 235 VPERMI2D Z0, Z0, Z1 236 VPCMPUD $2, Z1, Z0, K1 237 KORTESTW K2, K1 238 JNC testDescending15 239 ADDQ $15, SI 240 CMPQ SI, AX 241 JNE testAscending15 242 VZEROUPPER 243 JMP testAscending 244 testDescending15: 245 VMOVDQU32 (R8)(DI*4), Z0 246 VMOVDQU32 Z2, Z1 247 VPERMI2D Z0, Z0, Z1 248 VPCMPUD $5, Z1, Z0, K1 249 KORTESTW K2, K1 250 JNC undefined15 251 ADDQ $15, DI 252 CMPQ DI, AX 253 JNE testDescending15 254 VZEROUPPER 255 JMP testDescending 256 257 test: 258 DECQ R9 259 testAscending: 260 CMPQ SI, R9 261 JAE ascending 262 MOVL (R8)(SI*4), BX 263 MOVL 4(R8)(SI*4), DX 264 INCQ SI 265 CMPL BX, DX 266 JBE testAscending 267 JMP testDescending 268 ascending: 269 MOVQ $ASCENDING, ret+24(FP) 270 RET 271 testDescending: 272 CMPQ DI, R9 273 JAE descending 274 MOVL (R8)(DI*4), BX 275 MOVL 4(R8)(DI*4), DX 276 INCQ DI 277 CMPL BX, DX 278 JAE testDescending 279 JMP undefined 280 descending: 281 MOVQ $DESCENDING, ret+24(FP) 282 RET 283 undefined15: 284 VZEROUPPER 285 undefined: 286 MOVQ $UNDEFINED, ret+24(FP) 287 RET 288 289 // func orderOfUint64(data []uint64) int 290 TEXT ·orderOfUint64(SB), NOSPLIT, $-32 291 MOVQ data_base+0(FP), R8 292 MOVQ data_len+8(FP), R9 293 XORQ SI, SI 294 XORQ DI, DI 295 296 CMPQ R9, $2 297 JB undefined 298 299 CMPB ·hasAVX512VL(SB), $0 300 JE test 301 302 CMPQ R9, $8 303 JB test 304 305 XORQ DX, DX 306 MOVQ R9, AX 307 SHRQ $3, AX 308 SHLQ $3, AX 309 MOVQ $7, CX 310 IDIVQ CX 311 IMULQ $7, AX 312 DECQ R9 313 314 VMOVDQU64 shift1x64<>(SB), Z2 315 KXORB K2, K2, K2 316 testAscending7: 317 VMOVDQU64 (R8)(SI*8), Z0 318 VMOVDQU64 Z2, Z1 319 VPERMI2Q Z0, Z0, Z1 320 VPCMPUQ $2, Z1, Z0, K1 321 KORTESTB K2, K1 322 JNC testDescending7 323 ADDQ $7, SI 324 CMPQ SI, AX 325 JNE testAscending7 326 VZEROUPPER 327 JMP testAscending 328 testDescending7: 329 VMOVDQU64 (R8)(DI*8), Z0 330 VMOVDQU64 Z2, Z1 331 VPERMI2Q Z0, Z0, Z1 332 VPCMPUQ $5, Z1, Z0, K1 333 KORTESTB K2, K1 334 JNC undefined7 335 ADDQ $7, DI 336 CMPQ DI, AX 337 JNE testDescending7 338 VZEROUPPER 339 JMP testDescending 340 341 test: 342 DECQ R9 343 testAscending: 344 CMPQ SI, R9 345 JAE ascending 346 MOVQ (R8)(SI*8), BX 347 MOVQ 8(R8)(SI*8), DX 348 INCQ SI 349 CMPQ BX, DX 350 JBE testAscending 351 JMP testDescending 352 ascending: 353 MOVQ $ASCENDING, ret+24(FP) 354 RET 355 testDescending: 356 CMPQ DI, R9 357 JAE descending 358 MOVQ (R8)(DI*8), BX 359 MOVQ 8(R8)(DI*8), DX 360 INCQ DI 361 CMPQ BX, DX 362 JAE testDescending 363 JMP undefined 364 descending: 365 MOVQ $DESCENDING, ret+24(FP) 366 RET 367 undefined7: 368 VZEROUPPER 369 undefined: 370 MOVQ $UNDEFINED, ret+24(FP) 371 RET 372 373 // func orderOfFloat32(data []float32) int 374 TEXT ·orderOfFloat32(SB), NOSPLIT, $-32 375 MOVQ data_base+0(FP), R8 376 MOVQ data_len+8(FP), R9 377 XORQ SI, SI 378 XORQ DI, DI 379 380 CMPQ R9, $2 381 JB undefined 382 383 CMPB ·hasAVX512VL(SB), $0 384 JE test 385 386 CMPQ R9, $16 387 JB test 388 389 XORQ DX, DX 390 MOVQ R9, AX 391 SHRQ $4, AX 392 SHLQ $4, AX 393 MOVQ $15, CX 394 IDIVQ CX 395 IMULQ $15, AX 396 DECQ R9 397 398 VMOVDQU32 shift1x32<>(SB), Z2 399 KXORW K2, K2, K2 400 testAscending15: 401 VMOVDQU32 (R8)(SI*4), Z0 402 VMOVDQU32 Z2, Z1 403 VPERMI2D Z0, Z0, Z1 404 VCMPPS $2, Z1, Z0, K1 405 KORTESTW K2, K1 406 JNC testDescending15 407 ADDQ $15, SI 408 CMPQ SI, AX 409 JNE testAscending15 410 VZEROUPPER 411 JMP testAscending 412 testDescending15: 413 VMOVDQU32 (R8)(DI*4), Z0 414 VMOVDQU32 Z2, Z1 415 VPERMI2D Z0, Z0, Z1 416 VCMPPS $5, Z1, Z0, K1 417 KORTESTW K2, K1 418 JNC undefined15 419 ADDQ $15, DI 420 CMPQ DI, AX 421 JNE testDescending15 422 VZEROUPPER 423 JMP testDescending 424 425 test: 426 DECQ R9 427 testAscending: 428 CMPQ SI, R9 429 JAE ascending 430 MOVLQZX (R8)(SI*4), BX 431 MOVLQZX 4(R8)(SI*4), DX 432 INCQ SI 433 MOVQ BX, X0 434 MOVQ DX, X1 435 UCOMISS X1, X0 436 JBE testAscending 437 JMP testDescending 438 ascending: 439 MOVQ $ASCENDING, ret+24(FP) 440 RET 441 testDescending: 442 CMPQ DI, R9 443 JAE descending 444 MOVLQZX (R8)(DI*4), BX 445 MOVLQZX 4(R8)(DI*4), DX 446 INCQ DI 447 MOVQ BX, X0 448 MOVQ DX, X1 449 UCOMISS X1, X0 450 JAE testDescending 451 JMP undefined 452 descending: 453 MOVQ $DESCENDING, ret+24(FP) 454 RET 455 undefined15: 456 VZEROUPPER 457 undefined: 458 MOVQ $UNDEFINED, ret+24(FP) 459 RET 460 461 // func orderOfFloat64(data []uint64) int 462 TEXT ·orderOfFloat64(SB), NOSPLIT, $-32 463 MOVQ data_base+0(FP), R8 464 MOVQ data_len+8(FP), R9 465 XORQ SI, SI 466 XORQ DI, DI 467 468 CMPQ R9, $2 469 JB undefined 470 471 CMPB ·hasAVX512VL(SB), $0 472 JE test 473 474 CMPQ R9, $8 475 JB test 476 477 XORQ DX, DX 478 MOVQ R9, AX 479 SHRQ $3, AX 480 SHLQ $3, AX 481 MOVQ $7, CX 482 IDIVQ CX 483 IMULQ $7, AX 484 DECQ R9 485 486 VMOVDQU64 shift1x64<>(SB), Z2 487 KXORB K2, K2, K2 488 testAscending7: 489 VMOVDQU64 (R8)(SI*8), Z0 490 VMOVDQU64 Z2, Z1 491 VPERMI2Q Z0, Z0, Z1 492 VCMPPD $2, Z1, Z0, K1 493 KORTESTB K2, K1 494 JNC testDescending7 495 ADDQ $7, SI 496 CMPQ SI, AX 497 JNE testAscending7 498 VZEROUPPER 499 JMP testAscending 500 testDescending7: 501 VMOVDQU64 (R8)(DI*8), Z0 502 VMOVDQU64 Z2, Z1 503 VPERMI2Q Z0, Z0, Z1 504 VCMPPD $5, Z1, Z0, K1 505 KORTESTB K2, K1 506 JNC undefined7 507 ADDQ $7, DI 508 CMPQ DI, AX 509 JNE testDescending7 510 VZEROUPPER 511 JMP testDescending 512 513 test: 514 DECQ R9 515 testAscending: 516 CMPQ SI, R9 517 JAE ascending 518 MOVQ (R8)(SI*8), BX 519 MOVQ 8(R8)(SI*8), DX 520 INCQ SI 521 MOVQ BX, X0 522 MOVQ DX, X1 523 UCOMISD X1, X0 524 JBE testAscending 525 JMP testDescending 526 ascending: 527 MOVQ $ASCENDING, ret+24(FP) 528 RET 529 testDescending: 530 CMPQ DI, R9 531 JAE descending 532 MOVQ (R8)(DI*8), BX 533 MOVQ 8(R8)(DI*8), DX 534 INCQ DI 535 MOVQ BX, X0 536 MOVQ DX, X1 537 UCOMISD X1, X0 538 JAE testDescending 539 JMP undefined 540 descending: 541 MOVQ $DESCENDING, ret+24(FP) 542 RET 543 undefined7: 544 VZEROUPPER 545 undefined: 546 MOVQ $UNDEFINED, ret+24(FP) 547 RET