github.com/emmansun/gmsm@v0.29.1/sm9/bn256/select_amd64.s (about) 1 //go:build !purego 2 3 #include "textflag.h" 4 5 #define res_ptr DI 6 #define x_ptr SI 7 #define y_ptr CX 8 9 // func gfpCopy(res, a *gfP) 10 TEXT ·gfpCopy(SB),NOSPLIT,$0 11 MOVQ res+0(FP), res_ptr 12 MOVQ a+8(FP), x_ptr 13 14 CMPB ·supportAVX2+0(SB), $0x01 15 JEQ copygfp_avx2 16 17 MOVOU (16*0)(x_ptr), X0 18 MOVOU (16*1)(x_ptr), X1 19 20 MOVOU X0, (16*0)(res_ptr) 21 MOVOU X1, (16*1)(res_ptr) 22 23 RET 24 25 copygfp_avx2: 26 VMOVDQU (x_ptr), Y0 27 VMOVDQU Y0, (res_ptr) 28 VZEROUPPER 29 RET 30 31 // func gfp2Copy(res, a *gfP2) 32 TEXT ·gfp2Copy(SB),NOSPLIT,$0 33 MOVQ res+0(FP), res_ptr 34 MOVQ a+8(FP), x_ptr 35 36 CMPB ·supportAVX2+0(SB), $0x01 37 JEQ copygfp2_avx2 38 39 MOVOU (16*0)(x_ptr), X0 40 MOVOU (16*1)(x_ptr), X1 41 MOVOU (16*2)(x_ptr), X2 42 MOVOU (16*3)(x_ptr), X3 43 44 MOVOU X0, (16*0)(res_ptr) 45 MOVOU X1, (16*1)(res_ptr) 46 MOVOU X2, (16*2)(res_ptr) 47 MOVOU X3, (16*3)(res_ptr) 48 RET 49 50 copygfp2_avx2: 51 VMOVDQU (32*0)(x_ptr), Y0 52 VMOVDQU (32*1)(x_ptr), Y1 53 54 VMOVDQU Y0, (32*0)(res_ptr) 55 VMOVDQU Y1, (32*1)(res_ptr) 56 57 VZEROUPPER 58 RET 59 60 // func gfp4Copy(res, a *gfP4) 61 TEXT ·gfp4Copy(SB),NOSPLIT,$0 62 MOVQ res+0(FP), res_ptr 63 MOVQ a+8(FP), x_ptr 64 65 CMPB ·supportAVX2+0(SB), $0x01 66 JEQ copygfp4_avx2 67 68 MOVOU (16*0)(x_ptr), X0 69 MOVOU (16*1)(x_ptr), X1 70 MOVOU (16*2)(x_ptr), X2 71 MOVOU (16*3)(x_ptr), X3 72 73 MOVOU (16*4)(x_ptr), X4 74 MOVOU (16*5)(x_ptr), X5 75 MOVOU (16*6)(x_ptr), X6 76 MOVOU (16*7)(x_ptr), X7 77 78 MOVOU X0, (16*0)(res_ptr) 79 MOVOU X1, (16*1)(res_ptr) 80 MOVOU X2, (16*2)(res_ptr) 81 MOVOU X3, (16*3)(res_ptr) 82 83 MOVOU X4, (16*4)(res_ptr) 84 MOVOU X5, (16*5)(res_ptr) 85 MOVOU X6, (16*6)(res_ptr) 86 MOVOU X7, (16*7)(res_ptr) 87 88 RET 89 90 copygfp4_avx2: 91 VMOVDQU (32*0)(x_ptr), Y0 92 VMOVDQU (32*1)(x_ptr), Y1 93 VMOVDQU (32*2)(x_ptr), Y2 94 VMOVDQU (32*3)(x_ptr), Y3 95 96 VMOVDQU Y0, (32*0)(res_ptr) 97 VMOVDQU Y1, (32*1)(res_ptr) 98 VMOVDQU Y2, (32*2)(res_ptr) 99 VMOVDQU Y3, (32*3)(res_ptr) 100 101 VZEROUPPER 102 RET 103 104 // func gfp6Copy(res, a *gfP6) 105 TEXT ·gfp6Copy(SB),NOSPLIT,$0 106 MOVQ res+0(FP), res_ptr 107 MOVQ a+8(FP), x_ptr 108 109 CMPB ·supportAVX2+0(SB), $0x01 110 JEQ copygfp6_avx2 111 112 MOVOU (16*0)(x_ptr), X0 113 MOVOU (16*1)(x_ptr), X1 114 MOVOU (16*2)(x_ptr), X2 115 MOVOU (16*3)(x_ptr), X3 116 117 MOVOU (16*4)(x_ptr), X4 118 MOVOU (16*5)(x_ptr), X5 119 MOVOU (16*6)(x_ptr), X6 120 MOVOU (16*7)(x_ptr), X7 121 122 MOVOU (16*8)(x_ptr), X8 123 MOVOU (16*9)(x_ptr), X9 124 MOVOU (16*10)(x_ptr), X10 125 MOVOU (16*11)(x_ptr), X11 126 127 MOVOU X0, (16*0)(res_ptr) 128 MOVOU X1, (16*1)(res_ptr) 129 MOVOU X2, (16*2)(res_ptr) 130 MOVOU X3, (16*3)(res_ptr) 131 132 MOVOU X4, (16*4)(res_ptr) 133 MOVOU X5, (16*5)(res_ptr) 134 MOVOU X6, (16*6)(res_ptr) 135 MOVOU X7, (16*7)(res_ptr) 136 137 MOVOU X8, (16*8)(res_ptr) 138 MOVOU X9, (16*9)(res_ptr) 139 MOVOU X10, (16*10)(res_ptr) 140 MOVOU X11, (16*11)(res_ptr) 141 142 RET 143 144 copygfp6_avx2: 145 VMOVDQU (32*0)(x_ptr), Y0 146 VMOVDQU (32*1)(x_ptr), Y1 147 VMOVDQU (32*2)(x_ptr), Y2 148 VMOVDQU (32*3)(x_ptr), Y3 149 VMOVDQU (32*4)(x_ptr), Y4 150 VMOVDQU (32*5)(x_ptr), Y5 151 152 VMOVDQU Y0, (32*0)(res_ptr) 153 VMOVDQU Y1, (32*1)(res_ptr) 154 VMOVDQU Y2, (32*2)(res_ptr) 155 VMOVDQU Y3, (32*3)(res_ptr) 156 VMOVDQU Y4, (32*4)(res_ptr) 157 VMOVDQU Y5, (32*5)(res_ptr) 158 159 VZEROUPPER 160 RET 161 162 // func gfp12Copy(res, a *gfP12) 163 TEXT ·gfp12Copy(SB),NOSPLIT,$0 164 MOVQ res+0(FP), res_ptr 165 MOVQ a+8(FP), x_ptr 166 167 CMPB ·supportAVX2+0(SB), $0x01 168 JEQ copygfp12_avx2 169 170 MOVOU (16*0)(x_ptr), X0 171 MOVOU (16*1)(x_ptr), X1 172 MOVOU (16*2)(x_ptr), X2 173 MOVOU (16*3)(x_ptr), X3 174 175 MOVOU (16*4)(x_ptr), X4 176 MOVOU (16*5)(x_ptr), X5 177 MOVOU (16*6)(x_ptr), X6 178 MOVOU (16*7)(x_ptr), X7 179 180 MOVOU X0, (16*0)(res_ptr) 181 MOVOU X1, (16*1)(res_ptr) 182 MOVOU X2, (16*2)(res_ptr) 183 MOVOU X3, (16*3)(res_ptr) 184 185 MOVOU X4, (16*4)(res_ptr) 186 MOVOU X5, (16*5)(res_ptr) 187 MOVOU X6, (16*6)(res_ptr) 188 MOVOU X7, (16*7)(res_ptr) 189 190 MOVOU (16*8)(x_ptr), X0 191 MOVOU (16*9)(x_ptr), X1 192 MOVOU (16*10)(x_ptr), X2 193 MOVOU (16*11)(x_ptr), X3 194 195 MOVOU (16*12)(x_ptr), X4 196 MOVOU (16*13)(x_ptr), X5 197 MOVOU (16*14)(x_ptr), X6 198 MOVOU (16*15)(x_ptr), X7 199 200 MOVOU X0, (16*8)(res_ptr) 201 MOVOU X1, (16*9)(res_ptr) 202 MOVOU X2, (16*10)(res_ptr) 203 MOVOU X3, (16*11)(res_ptr) 204 205 MOVOU X4, (16*12)(res_ptr) 206 MOVOU X5, (16*13)(res_ptr) 207 MOVOU X6, (16*14)(res_ptr) 208 MOVOU X7, (16*15)(res_ptr) 209 210 MOVOU (16*16)(x_ptr), X0 211 MOVOU (16*17)(x_ptr), X1 212 MOVOU (16*18)(x_ptr), X2 213 MOVOU (16*19)(x_ptr), X3 214 215 MOVOU (16*20)(x_ptr), X4 216 MOVOU (16*21)(x_ptr), X5 217 MOVOU (16*22)(x_ptr), X6 218 MOVOU (16*23)(x_ptr), X7 219 220 MOVOU X0, (16*16)(res_ptr) 221 MOVOU X1, (16*17)(res_ptr) 222 MOVOU X2, (16*18)(res_ptr) 223 MOVOU X3, (16*19)(res_ptr) 224 225 MOVOU X4, (16*20)(res_ptr) 226 MOVOU X5, (16*21)(res_ptr) 227 MOVOU X6, (16*22)(res_ptr) 228 MOVOU X7, (16*23)(res_ptr) 229 230 RET 231 232 copygfp12_avx2: 233 VMOVDQU (32*0)(x_ptr), Y0 234 VMOVDQU (32*1)(x_ptr), Y1 235 VMOVDQU (32*2)(x_ptr), Y2 236 VMOVDQU (32*3)(x_ptr), Y3 237 238 VMOVDQU (32*4)(x_ptr), Y4 239 VMOVDQU (32*5)(x_ptr), Y5 240 VMOVDQU (32*6)(x_ptr), Y6 241 VMOVDQU (32*7)(x_ptr), Y7 242 243 VMOVDQU (32*8)(x_ptr), Y8 244 VMOVDQU (32*9)(x_ptr), Y9 245 VMOVDQU (32*10)(x_ptr), Y10 246 VMOVDQU (32*11)(x_ptr), Y11 247 248 VMOVDQU Y0, (32*0)(res_ptr) 249 VMOVDQU Y1, (32*1)(res_ptr) 250 VMOVDQU Y2, (32*2)(res_ptr) 251 VMOVDQU Y3, (32*3)(res_ptr) 252 253 VMOVDQU Y4, (32*4)(res_ptr) 254 VMOVDQU Y5, (32*5)(res_ptr) 255 VMOVDQU Y6, (32*6)(res_ptr) 256 VMOVDQU Y7, (32*7)(res_ptr) 257 258 VMOVDQU Y8, (32*8)(res_ptr) 259 VMOVDQU Y9, (32*9)(res_ptr) 260 VMOVDQU Y10, (32*10)(res_ptr) 261 VMOVDQU Y11, (32*11)(res_ptr) 262 263 VZEROUPPER 264 RET 265 266 // func gfP12MovCond(res, a, b *gfP12, cond int) 267 TEXT ·gfP12MovCond(SB),NOSPLIT,$0 268 MOVQ res+0(FP), res_ptr 269 MOVQ a+8(FP), x_ptr 270 MOVQ b+16(FP), y_ptr 271 MOVQ cond+24(FP), X12 272 273 CMPB ·supportAVX2+0(SB), $0x01 274 JEQ move_avx2 275 276 PXOR X13, X13 277 PSHUFD $0, X12, X12 278 PCMPEQL X13, X12 279 280 MOVOU X12, X0 281 MOVOU (16*0)(x_ptr), X6 282 PANDN X6, X0 283 284 MOVOU X12, X1 285 MOVOU (16*1)(x_ptr), X7 286 PANDN X7, X1 287 288 MOVOU X12, X2 289 MOVOU (16*2)(x_ptr), X8 290 PANDN X8, X2 291 292 MOVOU X12, X3 293 MOVOU (16*3)(x_ptr), X9 294 PANDN X9, X3 295 296 MOVOU X12, X4 297 MOVOU (16*4)(x_ptr), X10 298 PANDN X10, X4 299 300 MOVOU X12, X5 301 MOVOU (16*5)(x_ptr), X11 302 PANDN X11, X5 303 304 MOVOU (16*0)(y_ptr), X6 305 MOVOU (16*1)(y_ptr), X7 306 MOVOU (16*2)(y_ptr), X8 307 MOVOU (16*3)(y_ptr), X9 308 MOVOU (16*4)(y_ptr), X10 309 MOVOU (16*5)(y_ptr), X11 310 311 PAND X12, X6 312 PAND X12, X7 313 PAND X12, X8 314 PAND X12, X9 315 PAND X12, X10 316 PAND X12, X11 317 318 PXOR X6, X0 319 PXOR X7, X1 320 PXOR X8, X2 321 PXOR X9, X3 322 PXOR X10, X4 323 PXOR X11, X5 324 325 MOVOU X0, (16*0)(res_ptr) 326 MOVOU X1, (16*1)(res_ptr) 327 MOVOU X2, (16*2)(res_ptr) 328 MOVOU X3, (16*3)(res_ptr) 329 MOVOU X4, (16*4)(res_ptr) 330 MOVOU X5, (16*5)(res_ptr) 331 332 MOVOU X12, X0 333 MOVOU (16*6)(x_ptr), X6 334 PANDN X6, X0 335 336 MOVOU X12, X1 337 MOVOU (16*7)(x_ptr), X7 338 PANDN X7, X1 339 340 MOVOU X12, X2 341 MOVOU (16*8)(x_ptr), X8 342 PANDN X8, X2 343 344 MOVOU X12, X3 345 MOVOU (16*9)(x_ptr), X9 346 PANDN X9, X3 347 348 MOVOU X12, X4 349 MOVOU (16*10)(x_ptr), X10 350 PANDN X10, X4 351 352 MOVOU X12, X5 353 MOVOU (16*11)(x_ptr), X11 354 PANDN X11, X5 355 356 MOVOU (16*6)(y_ptr), X6 357 MOVOU (16*7)(y_ptr), X7 358 MOVOU (16*8)(y_ptr), X8 359 MOVOU (16*9)(y_ptr), X9 360 MOVOU (16*10)(y_ptr), X10 361 MOVOU (16*11)(y_ptr), X11 362 363 PAND X12, X6 364 PAND X12, X7 365 PAND X12, X8 366 PAND X12, X9 367 PAND X12, X10 368 PAND X12, X11 369 370 PXOR X6, X0 371 PXOR X7, X1 372 PXOR X8, X2 373 PXOR X9, X3 374 PXOR X10, X4 375 PXOR X11, X5 376 377 MOVOU X0, (16*6)(res_ptr) 378 MOVOU X1, (16*7)(res_ptr) 379 MOVOU X2, (16*8)(res_ptr) 380 MOVOU X3, (16*9)(res_ptr) 381 MOVOU X4, (16*10)(res_ptr) 382 MOVOU X5, (16*11)(res_ptr) 383 384 MOVOU X12, X0 385 MOVOU (16*12)(x_ptr), X6 386 PANDN X6, X0 387 388 MOVOU X12, X1 389 MOVOU (16*13)(x_ptr), X7 390 PANDN X7, X1 391 392 MOVOU X12, X2 393 MOVOU (16*14)(x_ptr), X8 394 PANDN X8, X2 395 396 MOVOU X12, X3 397 MOVOU (16*15)(x_ptr), X9 398 PANDN X9, X3 399 400 MOVOU X12, X4 401 MOVOU (16*16)(x_ptr), X10 402 PANDN X10, X4 403 404 MOVOU X12, X5 405 MOVOU (16*17)(x_ptr), X11 406 PANDN X11, X5 407 408 MOVOU (16*12)(y_ptr), X6 409 MOVOU (16*13)(y_ptr), X7 410 MOVOU (16*14)(y_ptr), X8 411 MOVOU (16*15)(y_ptr), X9 412 MOVOU (16*16)(y_ptr), X10 413 MOVOU (16*17)(y_ptr), X11 414 415 PAND X12, X6 416 PAND X12, X7 417 PAND X12, X8 418 PAND X12, X9 419 PAND X12, X10 420 PAND X12, X11 421 422 PXOR X6, X0 423 PXOR X7, X1 424 PXOR X8, X2 425 PXOR X9, X3 426 PXOR X10, X4 427 PXOR X11, X5 428 429 MOVOU X0, (16*12)(res_ptr) 430 MOVOU X1, (16*13)(res_ptr) 431 MOVOU X2, (16*14)(res_ptr) 432 MOVOU X3, (16*15)(res_ptr) 433 MOVOU X4, (16*16)(res_ptr) 434 MOVOU X5, (16*17)(res_ptr) 435 436 MOVOU X12, X0 437 MOVOU (16*18)(x_ptr), X6 438 PANDN X6, X0 439 440 MOVOU X12, X1 441 MOVOU (16*19)(x_ptr), X7 442 PANDN X7, X1 443 444 MOVOU X12, X2 445 MOVOU (16*20)(x_ptr), X8 446 PANDN X8, X2 447 448 MOVOU X12, X3 449 MOVOU (16*21)(x_ptr), X9 450 PANDN X9, X3 451 452 MOVOU X12, X4 453 MOVOU (16*22)(x_ptr), X10 454 PANDN X10, X4 455 456 MOVOU X12, X5 457 MOVOU (16*23)(x_ptr), X11 458 PANDN X11, X5 459 460 MOVOU (16*18)(y_ptr), X6 461 MOVOU (16*19)(y_ptr), X7 462 MOVOU (16*20)(y_ptr), X8 463 MOVOU (16*21)(y_ptr), X9 464 MOVOU (16*22)(y_ptr), X10 465 MOVOU (16*23)(y_ptr), X11 466 467 PAND X12, X6 468 PAND X12, X7 469 PAND X12, X8 470 PAND X12, X9 471 PAND X12, X10 472 PAND X12, X11 473 474 PXOR X6, X0 475 PXOR X7, X1 476 PXOR X8, X2 477 PXOR X9, X3 478 PXOR X10, X4 479 PXOR X11, X5 480 481 MOVOU X0, (16*18)(res_ptr) 482 MOVOU X1, (16*19)(res_ptr) 483 MOVOU X2, (16*20)(res_ptr) 484 MOVOU X3, (16*21)(res_ptr) 485 MOVOU X4, (16*22)(res_ptr) 486 MOVOU X5, (16*23)(res_ptr) 487 488 RET 489 490 move_avx2: 491 VPXOR Y13, Y13, Y13 492 VPBROADCASTD X12, Y12 493 VPCMPEQD Y13, Y12, Y12 494 495 VPANDN (32*0)(x_ptr), Y12, Y0 496 VPANDN (32*1)(x_ptr), Y12, Y1 497 VPANDN (32*2)(x_ptr), Y12, Y2 498 VPANDN (32*3)(x_ptr), Y12, Y3 499 VPANDN (32*4)(x_ptr), Y12, Y4 500 VPANDN (32*5)(x_ptr), Y12, Y5 501 502 VPAND (32*0)(y_ptr), Y12, Y6 503 VPAND (32*1)(y_ptr), Y12, Y7 504 VPAND (32*2)(y_ptr), Y12, Y8 505 VPAND (32*3)(y_ptr), Y12, Y9 506 VPAND (32*4)(y_ptr), Y12, Y10 507 VPAND (32*5)(y_ptr), Y12, Y11 508 509 VPXOR Y6, Y0, Y0 510 VPXOR Y7, Y1, Y1 511 VPXOR Y8, Y2, Y2 512 VPXOR Y9, Y3, Y3 513 VPXOR Y10, Y4, Y4 514 VPXOR Y11, Y5, Y5 515 516 VMOVDQU Y0, (32*0)(res_ptr) 517 VMOVDQU Y1, (32*1)(res_ptr) 518 VMOVDQU Y2, (32*2)(res_ptr) 519 VMOVDQU Y3, (32*3)(res_ptr) 520 VMOVDQU Y4, (32*4)(res_ptr) 521 VMOVDQU Y5, (32*5)(res_ptr) 522 523 VPANDN (32*6)(x_ptr), Y12, Y0 524 VPANDN (32*7)(x_ptr), Y12, Y1 525 VPANDN (32*8)(x_ptr), Y12, Y2 526 VPANDN (32*9)(x_ptr), Y12, Y3 527 VPANDN (32*10)(x_ptr), Y12, Y4 528 VPANDN (32*11)(x_ptr), Y12, Y5 529 530 VPAND (32*6)(y_ptr), Y12, Y6 531 VPAND (32*7)(y_ptr), Y12, Y7 532 VPAND (32*8)(y_ptr), Y12, Y8 533 VPAND (32*9)(y_ptr), Y12, Y9 534 VPAND (32*10)(y_ptr), Y12, Y10 535 VPAND (32*11)(y_ptr), Y12, Y11 536 537 VPXOR Y6, Y0, Y0 538 VPXOR Y7, Y1, Y1 539 VPXOR Y8, Y2, Y2 540 VPXOR Y9, Y3, Y3 541 VPXOR Y10, Y4, Y4 542 VPXOR Y11, Y5, Y5 543 544 VMOVDQU Y0, (32*6)(res_ptr) 545 VMOVDQU Y1, (32*7)(res_ptr) 546 VMOVDQU Y2, (32*8)(res_ptr) 547 VMOVDQU Y3, (32*9)(res_ptr) 548 VMOVDQU Y4, (32*10)(res_ptr) 549 VMOVDQU Y5, (32*11)(res_ptr) 550 551 VZEROUPPER 552 RET 553 554 // func curvePointMovCond(res, a, b *curvePoint, cond int) 555 TEXT ·curvePointMovCond(SB),NOSPLIT,$0 556 MOVQ res+0(FP), res_ptr 557 MOVQ a+8(FP), x_ptr 558 MOVQ b+16(FP), y_ptr 559 MOVQ cond+24(FP), X12 560 561 CMPB ·supportAVX2+0(SB), $0x01 562 JEQ move_avx2 563 564 PXOR X13, X13 565 PSHUFD $0, X12, X12 566 PCMPEQL X13, X12 567 568 MOVOU X12, X0 569 MOVOU (16*0)(x_ptr), X6 570 PANDN X6, X0 571 572 MOVOU X12, X1 573 MOVOU (16*1)(x_ptr), X7 574 PANDN X7, X1 575 576 MOVOU X12, X2 577 MOVOU (16*2)(x_ptr), X8 578 PANDN X8, X2 579 580 MOVOU X12, X3 581 MOVOU (16*3)(x_ptr), X9 582 PANDN X9, X3 583 584 MOVOU X12, X4 585 MOVOU (16*4)(x_ptr), X10 586 PANDN X10, X4 587 588 MOVOU X12, X5 589 MOVOU (16*5)(x_ptr), X11 590 PANDN X11, X5 591 592 MOVOU (16*0)(y_ptr), X6 593 MOVOU (16*1)(y_ptr), X7 594 MOVOU (16*2)(y_ptr), X8 595 MOVOU (16*3)(y_ptr), X9 596 MOVOU (16*4)(y_ptr), X10 597 MOVOU (16*5)(y_ptr), X11 598 599 PAND X12, X6 600 PAND X12, X7 601 PAND X12, X8 602 PAND X12, X9 603 PAND X12, X10 604 PAND X12, X11 605 606 PXOR X6, X0 607 PXOR X7, X1 608 PXOR X8, X2 609 PXOR X9, X3 610 PXOR X10, X4 611 PXOR X11, X5 612 613 MOVOU X0, (16*0)(res_ptr) 614 MOVOU X1, (16*1)(res_ptr) 615 MOVOU X2, (16*2)(res_ptr) 616 MOVOU X3, (16*3)(res_ptr) 617 MOVOU X4, (16*4)(res_ptr) 618 MOVOU X5, (16*5)(res_ptr) 619 620 MOVOU X12, X0 621 MOVOU (16*6)(x_ptr), X6 622 PANDN X6, X0 623 624 MOVOU X12, X1 625 MOVOU (16*7)(x_ptr), X7 626 PANDN X7, X1 627 628 MOVOU (16*6)(y_ptr), X6 629 MOVOU (16*7)(y_ptr), X7 630 631 PAND X12, X6 632 PAND X12, X7 633 634 PXOR X6, X0 635 PXOR X7, X1 636 637 MOVOU X0, (16*6)(res_ptr) 638 MOVOU X1, (16*7)(res_ptr) 639 640 RET 641 642 move_avx2: 643 VPXOR Y13, Y13, Y13 644 VPBROADCASTD X12, Y12 645 VPCMPEQD Y13, Y12, Y12 646 647 VPANDN (32*0)(x_ptr), Y12, Y0 648 VPANDN (32*1)(x_ptr), Y12, Y1 649 VPANDN (32*2)(x_ptr), Y12, Y2 650 VPANDN (32*3)(x_ptr), Y12, Y3 651 652 VPAND (32*0)(y_ptr), Y12, Y6 653 VPAND (32*1)(y_ptr), Y12, Y7 654 VPAND (32*2)(y_ptr), Y12, Y8 655 VPAND (32*3)(y_ptr), Y12, Y9 656 657 VPXOR Y6, Y0, Y0 658 VPXOR Y7, Y1, Y1 659 VPXOR Y8, Y2, Y2 660 VPXOR Y9, Y3, Y3 661 662 VMOVDQU Y0, (32*0)(res_ptr) 663 VMOVDQU Y1, (32*1)(res_ptr) 664 VMOVDQU Y2, (32*2)(res_ptr) 665 VMOVDQU Y3, (32*3)(res_ptr) 666 667 VZEROUPPER 668 RET 669 670 // func twistPointMovCond(res, a, b *twistPoint, cond int) 671 TEXT ·twistPointMovCond(SB),NOSPLIT,$0 672 MOVQ res+0(FP), res_ptr 673 MOVQ a+8(FP), x_ptr 674 MOVQ b+16(FP), y_ptr 675 MOVQ cond+24(FP), X12 676 677 CMPB ·supportAVX2+0(SB), $0x01 678 JEQ move_avx2 679 680 PXOR X13, X13 681 PSHUFD $0, X12, X12 682 PCMPEQL X13, X12 683 684 MOVOU X12, X0 685 MOVOU (16*0)(x_ptr), X6 686 PANDN X6, X0 687 688 MOVOU X12, X1 689 MOVOU (16*1)(x_ptr), X7 690 PANDN X7, X1 691 692 MOVOU X12, X2 693 MOVOU (16*2)(x_ptr), X8 694 PANDN X8, X2 695 696 MOVOU X12, X3 697 MOVOU (16*3)(x_ptr), X9 698 PANDN X9, X3 699 700 MOVOU X12, X4 701 MOVOU (16*4)(x_ptr), X10 702 PANDN X10, X4 703 704 MOVOU X12, X5 705 MOVOU (16*5)(x_ptr), X11 706 PANDN X11, X5 707 708 MOVOU (16*0)(y_ptr), X6 709 MOVOU (16*1)(y_ptr), X7 710 MOVOU (16*2)(y_ptr), X8 711 MOVOU (16*3)(y_ptr), X9 712 MOVOU (16*4)(y_ptr), X10 713 MOVOU (16*5)(y_ptr), X11 714 715 PAND X12, X6 716 PAND X12, X7 717 PAND X12, X8 718 PAND X12, X9 719 PAND X12, X10 720 PAND X12, X11 721 722 PXOR X6, X0 723 PXOR X7, X1 724 PXOR X8, X2 725 PXOR X9, X3 726 PXOR X10, X4 727 PXOR X11, X5 728 729 MOVOU X0, (16*0)(res_ptr) 730 MOVOU X1, (16*1)(res_ptr) 731 MOVOU X2, (16*2)(res_ptr) 732 MOVOU X3, (16*3)(res_ptr) 733 MOVOU X4, (16*4)(res_ptr) 734 MOVOU X5, (16*5)(res_ptr) 735 736 MOVOU X12, X0 737 MOVOU (16*6)(x_ptr), X6 738 PANDN X6, X0 739 740 MOVOU X12, X1 741 MOVOU (16*7)(x_ptr), X7 742 PANDN X7, X1 743 744 MOVOU X12, X2 745 MOVOU (16*8)(x_ptr), X8 746 PANDN X8, X2 747 748 MOVOU X12, X3 749 MOVOU (16*9)(x_ptr), X9 750 PANDN X9, X3 751 752 MOVOU X12, X4 753 MOVOU (16*10)(x_ptr), X10 754 PANDN X10, X4 755 756 MOVOU X12, X5 757 MOVOU (16*11)(x_ptr), X11 758 PANDN X11, X5 759 760 MOVOU (16*6)(y_ptr), X6 761 MOVOU (16*7)(y_ptr), X7 762 MOVOU (16*8)(y_ptr), X8 763 MOVOU (16*9)(y_ptr), X9 764 MOVOU (16*10)(y_ptr), X10 765 MOVOU (16*11)(y_ptr), X11 766 767 PAND X12, X6 768 PAND X12, X7 769 PAND X12, X8 770 PAND X12, X9 771 PAND X12, X10 772 PAND X12, X11 773 774 PXOR X6, X0 775 PXOR X7, X1 776 PXOR X8, X2 777 PXOR X9, X3 778 PXOR X10, X4 779 PXOR X11, X5 780 781 MOVOU X0, (16*6)(res_ptr) 782 MOVOU X1, (16*7)(res_ptr) 783 MOVOU X2, (16*8)(res_ptr) 784 MOVOU X3, (16*9)(res_ptr) 785 MOVOU X4, (16*10)(res_ptr) 786 MOVOU X5, (16*11)(res_ptr) 787 788 MOVOU X12, X0 789 MOVOU (16*12)(x_ptr), X6 790 PANDN X6, X0 791 792 MOVOU X12, X1 793 MOVOU (16*13)(x_ptr), X7 794 PANDN X7, X1 795 796 MOVOU X12, X2 797 MOVOU (16*14)(x_ptr), X8 798 PANDN X8, X2 799 800 MOVOU X12, X3 801 MOVOU (16*15)(x_ptr), X9 802 PANDN X9, X3 803 804 MOVOU (16*12)(y_ptr), X6 805 MOVOU (16*13)(y_ptr), X7 806 MOVOU (16*14)(y_ptr), X8 807 MOVOU (16*15)(y_ptr), X9 808 809 PAND X12, X6 810 PAND X12, X7 811 PAND X12, X8 812 PAND X12, X9 813 814 PXOR X6, X0 815 PXOR X7, X1 816 PXOR X8, X2 817 PXOR X9, X3 818 819 MOVOU X0, (16*12)(res_ptr) 820 MOVOU X1, (16*13)(res_ptr) 821 MOVOU X2, (16*14)(res_ptr) 822 MOVOU X3, (16*15)(res_ptr) 823 824 RET 825 826 move_avx2: 827 VPXOR Y13, Y13, Y13 828 VPBROADCASTD X12, Y12 829 VPCMPEQD Y13, Y12, Y12 830 831 VPANDN (32*0)(x_ptr), Y12, Y0 832 VPANDN (32*1)(x_ptr), Y12, Y1 833 VPANDN (32*2)(x_ptr), Y12, Y2 834 VPANDN (32*3)(x_ptr), Y12, Y3 835 VPANDN (32*4)(x_ptr), Y12, Y4 836 VPANDN (32*5)(x_ptr), Y12, Y5 837 838 VPAND (32*0)(y_ptr), Y12, Y6 839 VPAND (32*1)(y_ptr), Y12, Y7 840 VPAND (32*2)(y_ptr), Y12, Y8 841 VPAND (32*3)(y_ptr), Y12, Y9 842 VPAND (32*4)(y_ptr), Y12, Y10 843 VPAND (32*5)(y_ptr), Y12, Y11 844 845 VPXOR Y6, Y0, Y0 846 VPXOR Y7, Y1, Y1 847 VPXOR Y8, Y2, Y2 848 VPXOR Y9, Y3, Y3 849 VPXOR Y10, Y4, Y4 850 VPXOR Y11, Y5, Y5 851 852 VMOVDQU Y0, (32*0)(res_ptr) 853 VMOVDQU Y1, (32*1)(res_ptr) 854 VMOVDQU Y2, (32*2)(res_ptr) 855 VMOVDQU Y3, (32*3)(res_ptr) 856 VMOVDQU Y4, (32*4)(res_ptr) 857 VMOVDQU Y5, (32*5)(res_ptr) 858 859 VPANDN (32*6)(x_ptr), Y12, Y0 860 VPANDN (32*7)(x_ptr), Y12, Y1 861 862 VPAND (32*6)(y_ptr), Y12, Y6 863 VPAND (32*7)(y_ptr), Y12, Y7 864 865 VPXOR Y6, Y0, Y0 866 VPXOR Y7, Y1, Y1 867 868 VMOVDQU Y0, (32*6)(res_ptr) 869 VMOVDQU Y1, (32*7)(res_ptr) 870 871 VZEROUPPER 872 RET