github.com/mad-day/Yawning-crypto@v0.0.0-20190711051033-5a5f8cca32ec/aez/aez_amd64.s (about) 1 // +build !noasm 2 // Generated by PeachPy 0.2.0 from aez_amd64.py 3 4 5 // func cpuidAMD64(cpuidParams *uint32) 6 TEXT ·cpuidAMD64(SB),4,$0-8 7 MOVQ cpuidParams+0(FP), R15 8 MOVL 0(R15), AX 9 MOVL 8(R15), CX 10 CPUID 11 MOVL AX, 0(R15) 12 MOVL BX, 4(R15) 13 MOVL CX, 8(R15) 14 MOVL DX, 12(R15) 15 RET 16 17 // func resetAMD64SSE2() 18 TEXT ·resetAMD64SSE2(SB),4,$0 19 PXOR X0, X0 20 PXOR X1, X1 21 PXOR X2, X2 22 PXOR X3, X3 23 PXOR X4, X4 24 PXOR X5, X5 25 PXOR X6, X6 26 PXOR X7, X7 27 PXOR X8, X8 28 PXOR X9, X9 29 PXOR X10, X10 30 PXOR X10, X11 31 PXOR X12, X12 32 PXOR X13, X13 33 PXOR X14, X14 34 PXOR X15, X15 35 RET 36 37 // func xorBytes1x16AMD64SSE2(a *uint8, b *uint8, dst *uint8) 38 TEXT ·xorBytes1x16AMD64SSE2(SB),4,$0-24 39 MOVQ a+0(FP), AX 40 MOVQ b+8(FP), BX 41 MOVQ dst+16(FP), CX 42 MOVOU 0(AX), X0 43 MOVOU 0(BX), X1 44 PXOR X1, X0 45 MOVOU X0, 0(CX) 46 RET 47 48 // func xorBytes4x16AMD64SSE2(a *uint8, b *uint8, c *uint8, d *uint8, dst *uint8) 49 TEXT ·xorBytes4x16AMD64SSE2(SB),4,$0-40 50 MOVQ a+0(FP), AX 51 MOVQ b+8(FP), BX 52 MOVQ c+16(FP), CX 53 MOVQ d+24(FP), DX 54 MOVQ dst+32(FP), DI 55 MOVOU 0(AX), X0 56 MOVOU 0(BX), X1 57 MOVOU 0(CX), X2 58 MOVOU 0(DX), X3 59 PXOR X1, X0 60 PXOR X3, X2 61 PXOR X2, X0 62 MOVOU X0, 0(DI) 63 RET 64 65 // func aezAES4AMD64AESNI(j *uint8, i *uint8, l *uint8, k *uint8, src *uint8, dst *uint8) 66 TEXT ·aezAES4AMD64AESNI(SB),4,$0-48 67 MOVQ j+0(FP), AX 68 MOVQ i+8(FP), BX 69 MOVQ l+16(FP), CX 70 MOVQ k+24(FP), DX 71 MOVQ src+32(FP), DI 72 MOVQ dst+40(FP), SI 73 MOVOU 0(DI), X0 74 MOVO 0(AX), X1 75 MOVO 0(BX), X2 76 MOVO 0(CX), X3 77 PXOR X1, X0 78 PXOR X3, X2 79 PXOR X2, X0 80 PXOR X4, X4 81 MOVO 0(DX), X2 82 MOVO 16(DX), X1 83 MOVO 32(DX), X3 84 AESENC X1, X0 85 AESENC X2, X0 86 AESENC X3, X0 87 AESENC X4, X0 88 MOVOU X0, 0(SI) 89 RET 90 91 // func aezAES10AMD64AESNI(l *uint8, k *uint8, src *uint8, dst *uint8) 92 TEXT ·aezAES10AMD64AESNI(SB),4,$0-32 93 MOVQ l+0(FP), AX 94 MOVQ k+8(FP), BX 95 MOVQ src+16(FP), CX 96 MOVQ dst+24(FP), DX 97 MOVOU 0(CX), X0 98 MOVOU 0(AX), X1 99 PXOR X1, X0 100 MOVO 0(BX), X2 101 MOVO 16(BX), X3 102 MOVO 32(BX), X1 103 AESENC X2, X0 104 AESENC X3, X0 105 AESENC X1, X0 106 AESENC X2, X0 107 AESENC X3, X0 108 AESENC X1, X0 109 AESENC X2, X0 110 AESENC X3, X0 111 AESENC X1, X0 112 AESENC X2, X0 113 MOVOU X0, 0(DX) 114 RET 115 116 // func aezCorePass1AMD64AESNI(src *uint8, dst *uint8, x *uint8, i *uint8, l *uint8, k *uint8, consts *uint8, sz *uint) 117 TEXT ·aezCorePass1AMD64AESNI(SB),4,$0-64 118 MOVQ src+0(FP), AX 119 MOVQ dst+8(FP), BX 120 MOVQ x+16(FP), CX 121 MOVQ l+32(FP), DX 122 MOVQ sz+56(FP), DI 123 MOVQ $1, SI 124 MOVOU 0(CX), X0 125 MOVQ i+24(FP), BP 126 MOVOU 0(BP), X1 127 MOVQ k+40(FP), BP 128 MOVOU 0(BP), X2 129 MOVOU 16(BP), X3 130 MOVOU 32(BP), X4 131 MOVQ consts+48(FP), BP 132 PXOR X5, X5 133 SUBQ $256, DI 134 JCS vector_loop256_end 135 vector_loop256_begin: 136 MOVOU 16(AX), X6 137 MOVOU 48(AX), X7 138 MOVOU 80(AX), X8 139 MOVOU 112(AX), X9 140 MOVOU 144(AX), X10 141 MOVOU 176(AX), X11 142 MOVOU 208(AX), X12 143 MOVOU 240(AX), X13 144 MOVO X3, X14 145 PXOR X1, X14 146 PXOR X14, X6 147 PXOR X14, X7 148 PXOR X14, X8 149 PXOR X14, X9 150 PXOR X14, X10 151 PXOR X14, X11 152 PXOR X14, X12 153 PXOR X14, X13 154 PXOR 16(DX), X6 155 PXOR 32(DX), X7 156 PXOR 48(DX), X8 157 PXOR 64(DX), X9 158 PXOR 80(DX), X10 159 PXOR 96(DX), X11 160 PXOR 112(DX), X12 161 PXOR 0(DX), X13 162 AESENC X3, X6 163 AESENC X3, X7 164 AESENC X3, X8 165 AESENC X3, X9 166 AESENC X3, X10 167 AESENC X3, X11 168 AESENC X3, X12 169 AESENC X3, X13 170 AESENC X2, X6 171 AESENC X2, X7 172 AESENC X2, X8 173 AESENC X2, X9 174 AESENC X2, X10 175 AESENC X2, X11 176 AESENC X2, X12 177 AESENC X2, X13 178 AESENC X4, X6 179 AESENC X4, X7 180 AESENC X4, X8 181 AESENC X4, X9 182 AESENC X4, X10 183 AESENC X4, X11 184 AESENC X4, X12 185 AESENC X4, X13 186 AESENC X5, X6 187 AESENC X5, X7 188 AESENC X5, X8 189 AESENC X5, X9 190 AESENC X5, X10 191 AESENC X5, X11 192 AESENC X5, X12 193 AESENC X5, X13 194 MOVOU 0(AX), X14 195 MOVOU 32(AX), X15 196 PXOR X14, X6 197 PXOR X15, X7 198 MOVOU 64(AX), X14 199 MOVOU 96(AX), X15 200 PXOR X14, X8 201 PXOR X15, X9 202 MOVOU 128(AX), X14 203 MOVOU 160(AX), X15 204 PXOR X14, X10 205 PXOR X15, X11 206 MOVOU 192(AX), X14 207 MOVOU 224(AX), X15 208 PXOR X14, X12 209 PXOR X15, X13 210 MOVOU X6, 0(BX) 211 MOVOU X7, 32(BX) 212 MOVOU X8, 64(BX) 213 MOVOU X9, 96(BX) 214 MOVOU X10, 128(BX) 215 MOVOU X11, 160(BX) 216 MOVOU X12, 192(BX) 217 MOVOU X13, 224(BX) 218 PXOR X2, X6 219 PXOR X2, X7 220 PXOR X2, X8 221 PXOR X2, X9 222 PXOR X2, X10 223 PXOR X2, X11 224 PXOR X2, X12 225 PXOR X2, X13 226 AESENC X3, X6 227 AESENC X3, X7 228 AESENC X3, X8 229 AESENC X3, X9 230 AESENC X3, X10 231 AESENC X3, X11 232 AESENC X3, X12 233 AESENC X3, X13 234 AESENC X2, X6 235 AESENC X2, X7 236 AESENC X2, X8 237 AESENC X2, X9 238 AESENC X2, X10 239 AESENC X2, X11 240 AESENC X2, X12 241 AESENC X2, X13 242 AESENC X4, X6 243 AESENC X4, X7 244 AESENC X4, X8 245 AESENC X4, X9 246 AESENC X4, X10 247 AESENC X4, X11 248 AESENC X4, X12 249 AESENC X4, X13 250 AESENC X5, X6 251 AESENC X5, X7 252 AESENC X5, X8 253 AESENC X5, X9 254 AESENC X5, X10 255 AESENC X5, X11 256 AESENC X5, X12 257 AESENC X5, X13 258 MOVOU 16(AX), X14 259 MOVOU 48(AX), X15 260 PXOR X14, X6 261 PXOR X15, X7 262 MOVOU 80(AX), X14 263 MOVOU 112(AX), X15 264 PXOR X14, X8 265 PXOR X15, X9 266 MOVOU 144(AX), X14 267 MOVOU 176(AX), X15 268 PXOR X14, X10 269 PXOR X15, X11 270 MOVOU 208(AX), X14 271 MOVOU 240(AX), X15 272 PXOR X14, X12 273 PXOR X15, X13 274 MOVOU X6, 16(BX) 275 MOVOU X7, 48(BX) 276 MOVOU X8, 80(BX) 277 MOVOU X9, 112(BX) 278 MOVOU X10, 144(BX) 279 MOVOU X11, 176(BX) 280 MOVOU X12, 208(BX) 281 MOVOU X13, 240(BX) 282 PXOR X6, X0 283 PXOR X7, X0 284 PXOR X8, X0 285 PXOR X9, X0 286 PXOR X10, X0 287 PXOR X11, X0 288 PXOR X12, X0 289 PXOR X13, X0 290 MOVO 0(BP), X14 291 PSHUFB X14, X1 292 MOVO X1, X15 293 PSRAL $31, X15 294 PAND 16(BP), X15 295 PSHUFL $147, X15, X15 296 PSLLL $1, X1 297 PXOR X15, X1 298 PSHUFB X14, X1 299 ADDQ $256, AX 300 ADDQ $256, BX 301 SUBQ $256, DI 302 JCC vector_loop256_begin 303 vector_loop256_end: 304 ADDQ $256, DI 305 SUBQ $128, DI 306 JCS process_64bytes 307 MOVOU 16(AX), X10 308 MOVOU 48(AX), X11 309 MOVOU 80(AX), X12 310 MOVOU 112(AX), X13 311 MOVO X10, X6 312 MOVO X11, X7 313 MOVOU X12, X8 314 MOVOU X13, X9 315 MOVO X3, X14 316 PXOR X1, X14 317 PXOR X14, X6 318 PXOR X14, X7 319 PXOR X14, X8 320 PXOR X14, X9 321 PXOR 16(DX), X6 322 PXOR 32(DX), X7 323 PXOR 48(DX), X8 324 PXOR 64(DX), X9 325 AESENC X3, X6 326 AESENC X3, X7 327 AESENC X3, X8 328 AESENC X3, X9 329 AESENC X2, X6 330 AESENC X2, X7 331 AESENC X2, X8 332 AESENC X2, X9 333 AESENC X4, X6 334 AESENC X4, X7 335 AESENC X4, X8 336 AESENC X4, X9 337 AESENC X5, X6 338 AESENC X5, X7 339 AESENC X5, X8 340 AESENC X5, X9 341 MOVOU 0(AX), X14 342 MOVOU 32(AX), X15 343 PXOR X14, X6 344 PXOR X15, X7 345 MOVOU 64(AX), X14 346 MOVOU 96(AX), X15 347 PXOR X14, X8 348 PXOR X15, X9 349 MOVOU X6, 0(BX) 350 MOVOU X7, 32(BX) 351 MOVOU X8, 64(BX) 352 MOVOU X9, 96(BX) 353 PXOR X2, X6 354 PXOR X2, X7 355 PXOR X2, X8 356 PXOR X2, X9 357 AESENC X3, X6 358 AESENC X3, X7 359 AESENC X3, X8 360 AESENC X3, X9 361 AESENC X2, X6 362 AESENC X2, X7 363 AESENC X2, X8 364 AESENC X2, X9 365 AESENC X4, X6 366 AESENC X4, X7 367 AESENC X4, X8 368 AESENC X4, X9 369 AESENC X5, X6 370 AESENC X5, X7 371 AESENC X5, X8 372 AESENC X5, X9 373 PXOR X10, X6 374 PXOR X11, X7 375 PXOR X12, X8 376 PXOR X13, X9 377 MOVOU X6, 16(BX) 378 MOVOU X7, 48(BX) 379 MOVOU X8, 80(BX) 380 MOVOU X9, 112(BX) 381 PXOR X6, X0 382 PXOR X7, X0 383 PXOR X8, X0 384 PXOR X9, X0 385 ADDQ $128, AX 386 ADDQ $128, BX 387 ADDQ $4, SI 388 SUBQ $128, DI 389 process_64bytes: 390 ADDQ $128, DI 391 SUBQ $64, DI 392 JCS process_32bytes 393 MOVQ SI, BP 394 SHLQ $4, BP 395 ADDQ DX, BP 396 MOVOU 16(AX), X10 397 MOVOU 48(AX), X11 398 MOVO X10, X6 399 MOVO X11, X7 400 PXOR X3, X6 401 PXOR X3, X7 402 PXOR X1, X6 403 PXOR X1, X7 404 PXOR 0(BP), X6 405 PXOR 16(BP), X7 406 AESENC X3, X6 407 AESENC X3, X7 408 AESENC X2, X6 409 AESENC X2, X7 410 AESENC X4, X6 411 AESENC X4, X7 412 AESENC X5, X6 413 AESENC X5, X7 414 MOVOU 0(AX), X14 415 MOVOU 32(AX), X15 416 PXOR X14, X6 417 PXOR X15, X7 418 MOVOU X6, 0(BX) 419 MOVOU X7, 32(BX) 420 PXOR X2, X6 421 PXOR X2, X7 422 AESENC X3, X6 423 AESENC X3, X7 424 AESENC X2, X6 425 AESENC X2, X7 426 AESENC X4, X6 427 AESENC X4, X7 428 AESENC X5, X6 429 AESENC X5, X7 430 PXOR X10, X6 431 PXOR X11, X7 432 MOVOU X6, 16(BX) 433 MOVOU X7, 48(BX) 434 PXOR X6, X0 435 PXOR X7, X0 436 ADDQ $64, AX 437 ADDQ $64, BX 438 ADDQ $2, SI 439 SUBQ $64, DI 440 process_32bytes: 441 ADDQ $64, DI 442 SUBQ $32, DI 443 JCS out 444 ANDQ $7, SI 445 SHLQ $4, SI 446 ADDQ SI, DX 447 MOVOU 16(AX), X10 448 MOVO X10, X6 449 PXOR X3, X6 450 PXOR X1, X6 451 PXOR 0(DX), X6 452 AESENC X3, X6 453 AESENC X2, X6 454 AESENC X4, X6 455 AESENC X5, X6 456 MOVOU 0(AX), X14 457 PXOR X14, X6 458 MOVOU X6, 0(BX) 459 PXOR X2, X6 460 AESENC X3, X6 461 AESENC X2, X6 462 AESENC X4, X6 463 AESENC X5, X6 464 PXOR X10, X6 465 MOVOU X6, 16(BX) 466 PXOR X6, X0 467 out: 468 MOVOU X0, 0(CX) 469 RET 470 471 // func aezCorePass2AMD64AESNI(dst *uint8, y *uint8, s *uint8, j *uint8, i *uint8, l *uint8, k *uint8, consts *uint8, sz *uint) 472 TEXT ·aezCorePass2AMD64AESNI(SB),4,$0-72 473 MOVQ dst+0(FP), AX 474 MOVQ y+8(FP), BX 475 MOVQ j+24(FP), CX 476 MOVQ l+40(FP), DX 477 MOVQ sz+64(FP), DI 478 MOVQ $1, SI 479 MOVQ k+48(FP), BP 480 MOVOU 0(BP), X0 481 MOVOU 16(BP), X1 482 MOVOU 32(BP), X2 483 MOVOU 0(BX), X3 484 MOVQ i+32(FP), BP 485 MOVOU 0(BP), X4 486 MOVQ consts+56(FP), BP 487 PXOR X5, X5 488 MOVQ s+16(FP), R8 489 MOVOU 0(R8), X6 490 PXOR 16(CX), X6 491 MOVQ SP, R9 492 ANDQ $18446744073709551584, SP 493 SUBQ $256, SP 494 SUBQ $256, DI 495 JCS vector_loop256_end 496 vector_loop256_begin: 497 MOVO X6, X7 498 PXOR X4, X7 499 MOVO X7, X8 500 MOVO X7, X9 501 MOVO X7, X10 502 MOVO X7, X11 503 MOVO X7, X12 504 MOVO X7, X13 505 MOVO X7, X14 506 PXOR 16(DX), X7 507 PXOR 32(DX), X8 508 PXOR 48(DX), X9 509 PXOR 64(DX), X10 510 PXOR 80(DX), X11 511 PXOR 96(DX), X12 512 PXOR 112(DX), X13 513 PXOR 0(DX), X14 514 AESENC X1, X7 515 AESENC X1, X8 516 AESENC X1, X9 517 AESENC X1, X10 518 AESENC X1, X11 519 AESENC X1, X12 520 AESENC X1, X13 521 AESENC X1, X14 522 AESENC X0, X7 523 AESENC X0, X8 524 AESENC X0, X9 525 AESENC X0, X10 526 AESENC X0, X11 527 AESENC X0, X12 528 AESENC X0, X13 529 AESENC X0, X14 530 AESENC X2, X7 531 AESENC X2, X8 532 AESENC X2, X9 533 AESENC X2, X10 534 AESENC X2, X11 535 AESENC X2, X12 536 AESENC X2, X13 537 AESENC X2, X14 538 AESENC X5, X7 539 AESENC X5, X8 540 AESENC X5, X9 541 AESENC X5, X10 542 AESENC X5, X11 543 AESENC X5, X12 544 AESENC X5, X13 545 AESENC X5, X14 546 MOVOU 0(AX), X15 547 MOVOU 32(AX), X6 548 PXOR X7, X15 549 PXOR X8, X6 550 PXOR X15, X3 551 PXOR X6, X3 552 MOVO X15, 0(SP) 553 MOVO X6, 32(SP) 554 MOVOU 64(AX), X15 555 MOVOU 96(AX), X6 556 PXOR X9, X15 557 PXOR X10, X6 558 PXOR X15, X3 559 PXOR X6, X3 560 MOVO X15, 64(SP) 561 MOVO X6, 96(SP) 562 MOVOU 128(AX), X15 563 MOVOU 160(AX), X6 564 PXOR X11, X15 565 PXOR X12, X6 566 PXOR X15, X3 567 PXOR X6, X3 568 MOVO X15, 128(SP) 569 MOVO X6, 160(SP) 570 MOVOU 192(AX), X15 571 MOVOU 224(AX), X6 572 PXOR X13, X15 573 PXOR X14, X6 574 PXOR X15, X3 575 PXOR X6, X3 576 MOVO X15, 192(SP) 577 MOVO X6, 224(SP) 578 MOVOU 16(AX), X15 579 MOVOU 48(AX), X6 580 PXOR X15, X7 581 PXOR X6, X8 582 MOVO X7, 16(SP) 583 MOVO X8, 48(SP) 584 MOVOU 80(AX), X15 585 MOVOU 112(AX), X6 586 PXOR X15, X9 587 PXOR X6, X10 588 MOVO X9, 80(SP) 589 MOVO X10, 112(SP) 590 MOVOU 144(AX), X15 591 MOVOU 176(AX), X6 592 PXOR X15, X11 593 PXOR X6, X12 594 MOVO X11, 144(SP) 595 MOVO X12, 176(SP) 596 MOVOU 208(AX), X15 597 MOVOU 240(AX), X6 598 PXOR X15, X13 599 PXOR X6, X14 600 MOVO X13, 208(SP) 601 MOVO X14, 240(SP) 602 PXOR X0, X7 603 PXOR X0, X8 604 PXOR X0, X9 605 PXOR X0, X10 606 PXOR X0, X11 607 PXOR X0, X12 608 PXOR X0, X13 609 PXOR X0, X14 610 AESENC X1, X7 611 AESENC X1, X8 612 AESENC X1, X9 613 AESENC X1, X10 614 AESENC X1, X11 615 AESENC X1, X12 616 AESENC X1, X13 617 AESENC X1, X14 618 AESENC X0, X7 619 AESENC X0, X8 620 AESENC X0, X9 621 AESENC X0, X10 622 AESENC X0, X11 623 AESENC X0, X12 624 AESENC X0, X13 625 AESENC X0, X14 626 AESENC X2, X7 627 AESENC X2, X8 628 AESENC X2, X9 629 AESENC X2, X10 630 AESENC X2, X11 631 AESENC X2, X12 632 AESENC X2, X13 633 AESENC X2, X14 634 AESENC X5, X7 635 AESENC X5, X8 636 AESENC X5, X9 637 AESENC X5, X10 638 AESENC X5, X11 639 AESENC X5, X12 640 AESENC X5, X13 641 AESENC X5, X14 642 PXOR 0(SP), X7 643 PXOR 32(SP), X8 644 PXOR 64(SP), X9 645 PXOR 96(SP), X10 646 PXOR 128(SP), X11 647 PXOR 160(SP), X12 648 PXOR 192(SP), X13 649 PXOR 224(SP), X14 650 MOVOU X7, 16(AX) 651 MOVOU X8, 48(AX) 652 MOVOU X9, 80(AX) 653 MOVOU X10, 112(AX) 654 MOVOU X11, 144(AX) 655 MOVOU X12, 176(AX) 656 MOVOU X13, 208(AX) 657 MOVOU X14, 240(AX) 658 MOVO 0(CX), X15 659 PXOR X4, X15 660 PXOR X15, X7 661 PXOR X15, X8 662 PXOR X15, X9 663 PXOR X15, X10 664 PXOR X15, X11 665 PXOR X15, X12 666 PXOR X15, X13 667 PXOR X15, X14 668 PXOR 16(DX), X7 669 PXOR 32(DX), X8 670 PXOR 48(DX), X9 671 PXOR 64(DX), X10 672 PXOR 80(DX), X11 673 PXOR 96(DX), X12 674 PXOR 112(DX), X13 675 PXOR 0(DX), X14 676 AESENC X1, X7 677 AESENC X1, X8 678 AESENC X1, X9 679 AESENC X1, X10 680 AESENC X1, X11 681 AESENC X1, X12 682 AESENC X1, X13 683 AESENC X1, X14 684 AESENC X0, X7 685 AESENC X0, X8 686 AESENC X0, X9 687 AESENC X0, X10 688 AESENC X0, X11 689 AESENC X0, X12 690 AESENC X0, X13 691 AESENC X0, X14 692 AESENC X2, X7 693 AESENC X2, X8 694 AESENC X2, X9 695 AESENC X2, X10 696 AESENC X2, X11 697 AESENC X2, X12 698 AESENC X2, X13 699 AESENC X2, X14 700 AESENC X5, X7 701 AESENC X5, X8 702 AESENC X5, X9 703 AESENC X5, X10 704 AESENC X5, X11 705 AESENC X5, X12 706 AESENC X5, X13 707 AESENC X5, X14 708 PXOR 16(SP), X7 709 PXOR 48(SP), X8 710 PXOR 80(SP), X9 711 PXOR 112(SP), X10 712 PXOR 144(SP), X11 713 PXOR 176(SP), X12 714 PXOR 208(SP), X13 715 PXOR 240(SP), X14 716 MOVOU X7, 0(AX) 717 MOVOU X8, 32(AX) 718 MOVOU X9, 64(AX) 719 MOVOU X10, 96(AX) 720 MOVOU X11, 128(AX) 721 MOVOU X12, 160(AX) 722 MOVOU X13, 192(AX) 723 MOVOU X14, 224(AX) 724 MOVO 0(BP), X15 725 PSHUFB X15, X4 726 MOVO X4, X6 727 PSRAL $31, X6 728 PAND 16(BP), X6 729 PSHUFL $147, X6, X6 730 PSLLL $1, X4 731 PXOR X6, X4 732 PSHUFB X15, X4 733 MOVOU 0(R8), X6 734 PXOR 16(CX), X6 735 ADDQ $256, AX 736 SUBQ $256, DI 737 JCC vector_loop256_begin 738 MOVO X5, 16(SP) 739 MOVO X5, 48(SP) 740 MOVO X5, 80(SP) 741 MOVO X5, 112(SP) 742 MOVO X5, 128(SP) 743 MOVO X5, 144(SP) 744 MOVO X5, 160(SP) 745 MOVO X5, 176(SP) 746 MOVO X5, 192(SP) 747 MOVO X5, 208(SP) 748 MOVO X5, 224(SP) 749 MOVO X5, 240(SP) 750 vector_loop256_end: 751 ADDQ $256, DI 752 SUBQ $128, DI 753 JCS process_64bytes 754 MOVO X6, X7 755 PXOR X4, X7 756 MOVO X7, X8 757 MOVO X7, X9 758 MOVO X7, X10 759 PXOR 16(DX), X7 760 PXOR 32(DX), X8 761 PXOR 48(DX), X9 762 PXOR 64(DX), X10 763 AESENC X1, X7 764 AESENC X1, X8 765 AESENC X1, X9 766 AESENC X1, X10 767 AESENC X0, X7 768 AESENC X0, X8 769 AESENC X0, X9 770 AESENC X0, X10 771 AESENC X2, X7 772 AESENC X2, X8 773 AESENC X2, X9 774 AESENC X2, X10 775 AESENC X5, X7 776 AESENC X5, X8 777 AESENC X5, X9 778 AESENC X5, X10 779 MOVOU 0(AX), X11 780 MOVOU 32(AX), X13 781 MOVOU 64(AX), X12 782 MOVOU 96(AX), X14 783 PXOR X7, X11 784 PXOR X8, X13 785 PXOR X9, X12 786 PXOR X10, X14 787 PXOR X11, X3 788 PXOR X13, X3 789 PXOR X12, X3 790 PXOR X14, X3 791 MOVO X11, 0(SP) 792 MOVO X13, 32(SP) 793 MOVO X12, 64(SP) 794 MOVO X14, 96(SP) 795 MOVOU 16(AX), X12 796 MOVOU 48(AX), X14 797 MOVOU 80(AX), X11 798 MOVOU 112(AX), X13 799 PXOR X12, X7 800 PXOR X14, X8 801 PXOR X11, X9 802 PXOR X13, X10 803 MOVOU X7, 16(AX) 804 MOVOU X8, 48(AX) 805 MOVOU X9, 80(AX) 806 MOVOU X10, 112(AX) 807 MOVO X7, X12 808 MOVO X8, X14 809 MOVO X9, X11 810 MOVO X10, X13 811 PXOR X0, X7 812 PXOR X0, X8 813 PXOR X0, X9 814 PXOR X0, X10 815 AESENC X1, X7 816 AESENC X1, X8 817 AESENC X1, X9 818 AESENC X1, X10 819 AESENC X0, X7 820 AESENC X0, X8 821 AESENC X0, X9 822 AESENC X0, X10 823 AESENC X2, X7 824 AESENC X2, X8 825 AESENC X2, X9 826 AESENC X2, X10 827 AESENC X5, X7 828 AESENC X5, X8 829 AESENC X5, X9 830 AESENC X5, X10 831 PXOR 0(SP), X7 832 PXOR 32(SP), X8 833 PXOR 64(SP), X9 834 PXOR 96(SP), X10 835 MOVOU X7, 16(AX) 836 MOVOU X8, 48(AX) 837 MOVOU X9, 80(AX) 838 MOVOU X10, 112(AX) 839 PXOR 0(CX), X7 840 PXOR 0(CX), X8 841 PXOR 0(CX), X9 842 PXOR 0(CX), X10 843 PXOR X4, X7 844 PXOR X4, X8 845 PXOR X4, X9 846 PXOR X4, X10 847 PXOR 16(DX), X7 848 PXOR 32(DX), X8 849 PXOR 48(DX), X9 850 PXOR 64(DX), X10 851 AESENC X1, X7 852 AESENC X1, X8 853 AESENC X1, X9 854 AESENC X1, X10 855 AESENC X0, X7 856 AESENC X0, X8 857 AESENC X0, X9 858 AESENC X0, X10 859 AESENC X2, X7 860 AESENC X2, X8 861 AESENC X2, X9 862 AESENC X2, X10 863 AESENC X5, X7 864 AESENC X5, X8 865 AESENC X5, X9 866 AESENC X5, X10 867 PXOR X12, X7 868 PXOR X14, X8 869 PXOR X11, X9 870 PXOR X13, X10 871 MOVOU X7, 0(AX) 872 MOVOU X8, 32(AX) 873 MOVOU X9, 64(AX) 874 MOVOU X10, 96(AX) 875 ADDQ $128, AX 876 ADDQ $4, SI 877 SUBQ $128, DI 878 process_64bytes: 879 ADDQ $128, DI 880 SUBQ $64, DI 881 JCS process_32bytes 882 MOVQ SI, BP 883 SHLQ $4, BP 884 ADDQ DX, BP 885 MOVO X6, X7 886 PXOR X4, X7 887 MOVO X7, X8 888 PXOR 0(BP), X7 889 PXOR 16(BP), X8 890 AESENC X1, X7 891 AESENC X1, X8 892 AESENC X0, X7 893 AESENC X0, X8 894 AESENC X2, X7 895 AESENC X2, X8 896 AESENC X5, X7 897 AESENC X5, X8 898 MOVOU 0(AX), X11 899 MOVOU 16(AX), X12 900 MOVOU 32(AX), X13 901 MOVOU 48(AX), X14 902 PXOR X7, X11 903 PXOR X8, X13 904 PXOR X11, X3 905 PXOR X13, X3 906 PXOR X12, X7 907 PXOR X14, X8 908 MOVO X7, X12 909 MOVO X8, X14 910 PXOR X0, X7 911 PXOR X0, X8 912 AESENC X1, X7 913 AESENC X1, X8 914 AESENC X0, X7 915 AESENC X0, X8 916 AESENC X2, X7 917 AESENC X2, X8 918 AESENC X5, X7 919 AESENC X5, X8 920 PXOR X11, X7 921 PXOR X13, X8 922 MOVO X7, X11 923 MOVO X8, X13 924 PXOR 0(CX), X7 925 PXOR 0(CX), X8 926 PXOR X4, X7 927 PXOR X4, X8 928 PXOR 0(BP), X7 929 PXOR 16(BP), X8 930 AESENC X1, X7 931 AESENC X1, X8 932 AESENC X0, X7 933 AESENC X0, X8 934 AESENC X2, X7 935 AESENC X2, X8 936 AESENC X5, X7 937 AESENC X5, X8 938 PXOR X7, X12 939 PXOR X8, X14 940 MOVOU X12, 0(AX) 941 MOVOU X11, 16(AX) 942 MOVOU X14, 32(AX) 943 MOVOU X13, 48(AX) 944 ADDQ $64, AX 945 ADDQ $2, SI 946 SUBQ $64, DI 947 process_32bytes: 948 ADDQ $64, DI 949 SUBQ $32, DI 950 JCS out 951 ANDQ $7, SI 952 SHLQ $4, SI 953 ADDQ SI, DX 954 MOVO X6, X7 955 PXOR X4, X7 956 PXOR 0(DX), X7 957 AESENC X1, X7 958 AESENC X0, X7 959 AESENC X2, X7 960 AESENC X5, X7 961 MOVOU 0(AX), X11 962 MOVOU 16(AX), X12 963 PXOR X7, X11 964 PXOR X11, X3 965 PXOR X12, X7 966 MOVO X7, X12 967 PXOR X0, X7 968 AESENC X1, X7 969 AESENC X0, X7 970 AESENC X2, X7 971 AESENC X5, X7 972 PXOR X11, X7 973 MOVO X7, X11 974 PXOR 0(CX), X7 975 PXOR X4, X7 976 PXOR 0(DX), X7 977 AESENC X1, X7 978 AESENC X0, X7 979 AESENC X2, X7 980 AESENC X5, X7 981 PXOR X7, X12 982 MOVOU X12, 0(AX) 983 MOVOU X11, 16(AX) 984 out: 985 MOVOU X3, 0(BX) 986 MOVO X5, 0(SP) 987 MOVO X5, 32(SP) 988 MOVO X5, 64(SP) 989 MOVO X5, 96(SP) 990 MOVQ R9, SP 991 RET