github.com/mad-day/Yawning-crypto@v0.0.0-20190711051033-5a5f8cca32ec/aez/aez_amd64.s (about)

     1  // +build !noasm
     2  // Generated by PeachPy 0.2.0 from aez_amd64.py
     3  
     4  
     5  // func cpuidAMD64(cpuidParams *uint32)
     6  TEXT ·cpuidAMD64(SB),4,$0-8
     7  	MOVQ cpuidParams+0(FP), R15
     8  	MOVL 0(R15), AX
     9  	MOVL 8(R15), CX
    10  	CPUID
    11  	MOVL AX, 0(R15)
    12  	MOVL BX, 4(R15)
    13  	MOVL CX, 8(R15)
    14  	MOVL DX, 12(R15)
    15  	RET
    16  
    17  // func resetAMD64SSE2()
    18  TEXT ·resetAMD64SSE2(SB),4,$0
    19  	PXOR X0, X0
    20  	PXOR X1, X1
    21  	PXOR X2, X2
    22  	PXOR X3, X3
    23  	PXOR X4, X4
    24  	PXOR X5, X5
    25  	PXOR X6, X6
    26  	PXOR X7, X7
    27  	PXOR X8, X8
    28  	PXOR X9, X9
    29  	PXOR X10, X10
    30  	PXOR X10, X11
    31  	PXOR X12, X12
    32  	PXOR X13, X13
    33  	PXOR X14, X14
    34  	PXOR X15, X15
    35  	RET
    36  
    37  // func xorBytes1x16AMD64SSE2(a *uint8, b *uint8, dst *uint8)
    38  TEXT ·xorBytes1x16AMD64SSE2(SB),4,$0-24
    39  	MOVQ a+0(FP), AX
    40  	MOVQ b+8(FP), BX
    41  	MOVQ dst+16(FP), CX
    42  	MOVOU 0(AX), X0
    43  	MOVOU 0(BX), X1
    44  	PXOR X1, X0
    45  	MOVOU X0, 0(CX)
    46  	RET
    47  
    48  // func xorBytes4x16AMD64SSE2(a *uint8, b *uint8, c *uint8, d *uint8, dst *uint8)
    49  TEXT ·xorBytes4x16AMD64SSE2(SB),4,$0-40
    50  	MOVQ a+0(FP), AX
    51  	MOVQ b+8(FP), BX
    52  	MOVQ c+16(FP), CX
    53  	MOVQ d+24(FP), DX
    54  	MOVQ dst+32(FP), DI
    55  	MOVOU 0(AX), X0
    56  	MOVOU 0(BX), X1
    57  	MOVOU 0(CX), X2
    58  	MOVOU 0(DX), X3
    59  	PXOR X1, X0
    60  	PXOR X3, X2
    61  	PXOR X2, X0
    62  	MOVOU X0, 0(DI)
    63  	RET
    64  
    65  // func aezAES4AMD64AESNI(j *uint8, i *uint8, l *uint8, k *uint8, src *uint8, dst *uint8)
    66  TEXT ·aezAES4AMD64AESNI(SB),4,$0-48
    67  	MOVQ j+0(FP), AX
    68  	MOVQ i+8(FP), BX
    69  	MOVQ l+16(FP), CX
    70  	MOVQ k+24(FP), DX
    71  	MOVQ src+32(FP), DI
    72  	MOVQ dst+40(FP), SI
    73  	MOVOU 0(DI), X0
    74  	MOVO 0(AX), X1
    75  	MOVO 0(BX), X2
    76  	MOVO 0(CX), X3
    77  	PXOR X1, X0
    78  	PXOR X3, X2
    79  	PXOR X2, X0
    80  	PXOR X4, X4
    81  	MOVO 0(DX), X2
    82  	MOVO 16(DX), X1
    83  	MOVO 32(DX), X3
    84  	AESENC X1, X0
    85  	AESENC X2, X0
    86  	AESENC X3, X0
    87  	AESENC X4, X0
    88  	MOVOU X0, 0(SI)
    89  	RET
    90  
    91  // func aezAES10AMD64AESNI(l *uint8, k *uint8, src *uint8, dst *uint8)
    92  TEXT ·aezAES10AMD64AESNI(SB),4,$0-32
    93  	MOVQ l+0(FP), AX
    94  	MOVQ k+8(FP), BX
    95  	MOVQ src+16(FP), CX
    96  	MOVQ dst+24(FP), DX
    97  	MOVOU 0(CX), X0
    98  	MOVOU 0(AX), X1
    99  	PXOR X1, X0
   100  	MOVO 0(BX), X2
   101  	MOVO 16(BX), X3
   102  	MOVO 32(BX), X1
   103  	AESENC X2, X0
   104  	AESENC X3, X0
   105  	AESENC X1, X0
   106  	AESENC X2, X0
   107  	AESENC X3, X0
   108  	AESENC X1, X0
   109  	AESENC X2, X0
   110  	AESENC X3, X0
   111  	AESENC X1, X0
   112  	AESENC X2, X0
   113  	MOVOU X0, 0(DX)
   114  	RET
   115  
   116  // func aezCorePass1AMD64AESNI(src *uint8, dst *uint8, x *uint8, i *uint8, l *uint8, k *uint8, consts *uint8, sz *uint)
   117  TEXT ·aezCorePass1AMD64AESNI(SB),4,$0-64
   118  	MOVQ src+0(FP), AX
   119  	MOVQ dst+8(FP), BX
   120  	MOVQ x+16(FP), CX
   121  	MOVQ l+32(FP), DX
   122  	MOVQ sz+56(FP), DI
   123  	MOVQ $1, SI
   124  	MOVOU 0(CX), X0
   125  	MOVQ i+24(FP), BP
   126  	MOVOU 0(BP), X1
   127  	MOVQ k+40(FP), BP
   128  	MOVOU 0(BP), X2
   129  	MOVOU 16(BP), X3
   130  	MOVOU 32(BP), X4
   131  	MOVQ consts+48(FP), BP
   132  	PXOR X5, X5
   133  	SUBQ $256, DI
   134  	JCS vector_loop256_end
   135  vector_loop256_begin:
   136  		MOVOU 16(AX), X6
   137  		MOVOU 48(AX), X7
   138  		MOVOU 80(AX), X8
   139  		MOVOU 112(AX), X9
   140  		MOVOU 144(AX), X10
   141  		MOVOU 176(AX), X11
   142  		MOVOU 208(AX), X12
   143  		MOVOU 240(AX), X13
   144  		MOVO X3, X14
   145  		PXOR X1, X14
   146  		PXOR X14, X6
   147  		PXOR X14, X7
   148  		PXOR X14, X8
   149  		PXOR X14, X9
   150  		PXOR X14, X10
   151  		PXOR X14, X11
   152  		PXOR X14, X12
   153  		PXOR X14, X13
   154  		PXOR 16(DX), X6
   155  		PXOR 32(DX), X7
   156  		PXOR 48(DX), X8
   157  		PXOR 64(DX), X9
   158  		PXOR 80(DX), X10
   159  		PXOR 96(DX), X11
   160  		PXOR 112(DX), X12
   161  		PXOR 0(DX), X13
   162  		AESENC X3, X6
   163  		AESENC X3, X7
   164  		AESENC X3, X8
   165  		AESENC X3, X9
   166  		AESENC X3, X10
   167  		AESENC X3, X11
   168  		AESENC X3, X12
   169  		AESENC X3, X13
   170  		AESENC X2, X6
   171  		AESENC X2, X7
   172  		AESENC X2, X8
   173  		AESENC X2, X9
   174  		AESENC X2, X10
   175  		AESENC X2, X11
   176  		AESENC X2, X12
   177  		AESENC X2, X13
   178  		AESENC X4, X6
   179  		AESENC X4, X7
   180  		AESENC X4, X8
   181  		AESENC X4, X9
   182  		AESENC X4, X10
   183  		AESENC X4, X11
   184  		AESENC X4, X12
   185  		AESENC X4, X13
   186  		AESENC X5, X6
   187  		AESENC X5, X7
   188  		AESENC X5, X8
   189  		AESENC X5, X9
   190  		AESENC X5, X10
   191  		AESENC X5, X11
   192  		AESENC X5, X12
   193  		AESENC X5, X13
   194  		MOVOU 0(AX), X14
   195  		MOVOU 32(AX), X15
   196  		PXOR X14, X6
   197  		PXOR X15, X7
   198  		MOVOU 64(AX), X14
   199  		MOVOU 96(AX), X15
   200  		PXOR X14, X8
   201  		PXOR X15, X9
   202  		MOVOU 128(AX), X14
   203  		MOVOU 160(AX), X15
   204  		PXOR X14, X10
   205  		PXOR X15, X11
   206  		MOVOU 192(AX), X14
   207  		MOVOU 224(AX), X15
   208  		PXOR X14, X12
   209  		PXOR X15, X13
   210  		MOVOU X6, 0(BX)
   211  		MOVOU X7, 32(BX)
   212  		MOVOU X8, 64(BX)
   213  		MOVOU X9, 96(BX)
   214  		MOVOU X10, 128(BX)
   215  		MOVOU X11, 160(BX)
   216  		MOVOU X12, 192(BX)
   217  		MOVOU X13, 224(BX)
   218  		PXOR X2, X6
   219  		PXOR X2, X7
   220  		PXOR X2, X8
   221  		PXOR X2, X9
   222  		PXOR X2, X10
   223  		PXOR X2, X11
   224  		PXOR X2, X12
   225  		PXOR X2, X13
   226  		AESENC X3, X6
   227  		AESENC X3, X7
   228  		AESENC X3, X8
   229  		AESENC X3, X9
   230  		AESENC X3, X10
   231  		AESENC X3, X11
   232  		AESENC X3, X12
   233  		AESENC X3, X13
   234  		AESENC X2, X6
   235  		AESENC X2, X7
   236  		AESENC X2, X8
   237  		AESENC X2, X9
   238  		AESENC X2, X10
   239  		AESENC X2, X11
   240  		AESENC X2, X12
   241  		AESENC X2, X13
   242  		AESENC X4, X6
   243  		AESENC X4, X7
   244  		AESENC X4, X8
   245  		AESENC X4, X9
   246  		AESENC X4, X10
   247  		AESENC X4, X11
   248  		AESENC X4, X12
   249  		AESENC X4, X13
   250  		AESENC X5, X6
   251  		AESENC X5, X7
   252  		AESENC X5, X8
   253  		AESENC X5, X9
   254  		AESENC X5, X10
   255  		AESENC X5, X11
   256  		AESENC X5, X12
   257  		AESENC X5, X13
   258  		MOVOU 16(AX), X14
   259  		MOVOU 48(AX), X15
   260  		PXOR X14, X6
   261  		PXOR X15, X7
   262  		MOVOU 80(AX), X14
   263  		MOVOU 112(AX), X15
   264  		PXOR X14, X8
   265  		PXOR X15, X9
   266  		MOVOU 144(AX), X14
   267  		MOVOU 176(AX), X15
   268  		PXOR X14, X10
   269  		PXOR X15, X11
   270  		MOVOU 208(AX), X14
   271  		MOVOU 240(AX), X15
   272  		PXOR X14, X12
   273  		PXOR X15, X13
   274  		MOVOU X6, 16(BX)
   275  		MOVOU X7, 48(BX)
   276  		MOVOU X8, 80(BX)
   277  		MOVOU X9, 112(BX)
   278  		MOVOU X10, 144(BX)
   279  		MOVOU X11, 176(BX)
   280  		MOVOU X12, 208(BX)
   281  		MOVOU X13, 240(BX)
   282  		PXOR X6, X0
   283  		PXOR X7, X0
   284  		PXOR X8, X0
   285  		PXOR X9, X0
   286  		PXOR X10, X0
   287  		PXOR X11, X0
   288  		PXOR X12, X0
   289  		PXOR X13, X0
   290  		MOVO 0(BP), X14
   291  		PSHUFB X14, X1
   292  		MOVO X1, X15
   293  		PSRAL $31, X15
   294  		PAND 16(BP), X15
   295  		PSHUFL $147, X15, X15
   296  		PSLLL $1, X1
   297  		PXOR X15, X1
   298  		PSHUFB X14, X1
   299  		ADDQ $256, AX
   300  		ADDQ $256, BX
   301  		SUBQ $256, DI
   302  		JCC vector_loop256_begin
   303  vector_loop256_end:
   304  	ADDQ $256, DI
   305  	SUBQ $128, DI
   306  	JCS process_64bytes
   307  	MOVOU 16(AX), X10
   308  	MOVOU 48(AX), X11
   309  	MOVOU 80(AX), X12
   310  	MOVOU 112(AX), X13
   311  	MOVO X10, X6
   312  	MOVO X11, X7
   313  	MOVOU X12, X8
   314  	MOVOU X13, X9
   315  	MOVO X3, X14
   316  	PXOR X1, X14
   317  	PXOR X14, X6
   318  	PXOR X14, X7
   319  	PXOR X14, X8
   320  	PXOR X14, X9
   321  	PXOR 16(DX), X6
   322  	PXOR 32(DX), X7
   323  	PXOR 48(DX), X8
   324  	PXOR 64(DX), X9
   325  	AESENC X3, X6
   326  	AESENC X3, X7
   327  	AESENC X3, X8
   328  	AESENC X3, X9
   329  	AESENC X2, X6
   330  	AESENC X2, X7
   331  	AESENC X2, X8
   332  	AESENC X2, X9
   333  	AESENC X4, X6
   334  	AESENC X4, X7
   335  	AESENC X4, X8
   336  	AESENC X4, X9
   337  	AESENC X5, X6
   338  	AESENC X5, X7
   339  	AESENC X5, X8
   340  	AESENC X5, X9
   341  	MOVOU 0(AX), X14
   342  	MOVOU 32(AX), X15
   343  	PXOR X14, X6
   344  	PXOR X15, X7
   345  	MOVOU 64(AX), X14
   346  	MOVOU 96(AX), X15
   347  	PXOR X14, X8
   348  	PXOR X15, X9
   349  	MOVOU X6, 0(BX)
   350  	MOVOU X7, 32(BX)
   351  	MOVOU X8, 64(BX)
   352  	MOVOU X9, 96(BX)
   353  	PXOR X2, X6
   354  	PXOR X2, X7
   355  	PXOR X2, X8
   356  	PXOR X2, X9
   357  	AESENC X3, X6
   358  	AESENC X3, X7
   359  	AESENC X3, X8
   360  	AESENC X3, X9
   361  	AESENC X2, X6
   362  	AESENC X2, X7
   363  	AESENC X2, X8
   364  	AESENC X2, X9
   365  	AESENC X4, X6
   366  	AESENC X4, X7
   367  	AESENC X4, X8
   368  	AESENC X4, X9
   369  	AESENC X5, X6
   370  	AESENC X5, X7
   371  	AESENC X5, X8
   372  	AESENC X5, X9
   373  	PXOR X10, X6
   374  	PXOR X11, X7
   375  	PXOR X12, X8
   376  	PXOR X13, X9
   377  	MOVOU X6, 16(BX)
   378  	MOVOU X7, 48(BX)
   379  	MOVOU X8, 80(BX)
   380  	MOVOU X9, 112(BX)
   381  	PXOR X6, X0
   382  	PXOR X7, X0
   383  	PXOR X8, X0
   384  	PXOR X9, X0
   385  	ADDQ $128, AX
   386  	ADDQ $128, BX
   387  	ADDQ $4, SI
   388  	SUBQ $128, DI
   389  process_64bytes:
   390  	ADDQ $128, DI
   391  	SUBQ $64, DI
   392  	JCS process_32bytes
   393  	MOVQ SI, BP
   394  	SHLQ $4, BP
   395  	ADDQ DX, BP
   396  	MOVOU 16(AX), X10
   397  	MOVOU 48(AX), X11
   398  	MOVO X10, X6
   399  	MOVO X11, X7
   400  	PXOR X3, X6
   401  	PXOR X3, X7
   402  	PXOR X1, X6
   403  	PXOR X1, X7
   404  	PXOR 0(BP), X6
   405  	PXOR 16(BP), X7
   406  	AESENC X3, X6
   407  	AESENC X3, X7
   408  	AESENC X2, X6
   409  	AESENC X2, X7
   410  	AESENC X4, X6
   411  	AESENC X4, X7
   412  	AESENC X5, X6
   413  	AESENC X5, X7
   414  	MOVOU 0(AX), X14
   415  	MOVOU 32(AX), X15
   416  	PXOR X14, X6
   417  	PXOR X15, X7
   418  	MOVOU X6, 0(BX)
   419  	MOVOU X7, 32(BX)
   420  	PXOR X2, X6
   421  	PXOR X2, X7
   422  	AESENC X3, X6
   423  	AESENC X3, X7
   424  	AESENC X2, X6
   425  	AESENC X2, X7
   426  	AESENC X4, X6
   427  	AESENC X4, X7
   428  	AESENC X5, X6
   429  	AESENC X5, X7
   430  	PXOR X10, X6
   431  	PXOR X11, X7
   432  	MOVOU X6, 16(BX)
   433  	MOVOU X7, 48(BX)
   434  	PXOR X6, X0
   435  	PXOR X7, X0
   436  	ADDQ $64, AX
   437  	ADDQ $64, BX
   438  	ADDQ $2, SI
   439  	SUBQ $64, DI
   440  process_32bytes:
   441  	ADDQ $64, DI
   442  	SUBQ $32, DI
   443  	JCS out
   444  	ANDQ $7, SI
   445  	SHLQ $4, SI
   446  	ADDQ SI, DX
   447  	MOVOU 16(AX), X10
   448  	MOVO X10, X6
   449  	PXOR X3, X6
   450  	PXOR X1, X6
   451  	PXOR 0(DX), X6
   452  	AESENC X3, X6
   453  	AESENC X2, X6
   454  	AESENC X4, X6
   455  	AESENC X5, X6
   456  	MOVOU 0(AX), X14
   457  	PXOR X14, X6
   458  	MOVOU X6, 0(BX)
   459  	PXOR X2, X6
   460  	AESENC X3, X6
   461  	AESENC X2, X6
   462  	AESENC X4, X6
   463  	AESENC X5, X6
   464  	PXOR X10, X6
   465  	MOVOU X6, 16(BX)
   466  	PXOR X6, X0
   467  out:
   468  	MOVOU X0, 0(CX)
   469  	RET
   470  
   471  // func aezCorePass2AMD64AESNI(dst *uint8, y *uint8, s *uint8, j *uint8, i *uint8, l *uint8, k *uint8, consts *uint8, sz *uint)
   472  TEXT ·aezCorePass2AMD64AESNI(SB),4,$0-72
   473  	MOVQ dst+0(FP), AX
   474  	MOVQ y+8(FP), BX
   475  	MOVQ j+24(FP), CX
   476  	MOVQ l+40(FP), DX
   477  	MOVQ sz+64(FP), DI
   478  	MOVQ $1, SI
   479  	MOVQ k+48(FP), BP
   480  	MOVOU 0(BP), X0
   481  	MOVOU 16(BP), X1
   482  	MOVOU 32(BP), X2
   483  	MOVOU 0(BX), X3
   484  	MOVQ i+32(FP), BP
   485  	MOVOU 0(BP), X4
   486  	MOVQ consts+56(FP), BP
   487  	PXOR X5, X5
   488  	MOVQ s+16(FP), R8
   489  	MOVOU 0(R8), X6
   490  	PXOR 16(CX), X6
   491  	MOVQ SP, R9
   492  	ANDQ $18446744073709551584, SP
   493  	SUBQ $256, SP
   494  	SUBQ $256, DI
   495  	JCS vector_loop256_end
   496  vector_loop256_begin:
   497  		MOVO X6, X7
   498  		PXOR X4, X7
   499  		MOVO X7, X8
   500  		MOVO X7, X9
   501  		MOVO X7, X10
   502  		MOVO X7, X11
   503  		MOVO X7, X12
   504  		MOVO X7, X13
   505  		MOVO X7, X14
   506  		PXOR 16(DX), X7
   507  		PXOR 32(DX), X8
   508  		PXOR 48(DX), X9
   509  		PXOR 64(DX), X10
   510  		PXOR 80(DX), X11
   511  		PXOR 96(DX), X12
   512  		PXOR 112(DX), X13
   513  		PXOR 0(DX), X14
   514  		AESENC X1, X7
   515  		AESENC X1, X8
   516  		AESENC X1, X9
   517  		AESENC X1, X10
   518  		AESENC X1, X11
   519  		AESENC X1, X12
   520  		AESENC X1, X13
   521  		AESENC X1, X14
   522  		AESENC X0, X7
   523  		AESENC X0, X8
   524  		AESENC X0, X9
   525  		AESENC X0, X10
   526  		AESENC X0, X11
   527  		AESENC X0, X12
   528  		AESENC X0, X13
   529  		AESENC X0, X14
   530  		AESENC X2, X7
   531  		AESENC X2, X8
   532  		AESENC X2, X9
   533  		AESENC X2, X10
   534  		AESENC X2, X11
   535  		AESENC X2, X12
   536  		AESENC X2, X13
   537  		AESENC X2, X14
   538  		AESENC X5, X7
   539  		AESENC X5, X8
   540  		AESENC X5, X9
   541  		AESENC X5, X10
   542  		AESENC X5, X11
   543  		AESENC X5, X12
   544  		AESENC X5, X13
   545  		AESENC X5, X14
   546  		MOVOU 0(AX), X15
   547  		MOVOU 32(AX), X6
   548  		PXOR X7, X15
   549  		PXOR X8, X6
   550  		PXOR X15, X3
   551  		PXOR X6, X3
   552  		MOVO X15, 0(SP)
   553  		MOVO X6, 32(SP)
   554  		MOVOU 64(AX), X15
   555  		MOVOU 96(AX), X6
   556  		PXOR X9, X15
   557  		PXOR X10, X6
   558  		PXOR X15, X3
   559  		PXOR X6, X3
   560  		MOVO X15, 64(SP)
   561  		MOVO X6, 96(SP)
   562  		MOVOU 128(AX), X15
   563  		MOVOU 160(AX), X6
   564  		PXOR X11, X15
   565  		PXOR X12, X6
   566  		PXOR X15, X3
   567  		PXOR X6, X3
   568  		MOVO X15, 128(SP)
   569  		MOVO X6, 160(SP)
   570  		MOVOU 192(AX), X15
   571  		MOVOU 224(AX), X6
   572  		PXOR X13, X15
   573  		PXOR X14, X6
   574  		PXOR X15, X3
   575  		PXOR X6, X3
   576  		MOVO X15, 192(SP)
   577  		MOVO X6, 224(SP)
   578  		MOVOU 16(AX), X15
   579  		MOVOU 48(AX), X6
   580  		PXOR X15, X7
   581  		PXOR X6, X8
   582  		MOVO X7, 16(SP)
   583  		MOVO X8, 48(SP)
   584  		MOVOU 80(AX), X15
   585  		MOVOU 112(AX), X6
   586  		PXOR X15, X9
   587  		PXOR X6, X10
   588  		MOVO X9, 80(SP)
   589  		MOVO X10, 112(SP)
   590  		MOVOU 144(AX), X15
   591  		MOVOU 176(AX), X6
   592  		PXOR X15, X11
   593  		PXOR X6, X12
   594  		MOVO X11, 144(SP)
   595  		MOVO X12, 176(SP)
   596  		MOVOU 208(AX), X15
   597  		MOVOU 240(AX), X6
   598  		PXOR X15, X13
   599  		PXOR X6, X14
   600  		MOVO X13, 208(SP)
   601  		MOVO X14, 240(SP)
   602  		PXOR X0, X7
   603  		PXOR X0, X8
   604  		PXOR X0, X9
   605  		PXOR X0, X10
   606  		PXOR X0, X11
   607  		PXOR X0, X12
   608  		PXOR X0, X13
   609  		PXOR X0, X14
   610  		AESENC X1, X7
   611  		AESENC X1, X8
   612  		AESENC X1, X9
   613  		AESENC X1, X10
   614  		AESENC X1, X11
   615  		AESENC X1, X12
   616  		AESENC X1, X13
   617  		AESENC X1, X14
   618  		AESENC X0, X7
   619  		AESENC X0, X8
   620  		AESENC X0, X9
   621  		AESENC X0, X10
   622  		AESENC X0, X11
   623  		AESENC X0, X12
   624  		AESENC X0, X13
   625  		AESENC X0, X14
   626  		AESENC X2, X7
   627  		AESENC X2, X8
   628  		AESENC X2, X9
   629  		AESENC X2, X10
   630  		AESENC X2, X11
   631  		AESENC X2, X12
   632  		AESENC X2, X13
   633  		AESENC X2, X14
   634  		AESENC X5, X7
   635  		AESENC X5, X8
   636  		AESENC X5, X9
   637  		AESENC X5, X10
   638  		AESENC X5, X11
   639  		AESENC X5, X12
   640  		AESENC X5, X13
   641  		AESENC X5, X14
   642  		PXOR 0(SP), X7
   643  		PXOR 32(SP), X8
   644  		PXOR 64(SP), X9
   645  		PXOR 96(SP), X10
   646  		PXOR 128(SP), X11
   647  		PXOR 160(SP), X12
   648  		PXOR 192(SP), X13
   649  		PXOR 224(SP), X14
   650  		MOVOU X7, 16(AX)
   651  		MOVOU X8, 48(AX)
   652  		MOVOU X9, 80(AX)
   653  		MOVOU X10, 112(AX)
   654  		MOVOU X11, 144(AX)
   655  		MOVOU X12, 176(AX)
   656  		MOVOU X13, 208(AX)
   657  		MOVOU X14, 240(AX)
   658  		MOVO 0(CX), X15
   659  		PXOR X4, X15
   660  		PXOR X15, X7
   661  		PXOR X15, X8
   662  		PXOR X15, X9
   663  		PXOR X15, X10
   664  		PXOR X15, X11
   665  		PXOR X15, X12
   666  		PXOR X15, X13
   667  		PXOR X15, X14
   668  		PXOR 16(DX), X7
   669  		PXOR 32(DX), X8
   670  		PXOR 48(DX), X9
   671  		PXOR 64(DX), X10
   672  		PXOR 80(DX), X11
   673  		PXOR 96(DX), X12
   674  		PXOR 112(DX), X13
   675  		PXOR 0(DX), X14
   676  		AESENC X1, X7
   677  		AESENC X1, X8
   678  		AESENC X1, X9
   679  		AESENC X1, X10
   680  		AESENC X1, X11
   681  		AESENC X1, X12
   682  		AESENC X1, X13
   683  		AESENC X1, X14
   684  		AESENC X0, X7
   685  		AESENC X0, X8
   686  		AESENC X0, X9
   687  		AESENC X0, X10
   688  		AESENC X0, X11
   689  		AESENC X0, X12
   690  		AESENC X0, X13
   691  		AESENC X0, X14
   692  		AESENC X2, X7
   693  		AESENC X2, X8
   694  		AESENC X2, X9
   695  		AESENC X2, X10
   696  		AESENC X2, X11
   697  		AESENC X2, X12
   698  		AESENC X2, X13
   699  		AESENC X2, X14
   700  		AESENC X5, X7
   701  		AESENC X5, X8
   702  		AESENC X5, X9
   703  		AESENC X5, X10
   704  		AESENC X5, X11
   705  		AESENC X5, X12
   706  		AESENC X5, X13
   707  		AESENC X5, X14
   708  		PXOR 16(SP), X7
   709  		PXOR 48(SP), X8
   710  		PXOR 80(SP), X9
   711  		PXOR 112(SP), X10
   712  		PXOR 144(SP), X11
   713  		PXOR 176(SP), X12
   714  		PXOR 208(SP), X13
   715  		PXOR 240(SP), X14
   716  		MOVOU X7, 0(AX)
   717  		MOVOU X8, 32(AX)
   718  		MOVOU X9, 64(AX)
   719  		MOVOU X10, 96(AX)
   720  		MOVOU X11, 128(AX)
   721  		MOVOU X12, 160(AX)
   722  		MOVOU X13, 192(AX)
   723  		MOVOU X14, 224(AX)
   724  		MOVO 0(BP), X15
   725  		PSHUFB X15, X4
   726  		MOVO X4, X6
   727  		PSRAL $31, X6
   728  		PAND 16(BP), X6
   729  		PSHUFL $147, X6, X6
   730  		PSLLL $1, X4
   731  		PXOR X6, X4
   732  		PSHUFB X15, X4
   733  		MOVOU 0(R8), X6
   734  		PXOR 16(CX), X6
   735  		ADDQ $256, AX
   736  		SUBQ $256, DI
   737  		JCC vector_loop256_begin
   738  		MOVO X5, 16(SP)
   739  		MOVO X5, 48(SP)
   740  		MOVO X5, 80(SP)
   741  		MOVO X5, 112(SP)
   742  		MOVO X5, 128(SP)
   743  		MOVO X5, 144(SP)
   744  		MOVO X5, 160(SP)
   745  		MOVO X5, 176(SP)
   746  		MOVO X5, 192(SP)
   747  		MOVO X5, 208(SP)
   748  		MOVO X5, 224(SP)
   749  		MOVO X5, 240(SP)
   750  vector_loop256_end:
   751  	ADDQ $256, DI
   752  	SUBQ $128, DI
   753  	JCS process_64bytes
   754  	MOVO X6, X7
   755  	PXOR X4, X7
   756  	MOVO X7, X8
   757  	MOVO X7, X9
   758  	MOVO X7, X10
   759  	PXOR 16(DX), X7
   760  	PXOR 32(DX), X8
   761  	PXOR 48(DX), X9
   762  	PXOR 64(DX), X10
   763  	AESENC X1, X7
   764  	AESENC X1, X8
   765  	AESENC X1, X9
   766  	AESENC X1, X10
   767  	AESENC X0, X7
   768  	AESENC X0, X8
   769  	AESENC X0, X9
   770  	AESENC X0, X10
   771  	AESENC X2, X7
   772  	AESENC X2, X8
   773  	AESENC X2, X9
   774  	AESENC X2, X10
   775  	AESENC X5, X7
   776  	AESENC X5, X8
   777  	AESENC X5, X9
   778  	AESENC X5, X10
   779  	MOVOU 0(AX), X11
   780  	MOVOU 32(AX), X13
   781  	MOVOU 64(AX), X12
   782  	MOVOU 96(AX), X14
   783  	PXOR X7, X11
   784  	PXOR X8, X13
   785  	PXOR X9, X12
   786  	PXOR X10, X14
   787  	PXOR X11, X3
   788  	PXOR X13, X3
   789  	PXOR X12, X3
   790  	PXOR X14, X3
   791  	MOVO X11, 0(SP)
   792  	MOVO X13, 32(SP)
   793  	MOVO X12, 64(SP)
   794  	MOVO X14, 96(SP)
   795  	MOVOU 16(AX), X12
   796  	MOVOU 48(AX), X14
   797  	MOVOU 80(AX), X11
   798  	MOVOU 112(AX), X13
   799  	PXOR X12, X7
   800  	PXOR X14, X8
   801  	PXOR X11, X9
   802  	PXOR X13, X10
   803  	MOVOU X7, 16(AX)
   804  	MOVOU X8, 48(AX)
   805  	MOVOU X9, 80(AX)
   806  	MOVOU X10, 112(AX)
   807  	MOVO X7, X12
   808  	MOVO X8, X14
   809  	MOVO X9, X11
   810  	MOVO X10, X13
   811  	PXOR X0, X7
   812  	PXOR X0, X8
   813  	PXOR X0, X9
   814  	PXOR X0, X10
   815  	AESENC X1, X7
   816  	AESENC X1, X8
   817  	AESENC X1, X9
   818  	AESENC X1, X10
   819  	AESENC X0, X7
   820  	AESENC X0, X8
   821  	AESENC X0, X9
   822  	AESENC X0, X10
   823  	AESENC X2, X7
   824  	AESENC X2, X8
   825  	AESENC X2, X9
   826  	AESENC X2, X10
   827  	AESENC X5, X7
   828  	AESENC X5, X8
   829  	AESENC X5, X9
   830  	AESENC X5, X10
   831  	PXOR 0(SP), X7
   832  	PXOR 32(SP), X8
   833  	PXOR 64(SP), X9
   834  	PXOR 96(SP), X10
   835  	MOVOU X7, 16(AX)
   836  	MOVOU X8, 48(AX)
   837  	MOVOU X9, 80(AX)
   838  	MOVOU X10, 112(AX)
   839  	PXOR 0(CX), X7
   840  	PXOR 0(CX), X8
   841  	PXOR 0(CX), X9
   842  	PXOR 0(CX), X10
   843  	PXOR X4, X7
   844  	PXOR X4, X8
   845  	PXOR X4, X9
   846  	PXOR X4, X10
   847  	PXOR 16(DX), X7
   848  	PXOR 32(DX), X8
   849  	PXOR 48(DX), X9
   850  	PXOR 64(DX), X10
   851  	AESENC X1, X7
   852  	AESENC X1, X8
   853  	AESENC X1, X9
   854  	AESENC X1, X10
   855  	AESENC X0, X7
   856  	AESENC X0, X8
   857  	AESENC X0, X9
   858  	AESENC X0, X10
   859  	AESENC X2, X7
   860  	AESENC X2, X8
   861  	AESENC X2, X9
   862  	AESENC X2, X10
   863  	AESENC X5, X7
   864  	AESENC X5, X8
   865  	AESENC X5, X9
   866  	AESENC X5, X10
   867  	PXOR X12, X7
   868  	PXOR X14, X8
   869  	PXOR X11, X9
   870  	PXOR X13, X10
   871  	MOVOU X7, 0(AX)
   872  	MOVOU X8, 32(AX)
   873  	MOVOU X9, 64(AX)
   874  	MOVOU X10, 96(AX)
   875  	ADDQ $128, AX
   876  	ADDQ $4, SI
   877  	SUBQ $128, DI
   878  process_64bytes:
   879  	ADDQ $128, DI
   880  	SUBQ $64, DI
   881  	JCS process_32bytes
   882  	MOVQ SI, BP
   883  	SHLQ $4, BP
   884  	ADDQ DX, BP
   885  	MOVO X6, X7
   886  	PXOR X4, X7
   887  	MOVO X7, X8
   888  	PXOR 0(BP), X7
   889  	PXOR 16(BP), X8
   890  	AESENC X1, X7
   891  	AESENC X1, X8
   892  	AESENC X0, X7
   893  	AESENC X0, X8
   894  	AESENC X2, X7
   895  	AESENC X2, X8
   896  	AESENC X5, X7
   897  	AESENC X5, X8
   898  	MOVOU 0(AX), X11
   899  	MOVOU 16(AX), X12
   900  	MOVOU 32(AX), X13
   901  	MOVOU 48(AX), X14
   902  	PXOR X7, X11
   903  	PXOR X8, X13
   904  	PXOR X11, X3
   905  	PXOR X13, X3
   906  	PXOR X12, X7
   907  	PXOR X14, X8
   908  	MOVO X7, X12
   909  	MOVO X8, X14
   910  	PXOR X0, X7
   911  	PXOR X0, X8
   912  	AESENC X1, X7
   913  	AESENC X1, X8
   914  	AESENC X0, X7
   915  	AESENC X0, X8
   916  	AESENC X2, X7
   917  	AESENC X2, X8
   918  	AESENC X5, X7
   919  	AESENC X5, X8
   920  	PXOR X11, X7
   921  	PXOR X13, X8
   922  	MOVO X7, X11
   923  	MOVO X8, X13
   924  	PXOR 0(CX), X7
   925  	PXOR 0(CX), X8
   926  	PXOR X4, X7
   927  	PXOR X4, X8
   928  	PXOR 0(BP), X7
   929  	PXOR 16(BP), X8
   930  	AESENC X1, X7
   931  	AESENC X1, X8
   932  	AESENC X0, X7
   933  	AESENC X0, X8
   934  	AESENC X2, X7
   935  	AESENC X2, X8
   936  	AESENC X5, X7
   937  	AESENC X5, X8
   938  	PXOR X7, X12
   939  	PXOR X8, X14
   940  	MOVOU X12, 0(AX)
   941  	MOVOU X11, 16(AX)
   942  	MOVOU X14, 32(AX)
   943  	MOVOU X13, 48(AX)
   944  	ADDQ $64, AX
   945  	ADDQ $2, SI
   946  	SUBQ $64, DI
   947  process_32bytes:
   948  	ADDQ $64, DI
   949  	SUBQ $32, DI
   950  	JCS out
   951  	ANDQ $7, SI
   952  	SHLQ $4, SI
   953  	ADDQ SI, DX
   954  	MOVO X6, X7
   955  	PXOR X4, X7
   956  	PXOR 0(DX), X7
   957  	AESENC X1, X7
   958  	AESENC X0, X7
   959  	AESENC X2, X7
   960  	AESENC X5, X7
   961  	MOVOU 0(AX), X11
   962  	MOVOU 16(AX), X12
   963  	PXOR X7, X11
   964  	PXOR X11, X3
   965  	PXOR X12, X7
   966  	MOVO X7, X12
   967  	PXOR X0, X7
   968  	AESENC X1, X7
   969  	AESENC X0, X7
   970  	AESENC X2, X7
   971  	AESENC X5, X7
   972  	PXOR X11, X7
   973  	MOVO X7, X11
   974  	PXOR 0(CX), X7
   975  	PXOR X4, X7
   976  	PXOR 0(DX), X7
   977  	AESENC X1, X7
   978  	AESENC X0, X7
   979  	AESENC X2, X7
   980  	AESENC X5, X7
   981  	PXOR X7, X12
   982  	MOVOU X12, 0(AX)
   983  	MOVOU X11, 16(AX)
   984  out:
   985  	MOVOU X3, 0(BX)
   986  	MOVO X5, 0(SP)
   987  	MOVO X5, 32(SP)
   988  	MOVO X5, 64(SP)
   989  	MOVO X5, 96(SP)
   990  	MOVQ R9, SP
   991  	RET