github.com/matrixorigin/matrixone@v0.7.0/pkg/container/hashtable/hash_amd64.s (about)

     1  // Copyright 2021 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  #include "textflag.h"
    16  
    17  // func crc32Int64BatchHash(data *uint64, hashes *uint64, length int)
    18  // Requires: SSE4.2
    19  TEXT ·crc32Int64BatchHash(SB), NOSPLIT, $0-24
    20  	MOVQ data+0(FP), SI
    21  	MOVQ hashes+8(FP), DI
    22  	MOVQ length+16(FP), CX
    23  
    24  loop:
    25  	SUBQ $8, CX
    26  	JL   tail
    27  
    28  	MOVQ $-1, R8
    29  	MOVQ $-1, R9
    30  	MOVQ $-1, R10
    31  	MOVQ $-1, R11
    32  	MOVQ $-1, R12
    33  	MOVQ $-1, R13
    34  	MOVQ $-1, R14
    35  	MOVQ $-1, R15
    36  
    37  	CRC32Q 0x00(SI), R8
    38  	CRC32Q 0x08(SI), R9
    39  	CRC32Q 0x10(SI), R10
    40  	CRC32Q 0x18(SI), R11
    41  	CRC32Q 0x20(SI), R12
    42  	CRC32Q 0x28(SI), R13
    43  	CRC32Q 0x30(SI), R14
    44  	CRC32Q 0x38(SI), R15
    45  
    46  	MOVQ R8, 0x00(DI)
    47  	MOVQ R9, 0x08(DI)
    48  	MOVQ R10, 0x10(DI)
    49  	MOVQ R11, 0x18(DI)
    50  	MOVQ R12, 0x20(DI)
    51  	MOVQ R13, 0x28(DI)
    52  	MOVQ R14, 0x30(DI)
    53  	MOVQ R15, 0x38(DI)
    54  
    55  	ADDQ $0x40, SI
    56  	ADDQ $0x40, DI
    57  	JMP  loop
    58  
    59  tail:
    60  	ADDQ $8, CX
    61  	JE   done
    62  
    63  tailLoop:
    64  	MOVQ   $-1, R8
    65  	CRC32Q (SI), R8
    66  	MOVQ   R8, (DI)
    67  
    68  	ADDQ $0x08, SI
    69  	ADDQ $0x08, DI
    70  	LOOP tailLoop
    71  
    72  done:
    73  	RET
    74  
    75  // func crc32Int64CellBatchHash(data *uint64, hashes *uint64, length int)
    76  // Requires: SSE4.2
    77  TEXT ·crc32Int64CellBatchHash(SB), NOSPLIT, $0-24
    78  	MOVQ data+0(FP), SI
    79  	MOVQ hashes+8(FP), DI
    80  	MOVQ length+16(FP), CX
    81  
    82  loop:
    83  	SUBQ $8, CX
    84  	JL   tail
    85  
    86  	MOVQ $-1, R8
    87  	MOVQ $-1, R9
    88  	MOVQ $-1, R10
    89  	MOVQ $-1, R11
    90  	MOVQ $-1, R12
    91  	MOVQ $-1, R13
    92  	MOVQ $-1, R14
    93  	MOVQ $-1, R15
    94  
    95  	CRC32Q 0x00(SI), R8
    96  	CRC32Q 0x10(SI), R9
    97  	CRC32Q 0x20(SI), R10
    98  	CRC32Q 0x30(SI), R11
    99  	CRC32Q 0x40(SI), R12
   100  	CRC32Q 0x50(SI), R13
   101  	CRC32Q 0x60(SI), R14
   102  	CRC32Q 0x70(SI), R15
   103  
   104  	MOVQ R8, 0x00(DI)
   105  	MOVQ R9, 0x08(DI)
   106  	MOVQ R10, 0x10(DI)
   107  	MOVQ R11, 0x18(DI)
   108  	MOVQ R12, 0x20(DI)
   109  	MOVQ R13, 0x28(DI)
   110  	MOVQ R14, 0x30(DI)
   111  	MOVQ R15, 0x38(DI)
   112  
   113  	ADDQ $0x80, SI
   114  	ADDQ $0x40, DI
   115  	JMP  loop
   116  
   117  tail:
   118  	ADDQ $8, CX
   119  	JE   done
   120  
   121  tailLoop:
   122  	MOVQ   $-1, R8
   123  	CRC32Q (SI), R8
   124  	MOVQ   R8, (DI)
   125  
   126  	ADDQ $0x10, SI
   127  	ADDQ $0x08, DI
   128  	LOOP tailLoop
   129  
   130  done:
   131  	RET
   132  
   133  ////////////////////////////////////////////////////////////////
   134  ////////////////////////////////////////////////////////////////
   135  ////////////////////////////////////////////////////////////////
   136  ////////////////////////////////////////////////////////////////
   137  
   138  DATA Pi<>+0x00(SB)/8, $0x3243f6a8885a308d
   139  DATA Pi<>+0x08(SB)/8, $0x313198a2e0370734
   140  DATA Pi<>+0x10(SB)/8, $0x4a4093822299f31d
   141  DATA Pi<>+0x18(SB)/8, $0x0082efa98ec4e6c8
   142  DATA Pi<>+0x20(SB)/8, $0x9452821e638d0137
   143  DATA Pi<>+0x28(SB)/8, $0x7be5466cf34e90c6
   144  DATA Pi<>+0x30(SB)/8, $0xcc0ac29b7c97c50d
   145  DATA Pi<>+0x38(SB)/8, $0xd3f84d5b5b547091
   146  DATA Pi<>+0x40(SB)/8, $0x79216d5d98979fb1
   147  DATA Pi<>+0x48(SB)/8, $0xbd1310ba698dfb5a
   148  DATA Pi<>+0x50(SB)/8, $0xc2ffd72dbd01adfb
   149  DATA Pi<>+0x58(SB)/8, $0x7b8e1afed6a267e9
   150  DATA Pi<>+0x60(SB)/8, $0x6ba7c9045f12c7f9
   151  DATA Pi<>+0x68(SB)/8, $0x924a19947b3916cf
   152  DATA Pi<>+0x70(SB)/8, $0x70801f2e2858efc1
   153  DATA Pi<>+0x78(SB)/8, $0x6636920d871574e6
   154  GLOBL Pi<>(SB), (NOPTR+RODATA), $0x80
   155  
   156  DATA CryptedPi<>+0x00(SB)/8, $0x822233b93c11087c
   157  DATA CryptedPi<>+0x08(SB)/8, $0xd2b32f4adde873da
   158  DATA CryptedPi<>+0x10(SB)/8, $0xae9c2fc7dd17bcdb
   159  DATA CryptedPi<>+0x18(SB)/8, $0x859110441a1569fc
   160  DATA CryptedPi<>+0x20(SB)/8, $0x47087d794fffb5c9
   161  DATA CryptedPi<>+0x28(SB)/8, $0xb7b6c8f565414445
   162  DATA CryptedPi<>+0x30(SB)/8, $0xfd260edabb308f8d
   163  DATA CryptedPi<>+0x38(SB)/8, $0x3ddefc67bc565a13
   164  DATA CryptedPi<>+0x40(SB)/8, $0xe4c1d50223544f10
   165  DATA CryptedPi<>+0x48(SB)/8, $0xaf40e05725c3192b
   166  DATA CryptedPi<>+0x50(SB)/8, $0x281d8ab9a16382e9
   167  DATA CryptedPi<>+0x58(SB)/8, $0xddc10c903b63a6cf
   168  DATA CryptedPi<>+0x60(SB)/8, $0x852d3ad603e8df72
   169  DATA CryptedPi<>+0x68(SB)/8, $0xa6642b57d1011deb
   170  DATA CryptedPi<>+0x70(SB)/8, $0x5063d25a1cb7b6b9
   171  DATA CryptedPi<>+0x78(SB)/8, $0xb2623e6241e8e46e
   172  GLOBL CryptedPi<>(SB), (NOPTR+RODATA), $0x80
   173  
   174  // func aesBytesBatchGenHashStates(data *[]byte, states *[3]uint64, length int)
   175  // Requires: AES
   176  TEXT ·aesBytesBatchGenHashStates(SB), NOSPLIT, $0-24
   177  	MOVQ data+0(FP), SI
   178  	MOVQ states+8(FP), DI
   179  	MOVQ length+16(FP), CX
   180  
   181  	VMOVDQU CryptedPi<>+0x00(SB), X0
   182  	VMOVDQU CryptedPi<>+0x10(SB), X1
   183  	VMOVDQU CryptedPi<>+0x20(SB), X2
   184  	VMOVDQU CryptedPi<>+0x30(SB), X3
   185  	VMOVDQU CryptedPi<>+0x40(SB), X4
   186  	VMOVDQU CryptedPi<>+0x50(SB), X5
   187  	VMOVDQU CryptedPi<>+0x60(SB), X6
   188  	VMOVDQU CryptedPi<>+0x70(SB), X7
   189  
   190  loop:
   191  	MOVQ (SI), AX
   192  	MOVQ 8(SI), DX
   193  	MOVQ DX, BX
   194  
   195  	ADDQ AX, DX
   196  	SUBQ $0x40, DX
   197  
   198  	VMOVDQU X0, X8
   199  	VMOVDQU X1, X9
   200  	VMOVDQU X2, X10
   201  	VMOVDQU X3, X11
   202  	VMOVDQU X4, X12
   203  	VMOVDQU X5, X13
   204  	VMOVDQU X6, X14
   205  	VMOVDQU X7, X15
   206  
   207  innerLoop:
   208  	CMPQ AX, DX
   209  	JGE  tail
   210  
   211  	VAESENC 0x00(AX), X8, X8
   212  	VAESENC 0x00(AX), X12, X12
   213  	VAESENC 0x10(AX), X9, X9
   214  	VAESENC 0x10(AX), X13, X13
   215  	VAESENC 0x20(AX), X10, X10
   216  	VAESENC 0x20(AX), X14, X14
   217  	VAESENC 0x30(AX), X11, X11
   218  	VAESENC 0x30(AX), X15, X15
   219  
   220  	ADDQ $0x40, AX
   221  	JMP  innerLoop
   222  
   223  tail:
   224  	ADDQ $0x30, DX
   225  	CMPQ AX, DX
   226  	JGE  done
   227  
   228  	VAESENC (AX), X8, X8
   229  	VAESENC (AX), X12, X12
   230  
   231  	ADDQ $0x10, AX
   232  	CMPQ AX, DX
   233  	JGE  done
   234  
   235  	VAESENC (AX), X9, X9
   236  	VAESENC (AX), X13, X13
   237  
   238  	ADDQ $0x10, AX
   239  	CMPQ AX, DX
   240  	JGE  done
   241  
   242  	VAESENC (AX), X10, X10
   243  	VAESENC (AX), X14, X14
   244  
   245  done:
   246  	VAESENC (DX), X11, X11
   247  	VAESENC (DX), X15, X15
   248  
   249  	VAESENC X9, X8, X8
   250  	VAESENC X10, X11, X11
   251  	VAESENC X8, X11, X11
   252  
   253  	VAESENC X11, X11, X11
   254  	VAESENC X11, X11, X11
   255  	VAESENC X11, X11, X11
   256  
   257  	VAESENC X14, X13, X13
   258  	VAESENC X15, X12, X12
   259  	VAESENC X13, X12, X12
   260  
   261  	VPSHUFD $0x4e, X11, X8
   262  	VPXOR   X8, X11, X11
   263  	VMOVQ   X11, R8
   264  	XORQ    BX, R8
   265  
   266  	MOVQ    R8, (DI)
   267  	VMOVDQU X12, 8(DI)
   268  
   269  	ADDQ $24, SI
   270  	ADDQ $24, DI
   271  	DECQ CX
   272  	JNZ  loop
   273  
   274  	RET
   275  
   276  // func aesInt192BatchGenHashStates(data *[3]uint64, states *[3]uint64, length int)
   277  // Requires: AES
   278  TEXT ·aesInt192BatchGenHashStates(SB), NOSPLIT, $0-24
   279  	MOVQ data+0(FP), SI
   280  	MOVQ states+8(FP), DI
   281  	MOVQ length+16(FP), CX
   282  
   283  	VMOVDQU CryptedPi<>+0x00(SB), X0
   284  	VMOVDQU CryptedPi<>+0x10(SB), X1
   285  	VMOVDQU CryptedPi<>+0x20(SB), X2
   286  	VMOVDQU CryptedPi<>+0x30(SB), X3
   287  	VMOVDQU CryptedPi<>+0x40(SB), X4
   288  	VMOVDQU CryptedPi<>+0x50(SB), X5
   289  	VMOVDQU CryptedPi<>+0x60(SB), X6
   290  	VMOVDQU CryptedPi<>+0x70(SB), X7
   291  	VAESENC X2, X3, X3
   292  	VAESENC X7, X6, X6
   293  
   294  loop:
   295  	VAESENC 0x00(SI), X0, X8
   296  	VAESENC 0x00(SI), X4, X10
   297  	VAESENC 0x08(SI), X1, X9
   298  	VAESENC 0x08(SI), X5, X11
   299  	VAESENC X8, X9, X9
   300  	VAESENC X3, X9, X9
   301  	VAESENC X9, X9, X9
   302  	VAESENC X9, X9, X9
   303  	VPSHUFD $0x4e, X9, X8
   304  	VPXOR   X8, X9, X9
   305  	VAESENC X11, X10, X10
   306  	VAESENC X6, X10, X10
   307  	VMOVQ   X9, 0x00(DI)
   308  	VMOVDQU X10, 0x08(DI)
   309  
   310  	ADDQ $0x18, SI
   311  	ADDQ $0x18, DI
   312  	LOOP loop
   313  
   314  done:
   315  	RET
   316  
   317  // func aesInt256BatchGenHashStates(data *[4]uint64, states *[3]uint64, length int)
   318  // Requires: AES
   319  TEXT ·aesInt256BatchGenHashStates(SB), NOSPLIT, $0-24
   320  	MOVQ data+0(FP), SI
   321  	MOVQ states+8(FP), DI
   322  	MOVQ length+16(FP), CX
   323  
   324  	VMOVDQU CryptedPi<>+0x00(SB), X0
   325  	VMOVDQU CryptedPi<>+0x10(SB), X1
   326  	VMOVDQU CryptedPi<>+0x20(SB), X2
   327  	VMOVDQU CryptedPi<>+0x30(SB), X3
   328  	VMOVDQU CryptedPi<>+0x40(SB), X4
   329  	VMOVDQU CryptedPi<>+0x50(SB), X5
   330  	VMOVDQU CryptedPi<>+0x60(SB), X6
   331  	VMOVDQU CryptedPi<>+0x70(SB), X7
   332  	VAESENC X2, X3, X3
   333  	VAESENC X7, X6, X6
   334  
   335  loop:
   336  	VAESENC 0x00(SI), X0, X8
   337  	VAESENC 0x00(SI), X4, X10
   338  	VAESENC 0x10(SI), X1, X9
   339  	VAESENC 0x10(SI), X5, X11
   340  	VAESENC X8, X9, X9
   341  	VAESENC X3, X9, X9
   342  	VAESENC X9, X9, X9
   343  	VAESENC X9, X9, X9
   344  	VPSHUFD $0x4e, X9, X8
   345  	VPXOR   X8, X9, X9
   346  	VAESENC X11, X10, X10
   347  	VAESENC X6, X10, X10
   348  	VMOVQ   X9, 0x00(DI)
   349  	VMOVDQU X10, 0x08(DI)
   350  
   351  	ADDQ $0x20, SI
   352  	ADDQ $0x18, DI
   353  	LOOP loop
   354  
   355  done:
   356  	RET
   357  
   358  // func aesInt320BatchGenHashStates(data *[5]uint64, states *[3]uint64, length int)
   359  // Requires: AES
   360  TEXT ·aesInt320BatchGenHashStates(SB), NOSPLIT, $0-24
   361  	MOVQ data+0(FP), SI
   362  	MOVQ states+8(FP), DI
   363  	MOVQ length+16(FP), CX
   364  
   365  	VMOVDQU CryptedPi<>+0x00(SB), X0
   366  	VMOVDQU CryptedPi<>+0x10(SB), X1
   367  	VMOVDQU CryptedPi<>+0x20(SB), X2
   368  	VMOVDQU CryptedPi<>+0x30(SB), X3
   369  	VMOVDQU CryptedPi<>+0x40(SB), X4
   370  	VMOVDQU CryptedPi<>+0x50(SB), X5
   371  	VMOVDQU CryptedPi<>+0x60(SB), X6
   372  	VMOVDQU CryptedPi<>+0x70(SB), X7
   373  
   374  loop:
   375  	VAESENC 0x00(SI), X0, X8
   376  	VAESENC 0x00(SI), X4, X11
   377  	VAESENC 0x10(SI), X1, X9
   378  	VAESENC 0x10(SI), X5, X12
   379  	VAESENC 0x18(SI), X3, X10
   380  	VAESENC 0x18(SI), X6, X13
   381  	VAESENC X10, X8, X8
   382  	VAESENC X2, X9, X9
   383  	VAESENC X9, X8, X8
   384  	VAESENC X8, X8, X8
   385  	VAESENC X8, X8, X8
   386  	VPSHUFD $0x4e, X8, X9
   387  	VPXOR   X9, X8, X8
   388  	VAESENC X12, X11, X11
   389  	VAESENC X7, X13, X13
   390  	VAESENC X13, X11, X11
   391  	VMOVQ   X8, 0x00(DI)
   392  	VMOVDQU X11, 0x08(DI)
   393  
   394  	ADDQ $0x28, SI
   395  	ADDQ $0x18, DI
   396  	LOOP loop
   397  
   398  done:
   399  	RET
   400  
   401  TEXT genCryptedPi(SB), NOSPLIT, $0-8
   402  	MOVQ dst+0(FP), DI
   403  
   404  	VMOVDQU Pi<>+0x00(SB), X0
   405  	VMOVDQU Pi<>+0x10(SB), X1
   406  	VMOVDQU Pi<>+0x20(SB), X2
   407  	VMOVDQU Pi<>+0x30(SB), X3
   408  	VMOVDQU Pi<>+0x40(SB), X4
   409  	VMOVDQU Pi<>+0x50(SB), X5
   410  	VMOVDQU Pi<>+0x60(SB), X6
   411  	VMOVDQU Pi<>+0x70(SB), X7
   412  
   413  	VAESENC X0, X0, X0
   414  	VAESENC X1, X1, X1
   415  	VAESENC X2, X2, X2
   416  	VAESENC X3, X3, X3
   417  	VAESENC X4, X4, X4
   418  	VAESENC X5, X5, X5
   419  	VAESENC X6, X6, X6
   420  	VAESENC X7, X7, X7
   421  
   422  	VAESENC X0, X0, X0
   423  	VAESENC X1, X1, X1
   424  	VAESENC X2, X2, X2
   425  	VAESENC X3, X3, X3
   426  	VAESENC X4, X4, X4
   427  	VAESENC X5, X5, X5
   428  	VAESENC X6, X6, X6
   429  	VAESENC X7, X7, X7
   430  
   431  	VAESENC X0, X0, X0
   432  	VAESENC X1, X1, X1
   433  	VAESENC X2, X2, X2
   434  	VAESENC X3, X3, X3
   435  	VAESENC X4, X4, X4
   436  	VAESENC X5, X5, X5
   437  	VAESENC X6, X6, X6
   438  	VAESENC X7, X7, X7
   439  
   440  	VAESENC X0, X0, X0
   441  	VAESENC X1, X1, X1
   442  	VAESENC X2, X2, X2
   443  	VAESENC X3, X3, X3
   444  	VAESENC X4, X4, X4
   445  	VAESENC X5, X5, X5
   446  	VAESENC X6, X6, X6
   447  	VAESENC X7, X7, X7
   448  
   449  	VAESENC X0, X0, X0
   450  	VAESENC X1, X1, X1
   451  	VAESENC X2, X2, X2
   452  	VAESENC X3, X3, X3
   453  	VAESENC X4, X4, X4
   454  	VAESENC X5, X5, X5
   455  	VAESENC X6, X6, X6
   456  	VAESENC X7, X7, X7
   457  
   458  	VAESENC X0, X0, X0
   459  	VAESENC X1, X1, X1
   460  	VAESENC X2, X2, X2
   461  	VAESENC X3, X3, X3
   462  	VAESENC X4, X4, X4
   463  	VAESENC X5, X5, X5
   464  	VAESENC X6, X6, X6
   465  	VAESENC X7, X7, X7
   466  
   467  	VAESENC X0, X0, X0
   468  	VAESENC X1, X1, X1
   469  	VAESENC X2, X2, X2
   470  	VAESENC X3, X3, X3
   471  	VAESENC X4, X4, X4
   472  	VAESENC X5, X5, X5
   473  	VAESENC X6, X6, X6
   474  	VAESENC X7, X7, X7
   475  
   476  	VAESENC X0, X0, X0
   477  	VAESENC X1, X1, X1
   478  	VAESENC X2, X2, X2
   479  	VAESENC X3, X3, X3
   480  	VAESENC X4, X4, X4
   481  	VAESENC X5, X5, X5
   482  	VAESENC X6, X6, X6
   483  	VAESENC X7, X7, X7
   484  
   485  	VAESENC X0, X0, X0
   486  	VAESENC X1, X1, X1
   487  	VAESENC X2, X2, X2
   488  	VAESENC X3, X3, X3
   489  	VAESENC X4, X4, X4
   490  	VAESENC X5, X5, X5
   491  	VAESENC X6, X6, X6
   492  	VAESENC X7, X7, X7
   493  
   494  	VAESENC X0, X0, X0
   495  	VAESENC X1, X1, X1
   496  	VAESENC X2, X2, X2
   497  	VAESENC X3, X3, X3
   498  	VAESENC X4, X4, X4
   499  	VAESENC X5, X5, X5
   500  	VAESENC X6, X6, X6
   501  	VAESENC X7, X7, X7
   502  
   503  	VAESENC X0, X0, X0
   504  	VAESENC X1, X1, X1
   505  	VAESENC X2, X2, X2
   506  	VAESENC X3, X3, X3
   507  	VAESENC X4, X4, X4
   508  	VAESENC X5, X5, X5
   509  	VAESENC X6, X6, X6
   510  	VAESENC X7, X7, X7
   511  
   512  	VAESENC X0, X0, X0
   513  	VAESENC X1, X1, X1
   514  	VAESENC X2, X2, X2
   515  	VAESENC X3, X3, X3
   516  	VAESENC X4, X4, X4
   517  	VAESENC X5, X5, X5
   518  	VAESENC X6, X6, X6
   519  	VAESENC X7, X7, X7
   520  
   521  	VAESENC X0, X0, X0
   522  	VAESENC X1, X1, X1
   523  	VAESENC X2, X2, X2
   524  	VAESENC X3, X3, X3
   525  	VAESENC X4, X4, X4
   526  	VAESENC X5, X5, X5
   527  	VAESENC X6, X6, X6
   528  	VAESENC X7, X7, X7
   529  
   530  	VAESENCLAST X0, X0, X0
   531  	VAESENCLAST X1, X1, X1
   532  	VAESENCLAST X2, X2, X2
   533  	VAESENCLAST X3, X3, X3
   534  	VAESENCLAST X4, X4, X4
   535  	VAESENCLAST X5, X5, X5
   536  	VAESENCLAST X6, X6, X6
   537  	VAESENCLAST X7, X7, X7
   538  
   539  	VAESENC X0, X0, X0
   540  	VAESENC X1, X1, X1
   541  	VAESENC X2, X2, X2
   542  	VAESENC X3, X3, X3
   543  	VAESENC X4, X4, X4
   544  	VAESENC X5, X5, X5
   545  	VAESENC X6, X6, X6
   546  	VAESENC X7, X7, X7
   547  
   548  	VAESENC X0, X0, X0
   549  	VAESENC X1, X1, X1
   550  	VAESENC X2, X2, X2
   551  	VAESENC X3, X3, X3
   552  	VAESENC X4, X4, X4
   553  	VAESENC X5, X5, X5
   554  	VAESENC X6, X6, X6
   555  	VAESENC X7, X7, X7
   556  
   557  	VAESENC X0, X0, X0
   558  	VAESENC X1, X1, X1
   559  	VAESENC X2, X2, X2
   560  	VAESENC X3, X3, X3
   561  	VAESENC X4, X4, X4
   562  	VAESENC X5, X5, X5
   563  	VAESENC X6, X6, X6
   564  	VAESENC X7, X7, X7
   565  
   566  	VAESENC X0, X0, X0
   567  	VAESENC X1, X1, X1
   568  	VAESENC X2, X2, X2
   569  	VAESENC X3, X3, X3
   570  	VAESENC X4, X4, X4
   571  	VAESENC X5, X5, X5
   572  	VAESENC X6, X6, X6
   573  	VAESENC X7, X7, X7
   574  
   575  	VAESENC X0, X0, X0
   576  	VAESENC X1, X1, X1
   577  	VAESENC X2, X2, X2
   578  	VAESENC X3, X3, X3
   579  	VAESENC X4, X4, X4
   580  	VAESENC X5, X5, X5
   581  	VAESENC X6, X6, X6
   582  	VAESENC X7, X7, X7
   583  
   584  	VAESENC X0, X0, X0
   585  	VAESENC X1, X1, X1
   586  	VAESENC X2, X2, X2
   587  	VAESENC X3, X3, X3
   588  	VAESENC X4, X4, X4
   589  	VAESENC X5, X5, X5
   590  	VAESENC X6, X6, X6
   591  	VAESENC X7, X7, X7
   592  
   593  	VAESENC X0, X0, X0
   594  	VAESENC X1, X1, X1
   595  	VAESENC X2, X2, X2
   596  	VAESENC X3, X3, X3
   597  	VAESENC X4, X4, X4
   598  	VAESENC X5, X5, X5
   599  	VAESENC X6, X6, X6
   600  	VAESENC X7, X7, X7
   601  
   602  	VAESENC X0, X0, X0
   603  	VAESENC X1, X1, X1
   604  	VAESENC X2, X2, X2
   605  	VAESENC X3, X3, X3
   606  	VAESENC X4, X4, X4
   607  	VAESENC X5, X5, X5
   608  	VAESENC X6, X6, X6
   609  	VAESENC X7, X7, X7
   610  
   611  	VAESENC X0, X0, X0
   612  	VAESENC X1, X1, X1
   613  	VAESENC X2, X2, X2
   614  	VAESENC X3, X3, X3
   615  	VAESENC X4, X4, X4
   616  	VAESENC X5, X5, X5
   617  	VAESENC X6, X6, X6
   618  	VAESENC X7, X7, X7
   619  
   620  	VAESENC X0, X0, X0
   621  	VAESENC X1, X1, X1
   622  	VAESENC X2, X2, X2
   623  	VAESENC X3, X3, X3
   624  	VAESENC X4, X4, X4
   625  	VAESENC X5, X5, X5
   626  	VAESENC X6, X6, X6
   627  	VAESENC X7, X7, X7
   628  
   629  	VAESENC X0, X0, X0
   630  	VAESENC X1, X1, X1
   631  	VAESENC X2, X2, X2
   632  	VAESENC X3, X3, X3
   633  	VAESENC X4, X4, X4
   634  	VAESENC X5, X5, X5
   635  	VAESENC X6, X6, X6
   636  	VAESENC X7, X7, X7
   637  
   638  	VAESENC X0, X0, X0
   639  	VAESENC X1, X1, X1
   640  	VAESENC X2, X2, X2
   641  	VAESENC X3, X3, X3
   642  	VAESENC X4, X4, X4
   643  	VAESENC X5, X5, X5
   644  	VAESENC X6, X6, X6
   645  	VAESENC X7, X7, X7
   646  
   647  	VAESENC X0, X0, X0
   648  	VAESENC X1, X1, X1
   649  	VAESENC X2, X2, X2
   650  	VAESENC X3, X3, X3
   651  	VAESENC X4, X4, X4
   652  	VAESENC X5, X5, X5
   653  	VAESENC X6, X6, X6
   654  	VAESENC X7, X7, X7
   655  
   656  	VAESENCLAST X0, X0, X0
   657  	VAESENCLAST X1, X1, X1
   658  	VAESENCLAST X2, X2, X2
   659  	VAESENCLAST X3, X3, X3
   660  	VAESENCLAST X4, X4, X4
   661  	VAESENCLAST X5, X5, X5
   662  	VAESENCLAST X6, X6, X6
   663  	VAESENCLAST X7, X7, X7
   664  
   665  	VMOVDQU X0, 0x00(DI)
   666  	VMOVDQU X1, 0x10(DI)
   667  	VMOVDQU X2, 0x20(DI)
   668  	VMOVDQU X3, 0x30(DI)
   669  	VMOVDQU X4, 0x40(DI)
   670  	VMOVDQU X5, 0x50(DI)
   671  	VMOVDQU X6, 0x60(DI)
   672  	VMOVDQU X7, 0x70(DI)
   673  
   674  	RET