github.com/bytedance/gopkg@v0.0.0-20240514070511-01b2cbcf35e1/util/xxhash3/sse2_amd64.s (about)

     1  // Copyright 2021 ByteDance Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Code generated by command: go run gen.go -sse2 -out ./sse2.s. DO NOT EDIT.
    16  
    17  #include "textflag.h"
    18  
    19  DATA prime_sse<>+0(SB)/4, $0x9e3779b1
    20  DATA prime_sse<>+4(SB)/4, $0x9e3779b1
    21  DATA prime_sse<>+8(SB)/4, $0x9e3779b1
    22  DATA prime_sse<>+12(SB)/4, $0x9e3779b1
    23  GLOBL prime_sse<>(SB), RODATA|NOPTR, $16
    24  
    25  // func accumSSE2(acc *[8]uint64, xinput *byte, xsecret *byte, len uint64)
    26  // Requires: SSE2
    27  TEXT ·accumSSE2(SB), NOSPLIT, $0-32
    28  	MOVQ  acc+0(FP), AX
    29  	MOVQ  xinput+8(FP), CX
    30  	MOVQ  xsecret+16(FP), DX
    31  	MOVQ  xsecret+16(FP), BX
    32  	MOVQ  len+24(FP), SI
    33  	MOVOU (AX), X1
    34  	MOVOU 16(AX), X2
    35  	MOVOU 32(AX), X3
    36  	MOVOU 48(AX), X4
    37  	MOVOU prime_sse<>+0(SB), X0
    38  
    39  accumBlock:
    40  	CMPQ    SI, $0x00000400
    41  	JLE     accumStripe
    42  	MOVOU   (CX), X5
    43  	MOVOU   (DX), X6
    44  	PXOR    X5, X6
    45  	PSHUFD  $0x31, X6, X7
    46  	PMULULQ X6, X7
    47  	PSHUFD  $0x4e, X5, X5
    48  	PADDQ   X5, X1
    49  	PADDQ   X7, X1
    50  	MOVOU   16(CX), X5
    51  	MOVOU   16(DX), X6
    52  	PXOR    X5, X6
    53  	PSHUFD  $0x31, X6, X7
    54  	PMULULQ X6, X7
    55  	PSHUFD  $0x4e, X5, X5
    56  	PADDQ   X5, X2
    57  	PADDQ   X7, X2
    58  	MOVOU   32(CX), X5
    59  	MOVOU   32(DX), X6
    60  	PXOR    X5, X6
    61  	PSHUFD  $0x31, X6, X7
    62  	PMULULQ X6, X7
    63  	PSHUFD  $0x4e, X5, X5
    64  	PADDQ   X5, X3
    65  	PADDQ   X7, X3
    66  	MOVOU   48(CX), X5
    67  	MOVOU   48(DX), X6
    68  	PXOR    X5, X6
    69  	PSHUFD  $0x31, X6, X7
    70  	PMULULQ X6, X7
    71  	PSHUFD  $0x4e, X5, X5
    72  	PADDQ   X5, X4
    73  	PADDQ   X7, X4
    74  	MOVOU   64(CX), X5
    75  	MOVOU   8(DX), X6
    76  	PXOR    X5, X6
    77  	PSHUFD  $0x31, X6, X7
    78  	PMULULQ X6, X7
    79  	PSHUFD  $0x4e, X5, X5
    80  	PADDQ   X5, X1
    81  	PADDQ   X7, X1
    82  	MOVOU   80(CX), X5
    83  	MOVOU   24(DX), X6
    84  	PXOR    X5, X6
    85  	PSHUFD  $0x31, X6, X7
    86  	PMULULQ X6, X7
    87  	PSHUFD  $0x4e, X5, X5
    88  	PADDQ   X5, X2
    89  	PADDQ   X7, X2
    90  	MOVOU   96(CX), X5
    91  	MOVOU   40(DX), X6
    92  	PXOR    X5, X6
    93  	PSHUFD  $0x31, X6, X7
    94  	PMULULQ X6, X7
    95  	PSHUFD  $0x4e, X5, X5
    96  	PADDQ   X5, X3
    97  	PADDQ   X7, X3
    98  	MOVOU   112(CX), X5
    99  	MOVOU   56(DX), X6
   100  	PXOR    X5, X6
   101  	PSHUFD  $0x31, X6, X7
   102  	PMULULQ X6, X7
   103  	PSHUFD  $0x4e, X5, X5
   104  	PADDQ   X5, X4
   105  	PADDQ   X7, X4
   106  	MOVOU   128(CX), X5
   107  	MOVOU   16(DX), X6
   108  	PXOR    X5, X6
   109  	PSHUFD  $0x31, X6, X7
   110  	PMULULQ X6, X7
   111  	PSHUFD  $0x4e, X5, X5
   112  	PADDQ   X5, X1
   113  	PADDQ   X7, X1
   114  	MOVOU   144(CX), X5
   115  	MOVOU   32(DX), X6
   116  	PXOR    X5, X6
   117  	PSHUFD  $0x31, X6, X7
   118  	PMULULQ X6, X7
   119  	PSHUFD  $0x4e, X5, X5
   120  	PADDQ   X5, X2
   121  	PADDQ   X7, X2
   122  	MOVOU   160(CX), X5
   123  	MOVOU   48(DX), X6
   124  	PXOR    X5, X6
   125  	PSHUFD  $0x31, X6, X7
   126  	PMULULQ X6, X7
   127  	PSHUFD  $0x4e, X5, X5
   128  	PADDQ   X5, X3
   129  	PADDQ   X7, X3
   130  	MOVOU   176(CX), X5
   131  	MOVOU   64(DX), X6
   132  	PXOR    X5, X6
   133  	PSHUFD  $0x31, X6, X7
   134  	PMULULQ X6, X7
   135  	PSHUFD  $0x4e, X5, X5
   136  	PADDQ   X5, X4
   137  	PADDQ   X7, X4
   138  	MOVOU   192(CX), X5
   139  	MOVOU   24(DX), X6
   140  	PXOR    X5, X6
   141  	PSHUFD  $0x31, X6, X7
   142  	PMULULQ X6, X7
   143  	PSHUFD  $0x4e, X5, X5
   144  	PADDQ   X5, X1
   145  	PADDQ   X7, X1
   146  	MOVOU   208(CX), X5
   147  	MOVOU   40(DX), X6
   148  	PXOR    X5, X6
   149  	PSHUFD  $0x31, X6, X7
   150  	PMULULQ X6, X7
   151  	PSHUFD  $0x4e, X5, X5
   152  	PADDQ   X5, X2
   153  	PADDQ   X7, X2
   154  	MOVOU   224(CX), X5
   155  	MOVOU   56(DX), X6
   156  	PXOR    X5, X6
   157  	PSHUFD  $0x31, X6, X7
   158  	PMULULQ X6, X7
   159  	PSHUFD  $0x4e, X5, X5
   160  	PADDQ   X5, X3
   161  	PADDQ   X7, X3
   162  	MOVOU   240(CX), X5
   163  	MOVOU   72(DX), X6
   164  	PXOR    X5, X6
   165  	PSHUFD  $0x31, X6, X7
   166  	PMULULQ X6, X7
   167  	PSHUFD  $0x4e, X5, X5
   168  	PADDQ   X5, X4
   169  	PADDQ   X7, X4
   170  	MOVOU   256(CX), X5
   171  	MOVOU   32(DX), X6
   172  	PXOR    X5, X6
   173  	PSHUFD  $0x31, X6, X7
   174  	PMULULQ X6, X7
   175  	PSHUFD  $0x4e, X5, X5
   176  	PADDQ   X5, X1
   177  	PADDQ   X7, X1
   178  	MOVOU   272(CX), X5
   179  	MOVOU   48(DX), X6
   180  	PXOR    X5, X6
   181  	PSHUFD  $0x31, X6, X7
   182  	PMULULQ X6, X7
   183  	PSHUFD  $0x4e, X5, X5
   184  	PADDQ   X5, X2
   185  	PADDQ   X7, X2
   186  	MOVOU   288(CX), X5
   187  	MOVOU   64(DX), X6
   188  	PXOR    X5, X6
   189  	PSHUFD  $0x31, X6, X7
   190  	PMULULQ X6, X7
   191  	PSHUFD  $0x4e, X5, X5
   192  	PADDQ   X5, X3
   193  	PADDQ   X7, X3
   194  	MOVOU   304(CX), X5
   195  	MOVOU   80(DX), X6
   196  	PXOR    X5, X6
   197  	PSHUFD  $0x31, X6, X7
   198  	PMULULQ X6, X7
   199  	PSHUFD  $0x4e, X5, X5
   200  	PADDQ   X5, X4
   201  	PADDQ   X7, X4
   202  	MOVOU   320(CX), X5
   203  	MOVOU   40(DX), X6
   204  	PXOR    X5, X6
   205  	PSHUFD  $0x31, X6, X7
   206  	PMULULQ X6, X7
   207  	PSHUFD  $0x4e, X5, X5
   208  	PADDQ   X5, X1
   209  	PADDQ   X7, X1
   210  	MOVOU   336(CX), X5
   211  	MOVOU   56(DX), X6
   212  	PXOR    X5, X6
   213  	PSHUFD  $0x31, X6, X7
   214  	PMULULQ X6, X7
   215  	PSHUFD  $0x4e, X5, X5
   216  	PADDQ   X5, X2
   217  	PADDQ   X7, X2
   218  	MOVOU   352(CX), X5
   219  	MOVOU   72(DX), X6
   220  	PXOR    X5, X6
   221  	PSHUFD  $0x31, X6, X7
   222  	PMULULQ X6, X7
   223  	PSHUFD  $0x4e, X5, X5
   224  	PADDQ   X5, X3
   225  	PADDQ   X7, X3
   226  	MOVOU   368(CX), X5
   227  	MOVOU   88(DX), X6
   228  	PXOR    X5, X6
   229  	PSHUFD  $0x31, X6, X7
   230  	PMULULQ X6, X7
   231  	PSHUFD  $0x4e, X5, X5
   232  	PADDQ   X5, X4
   233  	PADDQ   X7, X4
   234  	MOVOU   384(CX), X5
   235  	MOVOU   48(DX), X6
   236  	PXOR    X5, X6
   237  	PSHUFD  $0x31, X6, X7
   238  	PMULULQ X6, X7
   239  	PSHUFD  $0x4e, X5, X5
   240  	PADDQ   X5, X1
   241  	PADDQ   X7, X1
   242  	MOVOU   400(CX), X5
   243  	MOVOU   64(DX), X6
   244  	PXOR    X5, X6
   245  	PSHUFD  $0x31, X6, X7
   246  	PMULULQ X6, X7
   247  	PSHUFD  $0x4e, X5, X5
   248  	PADDQ   X5, X2
   249  	PADDQ   X7, X2
   250  	MOVOU   416(CX), X5
   251  	MOVOU   80(DX), X6
   252  	PXOR    X5, X6
   253  	PSHUFD  $0x31, X6, X7
   254  	PMULULQ X6, X7
   255  	PSHUFD  $0x4e, X5, X5
   256  	PADDQ   X5, X3
   257  	PADDQ   X7, X3
   258  	MOVOU   432(CX), X5
   259  	MOVOU   96(DX), X6
   260  	PXOR    X5, X6
   261  	PSHUFD  $0x31, X6, X7
   262  	PMULULQ X6, X7
   263  	PSHUFD  $0x4e, X5, X5
   264  	PADDQ   X5, X4
   265  	PADDQ   X7, X4
   266  	MOVOU   448(CX), X5
   267  	MOVOU   56(DX), X6
   268  	PXOR    X5, X6
   269  	PSHUFD  $0x31, X6, X7
   270  	PMULULQ X6, X7
   271  	PSHUFD  $0x4e, X5, X5
   272  	PADDQ   X5, X1
   273  	PADDQ   X7, X1
   274  	MOVOU   464(CX), X5
   275  	MOVOU   72(DX), X6
   276  	PXOR    X5, X6
   277  	PSHUFD  $0x31, X6, X7
   278  	PMULULQ X6, X7
   279  	PSHUFD  $0x4e, X5, X5
   280  	PADDQ   X5, X2
   281  	PADDQ   X7, X2
   282  	MOVOU   480(CX), X5
   283  	MOVOU   88(DX), X6
   284  	PXOR    X5, X6
   285  	PSHUFD  $0x31, X6, X7
   286  	PMULULQ X6, X7
   287  	PSHUFD  $0x4e, X5, X5
   288  	PADDQ   X5, X3
   289  	PADDQ   X7, X3
   290  	MOVOU   496(CX), X5
   291  	MOVOU   104(DX), X6
   292  	PXOR    X5, X6
   293  	PSHUFD  $0x31, X6, X7
   294  	PMULULQ X6, X7
   295  	PSHUFD  $0x4e, X5, X5
   296  	PADDQ   X5, X4
   297  	PADDQ   X7, X4
   298  	MOVOU   512(CX), X5
   299  	MOVOU   64(DX), X6
   300  	PXOR    X5, X6
   301  	PSHUFD  $0x31, X6, X7
   302  	PMULULQ X6, X7
   303  	PSHUFD  $0x4e, X5, X5
   304  	PADDQ   X5, X1
   305  	PADDQ   X7, X1
   306  	MOVOU   528(CX), X5
   307  	MOVOU   80(DX), X6
   308  	PXOR    X5, X6
   309  	PSHUFD  $0x31, X6, X7
   310  	PMULULQ X6, X7
   311  	PSHUFD  $0x4e, X5, X5
   312  	PADDQ   X5, X2
   313  	PADDQ   X7, X2
   314  	MOVOU   544(CX), X5
   315  	MOVOU   96(DX), X6
   316  	PXOR    X5, X6
   317  	PSHUFD  $0x31, X6, X7
   318  	PMULULQ X6, X7
   319  	PSHUFD  $0x4e, X5, X5
   320  	PADDQ   X5, X3
   321  	PADDQ   X7, X3
   322  	MOVOU   560(CX), X5
   323  	MOVOU   112(DX), X6
   324  	PXOR    X5, X6
   325  	PSHUFD  $0x31, X6, X7
   326  	PMULULQ X6, X7
   327  	PSHUFD  $0x4e, X5, X5
   328  	PADDQ   X5, X4
   329  	PADDQ   X7, X4
   330  	MOVOU   576(CX), X5
   331  	MOVOU   72(DX), X6
   332  	PXOR    X5, X6
   333  	PSHUFD  $0x31, X6, X7
   334  	PMULULQ X6, X7
   335  	PSHUFD  $0x4e, X5, X5
   336  	PADDQ   X5, X1
   337  	PADDQ   X7, X1
   338  	MOVOU   592(CX), X5
   339  	MOVOU   88(DX), X6
   340  	PXOR    X5, X6
   341  	PSHUFD  $0x31, X6, X7
   342  	PMULULQ X6, X7
   343  	PSHUFD  $0x4e, X5, X5
   344  	PADDQ   X5, X2
   345  	PADDQ   X7, X2
   346  	MOVOU   608(CX), X5
   347  	MOVOU   104(DX), X6
   348  	PXOR    X5, X6
   349  	PSHUFD  $0x31, X6, X7
   350  	PMULULQ X6, X7
   351  	PSHUFD  $0x4e, X5, X5
   352  	PADDQ   X5, X3
   353  	PADDQ   X7, X3
   354  	MOVOU   624(CX), X5
   355  	MOVOU   120(DX), X6
   356  	PXOR    X5, X6
   357  	PSHUFD  $0x31, X6, X7
   358  	PMULULQ X6, X7
   359  	PSHUFD  $0x4e, X5, X5
   360  	PADDQ   X5, X4
   361  	PADDQ   X7, X4
   362  	MOVOU   640(CX), X5
   363  	MOVOU   80(DX), X6
   364  	PXOR    X5, X6
   365  	PSHUFD  $0x31, X6, X7
   366  	PMULULQ X6, X7
   367  	PSHUFD  $0x4e, X5, X5
   368  	PADDQ   X5, X1
   369  	PADDQ   X7, X1
   370  	MOVOU   656(CX), X5
   371  	MOVOU   96(DX), X6
   372  	PXOR    X5, X6
   373  	PSHUFD  $0x31, X6, X7
   374  	PMULULQ X6, X7
   375  	PSHUFD  $0x4e, X5, X5
   376  	PADDQ   X5, X2
   377  	PADDQ   X7, X2
   378  	MOVOU   672(CX), X5
   379  	MOVOU   112(DX), X6
   380  	PXOR    X5, X6
   381  	PSHUFD  $0x31, X6, X7
   382  	PMULULQ X6, X7
   383  	PSHUFD  $0x4e, X5, X5
   384  	PADDQ   X5, X3
   385  	PADDQ   X7, X3
   386  	MOVOU   688(CX), X5
   387  	MOVOU   128(DX), X6
   388  	PXOR    X5, X6
   389  	PSHUFD  $0x31, X6, X7
   390  	PMULULQ X6, X7
   391  	PSHUFD  $0x4e, X5, X5
   392  	PADDQ   X5, X4
   393  	PADDQ   X7, X4
   394  	MOVOU   704(CX), X5
   395  	MOVOU   88(DX), X6
   396  	PXOR    X5, X6
   397  	PSHUFD  $0x31, X6, X7
   398  	PMULULQ X6, X7
   399  	PSHUFD  $0x4e, X5, X5
   400  	PADDQ   X5, X1
   401  	PADDQ   X7, X1
   402  	MOVOU   720(CX), X5
   403  	MOVOU   104(DX), X6
   404  	PXOR    X5, X6
   405  	PSHUFD  $0x31, X6, X7
   406  	PMULULQ X6, X7
   407  	PSHUFD  $0x4e, X5, X5
   408  	PADDQ   X5, X2
   409  	PADDQ   X7, X2
   410  	MOVOU   736(CX), X5
   411  	MOVOU   120(DX), X6
   412  	PXOR    X5, X6
   413  	PSHUFD  $0x31, X6, X7
   414  	PMULULQ X6, X7
   415  	PSHUFD  $0x4e, X5, X5
   416  	PADDQ   X5, X3
   417  	PADDQ   X7, X3
   418  	MOVOU   752(CX), X5
   419  	MOVOU   136(DX), X6
   420  	PXOR    X5, X6
   421  	PSHUFD  $0x31, X6, X7
   422  	PMULULQ X6, X7
   423  	PSHUFD  $0x4e, X5, X5
   424  	PADDQ   X5, X4
   425  	PADDQ   X7, X4
   426  	MOVOU   768(CX), X5
   427  	MOVOU   96(DX), X6
   428  	PXOR    X5, X6
   429  	PSHUFD  $0x31, X6, X7
   430  	PMULULQ X6, X7
   431  	PSHUFD  $0x4e, X5, X5
   432  	PADDQ   X5, X1
   433  	PADDQ   X7, X1
   434  	MOVOU   784(CX), X5
   435  	MOVOU   112(DX), X6
   436  	PXOR    X5, X6
   437  	PSHUFD  $0x31, X6, X7
   438  	PMULULQ X6, X7
   439  	PSHUFD  $0x4e, X5, X5
   440  	PADDQ   X5, X2
   441  	PADDQ   X7, X2
   442  	MOVOU   800(CX), X5
   443  	MOVOU   128(DX), X6
   444  	PXOR    X5, X6
   445  	PSHUFD  $0x31, X6, X7
   446  	PMULULQ X6, X7
   447  	PSHUFD  $0x4e, X5, X5
   448  	PADDQ   X5, X3
   449  	PADDQ   X7, X3
   450  	MOVOU   816(CX), X5
   451  	MOVOU   144(DX), X6
   452  	PXOR    X5, X6
   453  	PSHUFD  $0x31, X6, X7
   454  	PMULULQ X6, X7
   455  	PSHUFD  $0x4e, X5, X5
   456  	PADDQ   X5, X4
   457  	PADDQ   X7, X4
   458  	MOVOU   832(CX), X5
   459  	MOVOU   104(DX), X6
   460  	PXOR    X5, X6
   461  	PSHUFD  $0x31, X6, X7
   462  	PMULULQ X6, X7
   463  	PSHUFD  $0x4e, X5, X5
   464  	PADDQ   X5, X1
   465  	PADDQ   X7, X1
   466  	MOVOU   848(CX), X5
   467  	MOVOU   120(DX), X6
   468  	PXOR    X5, X6
   469  	PSHUFD  $0x31, X6, X7
   470  	PMULULQ X6, X7
   471  	PSHUFD  $0x4e, X5, X5
   472  	PADDQ   X5, X2
   473  	PADDQ   X7, X2
   474  	MOVOU   864(CX), X5
   475  	MOVOU   136(DX), X6
   476  	PXOR    X5, X6
   477  	PSHUFD  $0x31, X6, X7
   478  	PMULULQ X6, X7
   479  	PSHUFD  $0x4e, X5, X5
   480  	PADDQ   X5, X3
   481  	PADDQ   X7, X3
   482  	MOVOU   880(CX), X5
   483  	MOVOU   152(DX), X6
   484  	PXOR    X5, X6
   485  	PSHUFD  $0x31, X6, X7
   486  	PMULULQ X6, X7
   487  	PSHUFD  $0x4e, X5, X5
   488  	PADDQ   X5, X4
   489  	PADDQ   X7, X4
   490  	MOVOU   896(CX), X5
   491  	MOVOU   112(DX), X6
   492  	PXOR    X5, X6
   493  	PSHUFD  $0x31, X6, X7
   494  	PMULULQ X6, X7
   495  	PSHUFD  $0x4e, X5, X5
   496  	PADDQ   X5, X1
   497  	PADDQ   X7, X1
   498  	MOVOU   912(CX), X5
   499  	MOVOU   128(DX), X6
   500  	PXOR    X5, X6
   501  	PSHUFD  $0x31, X6, X7
   502  	PMULULQ X6, X7
   503  	PSHUFD  $0x4e, X5, X5
   504  	PADDQ   X5, X2
   505  	PADDQ   X7, X2
   506  	MOVOU   928(CX), X5
   507  	MOVOU   144(DX), X6
   508  	PXOR    X5, X6
   509  	PSHUFD  $0x31, X6, X7
   510  	PMULULQ X6, X7
   511  	PSHUFD  $0x4e, X5, X5
   512  	PADDQ   X5, X3
   513  	PADDQ   X7, X3
   514  	MOVOU   944(CX), X5
   515  	MOVOU   160(DX), X6
   516  	PXOR    X5, X6
   517  	PSHUFD  $0x31, X6, X7
   518  	PMULULQ X6, X7
   519  	PSHUFD  $0x4e, X5, X5
   520  	PADDQ   X5, X4
   521  	PADDQ   X7, X4
   522  	MOVOU   960(CX), X5
   523  	MOVOU   120(DX), X6
   524  	PXOR    X5, X6
   525  	PSHUFD  $0x31, X6, X7
   526  	PMULULQ X6, X7
   527  	PSHUFD  $0x4e, X5, X5
   528  	PADDQ   X5, X1
   529  	PADDQ   X7, X1
   530  	MOVOU   976(CX), X5
   531  	MOVOU   136(DX), X6
   532  	PXOR    X5, X6
   533  	PSHUFD  $0x31, X6, X7
   534  	PMULULQ X6, X7
   535  	PSHUFD  $0x4e, X5, X5
   536  	PADDQ   X5, X2
   537  	PADDQ   X7, X2
   538  	MOVOU   992(CX), X5
   539  	MOVOU   152(DX), X6
   540  	PXOR    X5, X6
   541  	PSHUFD  $0x31, X6, X7
   542  	PMULULQ X6, X7
   543  	PSHUFD  $0x4e, X5, X5
   544  	PADDQ   X5, X3
   545  	PADDQ   X7, X3
   546  	MOVOU   1008(CX), X5
   547  	MOVOU   168(DX), X6
   548  	PXOR    X5, X6
   549  	PSHUFD  $0x31, X6, X7
   550  	PMULULQ X6, X7
   551  	PSHUFD  $0x4e, X5, X5
   552  	PADDQ   X5, X4
   553  	PADDQ   X7, X4
   554  	ADDQ    $0x00000400, CX
   555  	SUBQ    $0x00000400, SI
   556  	MOVOU   X1, X5
   557  	PSRLQ   $0x2f, X1
   558  	PXOR    X5, X1
   559  	PXOR    128(DX), X1
   560  	PSHUFD  $0xf5, X1, X5
   561  	PMULULQ X0, X5
   562  	PSLLQ   $0x20, X5
   563  	PMULULQ X0, X1
   564  	PADDQ   X5, X1
   565  	MOVOU   X2, X5
   566  	PSRLQ   $0x2f, X2
   567  	PXOR    X5, X2
   568  	PXOR    144(DX), X2
   569  	PSHUFD  $0xf5, X2, X5
   570  	PMULULQ X0, X5
   571  	PSLLQ   $0x20, X5
   572  	PMULULQ X0, X2
   573  	PADDQ   X5, X2
   574  	MOVOU   X3, X5
   575  	PSRLQ   $0x2f, X3
   576  	PXOR    X5, X3
   577  	PXOR    160(DX), X3
   578  	PSHUFD  $0xf5, X3, X5
   579  	PMULULQ X0, X5
   580  	PSLLQ   $0x20, X5
   581  	PMULULQ X0, X3
   582  	PADDQ   X5, X3
   583  	MOVOU   X4, X5
   584  	PSRLQ   $0x2f, X4
   585  	PXOR    X5, X4
   586  	PXOR    176(DX), X4
   587  	PSHUFD  $0xf5, X4, X5
   588  	PMULULQ X0, X5
   589  	PSLLQ   $0x20, X5
   590  	PMULULQ X0, X4
   591  	PADDQ   X5, X4
   592  	JMP     accumBlock
   593  
   594  accumStripe:
   595  	CMPQ    SI, $0x40
   596  	JLE     accumLastStripe
   597  	MOVOU   (CX), X0
   598  	MOVOU   (BX), X5
   599  	PXOR    X0, X5
   600  	PSHUFD  $0x31, X5, X6
   601  	PMULULQ X5, X6
   602  	PSHUFD  $0x4e, X0, X0
   603  	PADDQ   X0, X1
   604  	PADDQ   X6, X1
   605  	MOVOU   16(CX), X0
   606  	MOVOU   16(BX), X5
   607  	PXOR    X0, X5
   608  	PSHUFD  $0x31, X5, X6
   609  	PMULULQ X5, X6
   610  	PSHUFD  $0x4e, X0, X0
   611  	PADDQ   X0, X2
   612  	PADDQ   X6, X2
   613  	MOVOU   32(CX), X0
   614  	MOVOU   32(BX), X5
   615  	PXOR    X0, X5
   616  	PSHUFD  $0x31, X5, X6
   617  	PMULULQ X5, X6
   618  	PSHUFD  $0x4e, X0, X0
   619  	PADDQ   X0, X3
   620  	PADDQ   X6, X3
   621  	MOVOU   48(CX), X0
   622  	MOVOU   48(BX), X5
   623  	PXOR    X0, X5
   624  	PSHUFD  $0x31, X5, X6
   625  	PMULULQ X5, X6
   626  	PSHUFD  $0x4e, X0, X0
   627  	PADDQ   X0, X4
   628  	PADDQ   X6, X4
   629  	ADDQ    $0x00000040, CX
   630  	SUBQ    $0x00000040, SI
   631  	ADDQ    $0x00000008, BX
   632  	JMP     accumStripe
   633  
   634  accumLastStripe:
   635  	CMPQ    SI, $0x00
   636  	JE      return
   637  	SUBQ    $0x40, CX
   638  	ADDQ    SI, CX
   639  	MOVOU   (CX), X0
   640  	MOVOU   121(DX), X5
   641  	PXOR    X0, X5
   642  	PSHUFD  $0x31, X5, X6
   643  	PMULULQ X5, X6
   644  	PSHUFD  $0x4e, X0, X0
   645  	PADDQ   X0, X1
   646  	PADDQ   X6, X1
   647  	MOVOU   16(CX), X0
   648  	MOVOU   137(DX), X5
   649  	PXOR    X0, X5
   650  	PSHUFD  $0x31, X5, X6
   651  	PMULULQ X5, X6
   652  	PSHUFD  $0x4e, X0, X0
   653  	PADDQ   X0, X2
   654  	PADDQ   X6, X2
   655  	MOVOU   32(CX), X0
   656  	MOVOU   153(DX), X5
   657  	PXOR    X0, X5
   658  	PSHUFD  $0x31, X5, X6
   659  	PMULULQ X5, X6
   660  	PSHUFD  $0x4e, X0, X0
   661  	PADDQ   X0, X3
   662  	PADDQ   X6, X3
   663  	MOVOU   48(CX), X0
   664  	MOVOU   169(DX), X5
   665  	PXOR    X0, X5
   666  	PSHUFD  $0x31, X5, X6
   667  	PMULULQ X5, X6
   668  	PSHUFD  $0x4e, X0, X0
   669  	PADDQ   X0, X4
   670  	PADDQ   X6, X4
   671  
   672  return:
   673  	MOVOU X1, (AX)
   674  	MOVOU X2, 16(AX)
   675  	MOVOU X3, 32(AX)
   676  	MOVOU X4, 48(AX)
   677  	RET