github.com/arr-ai/hash@v0.8.0/asm_arm64.s (about)

     1  // Copyright 2015 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "go_asm.h"
     6  #include "funcdata.h"
     7  #include "textflag.h"
     8  
     9  // func aeshash32(p unsafe.Pointer, h uintptr) uintptr
    10  TEXT ·aeshash32(SB),NOSPLIT|NOFRAME,$0-24
    11  	MOVD	p+0(FP), R0
    12  	MOVD	h+8(FP), R1
    13  	MOVD	$ret+16(FP), R2
    14  	MOVD	$·aeskeysched+0(SB), R3
    15  
    16  	VEOR	V0.B16, V0.B16, V0.B16
    17  	VLD1	(R3), [V2.B16]
    18  	VLD1	(R0), V0.S[1]
    19  	VMOV	R1, V0.S[0]
    20  
    21  	AESE	V2.B16, V0.B16
    22  	AESMC	V0.B16, V0.B16
    23  	AESE	V2.B16, V0.B16
    24  	AESMC	V0.B16, V0.B16
    25  	AESE	V2.B16, V0.B16
    26  
    27  	VST1	[V0.D1], (R2)
    28  	RET
    29  
    30  // func aeshash64(p unsafe.Pointer, h uintptr) uintptr
    31  TEXT ·aeshash64(SB),NOSPLIT|NOFRAME,$0-24
    32  	MOVD	p+0(FP), R0
    33  	MOVD	h+8(FP), R1
    34  	MOVD	$ret+16(FP), R2
    35  	MOVD	$·aeskeysched+0(SB), R3
    36  
    37  	VEOR	V0.B16, V0.B16, V0.B16
    38  	VLD1	(R3), [V2.B16]
    39  	VLD1	(R0), V0.D[1]
    40  	VMOV	R1, V0.D[0]
    41  
    42  	AESE	V2.B16, V0.B16
    43  	AESMC	V0.B16, V0.B16
    44  	AESE	V2.B16, V0.B16
    45  	AESMC	V0.B16, V0.B16
    46  	AESE	V2.B16, V0.B16
    47  
    48  	VST1	[V0.D1], (R2)
    49  	RET
    50  
    51  // func aeshash(p unsafe.Pointer, h, size uintptr) uintptr
    52  TEXT ·aeshash(SB),NOSPLIT|NOFRAME,$0-32
    53  	MOVD	p+0(FP), R0
    54  	MOVD	s+16(FP), R1
    55  	MOVWU	h+8(FP), R3
    56  	MOVD	$ret+24(FP), R2
    57  	B	aeshashbody<>(SB)
    58  
    59  // func aeshashstr(p unsafe.Pointer, h uintptr) uintptr
    60  TEXT ·aeshashstr(SB),NOSPLIT|NOFRAME,$0-24
    61  	MOVD	p+0(FP), R10 // string pointer
    62  	LDP	(R10), (R0, R1) //string data/ length
    63  	MOVWU	h+8(FP), R3
    64  	MOVD	$ret+16(FP), R2 // return adddress
    65  	B	aeshashbody<>(SB)
    66  
    67  // R0: data
    68  // R1: length
    69  // R2: address to put return value
    70  // R3: seed data
    71  TEXT aeshashbody<>(SB),NOSPLIT|NOFRAME,$0
    72  	VEOR	V30.B16, V30.B16, V30.B16
    73  	VMOV	R3, V30.D[0]
    74  	VMOV	R1, V30.D[1] // load length into seed
    75  
    76  	MOVD	$runtime·aeskeysched+0(SB), R4
    77  	VLD1.P	16(R4), [V0.B16]
    78  	AESE	V30.B16, V0.B16
    79  	AESMC	V0.B16, V0.B16
    80  	CMP	$16, R1
    81  	BLO	aes0to15
    82  	BEQ	aes16
    83  	CMP	$32, R1
    84  	BLS	aes17to32
    85  	CMP	$64, R1
    86  	BLS	aes33to64
    87  	CMP	$128, R1
    88  	BLS	aes65to128
    89  	B	aes129plus
    90  
    91  aes0to15:
    92  	CBZ	R1, aes0
    93  	VEOR	V2.B16, V2.B16, V2.B16
    94  	TBZ	$3, R1, less_than_8
    95  	VLD1.P	8(R0), V2.D[0]
    96  
    97  less_than_8:
    98  	TBZ	$2, R1, less_than_4
    99  	VLD1.P	4(R0), V2.S[2]
   100  
   101  less_than_4:
   102  	TBZ	$1, R1, less_than_2
   103  	VLD1.P	2(R0), V2.H[6]
   104  
   105  less_than_2:
   106  	TBZ	$0, R1, done
   107  	VLD1	(R0), V2.B[14]
   108  done:
   109  	AESE	V0.B16, V2.B16
   110  	AESMC	V2.B16, V2.B16
   111  	AESE	V0.B16, V2.B16
   112  	AESMC	V2.B16, V2.B16
   113  	AESE	V0.B16, V2.B16
   114  
   115  	VST1	[V2.D1], (R2)
   116  	RET
   117  aes0:
   118  	VST1	[V0.D1], (R2)
   119  	RET
   120  aes16:
   121  	VLD1	(R0), [V2.B16]
   122  	B	done
   123  
   124  aes17to32:
   125  	// make second seed
   126  	VLD1	(R4), [V1.B16]
   127  	AESE	V30.B16, V1.B16
   128  	AESMC	V1.B16, V1.B16
   129  	SUB	$16, R1, R10
   130  	VLD1.P	(R0)(R10), [V2.B16]
   131  	VLD1	(R0), [V3.B16]
   132  
   133  	AESE	V0.B16, V2.B16
   134  	AESMC	V2.B16, V2.B16
   135  	AESE	V1.B16, V3.B16
   136  	AESMC	V3.B16, V3.B16
   137  
   138  	AESE	V0.B16, V2.B16
   139  	AESMC	V2.B16, V2.B16
   140  	AESE	V1.B16, V3.B16
   141  	AESMC	V3.B16, V3.B16
   142  
   143  	AESE	V0.B16, V2.B16
   144  	AESE	V1.B16, V3.B16
   145  
   146  	VEOR	V3.B16, V2.B16, V2.B16
   147  	VST1	[V2.D1], (R2)
   148  	RET
   149  
   150  aes33to64:
   151  	VLD1	(R4), [V1.B16, V2.B16, V3.B16]
   152  	AESE	V30.B16, V1.B16
   153  	AESMC	V1.B16, V1.B16
   154  	AESE	V30.B16, V2.B16
   155  	AESMC	V2.B16, V2.B16
   156  	AESE	V30.B16, V3.B16
   157  	AESMC	V3.B16, V3.B16
   158  	SUB	$32, R1, R10
   159  
   160  	VLD1.P	(R0)(R10), [V4.B16, V5.B16]
   161  	VLD1	(R0), [V6.B16, V7.B16]
   162  
   163  	AESE	V0.B16, V4.B16
   164  	AESMC	V4.B16, V4.B16
   165  	AESE	V1.B16, V5.B16
   166  	AESMC	V5.B16, V5.B16
   167  	AESE	V2.B16, V6.B16
   168  	AESMC	V6.B16, V6.B16
   169  	AESE	V3.B16, V7.B16
   170  	AESMC	V7.B16, V7.B16
   171  
   172  	AESE	V0.B16, V4.B16
   173  	AESMC	V4.B16, V4.B16
   174  	AESE	V1.B16, V5.B16
   175  	AESMC	V5.B16, V5.B16
   176  	AESE	V2.B16, V6.B16
   177  	AESMC	V6.B16, V6.B16
   178  	AESE	V3.B16, V7.B16
   179  	AESMC	V7.B16, V7.B16
   180  
   181  	AESE	V0.B16, V4.B16
   182  	AESE	V1.B16, V5.B16
   183  	AESE	V2.B16, V6.B16
   184  	AESE	V3.B16, V7.B16
   185  
   186  	VEOR	V6.B16, V4.B16, V4.B16
   187  	VEOR	V7.B16, V5.B16, V5.B16
   188  	VEOR	V5.B16, V4.B16, V4.B16
   189  
   190  	VST1	[V4.D1], (R2)
   191  	RET
   192  
   193  aes65to128:
   194  	VLD1.P	64(R4), [V1.B16, V2.B16, V3.B16, V4.B16]
   195  	VLD1	(R4), [V5.B16, V6.B16, V7.B16]
   196  	AESE	V30.B16, V1.B16
   197  	AESMC	V1.B16, V1.B16
   198  	AESE	V30.B16, V2.B16
   199  	AESMC	V2.B16, V2.B16
   200  	AESE	V30.B16, V3.B16
   201  	AESMC	V3.B16, V3.B16
   202  	AESE	V30.B16, V4.B16
   203  	AESMC	V4.B16, V4.B16
   204  	AESE	V30.B16, V5.B16
   205  	AESMC	V5.B16, V5.B16
   206  	AESE	V30.B16, V6.B16
   207  	AESMC	V6.B16, V6.B16
   208  	AESE	V30.B16, V7.B16
   209  	AESMC	V7.B16, V7.B16
   210  
   211  	SUB	$64, R1, R10
   212  	VLD1.P	(R0)(R10), [V8.B16, V9.B16, V10.B16, V11.B16]
   213  	VLD1	(R0), [V12.B16, V13.B16, V14.B16, V15.B16]
   214  	AESE	V0.B16,	 V8.B16
   215  	AESMC	V8.B16,  V8.B16
   216  	AESE	V1.B16,	 V9.B16
   217  	AESMC	V9.B16,  V9.B16
   218  	AESE	V2.B16, V10.B16
   219  	AESMC	V10.B16,  V10.B16
   220  	AESE	V3.B16, V11.B16
   221  	AESMC	V11.B16,  V11.B16
   222  	AESE	V4.B16, V12.B16
   223  	AESMC	V12.B16,  V12.B16
   224  	AESE	V5.B16, V13.B16
   225  	AESMC	V13.B16,  V13.B16
   226  	AESE	V6.B16, V14.B16
   227  	AESMC	V14.B16,  V14.B16
   228  	AESE	V7.B16, V15.B16
   229  	AESMC	V15.B16,  V15.B16
   230  
   231  	AESE	V0.B16,	 V8.B16
   232  	AESMC	V8.B16,  V8.B16
   233  	AESE	V1.B16,	 V9.B16
   234  	AESMC	V9.B16,  V9.B16
   235  	AESE	V2.B16, V10.B16
   236  	AESMC	V10.B16,  V10.B16
   237  	AESE	V3.B16, V11.B16
   238  	AESMC	V11.B16,  V11.B16
   239  	AESE	V4.B16, V12.B16
   240  	AESMC	V12.B16,  V12.B16
   241  	AESE	V5.B16, V13.B16
   242  	AESMC	V13.B16,  V13.B16
   243  	AESE	V6.B16, V14.B16
   244  	AESMC	V14.B16,  V14.B16
   245  	AESE	V7.B16, V15.B16
   246  	AESMC	V15.B16,  V15.B16
   247  
   248  	AESE	V0.B16,	 V8.B16
   249  	AESE	V1.B16,	 V9.B16
   250  	AESE	V2.B16, V10.B16
   251  	AESE	V3.B16, V11.B16
   252  	AESE	V4.B16, V12.B16
   253  	AESE	V5.B16, V13.B16
   254  	AESE	V6.B16, V14.B16
   255  	AESE	V7.B16, V15.B16
   256  
   257  	VEOR	V12.B16, V8.B16, V8.B16
   258  	VEOR	V13.B16, V9.B16, V9.B16
   259  	VEOR	V14.B16, V10.B16, V10.B16
   260  	VEOR	V15.B16, V11.B16, V11.B16
   261  	VEOR	V10.B16, V8.B16, V8.B16
   262  	VEOR	V11.B16, V9.B16, V9.B16
   263  	VEOR	V9.B16, V8.B16, V8.B16
   264  
   265  	VST1	[V8.D1], (R2)
   266  	RET
   267  
   268  aes129plus:
   269  	PRFM (R0), PLDL1KEEP
   270  	VLD1.P	64(R4), [V1.B16, V2.B16, V3.B16, V4.B16]
   271  	VLD1	(R4), [V5.B16, V6.B16, V7.B16]
   272  	AESE	V30.B16, V1.B16
   273  	AESMC	V1.B16, V1.B16
   274  	AESE	V30.B16, V2.B16
   275  	AESMC	V2.B16, V2.B16
   276  	AESE	V30.B16, V3.B16
   277  	AESMC	V3.B16, V3.B16
   278  	AESE	V30.B16, V4.B16
   279  	AESMC	V4.B16, V4.B16
   280  	AESE	V30.B16, V5.B16
   281  	AESMC	V5.B16, V5.B16
   282  	AESE	V30.B16, V6.B16
   283  	AESMC	V6.B16, V6.B16
   284  	AESE	V30.B16, V7.B16
   285  	AESMC	V7.B16, V7.B16
   286  	ADD	R0, R1, R10
   287  	SUB	$128, R10, R10
   288  	VLD1.P	64(R10), [V8.B16, V9.B16, V10.B16, V11.B16]
   289  	VLD1	(R10), [V12.B16, V13.B16, V14.B16, V15.B16]
   290  	SUB	$1, R1, R1
   291  	LSR	$7, R1, R1
   292  
   293  aesloop:
   294  	AESE	V8.B16,	 V0.B16
   295  	AESMC	V0.B16,  V0.B16
   296  	AESE	V9.B16,	 V1.B16
   297  	AESMC	V1.B16,  V1.B16
   298  	AESE	V10.B16, V2.B16
   299  	AESMC	V2.B16,  V2.B16
   300  	AESE	V11.B16, V3.B16
   301  	AESMC	V3.B16,  V3.B16
   302  	AESE	V12.B16, V4.B16
   303  	AESMC	V4.B16,  V4.B16
   304  	AESE	V13.B16, V5.B16
   305  	AESMC	V5.B16,  V5.B16
   306  	AESE	V14.B16, V6.B16
   307  	AESMC	V6.B16,  V6.B16
   308  	AESE	V15.B16, V7.B16
   309  	AESMC	V7.B16,  V7.B16
   310  
   311  	VLD1.P	64(R0), [V8.B16, V9.B16, V10.B16, V11.B16]
   312  	AESE	V8.B16,	 V0.B16
   313  	AESMC	V0.B16,  V0.B16
   314  	AESE	V9.B16,	 V1.B16
   315  	AESMC	V1.B16,  V1.B16
   316  	AESE	V10.B16, V2.B16
   317  	AESMC	V2.B16,  V2.B16
   318  	AESE	V11.B16, V3.B16
   319  	AESMC	V3.B16,  V3.B16
   320  
   321  	VLD1.P	64(R0), [V12.B16, V13.B16, V14.B16, V15.B16]
   322  	AESE	V12.B16, V4.B16
   323  	AESMC	V4.B16,  V4.B16
   324  	AESE	V13.B16, V5.B16
   325  	AESMC	V5.B16,  V5.B16
   326  	AESE	V14.B16, V6.B16
   327  	AESMC	V6.B16,  V6.B16
   328  	AESE	V15.B16, V7.B16
   329  	AESMC	V7.B16,  V7.B16
   330  	SUB	$1, R1, R1
   331  	CBNZ	R1, aesloop
   332  
   333  	AESE	V8.B16,	 V0.B16
   334  	AESMC	V0.B16,  V0.B16
   335  	AESE	V9.B16,	 V1.B16
   336  	AESMC	V1.B16,  V1.B16
   337  	AESE	V10.B16, V2.B16
   338  	AESMC	V2.B16,  V2.B16
   339  	AESE	V11.B16, V3.B16
   340  	AESMC	V3.B16,  V3.B16
   341  	AESE	V12.B16, V4.B16
   342  	AESMC	V4.B16,  V4.B16
   343  	AESE	V13.B16, V5.B16
   344  	AESMC	V5.B16,  V5.B16
   345  	AESE	V14.B16, V6.B16
   346  	AESMC	V6.B16,  V6.B16
   347  	AESE	V15.B16, V7.B16
   348  	AESMC	V7.B16,  V7.B16
   349  
   350  	AESE	V8.B16,	 V0.B16
   351  	AESMC	V0.B16,  V0.B16
   352  	AESE	V9.B16,	 V1.B16
   353  	AESMC	V1.B16,  V1.B16
   354  	AESE	V10.B16, V2.B16
   355  	AESMC	V2.B16,  V2.B16
   356  	AESE	V11.B16, V3.B16
   357  	AESMC	V3.B16,  V3.B16
   358  	AESE	V12.B16, V4.B16
   359  	AESMC	V4.B16,  V4.B16
   360  	AESE	V13.B16, V5.B16
   361  	AESMC	V5.B16,  V5.B16
   362  	AESE	V14.B16, V6.B16
   363  	AESMC	V6.B16,  V6.B16
   364  	AESE	V15.B16, V7.B16
   365  	AESMC	V7.B16,  V7.B16
   366  
   367  	AESE	V8.B16,	 V0.B16
   368  	AESE	V9.B16,	 V1.B16
   369  	AESE	V10.B16, V2.B16
   370  	AESE	V11.B16, V3.B16
   371  	AESE	V12.B16, V4.B16
   372  	AESE	V13.B16, V5.B16
   373  	AESE	V14.B16, V6.B16
   374  	AESE	V15.B16, V7.B16
   375  
   376  	VEOR	V0.B16, V1.B16, V0.B16
   377  	VEOR	V2.B16, V3.B16, V2.B16
   378  	VEOR	V4.B16, V5.B16, V4.B16
   379  	VEOR	V6.B16, V7.B16, V6.B16
   380  	VEOR	V0.B16, V2.B16, V0.B16
   381  	VEOR	V4.B16, V6.B16, V4.B16
   382  	VEOR	V4.B16, V0.B16, V0.B16
   383  
   384  	VST1	[V0.D1], (R2)
   385  	RET