github.com/emmansun/gmsm@v0.29.1/cipher/xts_arm64.s (about)

     1  //go:build !purego
     2  
     3  #include "textflag.h"
     4  
     5  #define B0 V0
     6  #define T1 V1
     7  #define T2 V2
     8  
     9  #define POLY V3
    10  #define ZERO V4
    11  
    12  #define TW R0
    13  #define GB R1
    14  #define I R2
    15  
    16  #define doubleTweak(B0, ZERO, POLY, I, T1, T2) \
    17  	VMOV	B0.D[1], I                    \
    18  	ASR	$63, I                            \
    19  	VMOV	I, T1.D2                      \
    20  	VAND	POLY.B16, T1.B16, T1.B16      \
    21  	\
    22  	VUSHR	$63, B0.D2, T2.D2             \
    23  	VEXT	$8, T2.B16, ZERO.B16, T2.B16  \
    24  	VSLI	$1, B0.D2, T2.D2              \
    25  	VEOR	T1.B16, T2.B16, B0.B16
    26  
    27  #define gbDoubleTweak(B0, ZERO, POLY, I, T1, T2) \
    28  	VREV64 B0.B16, B0.B16                 \
    29  	VEXT	$8, B0.B16, B0.B16, B0.B16    \
    30  	\
    31  	VMOV	B0.D[0], I                    \
    32  	LSL $63, I                            \
    33  	ASR $63, I                            \
    34  	VMOV	I, T1.D2                      \
    35  	VAND	POLY.B16, T1.B16, T1.B16      \
    36  	\
    37  	VSHL $63, B0.D2, T2.D2                \
    38  	VEXT	$8, ZERO.B16, T2.B16, T2.B16  \
    39  	VSRI	$1, B0.D2, T2.D2              \
    40  	VEOR	T1.B16, T2.B16, B0.B16        \
    41  	\
    42  	VEXT	$8, B0.B16, B0.B16, B0.B16    \
    43  	VREV64 B0.B16, B0.B16
    44  
    45  // func mul2(tweak *[blockSize]byte, isGB bool)
    46  TEXT ·mul2(SB),NOSPLIT,$0
    47  	MOVD tweak+0(FP), TW
    48  	MOVB isGB+8(FP), GB
    49  
    50  	VLD1 (TW), [B0.B16]
    51  
    52  	VEOR	POLY.B16, POLY.B16, POLY.B16
    53  	VEOR	ZERO.B16, ZERO.B16, ZERO.B16
    54  
    55  	CMP $1, GB
    56  	BEQ gb_alg
    57  
    58  	MOVD	$0x87, I
    59  	VMOV	I, POLY.D[0]
    60  
    61  	doubleTweak(B0, ZERO, POLY, I, T1, T2)
    62  
    63  	VST1 [B0.B16], (TW)
    64  	RET
    65  
    66  gb_alg:
    67  	MOVD	$0xE1, I
    68  	LSL	$56, I
    69  	VMOV	I, POLY.D[1]
    70  
    71  	gbDoubleTweak(B0, ZERO, POLY, I, T1, T2)
    72  
    73  	VST1 [B0.B16], (TW)
    74  	RET
    75  
    76  // func doubleTweaks(tweak *[blockSize]byte, tweaks []byte, isGB bool)
    77  TEXT ·doubleTweaks(SB),NOSPLIT,$0
    78  	MOVD tweak+0(FP), TW
    79  	MOVD tweaks+8(FP), R3
    80  	MOVD tweaks_len+16(FP), R4
    81  	MOVB isGB+32(FP), GB
    82  
    83  	LSR $4, R4
    84  	EOR R5, R5
    85  
    86  	VEOR	POLY.B16, POLY.B16, POLY.B16
    87  	VEOR	ZERO.B16, ZERO.B16, ZERO.B16
    88  
    89  	VLD1 (TW), [B0.B16]
    90  
    91  	CMP $1, GB
    92  	BEQ dt_gb_alg
    93  
    94  	MOVD	$0x87, I
    95  	VMOV	I, POLY.D[0]
    96  
    97  loop:
    98  	VST1.P [B0.B16], 16(R3)
    99  
   100  	doubleTweak(B0, ZERO, POLY, I, T1, T2)
   101  
   102  	ADD $1, R5
   103  	CMP R4, R5
   104  	BNE loop
   105  
   106  	VST1 [B0.B16], (TW)
   107  	RET
   108  
   109  dt_gb_alg:
   110  	MOVD	$0xE1, I
   111  	LSL	$56, I
   112  	VMOV	I, POLY.D[1]
   113  
   114  gb_loop:
   115  	VST1.P [B0.B16], 16(R3)
   116  
   117  	gbDoubleTweak(B0, ZERO, POLY, I, T1, T2)
   118  
   119  	ADD $1, R5
   120  	CMP R4, R5
   121  	BNE gb_loop
   122  
   123  	VST1 [B0.B16], (TW)	
   124  	RET