github.com/emmansun/gmsm@v0.29.1/zuc/eia_asm_arm64.s (about)

     1  //go:build !purego
     2  
     3  #include "textflag.h"
     4  
     5  DATA bit_reverse_table<>+0x00(SB)/8, $0x0e060a020c040800
     6  DATA bit_reverse_table<>+0x08(SB)/8, $0x0f070b030d050901
     7  DATA bit_reverse_table<>+0x10(SB)/8, $0xe060a020c0408000
     8  DATA bit_reverse_table<>+0x18(SB)/8, $0xf070b030d0509010
     9  GLOBL bit_reverse_table<>(SB), RODATA, $32
    10  
    11  DATA shuf_mask_dw<>+0x00(SB)/8, $0xffffffff03020100
    12  DATA shuf_mask_dw<>+0x08(SB)/8, $0xffffffff07060504
    13  DATA shuf_mask_dw<>+0x10(SB)/8, $0xffffffff0b0a0908
    14  DATA shuf_mask_dw<>+0x18(SB)/8, $0xffffffff0f0e0d0c
    15  GLOBL shuf_mask_dw<>(SB), RODATA, $32
    16  
    17  #define AX R2
    18  #define BX R3
    19  #define CX R4
    20  #define DX R5
    21  
    22  #define XTMP1 V1
    23  #define XTMP2 V2
    24  #define XTMP3 V3
    25  #define XTMP4 V4
    26  #define XTMP5 V5
    27  #define XTMP6 V6
    28  #define XDATA V7
    29  #define XDIGEST V8
    30  #define KS_L V9
    31  #define KS_M1 V10
    32  #define KS_M2 V11
    33  #define KS_H V12
    34  #define BIT_REV_TAB_L V20
    35  #define BIT_REV_TAB_H V21
    36  #define BIT_REV_AND_TAB V22
    37  #define SHUF_MASK_DW0_DW1 V23
    38  #define SHUF_MASK_DW2_DW3 V24
    39  
    40  #define LOAD_GLOBAL_DATA() \
    41  	MOVD $bit_reverse_table<>(SB), R0                         \
    42  	VLD1 (R0), [BIT_REV_TAB_L.B16, BIT_REV_TAB_H.B16]         \
    43  	MOVW $0x0F0F0F0F, R0                                      \
    44  	VDUP R0, BIT_REV_AND_TAB.S4                               \
    45  	MOVD $shuf_mask_dw<>(SB), R0                              \
    46  	VLD1 (R0), [SHUF_MASK_DW0_DW1.B16, SHUF_MASK_DW2_DW3.B16]
    47  
    48  // func eia3Round16B(t *uint32, keyStream *uint32, p *byte, tagSize int)
    49  TEXT ·eia3Round16B(SB),NOSPLIT,$0
    50  	MOVD t+0(FP), AX
    51  	MOVD ks+8(FP), BX
    52  	MOVD p+16(FP), CX
    53  	MOVD tagSize+24(FP), DX
    54  
    55  	LOAD_GLOBAL_DATA()
    56  
    57  	// Reverse data bytes
    58  	VLD1 (CX), [XDATA.B16]
    59  	VAND BIT_REV_AND_TAB.B16, XDATA.B16, XTMP3.B16
    60  	VUSHR $4, XDATA.S4, XTMP1.S4
    61  	VAND BIT_REV_AND_TAB.B16, XTMP1.B16, XTMP1.B16
    62  
    63  	VTBL XTMP3.B16, [BIT_REV_TAB_H.B16], XTMP3.B16
    64  	VTBL XTMP1.B16, [BIT_REV_TAB_L.B16], XTMP1.B16
    65  	VEOR XTMP1.B16, XTMP3.B16, XTMP3.B16 // XTMP3 - bit reverse data bytes
    66  
    67  	// ZUC authentication part, 4x32 data bits
    68  	// setup KS
    69  	VLD1 (BX), [XTMP1.B16, XTMP2.B16]
    70  	VST1 [XTMP2.B16], (BX) // Copy last 16 bytes of KS to the front
    71  	// TODO: Any better solution???
    72  	VDUP XTMP1.S[1], KS_L.S4
    73  	VMOV XTMP1.S[0], KS_L.S[1]
    74  	VMOV XTMP1.S[2], KS_L.S[2] // KS bits [63:32 31:0 95:64 63:32]
    75  	VDUP XTMP1.S[3], KS_M1.S4
    76  	VMOV XTMP1.S[2], KS_M1.S[1]
    77  	VMOV XTMP2.S[0], KS_M1.S[2] // KS bits [127:96 95:64 159:128 127:96]
    78  
    79  	// setup DATA
    80  	VTBL SHUF_MASK_DW0_DW1.B16, [XTMP3.B16], XTMP1.B16 // XTMP1 - Data bits [31:0 0s 63:32 0s]
    81  	VTBL SHUF_MASK_DW2_DW3.B16, [XTMP3.B16], XTMP2.B16 // XTMP2 - Data bits [95:64 0s 127:96 0s]
    82  
    83  	// clmul
    84  	// xor the results from 4 32-bit words together
    85  	// Calculate lower 32 bits of tag
    86  	VPMULL KS_L.D1, XTMP1.D1, XTMP3.Q1
    87  	VPMULL2 KS_L.D2, XTMP1.D2, XTMP4.Q1
    88  	VPMULL KS_M1.D1, XTMP2.D1, XTMP5.Q1
    89  	VPMULL2 KS_M1.D2, XTMP2.D2, XTMP6.Q1
    90  
    91  	VEOR XTMP3.B16, XTMP4.B16, XTMP3.B16
    92  	VEOR XTMP5.B16, XTMP6.B16, XTMP5.B16
    93  	VEOR XTMP3.B16, XTMP5.B16, XDIGEST.B16
    94  
    95  	VMOV XDIGEST.S[1], R10
    96  	MOVW (AX), R11
    97  	EORW R10, R11
    98  	MOVW R11, (AX)
    99  
   100  	RET