github.com/emmansun/gmsm@v0.29.1/cipher/xts_amd64.s (about)

     1  //go:build !purego
     2  
     3  #include "textflag.h"
     4  
     5  DATA bswapMask<>+0x00(SB)/8, $0x08090a0b0c0d0e0f
     6  DATA bswapMask<>+0x08(SB)/8, $0x0001020304050607
     7  
     8  DATA gcmPoly<>+0x00(SB)/8, $0x0000000000000087
     9  DATA gcmPoly<>+0x08(SB)/8, $0x0000000000000000
    10  
    11  DATA gbGcmPoly<>+0x00(SB)/8, $0x0000000000000000
    12  DATA gbGcmPoly<>+0x08(SB)/8, $0xe100000000000000
    13  
    14  GLOBL bswapMask<>(SB), (NOPTR+RODATA), $16
    15  GLOBL gcmPoly<>(SB), (NOPTR+RODATA), $16
    16  GLOBL gbGcmPoly<>(SB), (NOPTR+RODATA), $16
    17  
    18  
    19  #define POLY X0
    20  #define BSWAP X1
    21  #define B0 X2
    22  #define T0 X3
    23  #define T1 X4
    24  
    25  #define doubleTweak(B0, POLY, T0, T1) \
    26  	\ // B0 * 2
    27  	PSHUFD $0xff, B0, T0 \
    28  	MOVOU B0, T1         \
    29  	PSRAL $31, T0        \ // T0 for reduction
    30  	PAND POLY, T0        \
    31  	PSRLL $31, T1        \
    32  	PSLLDQ $4, T1        \
    33  	PSLLL $1, B0         \
    34  	PXOR T0, B0          \
    35  	PXOR T1, B0
    36  
    37  #define gbDoubleTweak(B0, BSWAP, POLY, T0, T1) \
    38  	PSHUFB BSWAP, B0      \
    39  	\ // B0 * 2
    40  	MOVOU B0, T0          \
    41   	PSHUFD $0, B0, T1     \
    42  	PSRLQ $1, B0          \
    43  	PSLLQ $63, T0         \
    44  	PSRLDQ $8, T0         \
    45  	POR T0, B0            \
    46  	\ // reduction
    47  	PSLLL $31, T1         \
    48  	PSRAL $31, T1         \
    49  	PAND POLY, T1         \
    50  	PXOR T1, B0           \
    51  	PSHUFB BSWAP, B0
    52  
    53  // func mul2(tweak *[blockSize]byte, isGB bool)
    54  TEXT ·mul2(SB),NOSPLIT,$0
    55  	MOVQ tweak+0(FP), DI
    56  	MOVB isGB+8(FP), AX
    57  
    58  	MOVOU (0*16)(DI), B0
    59  
    60  	CMPB AX, $1
    61  	JE gb_alg
    62  
    63  	MOVOU gcmPoly<>(SB), POLY
    64  
    65  	doubleTweak(B0, POLY, T0, T1)
    66  
    67  	MOVOU B0, (0*16)(DI)
    68  
    69  	RET
    70  
    71  gb_alg:
    72  	MOVOU bswapMask<>(SB), BSWAP
    73  	MOVOU gbGcmPoly<>(SB), POLY
    74  
    75  	gbDoubleTweak(B0, BSWAP, POLY, T0, T1)
    76  
    77  	MOVOU B0, (0*16)(DI)
    78  	RET
    79  
    80  // func doubleTweaks(tweak *[blockSize]byte, tweaks []byte, isGB bool)
    81  TEXT ·doubleTweaks(SB),NOSPLIT,$0
    82  	MOVQ tweak+0(FP), DI
    83  	MOVQ tweaks+8(FP), AX
    84  	MOVQ tweaks_len+16(FP), BX
    85  	MOVB isGB+32(FP), CX
    86  
    87  	MOVOU (0*16)(DI), B0
    88  
    89  	SHRQ $4, BX
    90  	XORQ DX, DX
    91  
    92  	CMPB CX, $1
    93  	JE dt_gb_alg
    94  
    95  	MOVOU gcmPoly<>(SB), POLY
    96  
    97  loop:
    98  	MOVOU B0, (0*16)(AX)
    99  	LEAQ 16(AX), AX
   100  
   101  	doubleTweak(B0, POLY, T0, T1)
   102  
   103  	ADDQ $1, DX
   104  	CMPQ DX, BX
   105  	JB loop
   106  
   107  	MOVOU B0, (0*16)(DI)
   108  	RET
   109  
   110  dt_gb_alg:
   111  	MOVOU bswapMask<>(SB), BSWAP
   112  	MOVOU gbGcmPoly<>(SB), POLY
   113  
   114  gb_loop:
   115  	MOVOU B0, (0*16)(AX)
   116  	LEAQ 16(AX), AX
   117  
   118  	gbDoubleTweak(B0, BSWAP, POLY, T0, T1)
   119  
   120  	ADDQ $1, DX
   121  	CMPQ DX, BX
   122  	JB gb_loop
   123  
   124  	MOVOU B0, (0*16)(DI)
   125  	RET