github.com/emmansun/gmsm@v0.29.1/cipher/xts_amd64.s (about) 1 //go:build !purego 2 3 #include "textflag.h" 4 5 DATA bswapMask<>+0x00(SB)/8, $0x08090a0b0c0d0e0f 6 DATA bswapMask<>+0x08(SB)/8, $0x0001020304050607 7 8 DATA gcmPoly<>+0x00(SB)/8, $0x0000000000000087 9 DATA gcmPoly<>+0x08(SB)/8, $0x0000000000000000 10 11 DATA gbGcmPoly<>+0x00(SB)/8, $0x0000000000000000 12 DATA gbGcmPoly<>+0x08(SB)/8, $0xe100000000000000 13 14 GLOBL bswapMask<>(SB), (NOPTR+RODATA), $16 15 GLOBL gcmPoly<>(SB), (NOPTR+RODATA), $16 16 GLOBL gbGcmPoly<>(SB), (NOPTR+RODATA), $16 17 18 19 #define POLY X0 20 #define BSWAP X1 21 #define B0 X2 22 #define T0 X3 23 #define T1 X4 24 25 #define doubleTweak(B0, POLY, T0, T1) \ 26 \ // B0 * 2 27 PSHUFD $0xff, B0, T0 \ 28 MOVOU B0, T1 \ 29 PSRAL $31, T0 \ // T0 for reduction 30 PAND POLY, T0 \ 31 PSRLL $31, T1 \ 32 PSLLDQ $4, T1 \ 33 PSLLL $1, B0 \ 34 PXOR T0, B0 \ 35 PXOR T1, B0 36 37 #define gbDoubleTweak(B0, BSWAP, POLY, T0, T1) \ 38 PSHUFB BSWAP, B0 \ 39 \ // B0 * 2 40 MOVOU B0, T0 \ 41 PSHUFD $0, B0, T1 \ 42 PSRLQ $1, B0 \ 43 PSLLQ $63, T0 \ 44 PSRLDQ $8, T0 \ 45 POR T0, B0 \ 46 \ // reduction 47 PSLLL $31, T1 \ 48 PSRAL $31, T1 \ 49 PAND POLY, T1 \ 50 PXOR T1, B0 \ 51 PSHUFB BSWAP, B0 52 53 // func mul2(tweak *[blockSize]byte, isGB bool) 54 TEXT ·mul2(SB),NOSPLIT,$0 55 MOVQ tweak+0(FP), DI 56 MOVB isGB+8(FP), AX 57 58 MOVOU (0*16)(DI), B0 59 60 CMPB AX, $1 61 JE gb_alg 62 63 MOVOU gcmPoly<>(SB), POLY 64 65 doubleTweak(B0, POLY, T0, T1) 66 67 MOVOU B0, (0*16)(DI) 68 69 RET 70 71 gb_alg: 72 MOVOU bswapMask<>(SB), BSWAP 73 MOVOU gbGcmPoly<>(SB), POLY 74 75 gbDoubleTweak(B0, BSWAP, POLY, T0, T1) 76 77 MOVOU B0, (0*16)(DI) 78 RET 79 80 // func doubleTweaks(tweak *[blockSize]byte, tweaks []byte, isGB bool) 81 TEXT ·doubleTweaks(SB),NOSPLIT,$0 82 MOVQ tweak+0(FP), DI 83 MOVQ tweaks+8(FP), AX 84 MOVQ tweaks_len+16(FP), BX 85 MOVB isGB+32(FP), CX 86 87 MOVOU (0*16)(DI), B0 88 89 SHRQ $4, BX 90 XORQ DX, DX 91 92 CMPB CX, $1 93 JE dt_gb_alg 94 95 MOVOU gcmPoly<>(SB), POLY 96 97 loop: 98 MOVOU B0, (0*16)(AX) 99 LEAQ 16(AX), AX 100 101 doubleTweak(B0, POLY, T0, T1) 102 103 ADDQ $1, DX 104 CMPQ DX, BX 105 JB loop 106 107 MOVOU B0, (0*16)(DI) 108 RET 109 110 dt_gb_alg: 111 MOVOU bswapMask<>(SB), BSWAP 112 MOVOU gbGcmPoly<>(SB), POLY 113 114 gb_loop: 115 MOVOU B0, (0*16)(AX) 116 LEAQ 16(AX), AX 117 118 gbDoubleTweak(B0, BSWAP, POLY, T0, T1) 119 120 ADDQ $1, DX 121 CMPQ DX, BX 122 JB gb_loop 123 124 MOVOU B0, (0*16)(DI) 125 RET