github.com/emmansun/gmsm@v0.29.1/cipher/xts_arm64.s (about) 1 //go:build !purego 2 3 #include "textflag.h" 4 5 #define B0 V0 6 #define T1 V1 7 #define T2 V2 8 9 #define POLY V3 10 #define ZERO V4 11 12 #define TW R0 13 #define GB R1 14 #define I R2 15 16 #define doubleTweak(B0, ZERO, POLY, I, T1, T2) \ 17 VMOV B0.D[1], I \ 18 ASR $63, I \ 19 VMOV I, T1.D2 \ 20 VAND POLY.B16, T1.B16, T1.B16 \ 21 \ 22 VUSHR $63, B0.D2, T2.D2 \ 23 VEXT $8, T2.B16, ZERO.B16, T2.B16 \ 24 VSLI $1, B0.D2, T2.D2 \ 25 VEOR T1.B16, T2.B16, B0.B16 26 27 #define gbDoubleTweak(B0, ZERO, POLY, I, T1, T2) \ 28 VREV64 B0.B16, B0.B16 \ 29 VEXT $8, B0.B16, B0.B16, B0.B16 \ 30 \ 31 VMOV B0.D[0], I \ 32 LSL $63, I \ 33 ASR $63, I \ 34 VMOV I, T1.D2 \ 35 VAND POLY.B16, T1.B16, T1.B16 \ 36 \ 37 VSHL $63, B0.D2, T2.D2 \ 38 VEXT $8, ZERO.B16, T2.B16, T2.B16 \ 39 VSRI $1, B0.D2, T2.D2 \ 40 VEOR T1.B16, T2.B16, B0.B16 \ 41 \ 42 VEXT $8, B0.B16, B0.B16, B0.B16 \ 43 VREV64 B0.B16, B0.B16 44 45 // func mul2(tweak *[blockSize]byte, isGB bool) 46 TEXT ·mul2(SB),NOSPLIT,$0 47 MOVD tweak+0(FP), TW 48 MOVB isGB+8(FP), GB 49 50 VLD1 (TW), [B0.B16] 51 52 VEOR POLY.B16, POLY.B16, POLY.B16 53 VEOR ZERO.B16, ZERO.B16, ZERO.B16 54 55 CMP $1, GB 56 BEQ gb_alg 57 58 MOVD $0x87, I 59 VMOV I, POLY.D[0] 60 61 doubleTweak(B0, ZERO, POLY, I, T1, T2) 62 63 VST1 [B0.B16], (TW) 64 RET 65 66 gb_alg: 67 MOVD $0xE1, I 68 LSL $56, I 69 VMOV I, POLY.D[1] 70 71 gbDoubleTweak(B0, ZERO, POLY, I, T1, T2) 72 73 VST1 [B0.B16], (TW) 74 RET 75 76 // func doubleTweaks(tweak *[blockSize]byte, tweaks []byte, isGB bool) 77 TEXT ·doubleTweaks(SB),NOSPLIT,$0 78 MOVD tweak+0(FP), TW 79 MOVD tweaks+8(FP), R3 80 MOVD tweaks_len+16(FP), R4 81 MOVB isGB+32(FP), GB 82 83 LSR $4, R4 84 EOR R5, R5 85 86 VEOR POLY.B16, POLY.B16, POLY.B16 87 VEOR ZERO.B16, ZERO.B16, ZERO.B16 88 89 VLD1 (TW), [B0.B16] 90 91 CMP $1, GB 92 BEQ dt_gb_alg 93 94 MOVD $0x87, I 95 VMOV I, POLY.D[0] 96 97 loop: 98 VST1.P [B0.B16], 16(R3) 99 100 doubleTweak(B0, ZERO, POLY, I, T1, T2) 101 102 ADD $1, R5 103 CMP R4, R5 104 BNE loop 105 106 VST1 [B0.B16], (TW) 107 RET 108 109 dt_gb_alg: 110 MOVD $0xE1, I 111 LSL $56, I 112 VMOV I, POLY.D[1] 113 114 gb_loop: 115 VST1.P [B0.B16], 16(R3) 116 117 gbDoubleTweak(B0, ZERO, POLY, I, T1, T2) 118 119 ADD $1, R5 120 CMP R4, R5 121 BNE gb_loop 122 123 VST1 [B0.B16], (TW) 124 RET