github.com/hxx258456/ccgo@v0.0.5-0.20230213014102-48b35f46f66f/sm3/sm3block_arm64.s (about) 1 #include "textflag.h" 2 3 #define SI R0 4 #define DI R1 5 #define BP R2 6 #define AX R3 7 #define BX R4 8 #define CX R5 9 #define DX R6 10 #define hlp0 R7 11 #define hlp1 R9 12 13 // Wt+4 = Mt+4; for 0 <= t <= 11 14 #define MSGSCHEDULE01(index) \ 15 MOVW ((index+4)*4)(SI), AX; \ 16 REVW AX, AX; \ 17 MOVW AX, ((index+4)*4)(BP) 18 19 // x = Wt-12 XOR Wt-5 XOR ROTL(15, Wt+1) 20 // p1(x) = x XOR ROTL(15, x) XOR ROTL(23, x) 21 // Wt+4 = p1(x) XOR ROTL(7, Wt-9) XOR Wt-2 22 // for 12 <= t <= 63 23 #define MSGSCHEDULE1(index) \ 24 MOVW ((index+1)*4)(BP), AX; \ 25 RORW $17, AX; \ 26 MOVW ((index-12)*4)(BP), BX; \ 27 EORW BX, AX; \ 28 MOVW ((index-5)*4)(BP), BX; \ 29 EORW BX, AX; \ // AX = x 30 RORW $17, AX, BX; \ // BX = ROTL(15, x) 31 RORW $9, AX, CX; \ // CX = ROTL(23, x) 32 EORW BX, AX; \ // AX = x xor ROTL(15, x) 33 EORW CX, AX; \ // AX = x xor ROTL(15, x) xor ROTL(23, x) 34 MOVW ((index-9)*4)(BP), BX; \ 35 RORW $25, BX; \ 36 MOVW ((index-2)*4)(BP), CX; \ 37 EORW BX, AX; \ 38 EORW CX, AX; \ 39 MOVW AX, ((index+4)*4)(BP) 40 41 // Calculate ss1 in BX 42 // x = ROTL(12, a) + e + ROTL(index, const) 43 // ret = ROTL(7, x) 44 #define SM3SS1(const, a, e) \ 45 RORW $20, a, BX; \ 46 ADDW e, BX; \ 47 ADDW $const, BX; \ 48 RORW $25, BX 49 50 // Calculate tt1 in CX 51 // ret = (a XOR b XOR c) + d + (ROTL(12, a) XOR ss1) + (Wt XOR Wt+4) 52 #define SM3TT10(index, a, b, c, d) \ 53 EORW a, b, DX; \ 54 EORW c, DX; \ // (a XOR b XOR c) 55 ADDW d, DX; \ // (a XOR b XOR c) + d 56 MOVW ((index)*4)(BP), hlp0; \ // Wt 57 EORW hlp0, AX; \ // Wt XOR Wt+4 58 ADDW AX, DX; \ 59 RORW $20, a, CX; \ 60 EORW BX, CX; \ // ROTL(12, a) XOR ss1 61 ADDW DX, CX // (a XOR b XOR c) + d + (ROTL(12, a) XOR ss1) 62 63 // Calculate tt2 in BX 64 // ret = (e XOR f XOR g) + h + ss1 + Wt 65 #define SM3TT20(e, f, g, h) \ 66 ADDW h, hlp0; \ // Wt + h 67 ADDW BX, hlp0; \ // Wt + h + ss1 68 EORW e, f, BX; \ // e XOR f 69 EORW g, BX; \ // e XOR f XOR g 70 ADDW hlp0, BX // (e XOR f XOR g) + Wt + h + ss1 71 72 // Calculate tt1 in CX, used DX, hlp0 73 // ret = ((a AND b) OR (a AND c) OR (b AND c)) + d + (ROTL(12, a) XOR ss1) + (Wt XOR Wt+4) 74 #define SM3TT11(index, a, b, c, d) \ 75 ANDW a, b, DX; \ // a AND b 76 ANDW a, c, CX; \ // a AND c 77 ORRW DX, CX; \ // (a AND b) OR (a AND c) 78 ANDW b, c, DX; \ // b AND c 79 ORRW CX, DX; \ // (a AND b) OR (a AND c) OR (b AND c) 80 ADDW d, DX; \ 81 RORW $20, a, CX; \ 82 EORW BX, CX; \ 83 ADDW DX, CX; \ // ((a AND b) OR (a AND c) OR (b AND c)) + d + (ROTL(12, a) XOR ss1) 84 MOVW ((index)*4)(BP), hlp0; \ 85 EORW hlp0, AX; \ // Wt XOR Wt+4 86 ADDW AX, CX 87 88 // Calculate tt2 in BX 89 // ret = ((e AND f) OR (NOT(e) AND g)) + h + ss1 + Wt 90 #define SM3TT21(e, f, g, h) \ 91 ADDW h, hlp0; \ // Wt + h 92 ADDW BX, hlp0; \ // h + ss1 + Wt 93 ANDW e, f, DX; \ // e AND f 94 MVNW e, BX; \ // NOT(e) 95 ANDW g, BX; \ // NOT(e) AND g 96 ORRW DX, BX; \ 97 ADDW hlp0, BX 98 99 #define COPYRESULT(b, d, f, h) \ 100 RORW $23, b; \ 101 MOVW CX, h; \ // a = ttl 102 RORW $13, f; \ 103 RORW $23, BX, CX; \ 104 EORW BX, CX; \ // tt2 XOR ROTL(9, tt2) 105 RORW $15, BX; \ 106 EORW BX, CX; \ // tt2 XOR ROTL(9, tt2) XOR ROTL(17, tt2) 107 MOVW CX, d // e = tt2 XOR ROTL(9, tt2) XOR ROTL(17, tt2) 108 109 #define SM3ROUND0(index, const, a, b, c, d, e, f, g, h) \ 110 MSGSCHEDULE01(index); \ 111 SM3SS1(const, a, e); \ 112 SM3TT10(index, a, b, c, d); \ 113 SM3TT20(e, f, g, h); \ 114 COPYRESULT(b, d, f, h) 115 116 #define SM3ROUND1(index, const, a, b, c, d, e, f, g, h) \ 117 MSGSCHEDULE1(index); \ 118 SM3SS1(const, a, e); \ 119 SM3TT10(index, a, b, c, d); \ 120 SM3TT20(e, f, g, h); \ 121 COPYRESULT(b, d, f, h) 122 123 #define SM3ROUND2(index, const, a, b, c, d, e, f, g, h) \ 124 MSGSCHEDULE1(index); \ 125 SM3SS1(const, a, e); \ 126 SM3TT11(index, a, b, c, d); \ 127 SM3TT21(e, f, g, h); \ 128 COPYRESULT(b, d, f, h) 129 130 // func blockARM64(dig *digest, p []byte) 131 TEXT ·blockARM64(SB), 0, $272-32 132 MOVD dig+0(FP), hlp1 133 MOVD p_base+8(FP), SI 134 MOVD p_len+16(FP), DX 135 MOVD RSP, BP 136 137 AND $~63, DX 138 CBZ DX, end 139 140 ADD SI, DX, DI 141 142 LDPW (0*8)(hlp1), (R19, R20) 143 LDPW (1*8)(hlp1), (R21, R22) 144 LDPW (2*8)(hlp1), (R23, R24) 145 LDPW (3*8)(hlp1), (R25, R26) 146 147 loop: 148 MOVW R19, R10 149 MOVW R20, R11 150 MOVW R21, R12 151 MOVW R22, R13 152 MOVW R23, R14 153 MOVW R24, R15 154 MOVW R25, R16 155 MOVW R26, R17 156 157 // Wt = Mt; for 0 <= t <= 3 158 LDPW (0*8)(SI), (AX, BX) 159 REVW AX, AX 160 REVW BX, BX 161 STPW (AX, BX), (0*8)(BP) 162 163 LDPW (1*8)(SI), (CX, DX) 164 REVW CX, CX 165 REVW DX, DX 166 STPW (CX, DX), (1*8)(BP) 167 168 SM3ROUND0(0, 0x79cc4519, R19, R20, R21, R22, R23, R24, R25, R26) 169 SM3ROUND0(1, 0xf3988a32, R26, R19, R20, R21, R22, R23, R24, R25) 170 SM3ROUND0(2, 0xe7311465, R25, R26, R19, R20, R21, R22, R23, R24) 171 SM3ROUND0(3, 0xce6228cb, R24, R25, R26, R19, R20, R21, R22, R23) 172 SM3ROUND0(4, 0x9cc45197, R23, R24, R25, R26, R19, R20, R21, R22) 173 SM3ROUND0(5, 0x3988a32f, R22, R23, R24, R25, R26, R19, R20, R21) 174 SM3ROUND0(6, 0x7311465e, R21, R22, R23, R24, R25, R26, R19, R20) 175 SM3ROUND0(7, 0xe6228cbc, R20, R21, R22, R23, R24, R25, R26, R19) 176 SM3ROUND0(8, 0xcc451979, R19, R20, R21, R22, R23, R24, R25, R26) 177 SM3ROUND0(9, 0x988a32f3, R26, R19, R20, R21, R22, R23, R24, R25) 178 SM3ROUND0(10, 0x311465e7, R25, R26, R19, R20, R21, R22, R23, R24) 179 SM3ROUND0(11, 0x6228cbce, R24, R25, R26, R19, R20, R21, R22, R23) 180 181 SM3ROUND1(12, 0xc451979c, R23, R24, R25, R26, R19, R20, R21, R22) 182 SM3ROUND1(13, 0x88a32f39, R22, R23, R24, R25, R26, R19, R20, R21) 183 SM3ROUND1(14, 0x11465e73, R21, R22, R23, R24, R25, R26, R19, R20) 184 SM3ROUND1(15, 0x228cbce6, R20, R21, R22, R23, R24, R25, R26, R19) 185 186 SM3ROUND2(16, 0x9d8a7a87, R19, R20, R21, R22, R23, R24, R25, R26) 187 SM3ROUND2(17, 0x3b14f50f, R26, R19, R20, R21, R22, R23, R24, R25) 188 SM3ROUND2(18, 0x7629ea1e, R25, R26, R19, R20, R21, R22, R23, R24) 189 SM3ROUND2(19, 0xec53d43c, R24, R25, R26, R19, R20, R21, R22, R23) 190 SM3ROUND2(20, 0xd8a7a879, R23, R24, R25, R26, R19, R20, R21, R22) 191 SM3ROUND2(21, 0xb14f50f3, R22, R23, R24, R25, R26, R19, R20, R21) 192 SM3ROUND2(22, 0x629ea1e7, R21, R22, R23, R24, R25, R26, R19, R20) 193 SM3ROUND2(23, 0xc53d43ce, R20, R21, R22, R23, R24, R25, R26, R19) 194 SM3ROUND2(24, 0x8a7a879d, R19, R20, R21, R22, R23, R24, R25, R26) 195 SM3ROUND2(25, 0x14f50f3b, R26, R19, R20, R21, R22, R23, R24, R25) 196 SM3ROUND2(26, 0x29ea1e76, R25, R26, R19, R20, R21, R22, R23, R24) 197 SM3ROUND2(27, 0x53d43cec, R24, R25, R26, R19, R20, R21, R22, R23) 198 SM3ROUND2(28, 0xa7a879d8, R23, R24, R25, R26, R19, R20, R21, R22) 199 SM3ROUND2(29, 0x4f50f3b1, R22, R23, R24, R25, R26, R19, R20, R21) 200 SM3ROUND2(30, 0x9ea1e762, R21, R22, R23, R24, R25, R26, R19, R20) 201 SM3ROUND2(31, 0x3d43cec5, R20, R21, R22, R23, R24, R25, R26, R19) 202 SM3ROUND2(32, 0x7a879d8a, R19, R20, R21, R22, R23, R24, R25, R26) 203 SM3ROUND2(33, 0xf50f3b14, R26, R19, R20, R21, R22, R23, R24, R25) 204 SM3ROUND2(34, 0xea1e7629, R25, R26, R19, R20, R21, R22, R23, R24) 205 SM3ROUND2(35, 0xd43cec53, R24, R25, R26, R19, R20, R21, R22, R23) 206 SM3ROUND2(36, 0xa879d8a7, R23, R24, R25, R26, R19, R20, R21, R22) 207 SM3ROUND2(37, 0x50f3b14f, R22, R23, R24, R25, R26, R19, R20, R21) 208 SM3ROUND2(38, 0xa1e7629e, R21, R22, R23, R24, R25, R26, R19, R20) 209 SM3ROUND2(39, 0x43cec53d, R20, R21, R22, R23, R24, R25, R26, R19) 210 SM3ROUND2(40, 0x879d8a7a, R19, R20, R21, R22, R23, R24, R25, R26) 211 SM3ROUND2(41, 0xf3b14f5, R26, R19, R20, R21, R22, R23, R24, R25) 212 SM3ROUND2(42, 0x1e7629ea, R25, R26, R19, R20, R21, R22, R23, R24) 213 SM3ROUND2(43, 0x3cec53d4, R24, R25, R26, R19, R20, R21, R22, R23) 214 SM3ROUND2(44, 0x79d8a7a8, R23, R24, R25, R26, R19, R20, R21, R22) 215 SM3ROUND2(45, 0xf3b14f50, R22, R23, R24, R25, R26, R19, R20, R21) 216 SM3ROUND2(46, 0xe7629ea1, R21, R22, R23, R24, R25, R26, R19, R20) 217 SM3ROUND2(47, 0xcec53d43, R20, R21, R22, R23, R24, R25, R26, R19) 218 SM3ROUND2(48, 0x9d8a7a87, R19, R20, R21, R22, R23, R24, R25, R26) 219 SM3ROUND2(49, 0x3b14f50f, R26, R19, R20, R21, R22, R23, R24, R25) 220 SM3ROUND2(50, 0x7629ea1e, R25, R26, R19, R20, R21, R22, R23, R24) 221 SM3ROUND2(51, 0xec53d43c, R24, R25, R26, R19, R20, R21, R22, R23) 222 SM3ROUND2(52, 0xd8a7a879, R23, R24, R25, R26, R19, R20, R21, R22) 223 SM3ROUND2(53, 0xb14f50f3, R22, R23, R24, R25, R26, R19, R20, R21) 224 SM3ROUND2(54, 0x629ea1e7, R21, R22, R23, R24, R25, R26, R19, R20) 225 SM3ROUND2(55, 0xc53d43ce, R20, R21, R22, R23, R24, R25, R26, R19) 226 SM3ROUND2(56, 0x8a7a879d, R19, R20, R21, R22, R23, R24, R25, R26) 227 SM3ROUND2(57, 0x14f50f3b, R26, R19, R20, R21, R22, R23, R24, R25) 228 SM3ROUND2(58, 0x29ea1e76, R25, R26, R19, R20, R21, R22, R23, R24) 229 SM3ROUND2(59, 0x53d43cec, R24, R25, R26, R19, R20, R21, R22, R23) 230 SM3ROUND2(60, 0xa7a879d8, R23, R24, R25, R26, R19, R20, R21, R22) 231 SM3ROUND2(61, 0x4f50f3b1, R22, R23, R24, R25, R26, R19, R20, R21) 232 SM3ROUND2(62, 0x9ea1e762, R21, R22, R23, R24, R25, R26, R19, R20) 233 SM3ROUND2(63, 0x3d43cec5, R20, R21, R22, R23, R24, R25, R26, R19) 234 235 EORW R10, R19 // H0 = a XOR H0 236 EORW R11, R20 // H1 = b XOR H1 237 EORW R12, R21 // H0 = a XOR H0 238 EORW R13, R22 // H1 = b XOR H1 239 EORW R14, R23 // H0 = a XOR H0 240 EORW R15, R24 // H1 = b XOR H1 241 EORW R16, R25 // H0 = a XOR H0 242 EORW R17, R26 // H1 = b XOR H1 243 244 ADD $64, SI 245 CMP SI, DI 246 BNE loop 247 248 STPW (R19, R20), (0*8)(hlp1) 249 STPW (R21, R22), (1*8)(hlp1) 250 STPW (R23, R24), (2*8)(hlp1) 251 STPW (R25, R26), (3*8)(hlp1) 252 253 end: 254 RET