github.com/emmansun/gmsm@v0.29.1/sm3/sm3blockni_arm64.s (about) 1 // Generated by gen_sm3block_ni.go. DO NOT EDIT. 2 //go:build !purego 3 4 #include "textflag.h" 5 6 // func blockSM3NI(h []uint32, p []byte, t []uint32) 7 TEXT ·blockSM3NI(SB), 0, $0 8 MOVD h_base+0(FP), R0 // Hash value first address 9 MOVD p_base+24(FP), R1 // message first address 10 MOVD p_len+32(FP), R3 // message length 11 MOVD t_base+48(FP), R2 // t constants first address 12 13 VLD1 (R0), [V8.S4, V9.S4] // load h(a,b,c,d,e,f,g,h) 14 VREV64 V8.S4, V8.S4 15 VEXT $8, V8.B16, V8.B16, V8.B16 16 VREV64 V9.S4, V9.S4 17 VEXT $8, V9.B16, V9.B16, V9.B16 18 LDPW (0*8)(R2), (R5, R6) // load t constants 19 20 blockloop: 21 VLD1.P 64(R1), [V0.B16, V1.B16, V2.B16, V3.B16] // load 64bytes message 22 VMOV V8.B16, V15.B16 // backup: V8 h(dcba) 23 VMOV V9.B16, V16.B16 // backup: V9 h(hgfe) 24 VREV32 V0.B16, V0.B16 // prepare for using message in Byte format 25 VREV32 V1.B16, V1.B16 26 VREV32 V2.B16, V2.B16 27 VREV32 V3.B16, V3.B16 28 // first 16 rounds 29 VMOV R5, V11.S[3] 30 // Extension 31 VEXT $12, V2.B16, V1.B16, V4.B16 32 VEXT $12, V1.B16, V0.B16, V6.B16 33 VEXT $8, V3.B16, V2.B16, V7.B16 34 WORD $0xce63c004 //SM3PARTW1 V4.4S, V0.4S, V3.4S 35 WORD $0xce66c4e4 //SM3PARTW2 V4.4S, V7.4S, V6.4S 36 VEOR V1.B16, V0.B16, V10.B16 37 // Compression 38 WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 39 VSHL $1, V11.S4, V12.S4 40 VSRI $31, V11.S4, V12.S4 41 WORD $0xce4a80a8 //SM3TT1A V8d.4S, V5.4S, V10.S, 0 42 WORD $0xce4088a9 //SM3TT2A V9d.4S, V5.4S, V0.S, 0 43 WORD $0xce4c2505 //SM3SS1 V5.4S, V8.4S, V12.4S, V9.4S 44 VSHL $1, V12.S4, V11.S4 45 VSRI $31, V12.S4, V11.S4 46 WORD $0xce4a90a8 //SM3TT1A V8d.4S, V5.4S, V10.S, 1 47 WORD $0xce4098a9 //SM3TT2A V9d.4S, V5.4S, V0.S, 1 48 WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 49 VSHL $1, V11.S4, V12.S4 50 VSRI $31, V11.S4, V12.S4 51 WORD $0xce4aa0a8 //SM3TT1A V8d.4S, V5.4S, V10.S, 2 52 WORD $0xce40a8a9 //SM3TT2A V9d.4S, V5.4S, V0.S, 2 53 WORD $0xce4c2505 //SM3SS1 V5.4S, V8.4S, V12.4S, V9.4S 54 VSHL $1, V12.S4, V11.S4 55 VSRI $31, V12.S4, V11.S4 56 WORD $0xce4ab0a8 //SM3TT1A V8d.4S, V5.4S, V10.S, 3 57 WORD $0xce40b8a9 //SM3TT2A V9d.4S, V5.4S, V0.S, 3 58 59 // Extension 60 VEXT $12, V3.B16, V2.B16, V0.B16 61 VEXT $12, V2.B16, V1.B16, V6.B16 62 VEXT $8, V4.B16, V3.B16, V7.B16 63 WORD $0xce64c020 //SM3PARTW1 V0.4S, V1.4S, V4.4S 64 WORD $0xce66c4e0 //SM3PARTW2 V0.4S, V7.4S, V6.4S 65 VEOR V2.B16, V1.B16, V10.B16 66 // Compression 67 WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 68 VSHL $1, V11.S4, V12.S4 69 VSRI $31, V11.S4, V12.S4 70 WORD $0xce4a80a8 //SM3TT1A V8d.4S, V5.4S, V10.S, 0 71 WORD $0xce4188a9 //SM3TT2A V9d.4S, V5.4S, V1.S, 0 72 WORD $0xce4c2505 //SM3SS1 V5.4S, V8.4S, V12.4S, V9.4S 73 VSHL $1, V12.S4, V11.S4 74 VSRI $31, V12.S4, V11.S4 75 WORD $0xce4a90a8 //SM3TT1A V8d.4S, V5.4S, V10.S, 1 76 WORD $0xce4198a9 //SM3TT2A V9d.4S, V5.4S, V1.S, 1 77 WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 78 VSHL $1, V11.S4, V12.S4 79 VSRI $31, V11.S4, V12.S4 80 WORD $0xce4aa0a8 //SM3TT1A V8d.4S, V5.4S, V10.S, 2 81 WORD $0xce41a8a9 //SM3TT2A V9d.4S, V5.4S, V1.S, 2 82 WORD $0xce4c2505 //SM3SS1 V5.4S, V8.4S, V12.4S, V9.4S 83 VSHL $1, V12.S4, V11.S4 84 VSRI $31, V12.S4, V11.S4 85 WORD $0xce4ab0a8 //SM3TT1A V8d.4S, V5.4S, V10.S, 3 86 WORD $0xce41b8a9 //SM3TT2A V9d.4S, V5.4S, V1.S, 3 87 88 // Extension 89 VEXT $12, V4.B16, V3.B16, V1.B16 90 VEXT $12, V3.B16, V2.B16, V6.B16 91 VEXT $8, V0.B16, V4.B16, V7.B16 92 WORD $0xce60c041 //SM3PARTW1 V1.4S, V2.4S, V0.4S 93 WORD $0xce66c4e1 //SM3PARTW2 V1.4S, V7.4S, V6.4S 94 VEOR V3.B16, V2.B16, V10.B16 95 // Compression 96 WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 97 VSHL $1, V11.S4, V12.S4 98 VSRI $31, V11.S4, V12.S4 99 WORD $0xce4a80a8 //SM3TT1A V8d.4S, V5.4S, V10.S, 0 100 WORD $0xce4288a9 //SM3TT2A V9d.4S, V5.4S, V2.S, 0 101 WORD $0xce4c2505 //SM3SS1 V5.4S, V8.4S, V12.4S, V9.4S 102 VSHL $1, V12.S4, V11.S4 103 VSRI $31, V12.S4, V11.S4 104 WORD $0xce4a90a8 //SM3TT1A V8d.4S, V5.4S, V10.S, 1 105 WORD $0xce4298a9 //SM3TT2A V9d.4S, V5.4S, V2.S, 1 106 WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 107 VSHL $1, V11.S4, V12.S4 108 VSRI $31, V11.S4, V12.S4 109 WORD $0xce4aa0a8 //SM3TT1A V8d.4S, V5.4S, V10.S, 2 110 WORD $0xce42a8a9 //SM3TT2A V9d.4S, V5.4S, V2.S, 2 111 WORD $0xce4c2505 //SM3SS1 V5.4S, V8.4S, V12.4S, V9.4S 112 VSHL $1, V12.S4, V11.S4 113 VSRI $31, V12.S4, V11.S4 114 WORD $0xce4ab0a8 //SM3TT1A V8d.4S, V5.4S, V10.S, 3 115 WORD $0xce42b8a9 //SM3TT2A V9d.4S, V5.4S, V2.S, 3 116 117 // Extension 118 VEXT $12, V0.B16, V4.B16, V2.B16 119 VEXT $12, V4.B16, V3.B16, V6.B16 120 VEXT $8, V1.B16, V0.B16, V7.B16 121 WORD $0xce61c062 //SM3PARTW1 V2.4S, V3.4S, V1.4S 122 WORD $0xce66c4e2 //SM3PARTW2 V2.4S, V7.4S, V6.4S 123 VEOR V4.B16, V3.B16, V10.B16 124 // Compression 125 WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 126 VSHL $1, V11.S4, V12.S4 127 VSRI $31, V11.S4, V12.S4 128 WORD $0xce4a80a8 //SM3TT1A V8d.4S, V5.4S, V10.S, 0 129 WORD $0xce4388a9 //SM3TT2A V9d.4S, V5.4S, V3.S, 0 130 WORD $0xce4c2505 //SM3SS1 V5.4S, V8.4S, V12.4S, V9.4S 131 VSHL $1, V12.S4, V11.S4 132 VSRI $31, V12.S4, V11.S4 133 WORD $0xce4a90a8 //SM3TT1A V8d.4S, V5.4S, V10.S, 1 134 WORD $0xce4398a9 //SM3TT2A V9d.4S, V5.4S, V3.S, 1 135 WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 136 VSHL $1, V11.S4, V12.S4 137 VSRI $31, V11.S4, V12.S4 138 WORD $0xce4aa0a8 //SM3TT1A V8d.4S, V5.4S, V10.S, 2 139 WORD $0xce43a8a9 //SM3TT2A V9d.4S, V5.4S, V3.S, 2 140 WORD $0xce4c2505 //SM3SS1 V5.4S, V8.4S, V12.4S, V9.4S 141 VSHL $1, V12.S4, V11.S4 142 VSRI $31, V12.S4, V11.S4 143 WORD $0xce4ab0a8 //SM3TT1A V8d.4S, V5.4S, V10.S, 3 144 WORD $0xce43b8a9 //SM3TT2A V9d.4S, V5.4S, V3.S, 3 145 146 // second 48 rounds 147 VMOV R6, V11.S[3] 148 // Extension 149 VEXT $12, V1.B16, V0.B16, V3.B16 150 VEXT $12, V0.B16, V4.B16, V6.B16 151 VEXT $8, V2.B16, V1.B16, V7.B16 152 WORD $0xce62c083 //SM3PARTW1 V3.4S, V4.4S, V2.4S 153 WORD $0xce66c4e3 //SM3PARTW2 V3.4S, V7.4S, V6.4S 154 VEOR V0.B16, V4.B16, V10.B16 155 // Compression 156 WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 157 VSHL $1, V11.S4, V12.S4 158 VSRI $31, V11.S4, V12.S4 159 WORD $0xce4a84a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 0 160 WORD $0xce448ca9 //SM3TT2B V9d.4S, V5.4S, V4.S, 0 161 WORD $0xce4c2505 //SM3SS1 V5.4S, V8.4S, V12.4S, V9.4S 162 VSHL $1, V12.S4, V11.S4 163 VSRI $31, V12.S4, V11.S4 164 WORD $0xce4a94a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 1 165 WORD $0xce449ca9 //SM3TT2B V9d.4S, V5.4S, V4.S, 1 166 WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 167 VSHL $1, V11.S4, V12.S4 168 VSRI $31, V11.S4, V12.S4 169 WORD $0xce4aa4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 2 170 WORD $0xce44aca9 //SM3TT2B V9d.4S, V5.4S, V4.S, 2 171 WORD $0xce4c2505 //SM3SS1 V5.4S, V8.4S, V12.4S, V9.4S 172 VSHL $1, V12.S4, V11.S4 173 VSRI $31, V12.S4, V11.S4 174 WORD $0xce4ab4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 3 175 WORD $0xce44bca9 //SM3TT2B V9d.4S, V5.4S, V4.S, 3 176 177 // Extension 178 VEXT $12, V2.B16, V1.B16, V4.B16 179 VEXT $12, V1.B16, V0.B16, V6.B16 180 VEXT $8, V3.B16, V2.B16, V7.B16 181 WORD $0xce63c004 //SM3PARTW1 V4.4S, V0.4S, V3.4S 182 WORD $0xce66c4e4 //SM3PARTW2 V4.4S, V7.4S, V6.4S 183 VEOR V1.B16, V0.B16, V10.B16 184 // Compression 185 WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 186 VSHL $1, V11.S4, V12.S4 187 VSRI $31, V11.S4, V12.S4 188 WORD $0xce4a84a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 0 189 WORD $0xce408ca9 //SM3TT2B V9d.4S, V5.4S, V0.S, 0 190 WORD $0xce4c2505 //SM3SS1 V5.4S, V8.4S, V12.4S, V9.4S 191 VSHL $1, V12.S4, V11.S4 192 VSRI $31, V12.S4, V11.S4 193 WORD $0xce4a94a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 1 194 WORD $0xce409ca9 //SM3TT2B V9d.4S, V5.4S, V0.S, 1 195 WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 196 VSHL $1, V11.S4, V12.S4 197 VSRI $31, V11.S4, V12.S4 198 WORD $0xce4aa4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 2 199 WORD $0xce40aca9 //SM3TT2B V9d.4S, V5.4S, V0.S, 2 200 WORD $0xce4c2505 //SM3SS1 V5.4S, V8.4S, V12.4S, V9.4S 201 VSHL $1, V12.S4, V11.S4 202 VSRI $31, V12.S4, V11.S4 203 WORD $0xce4ab4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 3 204 WORD $0xce40bca9 //SM3TT2B V9d.4S, V5.4S, V0.S, 3 205 206 // Extension 207 VEXT $12, V3.B16, V2.B16, V0.B16 208 VEXT $12, V2.B16, V1.B16, V6.B16 209 VEXT $8, V4.B16, V3.B16, V7.B16 210 WORD $0xce64c020 //SM3PARTW1 V0.4S, V1.4S, V4.4S 211 WORD $0xce66c4e0 //SM3PARTW2 V0.4S, V7.4S, V6.4S 212 VEOR V2.B16, V1.B16, V10.B16 213 // Compression 214 WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 215 VSHL $1, V11.S4, V12.S4 216 VSRI $31, V11.S4, V12.S4 217 WORD $0xce4a84a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 0 218 WORD $0xce418ca9 //SM3TT2B V9d.4S, V5.4S, V1.S, 0 219 WORD $0xce4c2505 //SM3SS1 V5.4S, V8.4S, V12.4S, V9.4S 220 VSHL $1, V12.S4, V11.S4 221 VSRI $31, V12.S4, V11.S4 222 WORD $0xce4a94a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 1 223 WORD $0xce419ca9 //SM3TT2B V9d.4S, V5.4S, V1.S, 1 224 WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 225 VSHL $1, V11.S4, V12.S4 226 VSRI $31, V11.S4, V12.S4 227 WORD $0xce4aa4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 2 228 WORD $0xce41aca9 //SM3TT2B V9d.4S, V5.4S, V1.S, 2 229 WORD $0xce4c2505 //SM3SS1 V5.4S, V8.4S, V12.4S, V9.4S 230 VSHL $1, V12.S4, V11.S4 231 VSRI $31, V12.S4, V11.S4 232 WORD $0xce4ab4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 3 233 WORD $0xce41bca9 //SM3TT2B V9d.4S, V5.4S, V1.S, 3 234 235 // Extension 236 VEXT $12, V4.B16, V3.B16, V1.B16 237 VEXT $12, V3.B16, V2.B16, V6.B16 238 VEXT $8, V0.B16, V4.B16, V7.B16 239 WORD $0xce60c041 //SM3PARTW1 V1.4S, V2.4S, V0.4S 240 WORD $0xce66c4e1 //SM3PARTW2 V1.4S, V7.4S, V6.4S 241 VEOR V3.B16, V2.B16, V10.B16 242 // Compression 243 WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 244 VSHL $1, V11.S4, V12.S4 245 VSRI $31, V11.S4, V12.S4 246 WORD $0xce4a84a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 0 247 WORD $0xce428ca9 //SM3TT2B V9d.4S, V5.4S, V2.S, 0 248 WORD $0xce4c2505 //SM3SS1 V5.4S, V8.4S, V12.4S, V9.4S 249 VSHL $1, V12.S4, V11.S4 250 VSRI $31, V12.S4, V11.S4 251 WORD $0xce4a94a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 1 252 WORD $0xce429ca9 //SM3TT2B V9d.4S, V5.4S, V2.S, 1 253 WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 254 VSHL $1, V11.S4, V12.S4 255 VSRI $31, V11.S4, V12.S4 256 WORD $0xce4aa4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 2 257 WORD $0xce42aca9 //SM3TT2B V9d.4S, V5.4S, V2.S, 2 258 WORD $0xce4c2505 //SM3SS1 V5.4S, V8.4S, V12.4S, V9.4S 259 VSHL $1, V12.S4, V11.S4 260 VSRI $31, V12.S4, V11.S4 261 WORD $0xce4ab4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 3 262 WORD $0xce42bca9 //SM3TT2B V9d.4S, V5.4S, V2.S, 3 263 264 // Extension 265 VEXT $12, V0.B16, V4.B16, V2.B16 266 VEXT $12, V4.B16, V3.B16, V6.B16 267 VEXT $8, V1.B16, V0.B16, V7.B16 268 WORD $0xce61c062 //SM3PARTW1 V2.4S, V3.4S, V1.4S 269 WORD $0xce66c4e2 //SM3PARTW2 V2.4S, V7.4S, V6.4S 270 VEOR V4.B16, V3.B16, V10.B16 271 // Compression 272 WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 273 VSHL $1, V11.S4, V12.S4 274 VSRI $31, V11.S4, V12.S4 275 WORD $0xce4a84a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 0 276 WORD $0xce438ca9 //SM3TT2B V9d.4S, V5.4S, V3.S, 0 277 WORD $0xce4c2505 //SM3SS1 V5.4S, V8.4S, V12.4S, V9.4S 278 VSHL $1, V12.S4, V11.S4 279 VSRI $31, V12.S4, V11.S4 280 WORD $0xce4a94a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 1 281 WORD $0xce439ca9 //SM3TT2B V9d.4S, V5.4S, V3.S, 1 282 WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 283 VSHL $1, V11.S4, V12.S4 284 VSRI $31, V11.S4, V12.S4 285 WORD $0xce4aa4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 2 286 WORD $0xce43aca9 //SM3TT2B V9d.4S, V5.4S, V3.S, 2 287 WORD $0xce4c2505 //SM3SS1 V5.4S, V8.4S, V12.4S, V9.4S 288 VSHL $1, V12.S4, V11.S4 289 VSRI $31, V12.S4, V11.S4 290 WORD $0xce4ab4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 3 291 WORD $0xce43bca9 //SM3TT2B V9d.4S, V5.4S, V3.S, 3 292 293 // Extension 294 VEXT $12, V1.B16, V0.B16, V3.B16 295 VEXT $12, V0.B16, V4.B16, V6.B16 296 VEXT $8, V2.B16, V1.B16, V7.B16 297 WORD $0xce62c083 //SM3PARTW1 V3.4S, V4.4S, V2.4S 298 WORD $0xce66c4e3 //SM3PARTW2 V3.4S, V7.4S, V6.4S 299 VEOR V0.B16, V4.B16, V10.B16 300 // Compression 301 WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 302 VSHL $1, V11.S4, V12.S4 303 VSRI $31, V11.S4, V12.S4 304 WORD $0xce4a84a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 0 305 WORD $0xce448ca9 //SM3TT2B V9d.4S, V5.4S, V4.S, 0 306 WORD $0xce4c2505 //SM3SS1 V5.4S, V8.4S, V12.4S, V9.4S 307 VSHL $1, V12.S4, V11.S4 308 VSRI $31, V12.S4, V11.S4 309 WORD $0xce4a94a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 1 310 WORD $0xce449ca9 //SM3TT2B V9d.4S, V5.4S, V4.S, 1 311 WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 312 VSHL $1, V11.S4, V12.S4 313 VSRI $31, V11.S4, V12.S4 314 WORD $0xce4aa4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 2 315 WORD $0xce44aca9 //SM3TT2B V9d.4S, V5.4S, V4.S, 2 316 WORD $0xce4c2505 //SM3SS1 V5.4S, V8.4S, V12.4S, V9.4S 317 VSHL $1, V12.S4, V11.S4 318 VSRI $31, V12.S4, V11.S4 319 WORD $0xce4ab4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 3 320 WORD $0xce44bca9 //SM3TT2B V9d.4S, V5.4S, V4.S, 3 321 322 // Extension 323 VEXT $12, V2.B16, V1.B16, V4.B16 324 VEXT $12, V1.B16, V0.B16, V6.B16 325 VEXT $8, V3.B16, V2.B16, V7.B16 326 WORD $0xce63c004 //SM3PARTW1 V4.4S, V0.4S, V3.4S 327 WORD $0xce66c4e4 //SM3PARTW2 V4.4S, V7.4S, V6.4S 328 VEOR V1.B16, V0.B16, V10.B16 329 // Compression 330 WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 331 VSHL $1, V11.S4, V12.S4 332 VSRI $31, V11.S4, V12.S4 333 WORD $0xce4a84a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 0 334 WORD $0xce408ca9 //SM3TT2B V9d.4S, V5.4S, V0.S, 0 335 WORD $0xce4c2505 //SM3SS1 V5.4S, V8.4S, V12.4S, V9.4S 336 VSHL $1, V12.S4, V11.S4 337 VSRI $31, V12.S4, V11.S4 338 WORD $0xce4a94a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 1 339 WORD $0xce409ca9 //SM3TT2B V9d.4S, V5.4S, V0.S, 1 340 WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 341 VSHL $1, V11.S4, V12.S4 342 VSRI $31, V11.S4, V12.S4 343 WORD $0xce4aa4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 2 344 WORD $0xce40aca9 //SM3TT2B V9d.4S, V5.4S, V0.S, 2 345 WORD $0xce4c2505 //SM3SS1 V5.4S, V8.4S, V12.4S, V9.4S 346 VSHL $1, V12.S4, V11.S4 347 VSRI $31, V12.S4, V11.S4 348 WORD $0xce4ab4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 3 349 WORD $0xce40bca9 //SM3TT2B V9d.4S, V5.4S, V0.S, 3 350 351 // Extension 352 VEXT $12, V3.B16, V2.B16, V0.B16 353 VEXT $12, V2.B16, V1.B16, V6.B16 354 VEXT $8, V4.B16, V3.B16, V7.B16 355 WORD $0xce64c020 //SM3PARTW1 V0.4S, V1.4S, V4.4S 356 WORD $0xce66c4e0 //SM3PARTW2 V0.4S, V7.4S, V6.4S 357 VEOR V2.B16, V1.B16, V10.B16 358 // Compression 359 WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 360 VSHL $1, V11.S4, V12.S4 361 VSRI $31, V11.S4, V12.S4 362 WORD $0xce4a84a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 0 363 WORD $0xce418ca9 //SM3TT2B V9d.4S, V5.4S, V1.S, 0 364 WORD $0xce4c2505 //SM3SS1 V5.4S, V8.4S, V12.4S, V9.4S 365 VSHL $1, V12.S4, V11.S4 366 VSRI $31, V12.S4, V11.S4 367 WORD $0xce4a94a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 1 368 WORD $0xce419ca9 //SM3TT2B V9d.4S, V5.4S, V1.S, 1 369 WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 370 VSHL $1, V11.S4, V12.S4 371 VSRI $31, V11.S4, V12.S4 372 WORD $0xce4aa4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 2 373 WORD $0xce41aca9 //SM3TT2B V9d.4S, V5.4S, V1.S, 2 374 WORD $0xce4c2505 //SM3SS1 V5.4S, V8.4S, V12.4S, V9.4S 375 VSHL $1, V12.S4, V11.S4 376 VSRI $31, V12.S4, V11.S4 377 WORD $0xce4ab4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 3 378 WORD $0xce41bca9 //SM3TT2B V9d.4S, V5.4S, V1.S, 3 379 380 // Extension 381 VEXT $12, V4.B16, V3.B16, V1.B16 382 VEXT $12, V3.B16, V2.B16, V6.B16 383 VEXT $8, V0.B16, V4.B16, V7.B16 384 WORD $0xce60c041 //SM3PARTW1 V1.4S, V2.4S, V0.4S 385 WORD $0xce66c4e1 //SM3PARTW2 V1.4S, V7.4S, V6.4S 386 VEOR V3.B16, V2.B16, V10.B16 387 // Compression 388 WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 389 VSHL $1, V11.S4, V12.S4 390 VSRI $31, V11.S4, V12.S4 391 WORD $0xce4a84a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 0 392 WORD $0xce428ca9 //SM3TT2B V9d.4S, V5.4S, V2.S, 0 393 WORD $0xce4c2505 //SM3SS1 V5.4S, V8.4S, V12.4S, V9.4S 394 VSHL $1, V12.S4, V11.S4 395 VSRI $31, V12.S4, V11.S4 396 WORD $0xce4a94a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 1 397 WORD $0xce429ca9 //SM3TT2B V9d.4S, V5.4S, V2.S, 1 398 WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 399 VSHL $1, V11.S4, V12.S4 400 VSRI $31, V11.S4, V12.S4 401 WORD $0xce4aa4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 2 402 WORD $0xce42aca9 //SM3TT2B V9d.4S, V5.4S, V2.S, 2 403 WORD $0xce4c2505 //SM3SS1 V5.4S, V8.4S, V12.4S, V9.4S 404 VSHL $1, V12.S4, V11.S4 405 VSRI $31, V12.S4, V11.S4 406 WORD $0xce4ab4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 3 407 WORD $0xce42bca9 //SM3TT2B V9d.4S, V5.4S, V2.S, 3 408 409 VEOR V4.B16, V3.B16, V10.B16 410 // Compression 411 WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 412 VSHL $1, V11.S4, V12.S4 413 VSRI $31, V11.S4, V12.S4 414 WORD $0xce4a84a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 0 415 WORD $0xce438ca9 //SM3TT2B V9d.4S, V5.4S, V3.S, 0 416 WORD $0xce4c2505 //SM3SS1 V5.4S, V8.4S, V12.4S, V9.4S 417 VSHL $1, V12.S4, V11.S4 418 VSRI $31, V12.S4, V11.S4 419 WORD $0xce4a94a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 1 420 WORD $0xce439ca9 //SM3TT2B V9d.4S, V5.4S, V3.S, 1 421 WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 422 VSHL $1, V11.S4, V12.S4 423 VSRI $31, V11.S4, V12.S4 424 WORD $0xce4aa4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 2 425 WORD $0xce43aca9 //SM3TT2B V9d.4S, V5.4S, V3.S, 2 426 WORD $0xce4c2505 //SM3SS1 V5.4S, V8.4S, V12.4S, V9.4S 427 VSHL $1, V12.S4, V11.S4 428 VSRI $31, V12.S4, V11.S4 429 WORD $0xce4ab4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 3 430 WORD $0xce43bca9 //SM3TT2B V9d.4S, V5.4S, V3.S, 3 431 432 VEOR V0.B16, V4.B16, V10.B16 433 // Compression 434 WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 435 VSHL $1, V11.S4, V12.S4 436 VSRI $31, V11.S4, V12.S4 437 WORD $0xce4a84a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 0 438 WORD $0xce448ca9 //SM3TT2B V9d.4S, V5.4S, V4.S, 0 439 WORD $0xce4c2505 //SM3SS1 V5.4S, V8.4S, V12.4S, V9.4S 440 VSHL $1, V12.S4, V11.S4 441 VSRI $31, V12.S4, V11.S4 442 WORD $0xce4a94a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 1 443 WORD $0xce449ca9 //SM3TT2B V9d.4S, V5.4S, V4.S, 1 444 WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 445 VSHL $1, V11.S4, V12.S4 446 VSRI $31, V11.S4, V12.S4 447 WORD $0xce4aa4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 2 448 WORD $0xce44aca9 //SM3TT2B V9d.4S, V5.4S, V4.S, 2 449 WORD $0xce4c2505 //SM3SS1 V5.4S, V8.4S, V12.4S, V9.4S 450 VSHL $1, V12.S4, V11.S4 451 VSRI $31, V12.S4, V11.S4 452 WORD $0xce4ab4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 3 453 WORD $0xce44bca9 //SM3TT2B V9d.4S, V5.4S, V4.S, 3 454 455 VEOR V1.B16, V0.B16, V10.B16 456 // Compression 457 WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 458 VSHL $1, V11.S4, V12.S4 459 VSRI $31, V11.S4, V12.S4 460 WORD $0xce4a84a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 0 461 WORD $0xce408ca9 //SM3TT2B V9d.4S, V5.4S, V0.S, 0 462 WORD $0xce4c2505 //SM3SS1 V5.4S, V8.4S, V12.4S, V9.4S 463 VSHL $1, V12.S4, V11.S4 464 VSRI $31, V12.S4, V11.S4 465 WORD $0xce4a94a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 1 466 WORD $0xce409ca9 //SM3TT2B V9d.4S, V5.4S, V0.S, 1 467 WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 468 VSHL $1, V11.S4, V12.S4 469 VSRI $31, V11.S4, V12.S4 470 WORD $0xce4aa4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 2 471 WORD $0xce40aca9 //SM3TT2B V9d.4S, V5.4S, V0.S, 2 472 WORD $0xce4c2505 //SM3SS1 V5.4S, V8.4S, V12.4S, V9.4S 473 VSHL $1, V12.S4, V11.S4 474 VSRI $31, V12.S4, V11.S4 475 WORD $0xce4ab4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 3 476 WORD $0xce40bca9 //SM3TT2B V9d.4S, V5.4S, V0.S, 3 477 478 SUB $64, R3, R3 // message length - 64bytes, then compare with 64bytes 479 VEOR V8.B16, V15.B16, V8.B16 480 VEOR V9.B16, V16.B16, V9.B16 481 CBNZ R3, blockloop 482 483 sm3ret: 484 VREV64 V8.S4, V8.S4 485 VEXT $8, V8.B16, V8.B16, V8.B16 486 VREV64 V9.S4, V9.S4 487 VEXT $8, V9.B16, V9.B16, V9.B16 488 VST1 [V8.S4, V9.S4], (R0) // store hash value H 489 RET 490