github.com/hxx258456/ccgo@v0.0.5-0.20230213014102-48b35f46f66f/sm3/sm3blockni_arm64.s (about) 1 // Generated by gen_sm3block_ni.go. DO NOT EDIT. 2 3 #include "textflag.h" 4 5 // func blockSM3NI(h []uint32, p []byte, t []uint32) 6 TEXT ·blockSM3NI(SB), 0, $0 7 MOVD h_base+0(FP), R0 // Hash value first address 8 MOVD p_base+24(FP), R1 // message first address 9 MOVD p_len+32(FP), R3 // message length 10 MOVD t_base+48(FP), R2 // t constants first address 11 12 VLD1 (R0), [V8.S4, V9.S4] // load h(a,b,c,d,e,f,g,h) 13 LDPW (0*8)(R2), (R5, R6) // load t constants 14 15 blockloop: 16 VLD1.P 64(R1), [V0.B16, V1.B16, V2.B16, V3.B16] // load 64bytes message 17 VMOV V8.B16, V15.B16 // backup: V8 h(dcba) 18 VMOV V9.B16, V16.B16 // backup: V9 h(hgfe) 19 VREV32 V0.B16, V0.B16 // prepare for using message in Byte format 20 VREV32 V1.B16, V1.B16 21 VREV32 V2.B16, V2.B16 22 VREV32 V3.B16, V3.B16 23 // first 16 rounds 24 VMOV R5, V11.S[3] 25 // Extension 26 VEXT $3, V2.B16, V1.B16, V4.B16 27 VEXT $3, V1.B16, V0.B16, V6.B16 28 VEXT $2, V3.B16, V2.B16, V7.B16 29 WORD $0x04c063ce //SM3PARTW1 V4.4S, V0.4S, V3.4S 30 WORD $0xe4c466ce //SM3PARTW2 V4.4S, V7.4S, V6.4S 31 VEOR V1.B16, V0.B16, V10.B16 32 // Compression 33 WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 34 VSHL $1, V11.S4, V11.S4 35 WORD $0xa8804ace //SM3TT1A V8d.4S, V5.4S, V10.S, 0 36 WORD $0xa98840ce //SM3TT2A V9d.4S, V5.4S, V0.S, 0 37 WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 38 VSHL $1, V11.S4, V11.S4 39 WORD $0xa8904ace //SM3TT1A V8d.4S, V5.4S, V10.S, 1 40 WORD $0xa99840ce //SM3TT2A V9d.4S, V5.4S, V0.S, 1 41 WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 42 VSHL $1, V11.S4, V11.S4 43 WORD $0xa8a04ace //SM3TT1A V8d.4S, V5.4S, V10.S, 2 44 WORD $0xa9a840ce //SM3TT2A V9d.4S, V5.4S, V0.S, 2 45 WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 46 VSHL $1, V11.S4, V11.S4 47 WORD $0xa8b04ace //SM3TT1A V8d.4S, V5.4S, V10.S, 3 48 WORD $0xa9b840ce //SM3TT2A V9d.4S, V5.4S, V0.S, 3 49 50 // Extension 51 VEXT $3, V3.B16, V2.B16, V0.B16 52 VEXT $3, V2.B16, V1.B16, V6.B16 53 VEXT $2, V4.B16, V3.B16, V7.B16 54 WORD $0x20c064ce //SM3PARTW1 V0.4S, V1.4S, V4.4S 55 WORD $0xe0c466ce //SM3PARTW2 V0.4S, V7.4S, V6.4S 56 VEOR V2.B16, V1.B16, V10.B16 57 // Compression 58 WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 59 VSHL $1, V11.S4, V11.S4 60 WORD $0xa8804ace //SM3TT1A V8d.4S, V5.4S, V10.S, 0 61 WORD $0xa98841ce //SM3TT2A V9d.4S, V5.4S, V1.S, 0 62 WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 63 VSHL $1, V11.S4, V11.S4 64 WORD $0xa8904ace //SM3TT1A V8d.4S, V5.4S, V10.S, 1 65 WORD $0xa99841ce //SM3TT2A V9d.4S, V5.4S, V1.S, 1 66 WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 67 VSHL $1, V11.S4, V11.S4 68 WORD $0xa8a04ace //SM3TT1A V8d.4S, V5.4S, V10.S, 2 69 WORD $0xa9a841ce //SM3TT2A V9d.4S, V5.4S, V1.S, 2 70 WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 71 VSHL $1, V11.S4, V11.S4 72 WORD $0xa8b04ace //SM3TT1A V8d.4S, V5.4S, V10.S, 3 73 WORD $0xa9b841ce //SM3TT2A V9d.4S, V5.4S, V1.S, 3 74 75 // Extension 76 VEXT $3, V4.B16, V3.B16, V1.B16 77 VEXT $3, V3.B16, V2.B16, V6.B16 78 VEXT $2, V0.B16, V4.B16, V7.B16 79 WORD $0x41c060ce //SM3PARTW1 V1.4S, V2.4S, V0.4S 80 WORD $0xe1c466ce //SM3PARTW2 V1.4S, V7.4S, V6.4S 81 VEOR V3.B16, V2.B16, V10.B16 82 // Compression 83 WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 84 VSHL $1, V11.S4, V11.S4 85 WORD $0xa8804ace //SM3TT1A V8d.4S, V5.4S, V10.S, 0 86 WORD $0xa98842ce //SM3TT2A V9d.4S, V5.4S, V2.S, 0 87 WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 88 VSHL $1, V11.S4, V11.S4 89 WORD $0xa8904ace //SM3TT1A V8d.4S, V5.4S, V10.S, 1 90 WORD $0xa99842ce //SM3TT2A V9d.4S, V5.4S, V2.S, 1 91 WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 92 VSHL $1, V11.S4, V11.S4 93 WORD $0xa8a04ace //SM3TT1A V8d.4S, V5.4S, V10.S, 2 94 WORD $0xa9a842ce //SM3TT2A V9d.4S, V5.4S, V2.S, 2 95 WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 96 VSHL $1, V11.S4, V11.S4 97 WORD $0xa8b04ace //SM3TT1A V8d.4S, V5.4S, V10.S, 3 98 WORD $0xa9b842ce //SM3TT2A V9d.4S, V5.4S, V2.S, 3 99 100 // Extension 101 VEXT $3, V0.B16, V4.B16, V2.B16 102 VEXT $3, V4.B16, V3.B16, V6.B16 103 VEXT $2, V1.B16, V0.B16, V7.B16 104 WORD $0x62c061ce //SM3PARTW1 V2.4S, V3.4S, V1.4S 105 WORD $0xe2c466ce //SM3PARTW2 V2.4S, V7.4S, V6.4S 106 VEOR V4.B16, V3.B16, V10.B16 107 // Compression 108 WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 109 VSHL $1, V11.S4, V11.S4 110 WORD $0xa8804ace //SM3TT1A V8d.4S, V5.4S, V10.S, 0 111 WORD $0xa98843ce //SM3TT2A V9d.4S, V5.4S, V3.S, 0 112 WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 113 VSHL $1, V11.S4, V11.S4 114 WORD $0xa8904ace //SM3TT1A V8d.4S, V5.4S, V10.S, 1 115 WORD $0xa99843ce //SM3TT2A V9d.4S, V5.4S, V3.S, 1 116 WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 117 VSHL $1, V11.S4, V11.S4 118 WORD $0xa8a04ace //SM3TT1A V8d.4S, V5.4S, V10.S, 2 119 WORD $0xa9a843ce //SM3TT2A V9d.4S, V5.4S, V3.S, 2 120 WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 121 VSHL $1, V11.S4, V11.S4 122 WORD $0xa8b04ace //SM3TT1A V8d.4S, V5.4S, V10.S, 3 123 WORD $0xa9b843ce //SM3TT2A V9d.4S, V5.4S, V3.S, 3 124 125 // second 48 rounds 126 VMOV R6, V11.S[3] 127 // Extension 128 VEXT $3, V1.B16, V0.B16, V3.B16 129 VEXT $3, V0.B16, V4.B16, V6.B16 130 VEXT $2, V2.B16, V1.B16, V7.B16 131 WORD $0x83c062ce //SM3PARTW1 V3.4S, V4.4S, V2.4S 132 WORD $0xe3c466ce //SM3PARTW2 V3.4S, V7.4S, V6.4S 133 VEOR V0.B16, V4.B16, V10.B16 134 // Compression 135 WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 136 VSHL $1, V11.S4, V11.S4 137 WORD $0xa8844ace //SM3TT1B V8d.4S, V5.4S, V10.S, 0 138 WORD $0xa98c44ce //SM3TT2B V9d.4S, V5.4S, V4.S, 0 139 WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 140 VSHL $1, V11.S4, V11.S4 141 WORD $0xa8944ace //SM3TT1B V8d.4S, V5.4S, V10.S, 1 142 WORD $0xa99c44ce //SM3TT2B V9d.4S, V5.4S, V4.S, 1 143 WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 144 VSHL $1, V11.S4, V11.S4 145 WORD $0xa8a44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 2 146 WORD $0xa9ac44ce //SM3TT2B V9d.4S, V5.4S, V4.S, 2 147 WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 148 VSHL $1, V11.S4, V11.S4 149 WORD $0xa8b44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 3 150 WORD $0xa9bc44ce //SM3TT2B V9d.4S, V5.4S, V4.S, 3 151 152 // Extension 153 VEXT $3, V2.B16, V1.B16, V4.B16 154 VEXT $3, V1.B16, V0.B16, V6.B16 155 VEXT $2, V3.B16, V2.B16, V7.B16 156 WORD $0x04c063ce //SM3PARTW1 V4.4S, V0.4S, V3.4S 157 WORD $0xe4c466ce //SM3PARTW2 V4.4S, V7.4S, V6.4S 158 VEOR V1.B16, V0.B16, V10.B16 159 // Compression 160 WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 161 VSHL $1, V11.S4, V11.S4 162 WORD $0xa8844ace //SM3TT1B V8d.4S, V5.4S, V10.S, 0 163 WORD $0xa98c40ce //SM3TT2B V9d.4S, V5.4S, V0.S, 0 164 WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 165 VSHL $1, V11.S4, V11.S4 166 WORD $0xa8944ace //SM3TT1B V8d.4S, V5.4S, V10.S, 1 167 WORD $0xa99c40ce //SM3TT2B V9d.4S, V5.4S, V0.S, 1 168 WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 169 VSHL $1, V11.S4, V11.S4 170 WORD $0xa8a44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 2 171 WORD $0xa9ac40ce //SM3TT2B V9d.4S, V5.4S, V0.S, 2 172 WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 173 VSHL $1, V11.S4, V11.S4 174 WORD $0xa8b44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 3 175 WORD $0xa9bc40ce //SM3TT2B V9d.4S, V5.4S, V0.S, 3 176 177 // Extension 178 VEXT $3, V3.B16, V2.B16, V0.B16 179 VEXT $3, V2.B16, V1.B16, V6.B16 180 VEXT $2, V4.B16, V3.B16, V7.B16 181 WORD $0x20c064ce //SM3PARTW1 V0.4S, V1.4S, V4.4S 182 WORD $0xe0c466ce //SM3PARTW2 V0.4S, V7.4S, V6.4S 183 VEOR V2.B16, V1.B16, V10.B16 184 // Compression 185 WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 186 VSHL $1, V11.S4, V11.S4 187 WORD $0xa8844ace //SM3TT1B V8d.4S, V5.4S, V10.S, 0 188 WORD $0xa98c41ce //SM3TT2B V9d.4S, V5.4S, V1.S, 0 189 WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 190 VSHL $1, V11.S4, V11.S4 191 WORD $0xa8944ace //SM3TT1B V8d.4S, V5.4S, V10.S, 1 192 WORD $0xa99c41ce //SM3TT2B V9d.4S, V5.4S, V1.S, 1 193 WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 194 VSHL $1, V11.S4, V11.S4 195 WORD $0xa8a44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 2 196 WORD $0xa9ac41ce //SM3TT2B V9d.4S, V5.4S, V1.S, 2 197 WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 198 VSHL $1, V11.S4, V11.S4 199 WORD $0xa8b44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 3 200 WORD $0xa9bc41ce //SM3TT2B V9d.4S, V5.4S, V1.S, 3 201 202 // Extension 203 VEXT $3, V4.B16, V3.B16, V1.B16 204 VEXT $3, V3.B16, V2.B16, V6.B16 205 VEXT $2, V0.B16, V4.B16, V7.B16 206 WORD $0x41c060ce //SM3PARTW1 V1.4S, V2.4S, V0.4S 207 WORD $0xe1c466ce //SM3PARTW2 V1.4S, V7.4S, V6.4S 208 VEOR V3.B16, V2.B16, V10.B16 209 // Compression 210 WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 211 VSHL $1, V11.S4, V11.S4 212 WORD $0xa8844ace //SM3TT1B V8d.4S, V5.4S, V10.S, 0 213 WORD $0xa98c42ce //SM3TT2B V9d.4S, V5.4S, V2.S, 0 214 WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 215 VSHL $1, V11.S4, V11.S4 216 WORD $0xa8944ace //SM3TT1B V8d.4S, V5.4S, V10.S, 1 217 WORD $0xa99c42ce //SM3TT2B V9d.4S, V5.4S, V2.S, 1 218 WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 219 VSHL $1, V11.S4, V11.S4 220 WORD $0xa8a44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 2 221 WORD $0xa9ac42ce //SM3TT2B V9d.4S, V5.4S, V2.S, 2 222 WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 223 VSHL $1, V11.S4, V11.S4 224 WORD $0xa8b44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 3 225 WORD $0xa9bc42ce //SM3TT2B V9d.4S, V5.4S, V2.S, 3 226 227 // Extension 228 VEXT $3, V0.B16, V4.B16, V2.B16 229 VEXT $3, V4.B16, V3.B16, V6.B16 230 VEXT $2, V1.B16, V0.B16, V7.B16 231 WORD $0x62c061ce //SM3PARTW1 V2.4S, V3.4S, V1.4S 232 WORD $0xe2c466ce //SM3PARTW2 V2.4S, V7.4S, V6.4S 233 VEOR V4.B16, V3.B16, V10.B16 234 // Compression 235 WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 236 VSHL $1, V11.S4, V11.S4 237 WORD $0xa8844ace //SM3TT1B V8d.4S, V5.4S, V10.S, 0 238 WORD $0xa98c43ce //SM3TT2B V9d.4S, V5.4S, V3.S, 0 239 WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 240 VSHL $1, V11.S4, V11.S4 241 WORD $0xa8944ace //SM3TT1B V8d.4S, V5.4S, V10.S, 1 242 WORD $0xa99c43ce //SM3TT2B V9d.4S, V5.4S, V3.S, 1 243 WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 244 VSHL $1, V11.S4, V11.S4 245 WORD $0xa8a44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 2 246 WORD $0xa9ac43ce //SM3TT2B V9d.4S, V5.4S, V3.S, 2 247 WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 248 VSHL $1, V11.S4, V11.S4 249 WORD $0xa8b44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 3 250 WORD $0xa9bc43ce //SM3TT2B V9d.4S, V5.4S, V3.S, 3 251 252 // Extension 253 VEXT $3, V1.B16, V0.B16, V3.B16 254 VEXT $3, V0.B16, V4.B16, V6.B16 255 VEXT $2, V2.B16, V1.B16, V7.B16 256 WORD $0x83c062ce //SM3PARTW1 V3.4S, V4.4S, V2.4S 257 WORD $0xe3c466ce //SM3PARTW2 V3.4S, V7.4S, V6.4S 258 VEOR V0.B16, V4.B16, V10.B16 259 // Compression 260 WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 261 VSHL $1, V11.S4, V11.S4 262 WORD $0xa8844ace //SM3TT1B V8d.4S, V5.4S, V10.S, 0 263 WORD $0xa98c44ce //SM3TT2B V9d.4S, V5.4S, V4.S, 0 264 WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 265 VSHL $1, V11.S4, V11.S4 266 WORD $0xa8944ace //SM3TT1B V8d.4S, V5.4S, V10.S, 1 267 WORD $0xa99c44ce //SM3TT2B V9d.4S, V5.4S, V4.S, 1 268 WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 269 VSHL $1, V11.S4, V11.S4 270 WORD $0xa8a44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 2 271 WORD $0xa9ac44ce //SM3TT2B V9d.4S, V5.4S, V4.S, 2 272 WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 273 VSHL $1, V11.S4, V11.S4 274 WORD $0xa8b44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 3 275 WORD $0xa9bc44ce //SM3TT2B V9d.4S, V5.4S, V4.S, 3 276 277 // Extension 278 VEXT $3, V2.B16, V1.B16, V4.B16 279 VEXT $3, V1.B16, V0.B16, V6.B16 280 VEXT $2, V3.B16, V2.B16, V7.B16 281 WORD $0x04c063ce //SM3PARTW1 V4.4S, V0.4S, V3.4S 282 WORD $0xe4c466ce //SM3PARTW2 V4.4S, V7.4S, V6.4S 283 VEOR V1.B16, V0.B16, V10.B16 284 // Compression 285 WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 286 VSHL $1, V11.S4, V11.S4 287 WORD $0xa8844ace //SM3TT1B V8d.4S, V5.4S, V10.S, 0 288 WORD $0xa98c40ce //SM3TT2B V9d.4S, V5.4S, V0.S, 0 289 WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 290 VSHL $1, V11.S4, V11.S4 291 WORD $0xa8944ace //SM3TT1B V8d.4S, V5.4S, V10.S, 1 292 WORD $0xa99c40ce //SM3TT2B V9d.4S, V5.4S, V0.S, 1 293 WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 294 VSHL $1, V11.S4, V11.S4 295 WORD $0xa8a44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 2 296 WORD $0xa9ac40ce //SM3TT2B V9d.4S, V5.4S, V0.S, 2 297 WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 298 VSHL $1, V11.S4, V11.S4 299 WORD $0xa8b44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 3 300 WORD $0xa9bc40ce //SM3TT2B V9d.4S, V5.4S, V0.S, 3 301 302 // Extension 303 VEXT $3, V3.B16, V2.B16, V0.B16 304 VEXT $3, V2.B16, V1.B16, V6.B16 305 VEXT $2, V4.B16, V3.B16, V7.B16 306 WORD $0x20c064ce //SM3PARTW1 V0.4S, V1.4S, V4.4S 307 WORD $0xe0c466ce //SM3PARTW2 V0.4S, V7.4S, V6.4S 308 VEOR V2.B16, V1.B16, V10.B16 309 // Compression 310 WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 311 VSHL $1, V11.S4, V11.S4 312 WORD $0xa8844ace //SM3TT1B V8d.4S, V5.4S, V10.S, 0 313 WORD $0xa98c41ce //SM3TT2B V9d.4S, V5.4S, V1.S, 0 314 WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 315 VSHL $1, V11.S4, V11.S4 316 WORD $0xa8944ace //SM3TT1B V8d.4S, V5.4S, V10.S, 1 317 WORD $0xa99c41ce //SM3TT2B V9d.4S, V5.4S, V1.S, 1 318 WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 319 VSHL $1, V11.S4, V11.S4 320 WORD $0xa8a44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 2 321 WORD $0xa9ac41ce //SM3TT2B V9d.4S, V5.4S, V1.S, 2 322 WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 323 VSHL $1, V11.S4, V11.S4 324 WORD $0xa8b44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 3 325 WORD $0xa9bc41ce //SM3TT2B V9d.4S, V5.4S, V1.S, 3 326 327 // Extension 328 VEXT $3, V4.B16, V3.B16, V1.B16 329 VEXT $3, V3.B16, V2.B16, V6.B16 330 VEXT $2, V0.B16, V4.B16, V7.B16 331 WORD $0x41c060ce //SM3PARTW1 V1.4S, V2.4S, V0.4S 332 WORD $0xe1c466ce //SM3PARTW2 V1.4S, V7.4S, V6.4S 333 VEOR V3.B16, V2.B16, V10.B16 334 // Compression 335 WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 336 VSHL $1, V11.S4, V11.S4 337 WORD $0xa8844ace //SM3TT1B V8d.4S, V5.4S, V10.S, 0 338 WORD $0xa98c42ce //SM3TT2B V9d.4S, V5.4S, V2.S, 0 339 WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 340 VSHL $1, V11.S4, V11.S4 341 WORD $0xa8944ace //SM3TT1B V8d.4S, V5.4S, V10.S, 1 342 WORD $0xa99c42ce //SM3TT2B V9d.4S, V5.4S, V2.S, 1 343 WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 344 VSHL $1, V11.S4, V11.S4 345 WORD $0xa8a44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 2 346 WORD $0xa9ac42ce //SM3TT2B V9d.4S, V5.4S, V2.S, 2 347 WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 348 VSHL $1, V11.S4, V11.S4 349 WORD $0xa8b44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 3 350 WORD $0xa9bc42ce //SM3TT2B V9d.4S, V5.4S, V2.S, 3 351 352 VEOR V4.B16, V3.B16, V10.B16 353 // Compression 354 WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 355 VSHL $1, V11.S4, V11.S4 356 WORD $0xa8844ace //SM3TT1B V8d.4S, V5.4S, V10.S, 0 357 WORD $0xa98c43ce //SM3TT2B V9d.4S, V5.4S, V3.S, 0 358 WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 359 VSHL $1, V11.S4, V11.S4 360 WORD $0xa8944ace //SM3TT1B V8d.4S, V5.4S, V10.S, 1 361 WORD $0xa99c43ce //SM3TT2B V9d.4S, V5.4S, V3.S, 1 362 WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 363 VSHL $1, V11.S4, V11.S4 364 WORD $0xa8a44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 2 365 WORD $0xa9ac43ce //SM3TT2B V9d.4S, V5.4S, V3.S, 2 366 WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 367 VSHL $1, V11.S4, V11.S4 368 WORD $0xa8b44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 3 369 WORD $0xa9bc43ce //SM3TT2B V9d.4S, V5.4S, V3.S, 3 370 371 VEOR V0.B16, V4.B16, V10.B16 372 // Compression 373 WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 374 VSHL $1, V11.S4, V11.S4 375 WORD $0xa8844ace //SM3TT1B V8d.4S, V5.4S, V10.S, 0 376 WORD $0xa98c44ce //SM3TT2B V9d.4S, V5.4S, V4.S, 0 377 WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 378 VSHL $1, V11.S4, V11.S4 379 WORD $0xa8944ace //SM3TT1B V8d.4S, V5.4S, V10.S, 1 380 WORD $0xa99c44ce //SM3TT2B V9d.4S, V5.4S, V4.S, 1 381 WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 382 VSHL $1, V11.S4, V11.S4 383 WORD $0xa8a44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 2 384 WORD $0xa9ac44ce //SM3TT2B V9d.4S, V5.4S, V4.S, 2 385 WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 386 VSHL $1, V11.S4, V11.S4 387 WORD $0xa8b44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 3 388 WORD $0xa9bc44ce //SM3TT2B V9d.4S, V5.4S, V4.S, 3 389 390 VEOR V1.B16, V0.B16, V10.B16 391 // Compression 392 WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 393 VSHL $1, V11.S4, V11.S4 394 WORD $0xa8844ace //SM3TT1B V8d.4S, V5.4S, V10.S, 0 395 WORD $0xa98c40ce //SM3TT2B V9d.4S, V5.4S, V0.S, 0 396 WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 397 VSHL $1, V11.S4, V11.S4 398 WORD $0xa8944ace //SM3TT1B V8d.4S, V5.4S, V10.S, 1 399 WORD $0xa99c40ce //SM3TT2B V9d.4S, V5.4S, V0.S, 1 400 WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 401 VSHL $1, V11.S4, V11.S4 402 WORD $0xa8a44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 2 403 WORD $0xa9ac40ce //SM3TT2B V9d.4S, V5.4S, V0.S, 2 404 WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S 405 VSHL $1, V11.S4, V11.S4 406 WORD $0xa8b44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 3 407 WORD $0xa9bc40ce //SM3TT2B V9d.4S, V5.4S, V0.S, 3 408 409 SUB $64, R3, R3 // message length - 64bytes, then compare with 64bytes 410 VEOR V8.B16, V15.B16, V8.B16 411 VEOR V9.B16, V16.B16, V9.B16 412 CBNZ R3, blockloop 413 414 sm3ret: 415 VST1 [V8.S4, V9.S4], (R0) // store hash value H 416 RET