github.com/code-reading/golang@v0.0.0-20220303082512-ba5bc0e589a3/go/src/crypto/aes/asm_arm64.s (about) 1 // Copyright 2017 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #include "textflag.h" 6 DATA rotInvSRows<>+0x00(SB)/8, $0x080f0205040b0e01 7 DATA rotInvSRows<>+0x08(SB)/8, $0x00070a0d0c030609 8 GLOBL rotInvSRows<>(SB), (NOPTR+RODATA), $16 9 DATA invSRows<>+0x00(SB)/8, $0x0b0e0104070a0d00 10 DATA invSRows<>+0x08(SB)/8, $0x0306090c0f020508 11 GLOBL invSRows<>(SB), (NOPTR+RODATA), $16 12 // func encryptBlockAsm(nr int, xk *uint32, dst, src *byte) 13 TEXT ·encryptBlockAsm(SB),NOSPLIT,$0 14 MOVD nr+0(FP), R9 15 MOVD xk+8(FP), R10 16 MOVD dst+16(FP), R11 17 MOVD src+24(FP), R12 18 19 VLD1 (R12), [V0.B16] 20 21 CMP $12, R9 22 BLT enc128 23 BEQ enc196 24 enc256: 25 VLD1.P 32(R10), [V1.B16, V2.B16] 26 AESE V1.B16, V0.B16 27 AESMC V0.B16, V0.B16 28 AESE V2.B16, V0.B16 29 AESMC V0.B16, V0.B16 30 enc196: 31 VLD1.P 32(R10), [V3.B16, V4.B16] 32 AESE V3.B16, V0.B16 33 AESMC V0.B16, V0.B16 34 AESE V4.B16, V0.B16 35 AESMC V0.B16, V0.B16 36 enc128: 37 VLD1.P 64(R10), [V5.B16, V6.B16, V7.B16, V8.B16] 38 VLD1.P 64(R10), [V9.B16, V10.B16, V11.B16, V12.B16] 39 VLD1.P 48(R10), [V13.B16, V14.B16, V15.B16] 40 AESE V5.B16, V0.B16 41 AESMC V0.B16, V0.B16 42 AESE V6.B16, V0.B16 43 AESMC V0.B16, V0.B16 44 AESE V7.B16, V0.B16 45 AESMC V0.B16, V0.B16 46 AESE V8.B16, V0.B16 47 AESMC V0.B16, V0.B16 48 AESE V9.B16, V0.B16 49 AESMC V0.B16, V0.B16 50 AESE V10.B16, V0.B16 51 AESMC V0.B16, V0.B16 52 AESE V11.B16, V0.B16 53 AESMC V0.B16, V0.B16 54 AESE V12.B16, V0.B16 55 AESMC V0.B16, V0.B16 56 AESE V13.B16, V0.B16 57 AESMC V0.B16, V0.B16 58 AESE V14.B16, V0.B16 59 VEOR V0.B16, V15.B16, V0.B16 60 VST1 [V0.B16], (R11) 61 RET 62 63 // func decryptBlockAsm(nr int, xk *uint32, dst, src *byte) 64 TEXT ·decryptBlockAsm(SB),NOSPLIT,$0 65 MOVD nr+0(FP), R9 66 MOVD xk+8(FP), R10 67 MOVD dst+16(FP), R11 68 MOVD src+24(FP), R12 69 70 VLD1 (R12), [V0.B16] 71 72 CMP $12, R9 73 BLT dec128 74 BEQ dec196 75 dec256: 76 VLD1.P 32(R10), [V1.B16, V2.B16] 77 AESD V1.B16, V0.B16 78 AESIMC V0.B16, V0.B16 79 AESD V2.B16, V0.B16 80 AESIMC V0.B16, V0.B16 81 dec196: 82 VLD1.P 32(R10), [V3.B16, V4.B16] 83 AESD V3.B16, V0.B16 84 AESIMC V0.B16, V0.B16 85 AESD V4.B16, V0.B16 86 AESIMC V0.B16, V0.B16 87 dec128: 88 VLD1.P 64(R10), [V5.B16, V6.B16, V7.B16, V8.B16] 89 VLD1.P 64(R10), [V9.B16, V10.B16, V11.B16, V12.B16] 90 VLD1.P 48(R10), [V13.B16, V14.B16, V15.B16] 91 AESD V5.B16, V0.B16 92 AESIMC V0.B16, V0.B16 93 AESD V6.B16, V0.B16 94 AESIMC V0.B16, V0.B16 95 AESD V7.B16, V0.B16 96 AESIMC V0.B16, V0.B16 97 AESD V8.B16, V0.B16 98 AESIMC V0.B16, V0.B16 99 AESD V9.B16, V0.B16 100 AESIMC V0.B16, V0.B16 101 AESD V10.B16, V0.B16 102 AESIMC V0.B16, V0.B16 103 AESD V11.B16, V0.B16 104 AESIMC V0.B16, V0.B16 105 AESD V12.B16, V0.B16 106 AESIMC V0.B16, V0.B16 107 AESD V13.B16, V0.B16 108 AESIMC V0.B16, V0.B16 109 AESD V14.B16, V0.B16 110 VEOR V0.B16, V15.B16, V0.B16 111 VST1 [V0.B16], (R11) 112 RET 113 114 // func expandKeyAsm(nr int, key *byte, enc, dec *uint32) { 115 // Note that round keys are stored in uint128 format, not uint32 116 TEXT ·expandKeyAsm(SB),NOSPLIT,$0 117 MOVD nr+0(FP), R8 118 MOVD key+8(FP), R9 119 MOVD enc+16(FP), R10 120 MOVD dec+24(FP), R11 121 LDP rotInvSRows<>(SB), (R0, R1) 122 VMOV R0, V3.D[0] 123 VMOV R1, V3.D[1] 124 VEOR V0.B16, V0.B16, V0.B16 // All zeroes 125 MOVW $1, R13 126 TBZ $1, R8, ks192 127 TBNZ $2, R8, ks256 128 LDPW (R9), (R4, R5) 129 LDPW 8(R9), (R6, R7) 130 STPW.P (R4, R5), 8(R10) 131 STPW.P (R6, R7), 8(R10) 132 MOVW $0x1b, R14 133 ks128Loop: 134 VMOV R7, V2.S[0] 135 WORD $0x4E030042 // TBL V3.B16, [V2.B16], V2.B16 136 AESE V0.B16, V2.B16 // Use AES to compute the SBOX 137 EORW R13, R4 138 LSLW $1, R13 // Compute next Rcon 139 ANDSW $0x100, R13, ZR 140 CSELW NE, R14, R13, R13 // Fake modulo 141 SUBS $1, R8 142 VMOV V2.S[0], R0 143 EORW R0, R4 144 EORW R4, R5 145 EORW R5, R6 146 EORW R6, R7 147 STPW.P (R4, R5), 8(R10) 148 STPW.P (R6, R7), 8(R10) 149 BNE ks128Loop 150 CBZ R11, ksDone // If dec is nil we are done 151 SUB $176, R10 152 // Decryption keys are encryption keys with InverseMixColumns applied 153 VLD1.P 64(R10), [V0.B16, V1.B16, V2.B16, V3.B16] 154 VMOV V0.B16, V7.B16 155 AESIMC V1.B16, V6.B16 156 AESIMC V2.B16, V5.B16 157 AESIMC V3.B16, V4.B16 158 VLD1.P 64(R10), [V0.B16, V1.B16, V2.B16, V3.B16] 159 AESIMC V0.B16, V11.B16 160 AESIMC V1.B16, V10.B16 161 AESIMC V2.B16, V9.B16 162 AESIMC V3.B16, V8.B16 163 VLD1 (R10), [V0.B16, V1.B16, V2.B16] 164 AESIMC V0.B16, V14.B16 165 AESIMC V1.B16, V13.B16 166 VMOV V2.B16, V12.B16 167 VST1.P [V12.B16, V13.B16, V14.B16], 48(R11) 168 VST1.P [V8.B16, V9.B16, V10.B16, V11.B16], 64(R11) 169 VST1 [V4.B16, V5.B16, V6.B16, V7.B16], (R11) 170 B ksDone 171 ks192: 172 LDPW (R9), (R2, R3) 173 LDPW 8(R9), (R4, R5) 174 LDPW 16(R9), (R6, R7) 175 STPW.P (R2, R3), 8(R10) 176 STPW.P (R4, R5), 8(R10) 177 SUB $4, R8 178 ks192Loop: 179 STPW.P (R6, R7), 8(R10) 180 VMOV R7, V2.S[0] 181 WORD $0x4E030042 //TBL V3.B16, [V2.B16], V2.B16 182 AESE V0.B16, V2.B16 183 EORW R13, R2 184 LSLW $1, R13 185 SUBS $1, R8 186 VMOV V2.S[0], R0 187 EORW R0, R2 188 EORW R2, R3 189 EORW R3, R4 190 EORW R4, R5 191 EORW R5, R6 192 EORW R6, R7 193 STPW.P (R2, R3), 8(R10) 194 STPW.P (R4, R5), 8(R10) 195 BNE ks192Loop 196 CBZ R11, ksDone 197 SUB $208, R10 198 VLD1.P 64(R10), [V0.B16, V1.B16, V2.B16, V3.B16] 199 VMOV V0.B16, V7.B16 200 AESIMC V1.B16, V6.B16 201 AESIMC V2.B16, V5.B16 202 AESIMC V3.B16, V4.B16 203 VLD1.P 64(R10), [V0.B16, V1.B16, V2.B16, V3.B16] 204 AESIMC V0.B16, V11.B16 205 AESIMC V1.B16, V10.B16 206 AESIMC V2.B16, V9.B16 207 AESIMC V3.B16, V8.B16 208 VLD1.P 64(R10), [V0.B16, V1.B16, V2.B16, V3.B16] 209 AESIMC V0.B16, V15.B16 210 AESIMC V1.B16, V14.B16 211 AESIMC V2.B16, V13.B16 212 AESIMC V3.B16, V12.B16 213 VLD1 (R10), [V0.B16] 214 VST1.P [V0.B16], 16(R11) 215 VST1.P [V12.B16, V13.B16, V14.B16, V15.B16], 64(R11) 216 VST1.P [V8.B16, V9.B16, V10.B16, V11.B16], 64(R11) 217 VST1 [V4.B16, V5.B16, V6.B16, V7.B16], (R11) 218 B ksDone 219 ks256: 220 LDP invSRows<>(SB), (R0, R1) 221 VMOV R0, V4.D[0] 222 VMOV R1, V4.D[1] 223 LDPW (R9), (R0, R1) 224 LDPW 8(R9), (R2, R3) 225 LDPW 16(R9), (R4, R5) 226 LDPW 24(R9), (R6, R7) 227 STPW.P (R0, R1), 8(R10) 228 STPW.P (R2, R3), 8(R10) 229 SUB $7, R8 230 ks256Loop: 231 STPW.P (R4, R5), 8(R10) 232 STPW.P (R6, R7), 8(R10) 233 VMOV R7, V2.S[0] 234 WORD $0x4E030042 //TBL V3.B16, [V2.B16], V2.B16 235 AESE V0.B16, V2.B16 236 EORW R13, R0 237 LSLW $1, R13 238 SUBS $1, R8 239 VMOV V2.S[0], R9 240 EORW R9, R0 241 EORW R0, R1 242 EORW R1, R2 243 EORW R2, R3 244 VMOV R3, V2.S[0] 245 WORD $0x4E040042 //TBL V3.B16, [V2.B16], V2.B16 246 AESE V0.B16, V2.B16 247 VMOV V2.S[0], R9 248 EORW R9, R4 249 EORW R4, R5 250 EORW R5, R6 251 EORW R6, R7 252 STPW.P (R0, R1), 8(R10) 253 STPW.P (R2, R3), 8(R10) 254 BNE ks256Loop 255 CBZ R11, ksDone 256 SUB $240, R10 257 VLD1.P 64(R10), [V0.B16, V1.B16, V2.B16, V3.B16] 258 VMOV V0.B16, V7.B16 259 AESIMC V1.B16, V6.B16 260 AESIMC V2.B16, V5.B16 261 AESIMC V3.B16, V4.B16 262 VLD1.P 64(R10), [V0.B16, V1.B16, V2.B16, V3.B16] 263 AESIMC V0.B16, V11.B16 264 AESIMC V1.B16, V10.B16 265 AESIMC V2.B16, V9.B16 266 AESIMC V3.B16, V8.B16 267 VLD1.P 64(R10), [V0.B16, V1.B16, V2.B16, V3.B16] 268 AESIMC V0.B16, V15.B16 269 AESIMC V1.B16, V14.B16 270 AESIMC V2.B16, V13.B16 271 AESIMC V3.B16, V12.B16 272 VLD1 (R10), [V0.B16, V1.B16, V2.B16] 273 AESIMC V0.B16, V18.B16 274 AESIMC V1.B16, V17.B16 275 VMOV V2.B16, V16.B16 276 VST1.P [V16.B16, V17.B16, V18.B16], 48(R11) 277 VST1.P [V12.B16, V13.B16, V14.B16, V15.B16], 64(R11) 278 VST1.P [V8.B16, V9.B16, V10.B16, V11.B16], 64(R11) 279 VST1 [V4.B16, V5.B16, V6.B16, V7.B16], (R11) 280 ksDone: 281 RET