github.com/dorkamotorka/go/src@v0.0.0-20230614113921-187095f0e316/crypto/aes/asm_ppc64x.s (about) 1 // Copyright 2016 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 //go:build ppc64 || ppc64le 6 7 // Based on CRYPTOGAMS code with the following comment: 8 // # ==================================================================== 9 // # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 10 // # project. The module is, however, dual licensed under OpenSSL and 11 // # CRYPTOGAMS licenses depending on where you obtain it. For further 12 // # details see http://www.openssl.org/~appro/cryptogams/. 13 // # ==================================================================== 14 15 // Original code can be found at the link below: 16 // https://github.com/dot-asm/cryptogams/blob/master/ppc/aesp8-ppc.pl 17 18 // Some function names were changed to be consistent with Go function 19 // names. For instance, function aes_p8_set_{en,de}crypt_key become 20 // set{En,De}cryptKeyAsm. I also split setEncryptKeyAsm in two parts 21 // and a new session was created (doEncryptKeyAsm). This was necessary to 22 // avoid arguments overwriting when setDecryptKeyAsm calls setEncryptKeyAsm. 23 // There were other modifications as well but kept the same functionality. 24 25 #include "textflag.h" 26 27 // For expandKeyAsm 28 #define INP R3 29 #define BITS R4 30 #define OUTENC R5 // Pointer to next expanded encrypt key 31 #define PTR R6 32 #define CNT R7 33 #define ROUNDS R8 34 #define OUTDEC R9 // Pointer to next expanded decrypt key 35 #define TEMP R19 36 #define ZERO V0 37 #define IN0 V1 38 #define IN1 V2 39 #define KEY V3 40 #define RCON V4 41 #define MASK V5 42 #define TMP V6 43 #define STAGE V7 44 #define OUTPERM V8 45 #define OUTMASK V9 46 #define OUTHEAD V10 47 #define OUTTAIL V11 48 49 // For P9 instruction emulation 50 #define ESPERM V21 // Endian swapping permute into BE 51 #define TMP2 V22 // Temporary for P8_STXVB16X/P8_STXVB16X 52 53 // For {en,de}cryptBlockAsm 54 #define BLK_INP R3 55 #define BLK_OUT R4 56 #define BLK_KEY R5 57 #define BLK_ROUNDS R6 58 #define BLK_IDX R7 59 60 DATA ·rcon+0x00(SB)/8, $0x0f0e0d0c0b0a0908 // Permute for vector doubleword endian swap 61 DATA ·rcon+0x08(SB)/8, $0x0706050403020100 62 DATA ·rcon+0x10(SB)/8, $0x0100000001000000 // RCON 63 DATA ·rcon+0x18(SB)/8, $0x0100000001000000 // RCON 64 DATA ·rcon+0x20(SB)/8, $0x1b0000001b000000 65 DATA ·rcon+0x28(SB)/8, $0x1b0000001b000000 66 DATA ·rcon+0x30(SB)/8, $0x0d0e0f0c0d0e0f0c // MASK 67 DATA ·rcon+0x38(SB)/8, $0x0d0e0f0c0d0e0f0c // MASK 68 DATA ·rcon+0x40(SB)/8, $0x0000000000000000 69 DATA ·rcon+0x48(SB)/8, $0x0000000000000000 70 GLOBL ·rcon(SB), RODATA, $80 71 72 #ifdef GOARCH_ppc64le 73 # ifdef GOPPC64_power9 74 #define P8_LXVB16X(RA,RB,VT) LXVB16X (RA+RB), VT 75 #define P8_STXVB16X(VS,RA,RB) STXVB16X VS, (RA+RB) 76 #define XXBRD_ON_LE(VA,VT) XXBRD VA, VT 77 # else 78 // On POWER8/ppc64le, emulate the POWER9 instructions by loading unaligned 79 // doublewords and byte-swapping each doubleword to emulate BE load/stores. 80 #define NEEDS_ESPERM 81 #define P8_LXVB16X(RA,RB,VT) \ 82 LXVD2X (RA+RB), VT \ 83 VPERM VT, VT, ESPERM, VT 84 85 #define P8_STXVB16X(VS,RA,RB) \ 86 VPERM VS, VS, ESPERM, TMP2 \ 87 STXVD2X TMP2, (RA+RB) 88 89 #define XXBRD_ON_LE(VA,VT) \ 90 VPERM VA, VA, ESPERM, VT 91 92 # endif // defined(GOPPC64_power9) 93 #else 94 #define P8_LXVB16X(RA,RB,VT) LXVD2X (RA+RB), VT 95 #define P8_STXVB16X(VS,RA,RB) STXVD2X VS, (RA+RB) 96 #define XXBRD_ON_LE(VA, VT) 97 #endif // defined(GOARCH_ppc64le) 98 99 // func setEncryptKeyAsm(nr int, key *byte, enc *uint32, dec *uint32) 100 TEXT ·expandKeyAsm(SB), NOSPLIT|NOFRAME, $0 101 // Load the arguments inside the registers 102 MOVD nr+0(FP), ROUNDS 103 MOVD key+8(FP), INP 104 MOVD enc+16(FP), OUTENC 105 MOVD dec+24(FP), OUTDEC 106 107 #ifdef NEEDS_ESPERM 108 MOVD $·rcon(SB), PTR // PTR points to rcon addr 109 LVX (PTR), ESPERM 110 ADD $0x10, PTR 111 #else 112 MOVD $·rcon+0x10(SB), PTR // PTR points to rcon addr (skipping permute vector) 113 #endif 114 115 // Get key from memory and write aligned into VR 116 P8_LXVB16X(INP, R0, IN0) 117 ADD $0x10, INP, INP 118 MOVD $0x20, TEMP 119 120 CMPW ROUNDS, $12 121 LVX (PTR)(R0), RCON // lvx 4,0,6 Load first 16 bytes into RCON 122 LVX (PTR)(TEMP), MASK 123 ADD $0x10, PTR, PTR // addi 6,6,0x10 PTR to next 16 bytes of RCON 124 MOVD $8, CNT // li 7,8 CNT = 8 125 VXOR ZERO, ZERO, ZERO // vxor 0,0,0 Zero to be zero :) 126 MOVD CNT, CTR // mtctr 7 Set the counter to 8 (rounds) 127 128 // The expanded decrypt key is the expanded encrypt key stored in reverse order. 129 // Move OUTDEC to the last key location, and store in descending order. 130 ADD $160, OUTDEC, OUTDEC 131 BLT loop128 132 ADD $32, OUTDEC, OUTDEC 133 BEQ l192 134 ADD $32, OUTDEC, OUTDEC 135 JMP l256 136 137 loop128: 138 // Key schedule (Round 1 to 8) 139 VPERM IN0, IN0, MASK, KEY // vperm 3,1,1,5 Rotate-n-splat 140 VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12 141 STXVD2X IN0, (R0+OUTENC) 142 STXVD2X IN0, (R0+OUTDEC) 143 VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4 144 ADD $16, OUTENC, OUTENC 145 ADD $-16, OUTDEC, OUTDEC 146 147 VXOR IN0, TMP, IN0 // vxor 1,1,6 148 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 149 VXOR IN0, TMP, IN0 // vxor 1,1,6 150 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 151 VXOR IN0, TMP, IN0 // vxor 1,1,6 152 VADDUWM RCON, RCON, RCON // vadduwm 4,4,4 153 VXOR IN0, KEY, IN0 // vxor 1,1,3 154 BC 0x10, 0, loop128 // bdnz .Loop128 155 156 LVX (PTR)(R0), RCON // lvx 4,0,6 Last two round keys 157 158 // Key schedule (Round 9) 159 VPERM IN0, IN0, MASK, KEY // vperm 3,1,1,5 Rotate-n-spat 160 VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12 161 STXVD2X IN0, (R0+OUTENC) 162 STXVD2X IN0, (R0+OUTDEC) 163 VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4 164 ADD $16, OUTENC, OUTENC 165 ADD $-16, OUTDEC, OUTDEC 166 167 // Key schedule (Round 10) 168 VXOR IN0, TMP, IN0 // vxor 1,1,6 169 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 170 VXOR IN0, TMP, IN0 // vxor 1,1,6 171 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 172 VXOR IN0, TMP, IN0 // vxor 1,1,6 173 VADDUWM RCON, RCON, RCON // vadduwm 4,4,4 174 VXOR IN0, KEY, IN0 // vxor 1,1,3 175 176 VPERM IN0, IN0, MASK, KEY // vperm 3,1,1,5 Rotate-n-splat 177 VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12 178 STXVD2X IN0, (R0+OUTENC) 179 STXVD2X IN0, (R0+OUTDEC) 180 VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4 181 ADD $16, OUTENC, OUTENC 182 ADD $-16, OUTDEC, OUTDEC 183 184 // Key schedule (Round 11) 185 VXOR IN0, TMP, IN0 // vxor 1,1,6 186 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 187 VXOR IN0, TMP, IN0 // vxor 1,1,6 188 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 189 VXOR IN0, TMP, IN0 // vxor 1,1,6 190 VXOR IN0, KEY, IN0 // vxor 1,1,3 191 STXVD2X IN0, (R0+OUTENC) 192 STXVD2X IN0, (R0+OUTDEC) 193 194 RET 195 196 l192: 197 LXSDX (INP+R0), IN1 // Load next 8 bytes into upper half of VSR. 198 XXBRD_ON_LE(IN1, IN1) // and convert to BE ordering on LE hosts. 199 MOVD $4, CNT // li 7,4 200 STXVD2X IN0, (R0+OUTENC) 201 STXVD2X IN0, (R0+OUTDEC) 202 ADD $16, OUTENC, OUTENC 203 ADD $-16, OUTDEC, OUTDEC 204 VSPLTISB $8, KEY // vspltisb 3,8 205 MOVD CNT, CTR // mtctr 7 206 VSUBUBM MASK, KEY, MASK // vsububm 5,5,3 207 208 loop192: 209 VPERM IN1, IN1, MASK, KEY // vperm 3,2,2,5 210 VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12 211 VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4 212 213 VXOR IN0, TMP, IN0 // vxor 1,1,6 214 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 215 VXOR IN0, TMP, IN0 // vxor 1,1,6 216 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 217 VXOR IN0, TMP, IN0 // vxor 1,1,6 218 219 VSLDOI $8, ZERO, IN1, STAGE // vsldoi 7,0,2,8 220 VSPLTW $3, IN0, TMP // vspltw 6,1,3 221 VXOR TMP, IN1, TMP // vxor 6,6,2 222 VSLDOI $12, ZERO, IN1, IN1 // vsldoi 2,0,2,12 223 VADDUWM RCON, RCON, RCON // vadduwm 4,4,4 224 VXOR IN1, TMP, IN1 // vxor 2,2,6 225 VXOR IN0, KEY, IN0 // vxor 1,1,3 226 VXOR IN1, KEY, IN1 // vxor 2,2,3 227 VSLDOI $8, STAGE, IN0, STAGE // vsldoi 7,7,1,8 228 229 VPERM IN1, IN1, MASK, KEY // vperm 3,2,2,5 230 VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12 231 STXVD2X STAGE, (R0+OUTENC) 232 STXVD2X STAGE, (R0+OUTDEC) 233 VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4 234 ADD $16, OUTENC, OUTENC 235 ADD $-16, OUTDEC, OUTDEC 236 237 VSLDOI $8, IN0, IN1, STAGE // vsldoi 7,1,2,8 238 VXOR IN0, TMP, IN0 // vxor 1,1,6 239 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 240 STXVD2X STAGE, (R0+OUTENC) 241 STXVD2X STAGE, (R0+OUTDEC) 242 VXOR IN0, TMP, IN0 // vxor 1,1,6 243 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 244 VXOR IN0, TMP, IN0 // vxor 1,1,6 245 ADD $16, OUTENC, OUTENC 246 ADD $-16, OUTDEC, OUTDEC 247 248 VSPLTW $3, IN0, TMP // vspltw 6,1,3 249 VXOR TMP, IN1, TMP // vxor 6,6,2 250 VSLDOI $12, ZERO, IN1, IN1 // vsldoi 2,0,2,12 251 VADDUWM RCON, RCON, RCON // vadduwm 4,4,4 252 VXOR IN1, TMP, IN1 // vxor 2,2,6 253 VXOR IN0, KEY, IN0 // vxor 1,1,3 254 VXOR IN1, KEY, IN1 // vxor 2,2,3 255 STXVD2X IN0, (R0+OUTENC) 256 STXVD2X IN0, (R0+OUTDEC) 257 ADD $16, OUTENC, OUTENC 258 ADD $-16, OUTDEC, OUTDEC 259 BC 0x10, 0, loop192 // bdnz .Loop192 260 261 RET 262 263 l256: 264 P8_LXVB16X(INP, R0, IN1) 265 MOVD $7, CNT // li 7,7 266 STXVD2X IN0, (R0+OUTENC) 267 STXVD2X IN0, (R0+OUTDEC) 268 ADD $16, OUTENC, OUTENC 269 ADD $-16, OUTDEC, OUTDEC 270 MOVD CNT, CTR // mtctr 7 271 272 loop256: 273 VPERM IN1, IN1, MASK, KEY // vperm 3,2,2,5 274 VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12 275 STXVD2X IN1, (R0+OUTENC) 276 STXVD2X IN1, (R0+OUTDEC) 277 VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4 278 ADD $16, OUTENC, OUTENC 279 ADD $-16, OUTDEC, OUTDEC 280 281 VXOR IN0, TMP, IN0 // vxor 1,1,6 282 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 283 VXOR IN0, TMP, IN0 // vxor 1,1,6 284 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 285 VXOR IN0, TMP, IN0 // vxor 1,1,6 286 VADDUWM RCON, RCON, RCON // vadduwm 4,4,4 287 VXOR IN0, KEY, IN0 // vxor 1,1,3 288 STXVD2X IN0, (R0+OUTENC) 289 STXVD2X IN0, (R0+OUTDEC) 290 ADD $16, OUTENC, OUTENC 291 ADD $-16, OUTDEC, OUTDEC 292 BC 0x12, 0, done // bdz .Ldone 293 294 VSPLTW $3, IN0, KEY // vspltw 3,1,3 295 VSLDOI $12, ZERO, IN1, TMP // vsldoi 6,0,2,12 296 VSBOX KEY, KEY // vsbox 3,3 297 298 VXOR IN1, TMP, IN1 // vxor 2,2,6 299 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 300 VXOR IN1, TMP, IN1 // vxor 2,2,6 301 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 302 VXOR IN1, TMP, IN1 // vxor 2,2,6 303 304 VXOR IN1, KEY, IN1 // vxor 2,2,3 305 JMP loop256 // b .Loop256 306 307 done: 308 RET 309 310 // func encryptBlockAsm(nr int, xk *uint32, dst, src *byte) 311 TEXT ·encryptBlockAsm(SB), NOSPLIT|NOFRAME, $0 312 MOVD nr+0(FP), R6 // Round count/Key size 313 MOVD xk+8(FP), R5 // Key pointer 314 MOVD dst+16(FP), R3 // Dest pointer 315 MOVD src+24(FP), R4 // Src pointer 316 #ifdef NEEDS_ESPERM 317 MOVD $·rcon(SB), R7 318 LVX (R7), ESPERM // Permute value for P8_ macros. 319 #endif 320 321 // Set CR{1,2,3}EQ to hold the key size information. 322 CMPU R6, $10, CR1 323 CMPU R6, $12, CR2 324 CMPU R6, $14, CR3 325 326 MOVD $16, R6 327 MOVD $32, R7 328 MOVD $48, R8 329 MOVD $64, R9 330 MOVD $80, R10 331 MOVD $96, R11 332 MOVD $112, R12 333 334 // Load text in BE order 335 P8_LXVB16X(R4, R0, V0) 336 337 // V1, V2 will hold keys, V0 is a temp. 338 // At completion, V2 will hold the ciphertext. 339 // Load xk[0:3] and xor with text 340 LXVD2X (R0+R5), V1 341 VXOR V0, V1, V0 342 343 // Load xk[4:11] and cipher 344 LXVD2X (R6+R5), V1 345 LXVD2X (R7+R5), V2 346 VCIPHER V0, V1, V0 347 VCIPHER V0, V2, V0 348 349 // Load xk[12:19] and cipher 350 LXVD2X (R8+R5), V1 351 LXVD2X (R9+R5), V2 352 VCIPHER V0, V1, V0 353 VCIPHER V0, V2, V0 354 355 // Load xk[20:27] and cipher 356 LXVD2X (R10+R5), V1 357 LXVD2X (R11+R5), V2 358 VCIPHER V0, V1, V0 359 VCIPHER V0, V2, V0 360 361 // Increment xk pointer to reuse constant offsets in R6-R12. 362 ADD $112, R5 363 364 // Load xk[28:35] and cipher 365 LXVD2X (R0+R5), V1 366 LXVD2X (R6+R5), V2 367 VCIPHER V0, V1, V0 368 VCIPHER V0, V2, V0 369 370 // Load xk[36:43] and cipher 371 LXVD2X (R7+R5), V1 372 LXVD2X (R8+R5), V2 373 BEQ CR1, Ldec_tail // Key size 10? 374 VCIPHER V0, V1, V0 375 VCIPHER V0, V2, V0 376 377 // Load xk[44:51] and cipher 378 LXVD2X (R9+R5), V1 379 LXVD2X (R10+R5), V2 380 BEQ CR2, Ldec_tail // Key size 12? 381 VCIPHER V0, V1, V0 382 VCIPHER V0, V2, V0 383 384 // Load xk[52:59] and cipher 385 LXVD2X (R11+R5), V1 386 LXVD2X (R12+R5), V2 387 BNE CR3, Linvalid_key_len // Not key size 14? 388 // Fallthrough to final cipher 389 390 Ldec_tail: 391 // Cipher last two keys such that key information is 392 // cleared from V1 and V2. 393 VCIPHER V0, V1, V1 394 VCIPHERLAST V1, V2, V2 395 396 // Store the result in BE order. 397 P8_STXVB16X(V2, R3, R0) 398 RET 399 400 Linvalid_key_len: 401 // Segfault, this should never happen. Only 3 keys sizes are created/used. 402 MOVD R0, 0(R0) 403 RET 404 405 // func decryptBlockAsm(nr int, xk *uint32, dst, src *byte) 406 TEXT ·decryptBlockAsm(SB), NOSPLIT|NOFRAME, $0 407 MOVD nr+0(FP), R6 // Round count/Key size 408 MOVD xk+8(FP), R5 // Key pointer 409 MOVD dst+16(FP), R3 // Dest pointer 410 MOVD src+24(FP), R4 // Src pointer 411 #ifdef NEEDS_ESPERM 412 MOVD $·rcon(SB), R7 413 LVX (R7), ESPERM // Permute value for P8_ macros. 414 #endif 415 416 // Set CR{1,2,3}EQ to hold the key size information. 417 CMPU R6, $10, CR1 418 CMPU R6, $12, CR2 419 CMPU R6, $14, CR3 420 421 MOVD $16, R6 422 MOVD $32, R7 423 MOVD $48, R8 424 MOVD $64, R9 425 MOVD $80, R10 426 MOVD $96, R11 427 MOVD $112, R12 428 429 // Load text in BE order 430 P8_LXVB16X(R4, R0, V0) 431 432 // V1, V2 will hold keys, V0 is a temp. 433 // At completion, V2 will hold the text. 434 // Load xk[0:3] and xor with ciphertext 435 LXVD2X (R0+R5), V1 436 VXOR V0, V1, V0 437 438 // Load xk[4:11] and cipher 439 LXVD2X (R6+R5), V1 440 LXVD2X (R7+R5), V2 441 VNCIPHER V0, V1, V0 442 VNCIPHER V0, V2, V0 443 444 // Load xk[12:19] and cipher 445 LXVD2X (R8+R5), V1 446 LXVD2X (R9+R5), V2 447 VNCIPHER V0, V1, V0 448 VNCIPHER V0, V2, V0 449 450 // Load xk[20:27] and cipher 451 LXVD2X (R10+R5), V1 452 LXVD2X (R11+R5), V2 453 VNCIPHER V0, V1, V0 454 VNCIPHER V0, V2, V0 455 456 // Increment xk pointer to reuse constant offsets in R6-R12. 457 ADD $112, R5 458 459 // Load xk[28:35] and cipher 460 LXVD2X (R0+R5), V1 461 LXVD2X (R6+R5), V2 462 VNCIPHER V0, V1, V0 463 VNCIPHER V0, V2, V0 464 465 // Load xk[36:43] and cipher 466 LXVD2X (R7+R5), V1 467 LXVD2X (R8+R5), V2 468 BEQ CR1, Ldec_tail // Key size 10? 469 VNCIPHER V0, V1, V0 470 VNCIPHER V0, V2, V0 471 472 // Load xk[44:51] and cipher 473 LXVD2X (R9+R5), V1 474 LXVD2X (R10+R5), V2 475 BEQ CR2, Ldec_tail // Key size 12? 476 VNCIPHER V0, V1, V0 477 VNCIPHER V0, V2, V0 478 479 // Load xk[52:59] and cipher 480 LXVD2X (R11+R5), V1 481 LXVD2X (R12+R5), V2 482 BNE CR3, Linvalid_key_len // Not key size 14? 483 // Fallthrough to final cipher 484 485 Ldec_tail: 486 // Cipher last two keys such that key information is 487 // cleared from V1 and V2. 488 VNCIPHER V0, V1, V1 489 VNCIPHERLAST V1, V2, V2 490 491 // Store the result in BE order. 492 P8_STXVB16X(V2, R3, R0) 493 RET 494 495 Linvalid_key_len: 496 // Segfault, this should never happen. Only 3 keys sizes are created/used. 497 MOVD R0, 0(R0) 498 RET 499 500 // Remove defines from above so they can be defined here 501 #undef INP 502 #undef OUTENC 503 #undef ROUNDS 504 #undef KEY 505 #undef TMP 506 507 #define INP R3 508 #define OUTP R4 509 #define LEN R5 510 #define KEYP R6 511 #define ROUNDS R7 512 #define IVP R8 513 #define ENC R9 514 515 #define INOUT V2 516 #define TMP V3 517 #define IVEC V4 518 519 // Load the crypt key into VSRs. 520 // 521 // The expanded key is stored and loaded using 522 // STXVD2X/LXVD2X. The in-memory byte ordering 523 // depends on the endianness of the machine. The 524 // expanded keys are generated by expandKeyAsm above. 525 // 526 // Rkeyp holds the key pointer. It is clobbered. Once 527 // the expanded keys are loaded, it is not needed. 528 // 529 // R12,R14-R21 are scratch registers. 530 // For keyp of 10, V6, V11-V20 hold the expanded key. 531 // For keyp of 12, V6, V9-V20 hold the expanded key. 532 // For keyp of 14, V6, V7-V20 hold the expanded key. 533 #define LOAD_KEY(Rkeyp) \ 534 MOVD $16, R12 \ 535 MOVD $32, R14 \ 536 MOVD $48, R15 \ 537 MOVD $64, R16 \ 538 MOVD $80, R17 \ 539 MOVD $96, R18 \ 540 MOVD $112, R19 \ 541 MOVD $128, R20 \ 542 MOVD $144, R21 \ 543 LXVD2X (R0+Rkeyp), V6 \ 544 ADD $16, Rkeyp \ 545 BEQ CR1, L_start10 \ 546 BEQ CR2, L_start12 \ 547 LXVD2X (R0+Rkeyp), V7 \ 548 LXVD2X (R12+Rkeyp), V8 \ 549 ADD $32, Rkeyp \ 550 L_start12: \ 551 LXVD2X (R0+Rkeyp), V9 \ 552 LXVD2X (R12+Rkeyp), V10 \ 553 ADD $32, Rkeyp \ 554 L_start10: \ 555 LXVD2X (R0+Rkeyp), V11 \ 556 LXVD2X (R12+Rkeyp), V12 \ 557 LXVD2X (R14+Rkeyp), V13 \ 558 LXVD2X (R15+Rkeyp), V14 \ 559 LXVD2X (R16+Rkeyp), V15 \ 560 LXVD2X (R17+Rkeyp), V16 \ 561 LXVD2X (R18+Rkeyp), V17 \ 562 LXVD2X (R19+Rkeyp), V18 \ 563 LXVD2X (R20+Rkeyp), V19 \ 564 LXVD2X (R21+Rkeyp), V20 565 566 // Perform aes cipher operation for keysize 10/12/14 using the keys 567 // loaded by LOAD_KEY, and key size information held in CR1EQ/CR2EQ. 568 // 569 // Vxor is ideally V6 (Key[0-3]), but for slightly improved encrypting 570 // performance V6 and IVEC can be swapped (xor is both associative and 571 // commutative) during encryption: 572 // 573 // VXOR INOUT, IVEC, INOUT 574 // VXOR INOUT, V6, INOUT 575 // 576 // into 577 // 578 // VXOR INOUT, V6, INOUT 579 // VXOR INOUT, IVEC, INOUT 580 // 581 #define CIPHER_BLOCK(Vin, Vxor, Vout, vcipher, vciphel, label10, label12) \ 582 VXOR Vin, Vxor, Vout \ 583 BEQ CR1, label10 \ 584 BEQ CR2, label12 \ 585 vcipher Vout, V7, Vout \ 586 vcipher Vout, V8, Vout \ 587 label12: \ 588 vcipher Vout, V9, Vout \ 589 vcipher Vout, V10, Vout \ 590 label10: \ 591 vcipher Vout, V11, Vout \ 592 vcipher Vout, V12, Vout \ 593 vcipher Vout, V13, Vout \ 594 vcipher Vout, V14, Vout \ 595 vcipher Vout, V15, Vout \ 596 vcipher Vout, V16, Vout \ 597 vcipher Vout, V17, Vout \ 598 vcipher Vout, V18, Vout \ 599 vcipher Vout, V19, Vout \ 600 vciphel Vout, V20, Vout \ 601 602 #define CLEAR_KEYS() \ 603 VXOR V6, V6, V6 \ 604 VXOR V7, V7, V7 \ 605 VXOR V8, V8, V8 \ 606 VXOR V9, V9, V9 \ 607 VXOR V10, V10, V10 \ 608 VXOR V11, V11, V11 \ 609 VXOR V12, V12, V12 \ 610 VXOR V13, V13, V13 \ 611 VXOR V14, V14, V14 \ 612 VXOR V15, V15, V15 \ 613 VXOR V16, V16, V16 \ 614 VXOR V17, V17, V17 \ 615 VXOR V18, V18, V18 \ 616 VXOR V19, V19, V19 \ 617 VXOR V20, V20, V20 618 619 //func cryptBlocksChain(src, dst *byte, length int, key *uint32, iv *byte, enc int, nr int) 620 TEXT ·cryptBlocksChain(SB), NOSPLIT|NOFRAME, $0 621 MOVD src+0(FP), INP 622 MOVD dst+8(FP), OUTP 623 MOVD length+16(FP), LEN 624 MOVD key+24(FP), KEYP 625 MOVD iv+32(FP), IVP 626 MOVD enc+40(FP), ENC 627 MOVD nr+48(FP), ROUNDS 628 629 #ifdef NEEDS_ESPERM 630 MOVD $·rcon(SB), R11 631 LVX (R11), ESPERM // Permute value for P8_ macros. 632 #endif 633 634 // Assume len > 0 && len % blockSize == 0. 635 CMPW ENC, $0 636 P8_LXVB16X(IVP, R0, IVEC) 637 CMPU ROUNDS, $10, CR1 638 CMPU ROUNDS, $12, CR2 // Only sizes 10/12/14 are supported. 639 640 // Setup key in VSRs, and set loop count in CTR. 641 LOAD_KEY(KEYP) 642 SRD $4, LEN 643 MOVD LEN, CTR 644 645 BEQ Lcbc_dec 646 647 PCALIGN $16 648 Lcbc_enc: 649 P8_LXVB16X(INP, R0, INOUT) 650 ADD $16, INP 651 VXOR INOUT, V6, INOUT 652 CIPHER_BLOCK(INOUT, IVEC, INOUT, VCIPHER, VCIPHERLAST, Lcbc_enc10, Lcbc_enc12) 653 VOR INOUT, INOUT, IVEC // ciphertext (INOUT) is IVEC for next block. 654 P8_STXVB16X(INOUT, OUTP, R0) 655 ADD $16, OUTP 656 BDNZ Lcbc_enc 657 658 P8_STXVB16X(INOUT, IVP, R0) 659 CLEAR_KEYS() 660 RET 661 662 PCALIGN $16 663 Lcbc_dec: 664 P8_LXVB16X(INP, R0, TMP) 665 ADD $16, INP 666 CIPHER_BLOCK(TMP, V6, INOUT, VNCIPHER, VNCIPHERLAST, Lcbc_dec10, Lcbc_dec12) 667 VXOR INOUT, IVEC, INOUT 668 VOR TMP, TMP, IVEC // TMP is IVEC for next block. 669 P8_STXVB16X(INOUT, OUTP, R0) 670 ADD $16, OUTP 671 BDNZ Lcbc_dec 672 673 P8_STXVB16X(IVEC, IVP, R0) 674 CLEAR_KEYS() 675 RET