github.com/mtsmfm/go/src@v0.0.0-20221020090648-44bdcb9f8fde/crypto/aes/asm_ppc64x.s (about) 1 // Copyright 2016 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 //go:build ppc64 || ppc64le 6 7 // Based on CRYPTOGAMS code with the following comment: 8 // # ==================================================================== 9 // # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 10 // # project. The module is, however, dual licensed under OpenSSL and 11 // # CRYPTOGAMS licenses depending on where you obtain it. For further 12 // # details see http://www.openssl.org/~appro/cryptogams/. 13 // # ==================================================================== 14 15 // Original code can be found at the link below: 16 // https://github.com/dot-asm/cryptogams/blob/master/ppc/aesp8-ppc.pl 17 18 // Some function names were changed to be consistent with Go function 19 // names. For instance, function aes_p8_set_{en,de}crypt_key become 20 // set{En,De}cryptKeyAsm. I also split setEncryptKeyAsm in two parts 21 // and a new session was created (doEncryptKeyAsm). This was necessary to 22 // avoid arguments overwriting when setDecryptKeyAsm calls setEncryptKeyAsm. 23 // There were other modifications as well but kept the same functionality. 24 25 #include "textflag.h" 26 27 // For expandKeyAsm 28 #define INP R3 29 #define BITS R4 30 #define OUTENC R5 // Pointer to next expanded encrypt key 31 #define PTR R6 32 #define CNT R7 33 #define ROUNDS R8 34 #define OUTDEC R9 // Pointer to next expanded decrypt key 35 #define TEMP R19 36 #define ZERO V0 37 #define IN0 V1 38 #define IN1 V2 39 #define KEY V3 40 #define RCON V4 41 #define MASK V5 42 #define TMP V6 43 #define STAGE V7 44 #define OUTPERM V8 45 #define OUTMASK V9 46 #define OUTHEAD V10 47 #define OUTTAIL V11 48 49 // For P9 instruction emulation 50 #define ESPERM V21 // Endian swapping permute into BE 51 #define TMP2 V22 // Temporary for P8_STXVB16X/P8_STXV 52 53 // For {en,de}cryptBlockAsm 54 #define BLK_INP R3 55 #define BLK_OUT R4 56 #define BLK_KEY R5 57 #define BLK_ROUNDS R6 58 #define BLK_IDX R7 59 60 DATA ·rcon+0x00(SB)/8, $0x0f0e0d0c0b0a0908 // Permute for vector doubleword endian swap 61 DATA ·rcon+0x08(SB)/8, $0x0706050403020100 62 DATA ·rcon+0x10(SB)/8, $0x0100000001000000 // RCON 63 DATA ·rcon+0x18(SB)/8, $0x0100000001000000 // RCON 64 DATA ·rcon+0x20(SB)/8, $0x1b0000001b000000 65 DATA ·rcon+0x28(SB)/8, $0x1b0000001b000000 66 DATA ·rcon+0x30(SB)/8, $0x0d0e0f0c0d0e0f0c // MASK 67 DATA ·rcon+0x38(SB)/8, $0x0d0e0f0c0d0e0f0c // MASK 68 DATA ·rcon+0x40(SB)/8, $0x0000000000000000 69 DATA ·rcon+0x48(SB)/8, $0x0000000000000000 70 GLOBL ·rcon(SB), RODATA, $80 71 72 // Emulate unaligned BE vector load/stores on LE targets 73 #ifdef GOARCH_ppc64le 74 #define P8_LXVB16X(RA,RB,VT) \ 75 LXVD2X (RA+RB), VT \ 76 VPERM VT, VT, ESPERM, VT 77 78 #define P8_STXVB16X(VS,RA,RB) \ 79 VPERM VS, VS, ESPERM, TMP2 \ 80 STXVD2X TMP2, (RA+RB) 81 82 #define LXSDX_BE(RA,RB,VT) \ 83 LXSDX (RA+RB), VT \ 84 VPERM VT, VT, ESPERM, VT 85 #else 86 #define P8_LXVB16X(RA,RB,VT) \ 87 LXVD2X (RA+RB), VT 88 89 #define P8_STXVB16X(VS,RA,RB) \ 90 STXVD2X VS, (RA+RB) 91 92 #define LXSDX_BE(RA,RB,VT) \ 93 LXSDX (RA+RB), VT 94 #endif 95 96 // func setEncryptKeyAsm(nr int, key *byte, enc *uint32, dec *uint32) 97 TEXT ·expandKeyAsm(SB), NOSPLIT|NOFRAME, $0 98 // Load the arguments inside the registers 99 MOVD nr+0(FP), ROUNDS 100 MOVD key+8(FP), INP 101 MOVD enc+16(FP), OUTENC 102 MOVD dec+24(FP), OUTDEC 103 104 #ifdef GOARCH_ppc64le 105 MOVD $·rcon(SB), PTR // PTR point to rcon addr 106 LVX (PTR), ESPERM 107 ADD $0x10, PTR 108 #else 109 MOVD $·rcon+0x10(SB), PTR // PTR point to rcon addr (skipping permute vector) 110 #endif 111 112 // Get key from memory and write aligned into VR 113 P8_LXVB16X(INP, R0, IN0) 114 ADD $0x10, INP, INP 115 MOVD $0x20, TEMP 116 117 CMPW ROUNDS, $12 118 LVX (PTR)(R0), RCON // lvx 4,0,6 Load first 16 bytes into RCON 119 LVX (PTR)(TEMP), MASK 120 ADD $0x10, PTR, PTR // addi 6,6,0x10 PTR to next 16 bytes of RCON 121 MOVD $8, CNT // li 7,8 CNT = 8 122 VXOR ZERO, ZERO, ZERO // vxor 0,0,0 Zero to be zero :) 123 MOVD CNT, CTR // mtctr 7 Set the counter to 8 (rounds) 124 125 // The expanded decrypt key is the expanded encrypt key stored in reverse order. 126 // Move OUTDEC to the last key location, and store in descending order. 127 ADD $160, OUTDEC, OUTDEC 128 BLT loop128 129 ADD $32, OUTDEC, OUTDEC 130 BEQ l192 131 ADD $32, OUTDEC, OUTDEC 132 JMP l256 133 134 loop128: 135 // Key schedule (Round 1 to 8) 136 VPERM IN0, IN0, MASK, KEY // vperm 3,1,1,5 Rotate-n-splat 137 VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12 138 STXVD2X IN0, (R0+OUTENC) 139 STXVD2X IN0, (R0+OUTDEC) 140 VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4 141 ADD $16, OUTENC, OUTENC 142 ADD $-16, OUTDEC, OUTDEC 143 144 VXOR IN0, TMP, IN0 // vxor 1,1,6 145 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 146 VXOR IN0, TMP, IN0 // vxor 1,1,6 147 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 148 VXOR IN0, TMP, IN0 // vxor 1,1,6 149 VADDUWM RCON, RCON, RCON // vadduwm 4,4,4 150 VXOR IN0, KEY, IN0 // vxor 1,1,3 151 BC 0x10, 0, loop128 // bdnz .Loop128 152 153 LVX (PTR)(R0), RCON // lvx 4,0,6 Last two round keys 154 155 // Key schedule (Round 9) 156 VPERM IN0, IN0, MASK, KEY // vperm 3,1,1,5 Rotate-n-spat 157 VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12 158 STXVD2X IN0, (R0+OUTENC) 159 STXVD2X IN0, (R0+OUTDEC) 160 VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4 161 ADD $16, OUTENC, OUTENC 162 ADD $-16, OUTDEC, OUTDEC 163 164 // Key schedule (Round 10) 165 VXOR IN0, TMP, IN0 // vxor 1,1,6 166 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 167 VXOR IN0, TMP, IN0 // vxor 1,1,6 168 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 169 VXOR IN0, TMP, IN0 // vxor 1,1,6 170 VADDUWM RCON, RCON, RCON // vadduwm 4,4,4 171 VXOR IN0, KEY, IN0 // vxor 1,1,3 172 173 VPERM IN0, IN0, MASK, KEY // vperm 3,1,1,5 Rotate-n-splat 174 VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12 175 STXVD2X IN0, (R0+OUTENC) 176 STXVD2X IN0, (R0+OUTDEC) 177 VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4 178 ADD $16, OUTENC, OUTENC 179 ADD $-16, OUTDEC, OUTDEC 180 181 // Key schedule (Round 11) 182 VXOR IN0, TMP, IN0 // vxor 1,1,6 183 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 184 VXOR IN0, TMP, IN0 // vxor 1,1,6 185 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 186 VXOR IN0, TMP, IN0 // vxor 1,1,6 187 VXOR IN0, KEY, IN0 // vxor 1,1,3 188 STXVD2X IN0, (R0+OUTENC) 189 STXVD2X IN0, (R0+OUTDEC) 190 191 RET 192 193 l192: 194 LXSDX_BE(INP, R0, IN1) // Load next 8 bytes into upper half of VSR in BE order. 195 MOVD $4, CNT // li 7,4 196 STXVD2X IN0, (R0+OUTENC) 197 STXVD2X IN0, (R0+OUTDEC) 198 ADD $16, OUTENC, OUTENC 199 ADD $-16, OUTDEC, OUTDEC 200 VSPLTISB $8, KEY // vspltisb 3,8 201 MOVD CNT, CTR // mtctr 7 202 VSUBUBM MASK, KEY, MASK // vsububm 5,5,3 203 204 loop192: 205 VPERM IN1, IN1, MASK, KEY // vperm 3,2,2,5 206 VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12 207 VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4 208 209 VXOR IN0, TMP, IN0 // vxor 1,1,6 210 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 211 VXOR IN0, TMP, IN0 // vxor 1,1,6 212 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 213 VXOR IN0, TMP, IN0 // vxor 1,1,6 214 215 VSLDOI $8, ZERO, IN1, STAGE // vsldoi 7,0,2,8 216 VSPLTW $3, IN0, TMP // vspltw 6,1,3 217 VXOR TMP, IN1, TMP // vxor 6,6,2 218 VSLDOI $12, ZERO, IN1, IN1 // vsldoi 2,0,2,12 219 VADDUWM RCON, RCON, RCON // vadduwm 4,4,4 220 VXOR IN1, TMP, IN1 // vxor 2,2,6 221 VXOR IN0, KEY, IN0 // vxor 1,1,3 222 VXOR IN1, KEY, IN1 // vxor 2,2,3 223 VSLDOI $8, STAGE, IN0, STAGE // vsldoi 7,7,1,8 224 225 VPERM IN1, IN1, MASK, KEY // vperm 3,2,2,5 226 VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12 227 STXVD2X STAGE, (R0+OUTENC) 228 STXVD2X STAGE, (R0+OUTDEC) 229 VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4 230 ADD $16, OUTENC, OUTENC 231 ADD $-16, OUTDEC, OUTDEC 232 233 VSLDOI $8, IN0, IN1, STAGE // vsldoi 7,1,2,8 234 VXOR IN0, TMP, IN0 // vxor 1,1,6 235 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 236 STXVD2X STAGE, (R0+OUTENC) 237 STXVD2X STAGE, (R0+OUTDEC) 238 VXOR IN0, TMP, IN0 // vxor 1,1,6 239 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 240 VXOR IN0, TMP, IN0 // vxor 1,1,6 241 ADD $16, OUTENC, OUTENC 242 ADD $-16, OUTDEC, OUTDEC 243 244 VSPLTW $3, IN0, TMP // vspltw 6,1,3 245 VXOR TMP, IN1, TMP // vxor 6,6,2 246 VSLDOI $12, ZERO, IN1, IN1 // vsldoi 2,0,2,12 247 VADDUWM RCON, RCON, RCON // vadduwm 4,4,4 248 VXOR IN1, TMP, IN1 // vxor 2,2,6 249 VXOR IN0, KEY, IN0 // vxor 1,1,3 250 VXOR IN1, KEY, IN1 // vxor 2,2,3 251 STXVD2X IN0, (R0+OUTENC) 252 STXVD2X IN0, (R0+OUTDEC) 253 ADD $16, OUTENC, OUTENC 254 ADD $-16, OUTDEC, OUTDEC 255 BC 0x10, 0, loop192 // bdnz .Loop192 256 257 RET 258 259 l256: 260 P8_LXVB16X(INP, R0, IN1) 261 MOVD $7, CNT // li 7,7 262 STXVD2X IN0, (R0+OUTENC) 263 STXVD2X IN0, (R0+OUTDEC) 264 ADD $16, OUTENC, OUTENC 265 ADD $-16, OUTDEC, OUTDEC 266 MOVD CNT, CTR // mtctr 7 267 268 loop256: 269 VPERM IN1, IN1, MASK, KEY // vperm 3,2,2,5 270 VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12 271 STXVD2X IN1, (R0+OUTENC) 272 STXVD2X IN1, (R0+OUTDEC) 273 VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4 274 ADD $16, OUTENC, OUTENC 275 ADD $-16, OUTDEC, OUTDEC 276 277 VXOR IN0, TMP, IN0 // vxor 1,1,6 278 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 279 VXOR IN0, TMP, IN0 // vxor 1,1,6 280 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 281 VXOR IN0, TMP, IN0 // vxor 1,1,6 282 VADDUWM RCON, RCON, RCON // vadduwm 4,4,4 283 VXOR IN0, KEY, IN0 // vxor 1,1,3 284 STXVD2X IN0, (R0+OUTENC) 285 STXVD2X IN0, (R0+OUTDEC) 286 ADD $16, OUTENC, OUTENC 287 ADD $-16, OUTDEC, OUTDEC 288 BC 0x12, 0, done // bdz .Ldone 289 290 VSPLTW $3, IN0, KEY // vspltw 3,1,3 291 VSLDOI $12, ZERO, IN1, TMP // vsldoi 6,0,2,12 292 VSBOX KEY, KEY // vsbox 3,3 293 294 VXOR IN1, TMP, IN1 // vxor 2,2,6 295 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 296 VXOR IN1, TMP, IN1 // vxor 2,2,6 297 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 298 VXOR IN1, TMP, IN1 // vxor 2,2,6 299 300 VXOR IN1, KEY, IN1 // vxor 2,2,3 301 JMP loop256 // b .Loop256 302 303 done: 304 RET 305 306 // func encryptBlockAsm(nr int, xk *uint32, dst, src *byte) 307 TEXT ·encryptBlockAsm(SB), NOSPLIT|NOFRAME, $0 308 MOVD nr+0(FP), R6 // Round count/Key size 309 MOVD xk+8(FP), R5 // Key pointer 310 MOVD dst+16(FP), R3 // Dest pointer 311 MOVD src+24(FP), R4 // Src pointer 312 #ifdef GOARCH_ppc64le 313 MOVD $·rcon(SB), R7 314 LVX (R7), ESPERM // Permute value for P8_ macros. 315 #endif 316 317 // Set CR{1,2,3}EQ to hold the key size information. 318 CMPU R6, $10, CR1 319 CMPU R6, $12, CR2 320 CMPU R6, $14, CR3 321 322 MOVD $16, R6 323 MOVD $32, R7 324 MOVD $48, R8 325 MOVD $64, R9 326 MOVD $80, R10 327 MOVD $96, R11 328 MOVD $112, R12 329 330 // Load text in BE order 331 P8_LXVB16X(R4, R0, V0) 332 333 // V1, V2 will hold keys, V0 is a temp. 334 // At completion, V2 will hold the ciphertext. 335 // Load xk[0:3] and xor with text 336 LXVD2X (R0+R5), V1 337 VXOR V0, V1, V0 338 339 // Load xk[4:11] and cipher 340 LXVD2X (R6+R5), V1 341 LXVD2X (R7+R5), V2 342 VCIPHER V0, V1, V0 343 VCIPHER V0, V2, V0 344 345 // Load xk[12:19] and cipher 346 LXVD2X (R8+R5), V1 347 LXVD2X (R9+R5), V2 348 VCIPHER V0, V1, V0 349 VCIPHER V0, V2, V0 350 351 // Load xk[20:27] and cipher 352 LXVD2X (R10+R5), V1 353 LXVD2X (R11+R5), V2 354 VCIPHER V0, V1, V0 355 VCIPHER V0, V2, V0 356 357 // Increment xk pointer to reuse constant offsets in R6-R12. 358 ADD $112, R5 359 360 // Load xk[28:35] and cipher 361 LXVD2X (R0+R5), V1 362 LXVD2X (R6+R5), V2 363 VCIPHER V0, V1, V0 364 VCIPHER V0, V2, V0 365 366 // Load xk[36:43] and cipher 367 LXVD2X (R7+R5), V1 368 LXVD2X (R8+R5), V2 369 BEQ CR1, Ldec_tail // Key size 10? 370 VCIPHER V0, V1, V0 371 VCIPHER V0, V2, V0 372 373 // Load xk[44:51] and cipher 374 LXVD2X (R9+R5), V1 375 LXVD2X (R10+R5), V2 376 BEQ CR2, Ldec_tail // Key size 12? 377 VCIPHER V0, V1, V0 378 VCIPHER V0, V2, V0 379 380 // Load xk[52:59] and cipher 381 LXVD2X (R11+R5), V1 382 LXVD2X (R12+R5), V2 383 BNE CR3, Linvalid_key_len // Not key size 14? 384 // Fallthrough to final cipher 385 386 Ldec_tail: 387 // Cipher last two keys such that key information is 388 // cleared from V1 and V2. 389 VCIPHER V0, V1, V1 390 VCIPHERLAST V1, V2, V2 391 392 // Store the result in BE order. 393 P8_STXVB16X(V2, R3, R0) 394 RET 395 396 Linvalid_key_len: 397 // Segfault, this should never happen. Only 3 keys sizes are created/used. 398 MOVD R0, 0(R0) 399 RET 400 401 // func decryptBlockAsm(nr int, xk *uint32, dst, src *byte) 402 TEXT ·decryptBlockAsm(SB), NOSPLIT|NOFRAME, $0 403 MOVD nr+0(FP), R6 // Round count/Key size 404 MOVD xk+8(FP), R5 // Key pointer 405 MOVD dst+16(FP), R3 // Dest pointer 406 MOVD src+24(FP), R4 // Src pointer 407 #ifdef GOARCH_ppc64le 408 MOVD $·rcon(SB), R7 409 LVX (R7), ESPERM // Permute value for P8_ macros. 410 #endif 411 412 // Set CR{1,2,3}EQ to hold the key size information. 413 CMPU R6, $10, CR1 414 CMPU R6, $12, CR2 415 CMPU R6, $14, CR3 416 417 MOVD $16, R6 418 MOVD $32, R7 419 MOVD $48, R8 420 MOVD $64, R9 421 MOVD $80, R10 422 MOVD $96, R11 423 MOVD $112, R12 424 425 // Load text in BE order 426 P8_LXVB16X(R4, R0, V0) 427 428 // V1, V2 will hold keys, V0 is a temp. 429 // At completion, V2 will hold the text. 430 // Load xk[0:3] and xor with ciphertext 431 LXVD2X (R0+R5), V1 432 VXOR V0, V1, V0 433 434 // Load xk[4:11] and cipher 435 LXVD2X (R6+R5), V1 436 LXVD2X (R7+R5), V2 437 VNCIPHER V0, V1, V0 438 VNCIPHER V0, V2, V0 439 440 // Load xk[12:19] and cipher 441 LXVD2X (R8+R5), V1 442 LXVD2X (R9+R5), V2 443 VNCIPHER V0, V1, V0 444 VNCIPHER V0, V2, V0 445 446 // Load xk[20:27] and cipher 447 LXVD2X (R10+R5), V1 448 LXVD2X (R11+R5), V2 449 VNCIPHER V0, V1, V0 450 VNCIPHER V0, V2, V0 451 452 // Increment xk pointer to reuse constant offsets in R6-R12. 453 ADD $112, R5 454 455 // Load xk[28:35] and cipher 456 LXVD2X (R0+R5), V1 457 LXVD2X (R6+R5), V2 458 VNCIPHER V0, V1, V0 459 VNCIPHER V0, V2, V0 460 461 // Load xk[36:43] and cipher 462 LXVD2X (R7+R5), V1 463 LXVD2X (R8+R5), V2 464 BEQ CR1, Ldec_tail // Key size 10? 465 VNCIPHER V0, V1, V0 466 VNCIPHER V0, V2, V0 467 468 // Load xk[44:51] and cipher 469 LXVD2X (R9+R5), V1 470 LXVD2X (R10+R5), V2 471 BEQ CR2, Ldec_tail // Key size 12? 472 VNCIPHER V0, V1, V0 473 VNCIPHER V0, V2, V0 474 475 // Load xk[52:59] and cipher 476 LXVD2X (R11+R5), V1 477 LXVD2X (R12+R5), V2 478 BNE CR3, Linvalid_key_len // Not key size 14? 479 // Fallthrough to final cipher 480 481 Ldec_tail: 482 // Cipher last two keys such that key information is 483 // cleared from V1 and V2. 484 VNCIPHER V0, V1, V1 485 VNCIPHERLAST V1, V2, V2 486 487 // Store the result in BE order. 488 P8_STXVB16X(V2, R3, R0) 489 RET 490 491 Linvalid_key_len: 492 // Segfault, this should never happen. Only 3 keys sizes are created/used. 493 MOVD R0, 0(R0) 494 RET 495 496 // Remove defines from above so they can be defined here 497 #undef INP 498 #undef OUTENC 499 #undef ROUNDS 500 #undef KEY 501 #undef TMP 502 503 // CBC encrypt or decrypt 504 // R3 src 505 // R4 dst 506 // R5 len 507 // R6 key 508 // R7 iv 509 // R8 enc=1 dec=0 510 // Ported from: aes_p8_cbc_encrypt 511 // Register usage: 512 // R9: ROUNDS 513 // R10: Index 514 // V4: IV 515 // V5: SRC 516 // V7: DST 517 518 #define INP R3 519 #define OUT R4 520 #define LEN R5 521 #define KEY R6 522 #define IVP R7 523 #define ENC R8 524 #define ROUNDS R9 525 #define IDX R10 526 527 #define RNDKEY0 V0 528 #define INOUT V2 529 #define TMP V3 530 531 #define IVEC V4 532 533 // Vector loads are done using LVX followed by 534 // a VPERM using mask generated from previous 535 // LVSL or LVSR instruction, to obtain the correct 536 // bytes if address is unaligned. 537 538 // Encryption is done with VCIPHER and VCIPHERLAST 539 // Decryption is done with VNCIPHER and VNCIPHERLAST 540 541 // Encrypt and decypt is done as follows: 542 // - INOUT value is initialized in outer loop. 543 // - ROUNDS value is adjusted for loop unrolling. 544 // - Encryption/decryption is done in loop based on 545 // adjusted ROUNDS value. 546 // - Final INOUT value is encrypted/decrypted and stored. 547 548 // Note: original implementation had an 8X version 549 // for decryption which was omitted to avoid the 550 // complexity. 551 552 // func cryptBlocksChain(src, dst *byte, length int, key *uint32, iv *byte, enc int, nr int) 553 TEXT ·cryptBlocksChain(SB), NOSPLIT|NOFRAME, $0 554 MOVD src+0(FP), INP 555 MOVD dst+8(FP), OUT 556 MOVD length+16(FP), LEN 557 MOVD key+24(FP), KEY 558 MOVD iv+32(FP), IVP 559 MOVD enc+40(FP), ENC 560 MOVD nr+48(FP), ROUNDS 561 562 #ifdef GOARCH_ppc64le 563 MOVD $·rcon(SB), R11 564 LVX (R11), ESPERM // Permute value for P8_ macros. 565 #endif 566 567 CMPU LEN, $16 // cmpldi r5,16 568 BC 14, 0, LR // bltlr-, return if len < 16. 569 CMPW ENC, $0 // cmpwi r8,0 570 571 P8_LXVB16X(IVP, R0, IVEC) // load ivec in BE register order 572 573 SRW $1, ROUNDS // rlwinm r9,r9,31,1,31 574 MOVD $0, IDX // li r10,0 575 ADD $-1, ROUNDS // addi r9,r9,-1 576 BEQ Lcbc_dec // beq 577 PCALIGN $16 578 579 // Outer loop: initialize encrypted value (INOUT) 580 // Load input (INPTAIL) ivec (IVEC) 581 Lcbc_enc: 582 P8_LXVB16X(INP, R0, INOUT) // load text in BE vreg order 583 ADD $16, INP // addi r3,r3,16 584 MOVD ROUNDS, CTR // mtctr r9 585 ADD $-16, LEN // addi r5,r5,-16 586 LXVD2X (KEY+IDX), RNDKEY0 // load first xkey 587 ADD $16, IDX // addi r10,r10,16 588 VXOR INOUT, RNDKEY0, INOUT // vxor v2,v2,v0 589 VXOR INOUT, IVEC, INOUT // vxor v2,v2,v4 590 591 // Encryption loop of INOUT using RNDKEY0 592 Loop_cbc_enc: 593 LXVD2X (KEY+IDX), RNDKEY0 // load next xkey 594 VCIPHER INOUT, RNDKEY0, INOUT // vcipher v2,v2,v1 595 ADD $16, IDX // addi r10,r10,16 596 LXVD2X (KEY+IDX), RNDKEY0 // load next xkey 597 VCIPHER INOUT, RNDKEY0, INOUT // vcipher v2,v2,v1 598 ADD $16, IDX // addi r10,r10,16 599 BDNZ Loop_cbc_enc 600 601 // Encrypt tail values and store INOUT 602 LXVD2X (KEY+IDX), RNDKEY0 // load next xkey 603 VCIPHER INOUT, RNDKEY0, INOUT // vcipher v2,v2,v1 604 ADD $16, IDX // addi r10,r10,16 605 LXVD2X (KEY+IDX), RNDKEY0 // load final xkey 606 VCIPHERLAST INOUT, RNDKEY0, IVEC // vcipherlast v4,v2,v0 607 MOVD $0, IDX // reset key index for next block 608 CMPU LEN, $16 // cmpldi r5,16 609 P8_STXVB16X(IVEC, OUT, R0) // store ciphertext in BE order 610 ADD $16, OUT // addi r4,r4,16 611 BGE Lcbc_enc // bge Lcbc_enc 612 BR Lcbc_done // b Lcbc_done 613 614 // Outer loop: initialize decrypted value (INOUT) 615 // Load input (INPTAIL) ivec (IVEC) 616 Lcbc_dec: 617 P8_LXVB16X(INP, R0, TMP) // load ciphertext in BE vreg order 618 ADD $16, INP // addi r3,r3,16 619 MOVD ROUNDS, CTR // mtctr r9 620 ADD $-16, LEN // addi r5,r5,-16 621 LXVD2X (KEY+IDX), RNDKEY0 // load first xkey 622 ADD $16, IDX // addi r10,r10,16 623 VXOR TMP, RNDKEY0, INOUT // vxor v2,v3,v0 624 PCALIGN $16 625 626 // Decryption loop of INOUT using RNDKEY0 627 Loop_cbc_dec: 628 LXVD2X (KEY+IDX), RNDKEY0 // load next xkey 629 ADD $16, IDX // addi r10,r10,16 630 VNCIPHER INOUT, RNDKEY0, INOUT // vncipher v2,v2,v1 631 LXVD2X (KEY+IDX), RNDKEY0 // load next xkey 632 ADD $16, IDX // addi r10,r10,16 633 VNCIPHER INOUT, RNDKEY0, INOUT // vncipher v2,v2,v0 634 BDNZ Loop_cbc_dec 635 636 // Decrypt tail values and store INOUT 637 LXVD2X (KEY+IDX), RNDKEY0 // load next xkey 638 ADD $16, IDX // addi r10,r10,16 639 VNCIPHER INOUT, RNDKEY0, INOUT // vncipher v2,v2,v1 640 LXVD2X (KEY+IDX), RNDKEY0 // load final xkey 641 MOVD $0, IDX // li r10,0 642 VNCIPHERLAST INOUT, RNDKEY0, INOUT // vncipherlast v2,v2,v0 643 CMPU LEN, $16 // cmpldi r5,16 644 VXOR INOUT, IVEC, INOUT // vxor v2,v2,v4 645 VOR TMP, TMP, IVEC // vor v4,v3,v3 646 P8_STXVB16X(INOUT, OUT, R0) // store text in BE order 647 ADD $16, OUT // addi r4,r4,16 648 BGE Lcbc_dec // bge 649 650 Lcbc_done: 651 VXOR RNDKEY0, RNDKEY0, RNDKEY0 // clear key register 652 P8_STXVB16X(IVEC, R0, IVP) // Save ivec in BE order for next round. 653 RET // bclr 20,lt,0 654