github.com/ltltlt/go-source-code@v0.0.0-20190830023027-95be009773aa/crypto/aes/asm_ppc64le.s (about) 1 // Copyright 2016 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Based on CRYPTOGAMS code with the following comment: 6 // # ==================================================================== 7 // # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 8 // # project. The module is, however, dual licensed under OpenSSL and 9 // # CRYPTOGAMS licenses depending on where you obtain it. For further 10 // # details see http://www.openssl.org/~appro/cryptogams/. 11 // # ==================================================================== 12 13 // Original code can be found at the link below: 14 // https://github.com/dot-asm/cryptogams/blob/master/ppc/aesp8-ppc.pl 15 16 // I changed some function names in order to be more likely to go standards. 17 // For instance, function aes_p8_set_{en,de}crypt_key become 18 // set{En,De}cryptKeyAsm. I also split setEncryptKeyAsm in two parts 19 // and a new session was created (doEncryptKeyAsm). This was necessary to 20 // avoid arguments overwriting when setDecryptKeyAsm calls setEncryptKeyAsm. 21 // There were other modifications as well but kept the same functionality. 22 23 #include "textflag.h" 24 25 // For set{En,De}cryptKeyAsm 26 #define INP R3 27 #define BITS R4 28 #define OUT R5 29 #define PTR R6 30 #define CNT R7 31 #define ROUNDS R8 32 #define TEMP R19 33 #define ZERO V0 34 #define IN0 V1 35 #define IN1 V2 36 #define KEY V3 37 #define RCON V4 38 #define MASK V5 39 #define TMP V6 40 #define STAGE V7 41 #define OUTPERM V8 42 #define OUTMASK V9 43 #define OUTHEAD V10 44 #define OUTTAIL V11 45 46 // For {en,de}cryptBlockAsm 47 #define BLK_INP R3 48 #define BLK_OUT R4 49 #define BLK_KEY R5 50 #define BLK_ROUNDS R6 51 #define BLK_IDX R7 52 53 DATA ·rcon+0x00(SB)/8, $0x0100000001000000 // RCON 54 DATA ·rcon+0x08(SB)/8, $0x0100000001000000 // RCON 55 DATA ·rcon+0x10(SB)/8, $0x1b0000001b000000 56 DATA ·rcon+0x18(SB)/8, $0x1b0000001b000000 57 DATA ·rcon+0x20(SB)/8, $0x0d0e0f0c0d0e0f0c // MASK 58 DATA ·rcon+0x28(SB)/8, $0x0d0e0f0c0d0e0f0c // MASK 59 DATA ·rcon+0x30(SB)/8, $0x0000000000000000 60 DATA ·rcon+0x38(SB)/8, $0x0000000000000000 61 GLOBL ·rcon(SB), RODATA, $64 62 63 // func setEncryptKeyAsm(key *byte, keylen int, enc *uint32) int 64 TEXT ·setEncryptKeyAsm(SB),NOSPLIT|NOFRAME,$0 65 // Load the arguments inside the registers 66 MOVD key+0(FP), INP 67 MOVD keylen+8(FP), BITS 68 MOVD enc+16(FP), OUT 69 JMP ·doEncryptKeyAsm(SB) 70 71 // This text is used both setEncryptKeyAsm and setDecryptKeyAsm 72 TEXT ·doEncryptKeyAsm(SB),NOSPLIT|NOFRAME,$0 73 // Do not change R10 since it's storing the LR value in setDecryptKeyAsm 74 75 // Check arguments 76 MOVD $-1, PTR // li 6,-1 exit code to -1 (255) 77 CMPU INP, $0 // cmpldi r3,0 input key pointer set? 78 BC 0x0E, 2, enc_key_abort // beq- .Lenc_key_abort 79 CMPU OUT, $0 // cmpldi r5,0 output key pointer set? 80 BC 0x0E, 2, enc_key_abort // beq- .Lenc_key_abort 81 MOVD $-2, PTR // li 6,-2 exit code to -2 (254) 82 CMPW BITS, $128 // cmpwi 4,128 greater or equal to 128 83 BC 0x0E, 0, enc_key_abort // blt- .Lenc_key_abort 84 CMPW BITS, $256 // cmpwi 4,256 lesser or equal to 256 85 BC 0x0E, 1, enc_key_abort // bgt- .Lenc_key_abort 86 ANDCC $0x3f, BITS, TEMP // andi. 0,4,0x3f multiple of 64 87 BC 0x06, 2, enc_key_abort // bne- .Lenc_key_abort 88 89 MOVD $·rcon(SB), PTR // PTR point to rcon addr 90 91 // Get key from memory and write aligned into VR 92 NEG INP, R9 // neg 9,3 R9 is ~INP + 1 93 LVX (INP)(R0), IN0 // lvx 1,0,3 Load key inside IN0 94 ADD $15, INP, INP // addi 3,3,15 Add 15B to INP addr 95 LVSR (R9)(R0), KEY // lvsr 3,0,9 96 MOVD $0x20, R8 // li 8,0x20 R8 = 32 97 CMPW BITS, $192 // cmpwi 4,192 Key size == 192? 98 LVX (INP)(R0), IN1 // lvx 2,0,3 99 VSPLTISB $0x0f, MASK // vspltisb 5,0x0f 0x0f0f0f0f... mask 100 LVX (PTR)(R0), RCON // lvx 4,0,6 Load first 16 bytes into RCON 101 VXOR KEY, MASK, KEY // vxor 3,3,5 Adjust for byte swap 102 LVX (PTR)(R8), MASK // lvx 5,8,6 103 ADD $0x10, PTR, PTR // addi 6,6,0x10 PTR to next 16 bytes of RCON 104 VPERM IN0, IN1, KEY, IN0 // vperm 1,1,2,3 Align 105 MOVD $8, CNT // li 7,8 CNT = 8 106 VXOR ZERO, ZERO, ZERO // vxor 0,0,0 Zero to be zero :) 107 MOVD CNT, CTR // mtctr 7 Set the counter to 8 (rounds) 108 109 LVSL (OUT)(R0), OUTPERM // lvsl 8,0,5 110 VSPLTISB $-1, OUTMASK // vspltisb 9,-1 111 LVX (OUT)(R0), OUTHEAD // lvx 10,0,5 112 VPERM OUTMASK, ZERO, OUTPERM, OUTMASK // vperm 9,9,0,8 113 114 BLT loop128 // blt .Loop128 115 ADD $8, INP, INP // addi 3,3,8 116 BEQ l192 // beq .L192 117 ADD $8, INP, INP // addi 3,3,8 118 JMP l256 // b .L256 119 120 loop128: 121 // Key schedule (Round 1 to 8) 122 VPERM IN0, IN0, MASK, KEY // vperm 3,1,1,5 Rotate-n-splat 123 VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12 124 VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8 Rotate 125 VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9 126 VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11 127 VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4 128 STVX STAGE, (OUT+R0) // stvx 7,0,5 Write to output 129 ADD $16, OUT, OUT // addi 5,5,16 Point to the next round 130 131 VXOR IN0, TMP, IN0 // vxor 1,1,6 132 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 133 VXOR IN0, TMP, IN0 // vxor 1,1,6 134 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 135 VXOR IN0, TMP, IN0 // vxor 1,1,6 136 VADDUWM RCON, RCON, RCON // vadduwm 4,4,4 137 VXOR IN0, KEY, IN0 // vxor 1,1,3 138 BC 0x10, 0, loop128 // bdnz .Loop128 139 140 LVX (PTR)(R0), RCON // lvx 4,0,6 Last two round keys 141 142 // Key schedule (Round 9) 143 VPERM IN0, IN0, MASK, KEY // vperm 3,1,1,5 Rotate-n-spat 144 VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12 145 VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8 Rotate 146 VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9 147 VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11 148 VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4 149 STVX STAGE, (OUT+R0) // stvx 7,0,5 Round 9 150 ADD $16, OUT, OUT // addi 5,5,16 151 152 // Key schedule (Round 10) 153 VXOR IN0, TMP, IN0 // vxor 1,1,6 154 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 155 VXOR IN0, TMP, IN0 // vxor 1,1,6 156 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 157 VXOR IN0, TMP, IN0 // vxor 1,1,6 158 VADDUWM RCON, RCON, RCON // vadduwm 4,4,4 159 VXOR IN0, KEY, IN0 // vxor 1,1,3 160 161 VPERM IN0, IN0, MASK, KEY // vperm 3,1,1,5 Rotate-n-splat 162 VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12 163 VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8 Rotate 164 VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9 165 VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11 166 VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4 167 STVX STAGE, (OUT+R0) // stvx 7,0,5 Round 10 168 ADD $16, OUT, OUT // addi 5,5,16 169 170 // Key schedule (Round 11) 171 VXOR IN0, TMP, IN0 // vxor 1,1,6 172 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 173 VXOR IN0, TMP, IN0 // vxor 1,1,6 174 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 175 VXOR IN0, TMP, IN0 // vxor 1,1,6 176 VXOR IN0, KEY, IN0 // vxor 1,1,3 177 VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8 178 VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9 179 VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11 180 STVX STAGE, (OUT+R0) // stvx 7,0,5 Round 11 181 182 ADD $15, OUT, INP // addi 3,5,15 183 ADD $0x50, OUT, OUT // addi 5,5,0x50 184 185 MOVD $10, ROUNDS // li 8,10 186 JMP done // b .Ldone 187 188 l192: 189 LVX (INP)(R0), TMP // lvx 6,0,3 190 MOVD $4, CNT // li 7,4 191 VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8 192 VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9 193 VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11 194 STVX STAGE, (OUT+R0) // stvx 7,0,5 195 ADD $16, OUT, OUT // addi 5,5,16 196 VPERM IN1, TMP, KEY, IN1 // vperm 2,2,6,3 197 VSPLTISB $8, KEY // vspltisb 3,8 198 MOVD CNT, CTR // mtctr 7 199 VSUBUBM MASK, KEY, MASK // vsububm 5,5,3 200 201 loop192: 202 VPERM IN1, IN1, MASK, KEY // vperm 3,2,2,5 203 VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12 204 VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4 205 206 VXOR IN0, TMP, IN0 // vxor 1,1,6 207 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 208 VXOR IN0, TMP, IN0 // vxor 1,1,6 209 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 210 VXOR IN0, TMP, IN0 // vxor 1,1,6 211 212 VSLDOI $8, ZERO, IN1, STAGE // vsldoi 7,0,2,8 213 VSPLTW $3, IN0, TMP // vspltw 6,1,3 214 VXOR TMP, IN1, TMP // vxor 6,6,2 215 VSLDOI $12, ZERO, IN1, IN1 // vsldoi 2,0,2,12 216 VADDUWM RCON, RCON, RCON // vadduwm 4,4,4 217 VXOR IN1, TMP, IN1 // vxor 2,2,6 218 VXOR IN0, KEY, IN0 // vxor 1,1,3 219 VXOR IN1, KEY, IN1 // vxor 2,2,3 220 VSLDOI $8, STAGE, IN0, STAGE // vsldoi 7,7,1,8 221 222 VPERM IN1, IN1, MASK, KEY // vperm 3,2,2,5 223 VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12 224 VPERM STAGE, STAGE, OUTPERM, OUTTAIL // vperm 11,7,7,8 225 VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9 226 VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11 227 VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4 228 STVX STAGE, (OUT+R0) // stvx 7,0,5 229 ADD $16, OUT, OUT // addi 5,5,16 230 231 VSLDOI $8, IN0, IN1, STAGE // vsldoi 7,1,2,8 232 VXOR IN0, TMP, IN0 // vxor 1,1,6 233 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 234 VPERM STAGE, STAGE, OUTPERM, OUTTAIL // vperm 11,7,7,8 235 VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9 236 VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11 237 VXOR IN0, TMP, IN0 // vxor 1,1,6 238 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 239 VXOR IN0, TMP, IN0 // vxor 1,1,6 240 STVX STAGE, (OUT+R0) // stvx 7,0,5 241 ADD $16, OUT, OUT // addi 5,5,16 242 243 VSPLTW $3, IN0, TMP // vspltw 6,1,3 244 VXOR TMP, IN1, TMP // vxor 6,6,2 245 VSLDOI $12, ZERO, IN1, IN1 // vsldoi 2,0,2,12 246 VADDUWM RCON, RCON, RCON // vadduwm 4,4,4 247 VXOR IN1, TMP, IN1 // vxor 2,2,6 248 VXOR IN0, KEY, IN0 // vxor 1,1,3 249 VXOR IN1, KEY, IN1 // vxor 2,2,3 250 VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8 251 VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9 252 VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11 253 STVX STAGE, (OUT+R0) // stvx 7,0,5 254 ADD $15, OUT, INP // addi 3,5,15 255 ADD $16, OUT, OUT // addi 5,5,16 256 BC 0x10, 0, loop192 // bdnz .Loop192 257 258 MOVD $12, ROUNDS // li 8,12 259 ADD $0x20, OUT, OUT // addi 5,5,0x20 260 JMP done // b .Ldone 261 262 l256: 263 LVX (INP)(R0), TMP // lvx 6,0,3 264 MOVD $7, CNT // li 7,7 265 MOVD $14, ROUNDS // li 8,14 266 VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8 267 VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9 268 VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11 269 STVX STAGE, (OUT+R0) // stvx 7,0,5 270 ADD $16, OUT, OUT // addi 5,5,16 271 VPERM IN1, TMP, KEY, IN1 // vperm 2,2,6,3 272 MOVD CNT, CTR // mtctr 7 273 274 loop256: 275 VPERM IN1, IN1, MASK, KEY // vperm 3,2,2,5 276 VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12 277 VPERM IN1, IN1, OUTPERM, OUTTAIL // vperm 11,2,2,8 278 VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9 279 VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11 280 VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4 281 STVX STAGE, (OUT+R0) // stvx 7,0,5 282 ADD $16, OUT, OUT // addi 5,5,16 283 284 VXOR IN0, TMP, IN0 // vxor 1,1,6 285 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 286 VXOR IN0, TMP, IN0 // vxor 1,1,6 287 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 288 VXOR IN0, TMP, IN0 // vxor 1,1,6 289 VADDUWM RCON, RCON, RCON // vadduwm 4,4,4 290 VXOR IN0, KEY, IN0 // vxor 1,1,3 291 VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8 292 VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9 293 VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11 294 STVX STAGE, (OUT+R0) // stvx 7,0,5 295 ADD $15, OUT, INP // addi 3,5,15 296 ADD $16, OUT, OUT // addi 5,5,16 297 BC 0x12, 0, done // bdz .Ldone 298 299 VSPLTW $3, IN0, KEY // vspltw 3,1,3 300 VSLDOI $12, ZERO, IN1, TMP // vsldoi 6,0,2,12 301 VSBOX KEY, KEY // vsbox 3,3 302 303 VXOR IN1, TMP, IN1 // vxor 2,2,6 304 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 305 VXOR IN1, TMP, IN1 // vxor 2,2,6 306 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 307 VXOR IN1, TMP, IN1 // vxor 2,2,6 308 309 VXOR IN1, KEY, IN1 // vxor 2,2,3 310 JMP loop256 // b .Loop256 311 312 done: 313 LVX (INP)(R0), IN1 // lvx 2,0,3 314 VSEL OUTHEAD, IN1, OUTMASK, IN1 // vsel 2,10,2,9 315 STVX IN1, (INP+R0) // stvx 2,0,3 316 MOVD $0, PTR // li 6,0 set PTR to 0 (exit code 0) 317 MOVW ROUNDS, 0(OUT) // stw 8,0(5) 318 319 enc_key_abort: 320 MOVD PTR, INP // mr 3,6 set exit code with PTR value 321 MOVD INP, ret+24(FP) // Put return value into the FP 322 RET // blr 323 324 // func setDecryptKeyAsm(key *byte, keylen int, dec *uint32) int 325 TEXT ·setDecryptKeyAsm(SB),NOSPLIT|NOFRAME,$0 326 // Load the arguments inside the registers 327 MOVD key+0(FP), INP 328 MOVD keylen+8(FP), BITS 329 MOVD dec+16(FP), OUT 330 331 MOVD LR, R10 // mflr 10 332 CALL ·doEncryptKeyAsm(SB) 333 MOVD R10, LR // mtlr 10 334 335 CMPW INP, $0 // cmpwi 3,0 exit 0 = ok 336 BC 0x06, 2, dec_key_abort // bne- .Ldec_key_abort 337 338 // doEncryptKeyAsm set ROUNDS (R8) with the proper value for each mode 339 SLW $4, ROUNDS, CNT // slwi 7,8,4 340 SUB $240, OUT, INP // subi 3,5,240 341 SRW $1, ROUNDS, ROUNDS // srwi 8,8,1 342 ADD R7, INP, OUT // add 5,3,7 343 MOVD ROUNDS, CTR // mtctr 8 344 345 // dec_key will invert the key sequence in order to be used for decrypt 346 dec_key: 347 MOVWZ 0(INP), TEMP // lwz 0, 0(3) 348 MOVWZ 4(INP), R6 // lwz 6, 4(3) 349 MOVWZ 8(INP), R7 // lwz 7, 8(3) 350 MOVWZ 12(INP), R8 // lwz 8, 12(3) 351 ADD $16, INP, INP // addi 3,3,16 352 MOVWZ 0(OUT), R9 // lwz 9, 0(5) 353 MOVWZ 4(OUT), R10 // lwz 10,4(5) 354 MOVWZ 8(OUT), R11 // lwz 11,8(5) 355 MOVWZ 12(OUT), R12 // lwz 12,12(5) 356 MOVW TEMP, 0(OUT) // stw 0, 0(5) 357 MOVW R6, 4(OUT) // stw 6, 4(5) 358 MOVW R7, 8(OUT) // stw 7, 8(5) 359 MOVW R8, 12(OUT) // stw 8, 12(5) 360 SUB $16, OUT, OUT // subi 5,5,16 361 MOVW R9, -16(INP) // stw 9, -16(3) 362 MOVW R10, -12(INP) // stw 10,-12(3) 363 MOVW R11, -8(INP) // stw 11,-8(3) 364 MOVW R12, -4(INP) // stw 12,-4(3) 365 BC 0x10, 0, dec_key // bdnz .Ldeckey 366 367 XOR R3, R3, R3 // xor 3,3,3 Clean R3 368 369 dec_key_abort: 370 MOVD R3, ret+24(FP) // Put return value into the FP 371 RET // blr 372 373 374 // func encryptBlockAsm(dst, src *byte, enc *uint32) 375 TEXT ·encryptBlockAsm(SB),NOSPLIT|NOFRAME,$0 376 // Load the arguments inside the registers 377 MOVD dst+0(FP), BLK_OUT 378 MOVD src+8(FP), BLK_INP 379 MOVD enc+16(FP), BLK_KEY 380 381 MOVWZ 240(BLK_KEY), BLK_ROUNDS // lwz 6,240(5) 382 MOVD $15, BLK_IDX // li 7,15 383 384 LVX (BLK_INP)(R0), ZERO // lvx 0,0,3 385 NEG BLK_OUT, R11 // neg 11,4 386 LVX (BLK_INP)(BLK_IDX), IN0 // lvx 1,7,3 387 LVSL (BLK_INP)(R0), IN1 // lvsl 2,0,3 388 VSPLTISB $0x0f, RCON // vspltisb 4,0x0f 389 LVSR (R11)(R0), KEY // lvsr 3,0,11 390 VXOR IN1, RCON, IN1 // vxor 2,2,4 391 MOVD $16, BLK_IDX // li 7,16 392 VPERM ZERO, IN0, IN1, ZERO // vperm 0,0,1,2 393 LVX (BLK_KEY)(R0), IN0 // lvx 1,0,5 394 LVSR (BLK_KEY)(R0), MASK // lvsr 5,0,5 395 SRW $1, BLK_ROUNDS, BLK_ROUNDS // srwi 6,6,1 396 LVX (BLK_KEY)(BLK_IDX), IN1 // lvx 2,7,5 397 ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16 398 SUB $1, BLK_ROUNDS, BLK_ROUNDS // subi 6,6,1 399 VPERM IN1, IN0, MASK, IN0 // vperm 1,2,1,5 400 401 VXOR ZERO, IN0, ZERO // vxor 0,0,1 402 LVX (BLK_KEY)(BLK_IDX), IN0 // lvx 1,7,5 403 ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16 404 MOVD BLK_ROUNDS, CTR // mtctr 6 405 406 loop_enc: 407 VPERM IN0, IN1, MASK, IN1 // vperm 2,1,2,5 408 VCIPHER ZERO, IN1, ZERO // vcipher 0,0,2 409 LVX (BLK_KEY)(BLK_IDX), IN1 // lvx 2,7,5 410 ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16 411 VPERM IN1, IN0, MASK, IN0 // vperm 1,2,1,5 412 VCIPHER ZERO, IN0, ZERO // vcipher 0,0,1 413 LVX (BLK_KEY)(BLK_IDX), IN0 // lvx 1,7,5 414 ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16 415 BC 0x10, 0, loop_enc // bdnz .Loop_enc 416 417 VPERM IN0, IN1, MASK, IN1 // vperm 2,1,2,5 418 VCIPHER ZERO, IN1, ZERO // vcipher 0,0,2 419 LVX (BLK_KEY)(BLK_IDX), IN1 // lvx 2,7,5 420 VPERM IN1, IN0, MASK, IN0 // vperm 1,2,1,5 421 VCIPHERLAST ZERO, IN0, ZERO // vcipherlast 0,0,1 422 423 VSPLTISB $-1, IN1 // vspltisb 2,-1 424 VXOR IN0, IN0, IN0 // vxor 1,1,1 425 MOVD $15, BLK_IDX // li 7,15 426 VPERM IN1, IN0, KEY, IN1 // vperm 2,2,1,3 427 VXOR KEY, RCON, KEY // vxor 3,3,4 428 LVX (BLK_OUT)(R0), IN0 // lvx 1,0,4 429 VPERM ZERO, ZERO, KEY, ZERO // vperm 0,0,0,3 430 VSEL IN0, ZERO, IN1, IN0 // vsel 1,1,0,2 431 LVX (BLK_OUT)(BLK_IDX), RCON // lvx 4,7,4 432 STVX IN0, (BLK_OUT+R0) // stvx 1,0,4 433 VSEL ZERO, RCON, IN1, ZERO // vsel 0,0,4,2 434 STVX ZERO, (BLK_OUT+BLK_IDX) // stvx 0,7,4 435 436 RET // blr 437 438 439 // func decryptBlockAsm(dst, src *byte, dec *uint32) 440 TEXT ·decryptBlockAsm(SB),NOSPLIT|NOFRAME,$0 441 // Load the arguments inside the registers 442 MOVD dst+0(FP), BLK_OUT 443 MOVD src+8(FP), BLK_INP 444 MOVD dec+16(FP), BLK_KEY 445 446 MOVWZ 240(BLK_KEY), BLK_ROUNDS // lwz 6,240(5) 447 MOVD $15, BLK_IDX // li 7,15 448 449 LVX (BLK_INP)(R0), ZERO // lvx 0,0,3 450 NEG BLK_OUT, R11 // neg 11,4 451 LVX (BLK_INP)(BLK_IDX), IN0 // lvx 1,7,3 452 LVSL (BLK_INP)(R0), IN1 // lvsl 2,0,3 453 VSPLTISB $0x0f, RCON // vspltisb 4,0x0f 454 LVSR (R11)(R0), KEY // lvsr 3,0,11 455 VXOR IN1, RCON, IN1 // vxor 2,2,4 456 MOVD $16, BLK_IDX // li 7,16 457 VPERM ZERO, IN0, IN1, ZERO // vperm 0,0,1,2 458 LVX (BLK_KEY)(R0), IN0 // lvx 1,0,5 459 LVSR (BLK_KEY)(R0), MASK // lvsr 5,0,5 460 SRW $1, BLK_ROUNDS, BLK_ROUNDS // srwi 6,6,1 461 LVX (BLK_KEY)(BLK_IDX), IN1 // lvx 2,7,5 462 ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16 463 SUB $1, BLK_ROUNDS, BLK_ROUNDS // subi 6,6,1 464 VPERM IN1, IN0, MASK, IN0 // vperm 1,2,1,5 465 466 VXOR ZERO, IN0, ZERO // vxor 0,0,1 467 LVX (BLK_KEY)(BLK_IDX), IN0 // lvx 1,7,5 468 ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16 469 MOVD BLK_ROUNDS, CTR // mtctr 6 470 471 loop_dec: 472 VPERM IN0, IN1, MASK, IN1 // vperm 2,1,2,5 473 VNCIPHER ZERO, IN1, ZERO // vncipher 0,0,2 474 LVX (BLK_KEY)(BLK_IDX), IN1 // lvx 2,7,5 475 ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16 476 VPERM IN1, IN0, MASK, IN0 // vperm 1,2,1,5 477 VNCIPHER ZERO, IN0, ZERO // vncipher 0,0,1 478 LVX (BLK_KEY)(BLK_IDX), IN0 // lvx 1,7,5 479 ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16 480 BC 0x10, 0, loop_dec // bdnz .Loop_dec 481 482 VPERM IN0, IN1, MASK, IN1 // vperm 2,1,2,5 483 VNCIPHER ZERO, IN1, ZERO // vncipher 0,0,2 484 LVX (BLK_KEY)(BLK_IDX), IN1 // lvx 2,7,5 485 VPERM IN1, IN0, MASK, IN0 // vperm 1,2,1,5 486 VNCIPHERLAST ZERO, IN0, ZERO // vncipherlast 0,0,1 487 488 VSPLTISB $-1, IN1 // vspltisb 2,-1 489 VXOR IN0, IN0, IN0 // vxor 1,1,1 490 MOVD $15, BLK_IDX // li 7,15 491 VPERM IN1, IN0, KEY, IN1 // vperm 2,2,1,3 492 VXOR KEY, RCON, KEY // vxor 3,3,4 493 LVX (BLK_OUT)(R0), IN0 // lvx 1,0,4 494 VPERM ZERO, ZERO, KEY, ZERO // vperm 0,0,0,3 495 VSEL IN0, ZERO, IN1, IN0 // vsel 1,1,0,2 496 LVX (BLK_OUT)(BLK_IDX), RCON // lvx 4,7,4 497 STVX IN0, (BLK_OUT+R0) // stvx 1,0,4 498 VSEL ZERO, RCON, IN1, ZERO // vsel 0,0,4,2 499 STVX ZERO, (BLK_OUT+BLK_IDX) // stvx 0,7,4 500 501 RET // blr