github.com/zebozhuang/go@v0.0.0-20200207033046-f8a98f6f5c5d/src/crypto/aes/asm_ppc64le.s (about) 1 // Copyright 2016 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // This is a derived work from OpenSSL of AES using assembly optimizations. The 6 // original code was written by Andy Polyakov <appro@openssl.org> and it's dual 7 // licensed under OpenSSL and CRYPTOGAMS licenses depending on where you obtain 8 // it. For further details see http://www.openssl.org/~appro/cryptogams/. 9 10 // Original code can be found at the link below: 11 // https://git.openssl.org/?p=openssl.git;a=blob;f=crypto/aes/asm/aesp8-ppc.pl 12 13 // The code is based on 627c953376 from 4 Jun 2016. I changed some function 14 // names in order to be more likely to go standards. For instance, function 15 // aes_p8_set_{en,de}crypt_key become set{En,De}cryptKeyAsm. I also split 16 // setEncryptKeyAsm in two parts and a new session was created 17 // (doEncryptKeyAsm). This was necessary to avoid arguments overwriting when 18 // setDecryptKeyAsm calls setEncryptKeyAsm. There were other modifications as 19 // well but kept the same functionality. 20 21 #include "textflag.h" 22 23 // For set{En,De}cryptKeyAsm 24 #define INP R3 25 #define BITS R4 26 #define OUT R5 27 #define PTR R6 28 #define CNT R7 29 #define ROUNDS R8 30 #define TEMP R19 31 #define ZERO V0 32 #define IN0 V1 33 #define IN1 V2 34 #define KEY V3 35 #define RCON V4 36 #define MASK V5 37 #define TMP V6 38 #define STAGE V7 39 #define OUTPERM V8 40 #define OUTMASK V9 41 #define OUTHEAD V10 42 #define OUTTAIL V11 43 44 // For {en,de}cryptBlockAsm 45 #define BLK_INP R3 46 #define BLK_OUT R4 47 #define BLK_KEY R5 48 #define BLK_ROUNDS R6 49 #define BLK_IDX R7 50 51 DATA ·rcon+0x00(SB)/8, $0x0100000001000000 // RCON 52 DATA ·rcon+0x08(SB)/8, $0x0100000001000000 // RCON 53 DATA ·rcon+0x10(SB)/8, $0x1b0000001b000000 54 DATA ·rcon+0x18(SB)/8, $0x1b0000001b000000 55 DATA ·rcon+0x20(SB)/8, $0x0d0e0f0c0d0e0f0c // MASK 56 DATA ·rcon+0x28(SB)/8, $0x0d0e0f0c0d0e0f0c // MASK 57 DATA ·rcon+0x30(SB)/8, $0x0000000000000000 58 DATA ·rcon+0x38(SB)/8, $0x0000000000000000 59 GLOBL ·rcon(SB), RODATA, $64 60 61 // func setEncryptKeyAsm(key *byte, keylen int, enc *uint32) int 62 TEXT ·setEncryptKeyAsm(SB),NOSPLIT|NOFRAME,$0 63 // Load the arguments inside the registers 64 MOVD key+0(FP), INP 65 MOVD keylen+8(FP), BITS 66 MOVD enc+16(FP), OUT 67 JMP ·doEncryptKeyAsm(SB) 68 69 // This text is used both setEncryptKeyAsm and setDecryptKeyAsm 70 TEXT ·doEncryptKeyAsm(SB),NOSPLIT|NOFRAME,$0 71 // Do not change R10 since it's storing the LR value in setDecryptKeyAsm 72 73 // Check arguments 74 MOVD $-1, PTR // li 6,-1 exit code to -1 (255) 75 CMPU INP, $0 // cmpldi r3,0 input key pointer set? 76 BC 0x0E, 2, enc_key_abort // beq- .Lenc_key_abort 77 CMPU OUT, $0 // cmpldi r5,0 output key pointer set? 78 BC 0x0E, 2, enc_key_abort // beq- .Lenc_key_abort 79 MOVD $-2, PTR // li 6,-2 exit code to -2 (254) 80 CMPW BITS, $128 // cmpwi 4,128 greater or equal to 128 81 BC 0x0E, 0, enc_key_abort // blt- .Lenc_key_abort 82 CMPW BITS, $256 // cmpwi 4,256 lesser or equal to 256 83 BC 0x0E, 1, enc_key_abort // bgt- .Lenc_key_abort 84 ANDCC $0x3f, BITS, TEMP // andi. 0,4,0x3f multiple of 64 85 BC 0x06, 2, enc_key_abort // bne- .Lenc_key_abort 86 87 MOVD $·rcon(SB), PTR // PTR point to rcon addr 88 89 // Get key from memory and write aligned into VR 90 NEG INP, R9 // neg 9,3 R9 is ~INP + 1 91 LVX (INP)(R0), IN0 // lvx 1,0,3 Load key inside IN0 92 ADD $15, INP, INP // addi 3,3,15 Add 15B to INP addr 93 LVSR (R9)(R0), KEY // lvsr 3,0,9 94 MOVD $0x20, R8 // li 8,0x20 R8 = 32 95 CMPW BITS, $192 // cmpwi 4,192 Key size == 192? 96 LVX (INP)(R0), IN1 // lvx 2,0,3 97 VSPLTISB $0x0f, MASK // vspltisb 5,0x0f 0x0f0f0f0f... mask 98 LVX (PTR)(R0), RCON // lvx 4,0,6 Load first 16 bytes into RCON 99 VXOR KEY, MASK, KEY // vxor 3,3,5 Adjust for byte swap 100 LVX (PTR)(R8), MASK // lvx 5,8,6 101 ADD $0x10, PTR, PTR // addi 6,6,0x10 PTR to next 16 bytes of RCON 102 VPERM IN0, IN1, KEY, IN0 // vperm 1,1,2,3 Align 103 MOVD $8, CNT // li 7,8 CNT = 8 104 VXOR ZERO, ZERO, ZERO // vxor 0,0,0 Zero to be zero :) 105 MOVD CNT, CTR // mtctr 7 Set the counter to 8 (rounds) 106 107 LVSL (OUT)(R0), OUTPERM // lvsl 8,0,5 108 VSPLTISB $-1, OUTMASK // vspltisb 9,-1 109 LVX (OUT)(R0), OUTHEAD // lvx 10,0,5 110 VPERM OUTMASK, ZERO, OUTPERM, OUTMASK // vperm 9,9,0,8 111 112 BLT loop128 // blt .Loop128 113 ADD $8, INP, INP // addi 3,3,8 114 BEQ l192 // beq .L192 115 ADD $8, INP, INP // addi 3,3,8 116 JMP l256 // b .L256 117 118 loop128: 119 // Key schedule (Round 1 to 8) 120 VPERM IN0, IN0, MASK, KEY // vperm 3,1,1,5 Rotate-n-splat 121 VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12 122 VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8 Rotate 123 VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9 124 VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11 125 VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4 126 STVX STAGE, (OUT+R0) // stvx 7,0,5 Write to output 127 ADD $16, OUT, OUT // addi 5,5,16 Point to the next round 128 129 VXOR IN0, TMP, IN0 // vxor 1,1,6 130 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 131 VXOR IN0, TMP, IN0 // vxor 1,1,6 132 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 133 VXOR IN0, TMP, IN0 // vxor 1,1,6 134 VADDUWM RCON, RCON, RCON // vadduwm 4,4,4 135 VXOR IN0, KEY, IN0 // vxor 1,1,3 136 BC 0x10, 0, loop128 // bdnz .Loop128 137 138 LVX (PTR)(R0), RCON // lvx 4,0,6 Last two round keys 139 140 // Key schedule (Round 9) 141 VPERM IN0, IN0, MASK, KEY // vperm 3,1,1,5 Rotate-n-spat 142 VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12 143 VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8 Rotate 144 VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9 145 VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11 146 VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4 147 STVX STAGE, (OUT+R0) // stvx 7,0,5 Round 9 148 ADD $16, OUT, OUT // addi 5,5,16 149 150 // Key schedule (Round 10) 151 VXOR IN0, TMP, IN0 // vxor 1,1,6 152 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 153 VXOR IN0, TMP, IN0 // vxor 1,1,6 154 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 155 VXOR IN0, TMP, IN0 // vxor 1,1,6 156 VADDUWM RCON, RCON, RCON // vadduwm 4,4,4 157 VXOR IN0, KEY, IN0 // vxor 1,1,3 158 159 VPERM IN0, IN0, MASK, KEY // vperm 3,1,1,5 Rotate-n-splat 160 VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12 161 VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8 Rotate 162 VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9 163 VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11 164 VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4 165 STVX STAGE, (OUT+R0) // stvx 7,0,5 Round 10 166 ADD $16, OUT, OUT // addi 5,5,16 167 168 // Key schedule (Round 11) 169 VXOR IN0, TMP, IN0 // vxor 1,1,6 170 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 171 VXOR IN0, TMP, IN0 // vxor 1,1,6 172 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 173 VXOR IN0, TMP, IN0 // vxor 1,1,6 174 VXOR IN0, KEY, IN0 // vxor 1,1,3 175 VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8 176 VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9 177 VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11 178 STVX STAGE, (OUT+R0) // stvx 7,0,5 Round 11 179 180 ADD $15, OUT, INP // addi 3,5,15 181 ADD $0x50, OUT, OUT // addi 5,5,0x50 182 183 MOVD $10, ROUNDS // li 8,10 184 JMP done // b .Ldone 185 186 l192: 187 LVX (INP)(R0), TMP // lvx 6,0,3 188 MOVD $4, CNT // li 7,4 189 VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8 190 VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9 191 VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11 192 STVX STAGE, (OUT+R0) // stvx 7,0,5 193 ADD $16, OUT, OUT // addi 5,5,16 194 VPERM IN1, TMP, KEY, IN1 // vperm 2,2,6,3 195 VSPLTISB $8, KEY // vspltisb 3,8 196 MOVD CNT, CTR // mtctr 7 197 VSUBUBM MASK, KEY, MASK // vsububm 5,5,3 198 199 loop192: 200 VPERM IN1, IN1, MASK, KEY // vperm 3,2,2,5 201 VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12 202 VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4 203 204 VXOR IN0, TMP, IN0 // vxor 1,1,6 205 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 206 VXOR IN0, TMP, IN0 // vxor 1,1,6 207 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 208 VXOR IN0, TMP, IN0 // vxor 1,1,6 209 210 VSLDOI $8, ZERO, IN1, STAGE // vsldoi 7,0,2,8 211 VSPLTW $3, IN0, TMP // vspltw 6,1,3 212 VXOR TMP, IN1, TMP // vxor 6,6,2 213 VSLDOI $12, ZERO, IN1, IN1 // vsldoi 2,0,2,12 214 VADDUWM RCON, RCON, RCON // vadduwm 4,4,4 215 VXOR IN1, TMP, IN1 // vxor 2,2,6 216 VXOR IN0, KEY, IN0 // vxor 1,1,3 217 VXOR IN1, KEY, IN1 // vxor 2,2,3 218 VSLDOI $8, STAGE, IN0, STAGE // vsldoi 7,7,1,8 219 220 VPERM IN1, IN1, MASK, KEY // vperm 3,2,2,5 221 VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12 222 VPERM STAGE, STAGE, OUTPERM, OUTTAIL // vperm 11,7,7,8 223 VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9 224 VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11 225 VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4 226 STVX STAGE, (OUT+R0) // stvx 7,0,5 227 ADD $16, OUT, OUT // addi 5,5,16 228 229 VSLDOI $8, IN0, IN1, STAGE // vsldoi 7,1,2,8 230 VXOR IN0, TMP, IN0 // vxor 1,1,6 231 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 232 VPERM STAGE, STAGE, OUTPERM, OUTTAIL // vperm 11,7,7,8 233 VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9 234 VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11 235 VXOR IN0, TMP, IN0 // vxor 1,1,6 236 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 237 VXOR IN0, TMP, IN0 // vxor 1,1,6 238 STVX STAGE, (OUT+R0) // stvx 7,0,5 239 ADD $16, OUT, OUT // addi 5,5,16 240 241 VSPLTW $3, IN0, TMP // vspltw 6,1,3 242 VXOR TMP, IN1, TMP // vxor 6,6,2 243 VSLDOI $12, ZERO, IN1, IN1 // vsldoi 2,0,2,12 244 VADDUWM RCON, RCON, RCON // vadduwm 4,4,4 245 VXOR IN1, TMP, IN1 // vxor 2,2,6 246 VXOR IN0, KEY, IN0 // vxor 1,1,3 247 VXOR IN1, KEY, IN1 // vxor 2,2,3 248 VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8 249 VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9 250 VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11 251 STVX STAGE, (OUT+R0) // stvx 7,0,5 252 ADD $15, OUT, INP // addi 3,5,15 253 ADD $16, OUT, OUT // addi 5,5,16 254 BC 0x10, 0, loop192 // bdnz .Loop192 255 256 MOVD $12, ROUNDS // li 8,12 257 ADD $0x20, OUT, OUT // addi 5,5,0x20 258 JMP done // b .Ldone 259 260 l256: 261 LVX (INP)(R0), TMP // lvx 6,0,3 262 MOVD $7, CNT // li 7,7 263 MOVD $14, ROUNDS // li 8,14 264 VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8 265 VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9 266 VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11 267 STVX STAGE, (OUT+R0) // stvx 7,0,5 268 ADD $16, OUT, OUT // addi 5,5,16 269 VPERM IN1, TMP, KEY, IN1 // vperm 2,2,6,3 270 MOVD CNT, CTR // mtctr 7 271 272 loop256: 273 VPERM IN1, IN1, MASK, KEY // vperm 3,2,2,5 274 VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12 275 VPERM IN1, IN1, OUTPERM, OUTTAIL // vperm 11,2,2,8 276 VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9 277 VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11 278 VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4 279 STVX STAGE, (OUT+R0) // stvx 7,0,5 280 ADD $16, OUT, OUT // addi 5,5,16 281 282 VXOR IN0, TMP, IN0 // vxor 1,1,6 283 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 284 VXOR IN0, TMP, IN0 // vxor 1,1,6 285 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 286 VXOR IN0, TMP, IN0 // vxor 1,1,6 287 VADDUWM RCON, RCON, RCON // vadduwm 4,4,4 288 VXOR IN0, KEY, IN0 // vxor 1,1,3 289 VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8 290 VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9 291 VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11 292 STVX STAGE, (OUT+R0) // stvx 7,0,5 293 ADD $15, OUT, INP // addi 3,5,15 294 ADD $16, OUT, OUT // addi 5,5,16 295 BC 0x12, 0, done // bdz .Ldone 296 297 VSPLTW $3, IN0, KEY // vspltw 3,1,3 298 VSLDOI $12, ZERO, IN1, TMP // vsldoi 6,0,2,12 299 VSBOX KEY, KEY // vsbox 3,3 300 301 VXOR IN1, TMP, IN1 // vxor 2,2,6 302 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 303 VXOR IN1, TMP, IN1 // vxor 2,2,6 304 VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 305 VXOR IN1, TMP, IN1 // vxor 2,2,6 306 307 VXOR IN1, KEY, IN1 // vxor 2,2,3 308 JMP loop256 // b .Loop256 309 310 done: 311 LVX (INP)(R0), IN1 // lvx 2,0,3 312 VSEL OUTHEAD, IN1, OUTMASK, IN1 // vsel 2,10,2,9 313 STVX IN1, (INP+R0) // stvx 2,0,3 314 MOVD $0, PTR // li 6,0 set PTR to 0 (exit code 0) 315 MOVW ROUNDS, 0(OUT) // stw 8,0(5) 316 317 enc_key_abort: 318 MOVD PTR, INP // mr 3,6 set exit code with PTR value 319 MOVD INP, ret+24(FP) // Put return value into the FP 320 RET // blr 321 322 // func setDecryptKeyAsm(key *byte, keylen int, dec *uint32) int 323 TEXT ·setDecryptKeyAsm(SB),NOSPLIT|NOFRAME,$0 324 // Load the arguments inside the registers 325 MOVD key+0(FP), INP 326 MOVD keylen+8(FP), BITS 327 MOVD dec+16(FP), OUT 328 329 MOVD LR, R10 // mflr 10 330 CALL ·doEncryptKeyAsm(SB) 331 MOVD R10, LR // mtlr 10 332 333 CMPW INP, $0 // cmpwi 3,0 exit 0 = ok 334 BC 0x06, 2, dec_key_abort // bne- .Ldec_key_abort 335 336 // doEncryptKeyAsm set ROUNDS (R8) with the proper value for each mode 337 SLW $4, ROUNDS, CNT // slwi 7,8,4 338 SUB $240, OUT, INP // subi 3,5,240 339 SRW $1, ROUNDS, ROUNDS // srwi 8,8,1 340 ADD R7, INP, OUT // add 5,3,7 341 MOVD ROUNDS, CTR // mtctr 8 342 343 // dec_key will invert the key sequence in order to be used for decrypt 344 dec_key: 345 MOVWZ 0(INP), TEMP // lwz 0, 0(3) 346 MOVWZ 4(INP), R6 // lwz 6, 4(3) 347 MOVWZ 8(INP), R7 // lwz 7, 8(3) 348 MOVWZ 12(INP), R8 // lwz 8, 12(3) 349 ADD $16, INP, INP // addi 3,3,16 350 MOVWZ 0(OUT), R9 // lwz 9, 0(5) 351 MOVWZ 4(OUT), R10 // lwz 10,4(5) 352 MOVWZ 8(OUT), R11 // lwz 11,8(5) 353 MOVWZ 12(OUT), R12 // lwz 12,12(5) 354 MOVW TEMP, 0(OUT) // stw 0, 0(5) 355 MOVW R6, 4(OUT) // stw 6, 4(5) 356 MOVW R7, 8(OUT) // stw 7, 8(5) 357 MOVW R8, 12(OUT) // stw 8, 12(5) 358 SUB $16, OUT, OUT // subi 5,5,16 359 MOVW R9, -16(INP) // stw 9, -16(3) 360 MOVW R10, -12(INP) // stw 10,-12(3) 361 MOVW R11, -8(INP) // stw 11,-8(3) 362 MOVW R12, -4(INP) // stw 12,-4(3) 363 BC 0x10, 0, dec_key // bdnz .Ldeckey 364 365 XOR R3, R3, R3 // xor 3,3,3 Clean R3 366 367 dec_key_abort: 368 MOVD R3, ret+24(FP) // Put return value into the FP 369 RET // blr 370 371 372 // func encryptBlockAsm(dst, src *byte, enc *uint32) 373 TEXT ·encryptBlockAsm(SB),NOSPLIT|NOFRAME,$0 374 // Load the arguments inside the registers 375 MOVD dst+0(FP), BLK_OUT 376 MOVD src+8(FP), BLK_INP 377 MOVD enc+16(FP), BLK_KEY 378 379 MOVWZ 240(BLK_KEY), BLK_ROUNDS // lwz 6,240(5) 380 MOVD $15, BLK_IDX // li 7,15 381 382 LVX (BLK_INP)(R0), ZERO // lvx 0,0,3 383 NEG BLK_OUT, R11 // neg 11,4 384 LVX (BLK_INP)(BLK_IDX), IN0 // lvx 1,7,3 385 LVSL (BLK_INP)(R0), IN1 // lvsl 2,0,3 386 VSPLTISB $0x0f, RCON // vspltisb 4,0x0f 387 LVSR (R11)(R0), KEY // lvsr 3,0,11 388 VXOR IN1, RCON, IN1 // vxor 2,2,4 389 MOVD $16, BLK_IDX // li 7,16 390 VPERM ZERO, IN0, IN1, ZERO // vperm 0,0,1,2 391 LVX (BLK_KEY)(R0), IN0 // lvx 1,0,5 392 LVSR (BLK_KEY)(R0), MASK // lvsr 5,0,5 393 SRW $1, BLK_ROUNDS, BLK_ROUNDS // srwi 6,6,1 394 LVX (BLK_KEY)(BLK_IDX), IN1 // lvx 2,7,5 395 ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16 396 SUB $1, BLK_ROUNDS, BLK_ROUNDS // subi 6,6,1 397 VPERM IN1, IN0, MASK, IN0 // vperm 1,2,1,5 398 399 VXOR ZERO, IN0, ZERO // vxor 0,0,1 400 LVX (BLK_KEY)(BLK_IDX), IN0 // lvx 1,7,5 401 ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16 402 MOVD BLK_ROUNDS, CTR // mtctr 6 403 404 loop_enc: 405 VPERM IN0, IN1, MASK, IN1 // vperm 2,1,2,5 406 VCIPHER ZERO, IN1, ZERO // vcipher 0,0,2 407 LVX (BLK_KEY)(BLK_IDX), IN1 // lvx 2,7,5 408 ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16 409 VPERM IN1, IN0, MASK, IN0 // vperm 1,2,1,5 410 VCIPHER ZERO, IN0, ZERO // vcipher 0,0,1 411 LVX (BLK_KEY)(BLK_IDX), IN0 // lvx 1,7,5 412 ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16 413 BC 0x10, 0, loop_enc // bdnz .Loop_enc 414 415 VPERM IN0, IN1, MASK, IN1 // vperm 2,1,2,5 416 VCIPHER ZERO, IN1, ZERO // vcipher 0,0,2 417 LVX (BLK_KEY)(BLK_IDX), IN1 // lvx 2,7,5 418 VPERM IN1, IN0, MASK, IN0 // vperm 1,2,1,5 419 VCIPHERLAST ZERO, IN0, ZERO // vcipherlast 0,0,1 420 421 VSPLTISB $-1, IN1 // vspltisb 2,-1 422 VXOR IN0, IN0, IN0 // vxor 1,1,1 423 MOVD $15, BLK_IDX // li 7,15 424 VPERM IN1, IN0, KEY, IN1 // vperm 2,2,1,3 425 VXOR KEY, RCON, KEY // vxor 3,3,4 426 LVX (BLK_OUT)(R0), IN0 // lvx 1,0,4 427 VPERM ZERO, ZERO, KEY, ZERO // vperm 0,0,0,3 428 VSEL IN0, ZERO, IN1, IN0 // vsel 1,1,0,2 429 LVX (BLK_OUT)(BLK_IDX), RCON // lvx 4,7,4 430 STVX IN0, (BLK_OUT+R0) // stvx 1,0,4 431 VSEL ZERO, RCON, IN1, ZERO // vsel 0,0,4,2 432 STVX ZERO, (BLK_OUT+BLK_IDX) // stvx 0,7,4 433 434 RET // blr 435 436 437 // func decryptBlockAsm(dst, src *byte, dec *uint32) 438 TEXT ·decryptBlockAsm(SB),NOSPLIT|NOFRAME,$0 439 // Load the arguments inside the registers 440 MOVD dst+0(FP), BLK_OUT 441 MOVD src+8(FP), BLK_INP 442 MOVD dec+16(FP), BLK_KEY 443 444 MOVWZ 240(BLK_KEY), BLK_ROUNDS // lwz 6,240(5) 445 MOVD $15, BLK_IDX // li 7,15 446 447 LVX (BLK_INP)(R0), ZERO // lvx 0,0,3 448 NEG BLK_OUT, R11 // neg 11,4 449 LVX (BLK_INP)(BLK_IDX), IN0 // lvx 1,7,3 450 LVSL (BLK_INP)(R0), IN1 // lvsl 2,0,3 451 VSPLTISB $0x0f, RCON // vspltisb 4,0x0f 452 LVSR (R11)(R0), KEY // lvsr 3,0,11 453 VXOR IN1, RCON, IN1 // vxor 2,2,4 454 MOVD $16, BLK_IDX // li 7,16 455 VPERM ZERO, IN0, IN1, ZERO // vperm 0,0,1,2 456 LVX (BLK_KEY)(R0), IN0 // lvx 1,0,5 457 LVSR (BLK_KEY)(R0), MASK // lvsr 5,0,5 458 SRW $1, BLK_ROUNDS, BLK_ROUNDS // srwi 6,6,1 459 LVX (BLK_KEY)(BLK_IDX), IN1 // lvx 2,7,5 460 ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16 461 SUB $1, BLK_ROUNDS, BLK_ROUNDS // subi 6,6,1 462 VPERM IN1, IN0, MASK, IN0 // vperm 1,2,1,5 463 464 VXOR ZERO, IN0, ZERO // vxor 0,0,1 465 LVX (BLK_KEY)(BLK_IDX), IN0 // lvx 1,7,5 466 ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16 467 MOVD BLK_ROUNDS, CTR // mtctr 6 468 469 loop_dec: 470 VPERM IN0, IN1, MASK, IN1 // vperm 2,1,2,5 471 VNCIPHER ZERO, IN1, ZERO // vncipher 0,0,2 472 LVX (BLK_KEY)(BLK_IDX), IN1 // lvx 2,7,5 473 ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16 474 VPERM IN1, IN0, MASK, IN0 // vperm 1,2,1,5 475 VNCIPHER ZERO, IN0, ZERO // vncipher 0,0,1 476 LVX (BLK_KEY)(BLK_IDX), IN0 // lvx 1,7,5 477 ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16 478 BC 0x10, 0, loop_dec // bdnz .Loop_dec 479 480 VPERM IN0, IN1, MASK, IN1 // vperm 2,1,2,5 481 VNCIPHER ZERO, IN1, ZERO // vncipher 0,0,2 482 LVX (BLK_KEY)(BLK_IDX), IN1 // lvx 2,7,5 483 VPERM IN1, IN0, MASK, IN0 // vperm 1,2,1,5 484 VNCIPHERLAST ZERO, IN0, ZERO // vncipherlast 0,0,1 485 486 VSPLTISB $-1, IN1 // vspltisb 2,-1 487 VXOR IN0, IN0, IN0 // vxor 1,1,1 488 MOVD $15, BLK_IDX // li 7,15 489 VPERM IN1, IN0, KEY, IN1 // vperm 2,2,1,3 490 VXOR KEY, RCON, KEY // vxor 3,3,4 491 LVX (BLK_OUT)(R0), IN0 // lvx 1,0,4 492 VPERM ZERO, ZERO, KEY, ZERO // vperm 0,0,0,3 493 VSEL IN0, ZERO, IN1, IN0 // vsel 1,1,0,2 494 LVX (BLK_OUT)(BLK_IDX), RCON // lvx 4,7,4 495 STVX IN0, (BLK_OUT+R0) // stvx 1,0,4 496 VSEL ZERO, RCON, IN1, ZERO // vsel 0,0,4,2 497 STVX ZERO, (BLK_OUT+BLK_IDX) // stvx 0,7,4 498 499 RET // blr