github.com/bir3/gocompiler@v0.3.205/src/cmd/internal/notsha256/sha256block_ppc64x.s (about) 1 // Copyright 2016 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // 6 // WARNING: this file is built by the bootstrap compiler, thus 7 // it must maintain compatibility with the oldest supported 8 // bootstrap toolchain. 9 // 10 11 //go:build !purego && (ppc64 || ppc64le) 12 // +build !purego 13 // +build ppc64 ppc64le 14 15 // Based on CRYPTOGAMS code with the following comment: 16 // # ==================================================================== 17 // # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 18 // # project. The module is, however, dual licensed under OpenSSL and 19 // # CRYPTOGAMS licenses depending on where you obtain it. For further 20 // # details see http://www.openssl.org/~appro/cryptogams/. 21 // # ==================================================================== 22 23 #include "textflag.h" 24 25 // SHA256 block routine. See sha256block.go for Go equivalent. 26 // 27 // The algorithm is detailed in FIPS 180-4: 28 // 29 // https://csrc.nist.gov/publications/fips/fips180-4/fips-180-4.pdf 30 // 31 // Wt = Mt; for 0 <= t <= 15 32 // Wt = SIGMA1(Wt-2) + SIGMA0(Wt-15) + Wt-16; for 16 <= t <= 63 33 // 34 // a = H0 35 // b = H1 36 // c = H2 37 // d = H3 38 // e = H4 39 // f = H5 40 // g = H6 41 // h = H7 42 // 43 // for t = 0 to 63 { 44 // T1 = h + BIGSIGMA1(e) + Ch(e,f,g) + Kt + Wt 45 // T2 = BIGSIGMA0(a) + Maj(a,b,c) 46 // h = g 47 // g = f 48 // f = e 49 // e = d + T1 50 // d = c 51 // c = b 52 // b = a 53 // a = T1 + T2 54 // } 55 // 56 // H0 = a + H0 57 // H1 = b + H1 58 // H2 = c + H2 59 // H3 = d + H3 60 // H4 = e + H4 61 // H5 = f + H5 62 // H6 = g + H6 63 // H7 = h + H7 64 65 #define CTX R3 66 #define INP R4 67 #define END R5 68 #define TBL R6 // Pointer into kcon table 69 #define LEN R9 70 #define TEMP R12 71 72 #define TBL_STRT R7 // Pointer to start of kcon table. 73 74 #define R_x000 R0 75 #define R_x010 R8 76 #define R_x020 R10 77 #define R_x030 R11 78 #define R_x040 R14 79 #define R_x050 R15 80 #define R_x060 R16 81 #define R_x070 R17 82 #define R_x080 R18 83 #define R_x090 R19 84 #define R_x0a0 R20 85 #define R_x0b0 R21 86 #define R_x0c0 R22 87 #define R_x0d0 R23 88 #define R_x0e0 R24 89 #define R_x0f0 R25 90 #define R_x100 R26 91 #define R_x110 R27 92 93 94 // V0-V7 are A-H 95 // V8-V23 are used for the message schedule 96 #define KI V24 97 #define FUNC V25 98 #define S0 V26 99 #define S1 V27 100 #define s0 V28 101 #define s1 V29 102 #define LEMASK V31 // Permutation control register for little endian 103 104 // 4 copies of each Kt, to fill all 4 words of a vector register 105 DATA ·kcon+0x000(SB)/8, $0x428a2f98428a2f98 106 DATA ·kcon+0x008(SB)/8, $0x428a2f98428a2f98 107 DATA ·kcon+0x010(SB)/8, $0x7137449171374491 108 DATA ·kcon+0x018(SB)/8, $0x7137449171374491 109 DATA ·kcon+0x020(SB)/8, $0xb5c0fbcfb5c0fbcf 110 DATA ·kcon+0x028(SB)/8, $0xb5c0fbcfb5c0fbcf 111 DATA ·kcon+0x030(SB)/8, $0xe9b5dba5e9b5dba5 112 DATA ·kcon+0x038(SB)/8, $0xe9b5dba5e9b5dba5 113 DATA ·kcon+0x040(SB)/8, $0x3956c25b3956c25b 114 DATA ·kcon+0x048(SB)/8, $0x3956c25b3956c25b 115 DATA ·kcon+0x050(SB)/8, $0x59f111f159f111f1 116 DATA ·kcon+0x058(SB)/8, $0x59f111f159f111f1 117 DATA ·kcon+0x060(SB)/8, $0x923f82a4923f82a4 118 DATA ·kcon+0x068(SB)/8, $0x923f82a4923f82a4 119 DATA ·kcon+0x070(SB)/8, $0xab1c5ed5ab1c5ed5 120 DATA ·kcon+0x078(SB)/8, $0xab1c5ed5ab1c5ed5 121 DATA ·kcon+0x080(SB)/8, $0xd807aa98d807aa98 122 DATA ·kcon+0x088(SB)/8, $0xd807aa98d807aa98 123 DATA ·kcon+0x090(SB)/8, $0x12835b0112835b01 124 DATA ·kcon+0x098(SB)/8, $0x12835b0112835b01 125 DATA ·kcon+0x0A0(SB)/8, $0x243185be243185be 126 DATA ·kcon+0x0A8(SB)/8, $0x243185be243185be 127 DATA ·kcon+0x0B0(SB)/8, $0x550c7dc3550c7dc3 128 DATA ·kcon+0x0B8(SB)/8, $0x550c7dc3550c7dc3 129 DATA ·kcon+0x0C0(SB)/8, $0x72be5d7472be5d74 130 DATA ·kcon+0x0C8(SB)/8, $0x72be5d7472be5d74 131 DATA ·kcon+0x0D0(SB)/8, $0x80deb1fe80deb1fe 132 DATA ·kcon+0x0D8(SB)/8, $0x80deb1fe80deb1fe 133 DATA ·kcon+0x0E0(SB)/8, $0x9bdc06a79bdc06a7 134 DATA ·kcon+0x0E8(SB)/8, $0x9bdc06a79bdc06a7 135 DATA ·kcon+0x0F0(SB)/8, $0xc19bf174c19bf174 136 DATA ·kcon+0x0F8(SB)/8, $0xc19bf174c19bf174 137 DATA ·kcon+0x100(SB)/8, $0xe49b69c1e49b69c1 138 DATA ·kcon+0x108(SB)/8, $0xe49b69c1e49b69c1 139 DATA ·kcon+0x110(SB)/8, $0xefbe4786efbe4786 140 DATA ·kcon+0x118(SB)/8, $0xefbe4786efbe4786 141 DATA ·kcon+0x120(SB)/8, $0x0fc19dc60fc19dc6 142 DATA ·kcon+0x128(SB)/8, $0x0fc19dc60fc19dc6 143 DATA ·kcon+0x130(SB)/8, $0x240ca1cc240ca1cc 144 DATA ·kcon+0x138(SB)/8, $0x240ca1cc240ca1cc 145 DATA ·kcon+0x140(SB)/8, $0x2de92c6f2de92c6f 146 DATA ·kcon+0x148(SB)/8, $0x2de92c6f2de92c6f 147 DATA ·kcon+0x150(SB)/8, $0x4a7484aa4a7484aa 148 DATA ·kcon+0x158(SB)/8, $0x4a7484aa4a7484aa 149 DATA ·kcon+0x160(SB)/8, $0x5cb0a9dc5cb0a9dc 150 DATA ·kcon+0x168(SB)/8, $0x5cb0a9dc5cb0a9dc 151 DATA ·kcon+0x170(SB)/8, $0x76f988da76f988da 152 DATA ·kcon+0x178(SB)/8, $0x76f988da76f988da 153 DATA ·kcon+0x180(SB)/8, $0x983e5152983e5152 154 DATA ·kcon+0x188(SB)/8, $0x983e5152983e5152 155 DATA ·kcon+0x190(SB)/8, $0xa831c66da831c66d 156 DATA ·kcon+0x198(SB)/8, $0xa831c66da831c66d 157 DATA ·kcon+0x1A0(SB)/8, $0xb00327c8b00327c8 158 DATA ·kcon+0x1A8(SB)/8, $0xb00327c8b00327c8 159 DATA ·kcon+0x1B0(SB)/8, $0xbf597fc7bf597fc7 160 DATA ·kcon+0x1B8(SB)/8, $0xbf597fc7bf597fc7 161 DATA ·kcon+0x1C0(SB)/8, $0xc6e00bf3c6e00bf3 162 DATA ·kcon+0x1C8(SB)/8, $0xc6e00bf3c6e00bf3 163 DATA ·kcon+0x1D0(SB)/8, $0xd5a79147d5a79147 164 DATA ·kcon+0x1D8(SB)/8, $0xd5a79147d5a79147 165 DATA ·kcon+0x1E0(SB)/8, $0x06ca635106ca6351 166 DATA ·kcon+0x1E8(SB)/8, $0x06ca635106ca6351 167 DATA ·kcon+0x1F0(SB)/8, $0x1429296714292967 168 DATA ·kcon+0x1F8(SB)/8, $0x1429296714292967 169 DATA ·kcon+0x200(SB)/8, $0x27b70a8527b70a85 170 DATA ·kcon+0x208(SB)/8, $0x27b70a8527b70a85 171 DATA ·kcon+0x210(SB)/8, $0x2e1b21382e1b2138 172 DATA ·kcon+0x218(SB)/8, $0x2e1b21382e1b2138 173 DATA ·kcon+0x220(SB)/8, $0x4d2c6dfc4d2c6dfc 174 DATA ·kcon+0x228(SB)/8, $0x4d2c6dfc4d2c6dfc 175 DATA ·kcon+0x230(SB)/8, $0x53380d1353380d13 176 DATA ·kcon+0x238(SB)/8, $0x53380d1353380d13 177 DATA ·kcon+0x240(SB)/8, $0x650a7354650a7354 178 DATA ·kcon+0x248(SB)/8, $0x650a7354650a7354 179 DATA ·kcon+0x250(SB)/8, $0x766a0abb766a0abb 180 DATA ·kcon+0x258(SB)/8, $0x766a0abb766a0abb 181 DATA ·kcon+0x260(SB)/8, $0x81c2c92e81c2c92e 182 DATA ·kcon+0x268(SB)/8, $0x81c2c92e81c2c92e 183 DATA ·kcon+0x270(SB)/8, $0x92722c8592722c85 184 DATA ·kcon+0x278(SB)/8, $0x92722c8592722c85 185 DATA ·kcon+0x280(SB)/8, $0xa2bfe8a1a2bfe8a1 186 DATA ·kcon+0x288(SB)/8, $0xa2bfe8a1a2bfe8a1 187 DATA ·kcon+0x290(SB)/8, $0xa81a664ba81a664b 188 DATA ·kcon+0x298(SB)/8, $0xa81a664ba81a664b 189 DATA ·kcon+0x2A0(SB)/8, $0xc24b8b70c24b8b70 190 DATA ·kcon+0x2A8(SB)/8, $0xc24b8b70c24b8b70 191 DATA ·kcon+0x2B0(SB)/8, $0xc76c51a3c76c51a3 192 DATA ·kcon+0x2B8(SB)/8, $0xc76c51a3c76c51a3 193 DATA ·kcon+0x2C0(SB)/8, $0xd192e819d192e819 194 DATA ·kcon+0x2C8(SB)/8, $0xd192e819d192e819 195 DATA ·kcon+0x2D0(SB)/8, $0xd6990624d6990624 196 DATA ·kcon+0x2D8(SB)/8, $0xd6990624d6990624 197 DATA ·kcon+0x2E0(SB)/8, $0xf40e3585f40e3585 198 DATA ·kcon+0x2E8(SB)/8, $0xf40e3585f40e3585 199 DATA ·kcon+0x2F0(SB)/8, $0x106aa070106aa070 200 DATA ·kcon+0x2F8(SB)/8, $0x106aa070106aa070 201 DATA ·kcon+0x300(SB)/8, $0x19a4c11619a4c116 202 DATA ·kcon+0x308(SB)/8, $0x19a4c11619a4c116 203 DATA ·kcon+0x310(SB)/8, $0x1e376c081e376c08 204 DATA ·kcon+0x318(SB)/8, $0x1e376c081e376c08 205 DATA ·kcon+0x320(SB)/8, $0x2748774c2748774c 206 DATA ·kcon+0x328(SB)/8, $0x2748774c2748774c 207 DATA ·kcon+0x330(SB)/8, $0x34b0bcb534b0bcb5 208 DATA ·kcon+0x338(SB)/8, $0x34b0bcb534b0bcb5 209 DATA ·kcon+0x340(SB)/8, $0x391c0cb3391c0cb3 210 DATA ·kcon+0x348(SB)/8, $0x391c0cb3391c0cb3 211 DATA ·kcon+0x350(SB)/8, $0x4ed8aa4a4ed8aa4a 212 DATA ·kcon+0x358(SB)/8, $0x4ed8aa4a4ed8aa4a 213 DATA ·kcon+0x360(SB)/8, $0x5b9cca4f5b9cca4f 214 DATA ·kcon+0x368(SB)/8, $0x5b9cca4f5b9cca4f 215 DATA ·kcon+0x370(SB)/8, $0x682e6ff3682e6ff3 216 DATA ·kcon+0x378(SB)/8, $0x682e6ff3682e6ff3 217 DATA ·kcon+0x380(SB)/8, $0x748f82ee748f82ee 218 DATA ·kcon+0x388(SB)/8, $0x748f82ee748f82ee 219 DATA ·kcon+0x390(SB)/8, $0x78a5636f78a5636f 220 DATA ·kcon+0x398(SB)/8, $0x78a5636f78a5636f 221 DATA ·kcon+0x3A0(SB)/8, $0x84c8781484c87814 222 DATA ·kcon+0x3A8(SB)/8, $0x84c8781484c87814 223 DATA ·kcon+0x3B0(SB)/8, $0x8cc702088cc70208 224 DATA ·kcon+0x3B8(SB)/8, $0x8cc702088cc70208 225 DATA ·kcon+0x3C0(SB)/8, $0x90befffa90befffa 226 DATA ·kcon+0x3C8(SB)/8, $0x90befffa90befffa 227 DATA ·kcon+0x3D0(SB)/8, $0xa4506ceba4506ceb 228 DATA ·kcon+0x3D8(SB)/8, $0xa4506ceba4506ceb 229 DATA ·kcon+0x3E0(SB)/8, $0xbef9a3f7bef9a3f7 230 DATA ·kcon+0x3E8(SB)/8, $0xbef9a3f7bef9a3f7 231 DATA ·kcon+0x3F0(SB)/8, $0xc67178f2c67178f2 232 DATA ·kcon+0x3F8(SB)/8, $0xc67178f2c67178f2 233 DATA ·kcon+0x400(SB)/8, $0x0000000000000000 234 DATA ·kcon+0x408(SB)/8, $0x0000000000000000 235 236 #ifdef GOARCH_ppc64le 237 DATA ·kcon+0x410(SB)/8, $0x1011121310111213 // permutation control vectors 238 DATA ·kcon+0x418(SB)/8, $0x1011121300010203 239 DATA ·kcon+0x420(SB)/8, $0x1011121310111213 240 DATA ·kcon+0x428(SB)/8, $0x0405060700010203 241 DATA ·kcon+0x430(SB)/8, $0x1011121308090a0b 242 DATA ·kcon+0x438(SB)/8, $0x0405060700010203 243 #else 244 DATA ·kcon+0x410(SB)/8, $0x1011121300010203 245 DATA ·kcon+0x418(SB)/8, $0x1011121310111213 // permutation control vectors 246 DATA ·kcon+0x420(SB)/8, $0x0405060700010203 247 DATA ·kcon+0x428(SB)/8, $0x1011121310111213 248 DATA ·kcon+0x430(SB)/8, $0x0001020304050607 249 DATA ·kcon+0x438(SB)/8, $0x08090a0b10111213 250 #endif 251 252 GLOBL ·kcon(SB), RODATA, $1088 253 254 #define SHA256ROUND0(a, b, c, d, e, f, g, h, xi, idx) \ 255 VSEL g, f, e, FUNC; \ 256 VSHASIGMAW $15, e, $1, S1; \ 257 VADDUWM xi, h, h; \ 258 VSHASIGMAW $0, a, $1, S0; \ 259 VADDUWM FUNC, h, h; \ 260 VXOR b, a, FUNC; \ 261 VADDUWM S1, h, h; \ 262 VSEL b, c, FUNC, FUNC; \ 263 VADDUWM KI, g, g; \ 264 VADDUWM h, d, d; \ 265 VADDUWM FUNC, S0, S0; \ 266 LVX (TBL)(idx), KI; \ 267 VADDUWM S0, h, h 268 269 #define SHA256ROUND1(a, b, c, d, e, f, g, h, xi, xj, xj_1, xj_9, xj_14, idx) \ 270 VSHASIGMAW $0, xj_1, $0, s0; \ 271 VSEL g, f, e, FUNC; \ 272 VSHASIGMAW $15, e, $1, S1; \ 273 VADDUWM xi, h, h; \ 274 VSHASIGMAW $0, a, $1, S0; \ 275 VSHASIGMAW $15, xj_14, $0, s1; \ 276 VADDUWM FUNC, h, h; \ 277 VXOR b, a, FUNC; \ 278 VADDUWM xj_9, xj, xj; \ 279 VADDUWM S1, h, h; \ 280 VSEL b, c, FUNC, FUNC; \ 281 VADDUWM KI, g, g; \ 282 VADDUWM h, d, d; \ 283 VADDUWM FUNC, S0, S0; \ 284 VADDUWM s0, xj, xj; \ 285 LVX (TBL)(idx), KI; \ 286 VADDUWM S0, h, h; \ 287 VADDUWM s1, xj, xj 288 289 #ifdef GOARCH_ppc64le 290 #define VPERMLE(va,vb,vc,vt) VPERM va, vb, vc, vt 291 #else 292 #define VPERMLE(va,vb,vc,vt) 293 #endif 294 295 // func block(dig *digest, p []byte) 296 TEXT ·block(SB),0,$0-32 297 MOVD dig+0(FP), CTX 298 MOVD p_base+8(FP), INP 299 MOVD p_len+16(FP), LEN 300 301 SRD $6, LEN 302 SLD $6, LEN 303 ADD INP, LEN, END 304 305 CMP INP, END 306 BEQ end 307 308 MOVD $·kcon(SB), TBL_STRT 309 MOVD $0x10, R_x010 310 311 #ifdef GOARCH_ppc64le 312 MOVWZ $8, TEMP 313 LVSL (TEMP)(R0), LEMASK 314 VSPLTISB $0x0F, KI 315 VXOR KI, LEMASK, LEMASK 316 #endif 317 318 LXVW4X (CTX)(R_x000), V0 319 LXVW4X (CTX)(R_x010), V4 320 321 // unpack the input values into vector registers 322 VSLDOI $4, V0, V0, V1 323 VSLDOI $8, V0, V0, V2 324 VSLDOI $12, V0, V0, V3 325 VSLDOI $4, V4, V4, V5 326 VSLDOI $8, V4, V4, V6 327 VSLDOI $12, V4, V4, V7 328 329 MOVD $0x020, R_x020 330 MOVD $0x030, R_x030 331 MOVD $0x040, R_x040 332 MOVD $0x050, R_x050 333 MOVD $0x060, R_x060 334 MOVD $0x070, R_x070 335 MOVD $0x080, R_x080 336 MOVD $0x090, R_x090 337 MOVD $0x0a0, R_x0a0 338 MOVD $0x0b0, R_x0b0 339 MOVD $0x0c0, R_x0c0 340 MOVD $0x0d0, R_x0d0 341 MOVD $0x0e0, R_x0e0 342 MOVD $0x0f0, R_x0f0 343 MOVD $0x100, R_x100 344 MOVD $0x110, R_x110 345 346 loop: 347 MOVD TBL_STRT, TBL 348 LVX (TBL)(R_x000), KI 349 350 LXVD2X (INP)(R_x000), V8 // load v8 in advance 351 352 // Offload to VSR24-31 (aka FPR24-31) 353 XXLOR V0, V0, VS24 354 XXLOR V1, V1, VS25 355 XXLOR V2, V2, VS26 356 XXLOR V3, V3, VS27 357 XXLOR V4, V4, VS28 358 XXLOR V5, V5, VS29 359 XXLOR V6, V6, VS30 360 XXLOR V7, V7, VS31 361 362 VADDUWM KI, V7, V7 // h+K[i] 363 LVX (TBL)(R_x010), KI 364 365 VPERMLE(V8, V8, LEMASK, V8) 366 SHA256ROUND0(V0, V1, V2, V3, V4, V5, V6, V7, V8, R_x020) 367 VSLDOI $4, V8, V8, V9 368 SHA256ROUND0(V7, V0, V1, V2, V3, V4, V5, V6, V9, R_x030) 369 VSLDOI $4, V9, V9, V10 370 SHA256ROUND0(V6, V7, V0, V1, V2, V3, V4, V5, V10, R_x040) 371 LXVD2X (INP)(R_x010), V12 // load v12 in advance 372 VSLDOI $4, V10, V10, V11 373 SHA256ROUND0(V5, V6, V7, V0, V1, V2, V3, V4, V11, R_x050) 374 VPERMLE(V12, V12, LEMASK, V12) 375 SHA256ROUND0(V4, V5, V6, V7, V0, V1, V2, V3, V12, R_x060) 376 VSLDOI $4, V12, V12, V13 377 SHA256ROUND0(V3, V4, V5, V6, V7, V0, V1, V2, V13, R_x070) 378 VSLDOI $4, V13, V13, V14 379 SHA256ROUND0(V2, V3, V4, V5, V6, V7, V0, V1, V14, R_x080) 380 LXVD2X (INP)(R_x020), V16 // load v16 in advance 381 VSLDOI $4, V14, V14, V15 382 SHA256ROUND0(V1, V2, V3, V4, V5, V6, V7, V0, V15, R_x090) 383 VPERMLE(V16, V16, LEMASK, V16) 384 SHA256ROUND0(V0, V1, V2, V3, V4, V5, V6, V7, V16, R_x0a0) 385 VSLDOI $4, V16, V16, V17 386 SHA256ROUND0(V7, V0, V1, V2, V3, V4, V5, V6, V17, R_x0b0) 387 VSLDOI $4, V17, V17, V18 388 SHA256ROUND0(V6, V7, V0, V1, V2, V3, V4, V5, V18, R_x0c0) 389 VSLDOI $4, V18, V18, V19 390 LXVD2X (INP)(R_x030), V20 // load v20 in advance 391 SHA256ROUND0(V5, V6, V7, V0, V1, V2, V3, V4, V19, R_x0d0) 392 VPERMLE(V20, V20, LEMASK, V20) 393 SHA256ROUND0(V4, V5, V6, V7, V0, V1, V2, V3, V20, R_x0e0) 394 VSLDOI $4, V20, V20, V21 395 SHA256ROUND0(V3, V4, V5, V6, V7, V0, V1, V2, V21, R_x0f0) 396 VSLDOI $4, V21, V21, V22 397 SHA256ROUND0(V2, V3, V4, V5, V6, V7, V0, V1, V22, R_x100) 398 VSLDOI $4, V22, V22, V23 399 SHA256ROUND1(V1, V2, V3, V4, V5, V6, V7, V0, V23, V8, V9, V17, V22, R_x110) 400 401 MOVD $3, TEMP 402 MOVD TEMP, CTR 403 ADD $0x120, TBL 404 ADD $0x40, INP 405 406 L16_xx: 407 SHA256ROUND1(V0, V1, V2, V3, V4, V5, V6, V7, V8, V9, V10, V18, V23, R_x000) 408 SHA256ROUND1(V7, V0, V1, V2, V3, V4, V5, V6, V9, V10, V11, V19, V8, R_x010) 409 SHA256ROUND1(V6, V7, V0, V1, V2, V3, V4, V5, V10, V11, V12, V20, V9, R_x020) 410 SHA256ROUND1(V5, V6, V7, V0, V1, V2, V3, V4, V11, V12, V13, V21, V10, R_x030) 411 SHA256ROUND1(V4, V5, V6, V7, V0, V1, V2, V3, V12, V13, V14, V22, V11, R_x040) 412 SHA256ROUND1(V3, V4, V5, V6, V7, V0, V1, V2, V13, V14, V15, V23, V12, R_x050) 413 SHA256ROUND1(V2, V3, V4, V5, V6, V7, V0, V1, V14, V15, V16, V8, V13, R_x060) 414 SHA256ROUND1(V1, V2, V3, V4, V5, V6, V7, V0, V15, V16, V17, V9, V14, R_x070) 415 SHA256ROUND1(V0, V1, V2, V3, V4, V5, V6, V7, V16, V17, V18, V10, V15, R_x080) 416 SHA256ROUND1(V7, V0, V1, V2, V3, V4, V5, V6, V17, V18, V19, V11, V16, R_x090) 417 SHA256ROUND1(V6, V7, V0, V1, V2, V3, V4, V5, V18, V19, V20, V12, V17, R_x0a0) 418 SHA256ROUND1(V5, V6, V7, V0, V1, V2, V3, V4, V19, V20, V21, V13, V18, R_x0b0) 419 SHA256ROUND1(V4, V5, V6, V7, V0, V1, V2, V3, V20, V21, V22, V14, V19, R_x0c0) 420 SHA256ROUND1(V3, V4, V5, V6, V7, V0, V1, V2, V21, V22, V23, V15, V20, R_x0d0) 421 SHA256ROUND1(V2, V3, V4, V5, V6, V7, V0, V1, V22, V23, V8, V16, V21, R_x0e0) 422 SHA256ROUND1(V1, V2, V3, V4, V5, V6, V7, V0, V23, V8, V9, V17, V22, R_x0f0) 423 ADD $0x100, TBL 424 425 BDNZ L16_xx 426 427 XXLOR VS24, VS24, V10 428 429 XXLOR VS25, VS25, V11 430 VADDUWM V10, V0, V0 431 XXLOR VS26, VS26, V12 432 VADDUWM V11, V1, V1 433 XXLOR VS27, VS27, V13 434 VADDUWM V12, V2, V2 435 XXLOR VS28, VS28, V14 436 VADDUWM V13, V3, V3 437 XXLOR VS29, VS29, V15 438 VADDUWM V14, V4, V4 439 XXLOR VS30, VS30, V16 440 VADDUWM V15, V5, V5 441 XXLOR VS31, VS31, V17 442 VADDUWM V16, V6, V6 443 VADDUWM V17, V7, V7 444 445 CMPU INP, END 446 BLT loop 447 448 LVX (TBL)(R_x000), V8 449 VPERM V0, V1, KI, V0 450 LVX (TBL)(R_x010), V9 451 VPERM V4, V5, KI, V4 452 VPERM V0, V2, V8, V0 453 VPERM V4, V6, V8, V4 454 VPERM V0, V3, V9, V0 455 VPERM V4, V7, V9, V4 456 STXVD2X V0, (CTX+R_x000) 457 STXVD2X V4, (CTX+R_x010) 458 459 end: 460 RET 461