github.com/bir3/gocompiler@v0.9.2202/src/cmd/internal/notsha256/sha256block_ppc64x.s (about) 1 // Copyright 2016 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // 6 // WARNING: this file is built by the bootstrap compiler, thus 7 // it must maintain compatibility with the oldest supported 8 // bootstrap toolchain. 9 // 10 11 //go:build !purego && (ppc64 || ppc64le) 12 13 // Based on CRYPTOGAMS code with the following comment: 14 // # ==================================================================== 15 // # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 16 // # project. The module is, however, dual licensed under OpenSSL and 17 // # CRYPTOGAMS licenses depending on where you obtain it. For further 18 // # details see http://www.openssl.org/~appro/cryptogams/. 19 // # ==================================================================== 20 21 #include "textflag.h" 22 23 // SHA256 block routine. See sha256block.go for Go equivalent. 24 // 25 // The algorithm is detailed in FIPS 180-4: 26 // 27 // https://csrc.nist.gov/publications/fips/fips180-4/fips-180-4.pdf 28 // 29 // Wt = Mt; for 0 <= t <= 15 30 // Wt = SIGMA1(Wt-2) + SIGMA0(Wt-15) + Wt-16; for 16 <= t <= 63 31 // 32 // a = H0 33 // b = H1 34 // c = H2 35 // d = H3 36 // e = H4 37 // f = H5 38 // g = H6 39 // h = H7 40 // 41 // for t = 0 to 63 { 42 // T1 = h + BIGSIGMA1(e) + Ch(e,f,g) + Kt + Wt 43 // T2 = BIGSIGMA0(a) + Maj(a,b,c) 44 // h = g 45 // g = f 46 // f = e 47 // e = d + T1 48 // d = c 49 // c = b 50 // b = a 51 // a = T1 + T2 52 // } 53 // 54 // H0 = a + H0 55 // H1 = b + H1 56 // H2 = c + H2 57 // H3 = d + H3 58 // H4 = e + H4 59 // H5 = f + H5 60 // H6 = g + H6 61 // H7 = h + H7 62 63 #define CTX R3 64 #define INP R4 65 #define END R5 66 #define TBL R6 // Pointer into kcon table 67 #define LEN R9 68 #define TEMP R12 69 70 #define TBL_STRT R7 // Pointer to start of kcon table. 71 72 #define R_x000 R0 73 #define R_x010 R8 74 #define R_x020 R10 75 #define R_x030 R11 76 #define R_x040 R14 77 #define R_x050 R15 78 #define R_x060 R16 79 #define R_x070 R17 80 #define R_x080 R18 81 #define R_x090 R19 82 #define R_x0a0 R20 83 #define R_x0b0 R21 84 #define R_x0c0 R22 85 #define R_x0d0 R23 86 #define R_x0e0 R24 87 #define R_x0f0 R25 88 #define R_x100 R26 89 #define R_x110 R27 90 91 92 // V0-V7 are A-H 93 // V8-V23 are used for the message schedule 94 #define KI V24 95 #define FUNC V25 96 #define S0 V26 97 #define S1 V27 98 #define s0 V28 99 #define s1 V29 100 #define LEMASK V31 // Permutation control register for little endian 101 102 // 4 copies of each Kt, to fill all 4 words of a vector register 103 DATA ·kcon+0x000(SB)/8, $0x428a2f98428a2f98 104 DATA ·kcon+0x008(SB)/8, $0x428a2f98428a2f98 105 DATA ·kcon+0x010(SB)/8, $0x7137449171374491 106 DATA ·kcon+0x018(SB)/8, $0x7137449171374491 107 DATA ·kcon+0x020(SB)/8, $0xb5c0fbcfb5c0fbcf 108 DATA ·kcon+0x028(SB)/8, $0xb5c0fbcfb5c0fbcf 109 DATA ·kcon+0x030(SB)/8, $0xe9b5dba5e9b5dba5 110 DATA ·kcon+0x038(SB)/8, $0xe9b5dba5e9b5dba5 111 DATA ·kcon+0x040(SB)/8, $0x3956c25b3956c25b 112 DATA ·kcon+0x048(SB)/8, $0x3956c25b3956c25b 113 DATA ·kcon+0x050(SB)/8, $0x59f111f159f111f1 114 DATA ·kcon+0x058(SB)/8, $0x59f111f159f111f1 115 DATA ·kcon+0x060(SB)/8, $0x923f82a4923f82a4 116 DATA ·kcon+0x068(SB)/8, $0x923f82a4923f82a4 117 DATA ·kcon+0x070(SB)/8, $0xab1c5ed5ab1c5ed5 118 DATA ·kcon+0x078(SB)/8, $0xab1c5ed5ab1c5ed5 119 DATA ·kcon+0x080(SB)/8, $0xd807aa98d807aa98 120 DATA ·kcon+0x088(SB)/8, $0xd807aa98d807aa98 121 DATA ·kcon+0x090(SB)/8, $0x12835b0112835b01 122 DATA ·kcon+0x098(SB)/8, $0x12835b0112835b01 123 DATA ·kcon+0x0A0(SB)/8, $0x243185be243185be 124 DATA ·kcon+0x0A8(SB)/8, $0x243185be243185be 125 DATA ·kcon+0x0B0(SB)/8, $0x550c7dc3550c7dc3 126 DATA ·kcon+0x0B8(SB)/8, $0x550c7dc3550c7dc3 127 DATA ·kcon+0x0C0(SB)/8, $0x72be5d7472be5d74 128 DATA ·kcon+0x0C8(SB)/8, $0x72be5d7472be5d74 129 DATA ·kcon+0x0D0(SB)/8, $0x80deb1fe80deb1fe 130 DATA ·kcon+0x0D8(SB)/8, $0x80deb1fe80deb1fe 131 DATA ·kcon+0x0E0(SB)/8, $0x9bdc06a79bdc06a7 132 DATA ·kcon+0x0E8(SB)/8, $0x9bdc06a79bdc06a7 133 DATA ·kcon+0x0F0(SB)/8, $0xc19bf174c19bf174 134 DATA ·kcon+0x0F8(SB)/8, $0xc19bf174c19bf174 135 DATA ·kcon+0x100(SB)/8, $0xe49b69c1e49b69c1 136 DATA ·kcon+0x108(SB)/8, $0xe49b69c1e49b69c1 137 DATA ·kcon+0x110(SB)/8, $0xefbe4786efbe4786 138 DATA ·kcon+0x118(SB)/8, $0xefbe4786efbe4786 139 DATA ·kcon+0x120(SB)/8, $0x0fc19dc60fc19dc6 140 DATA ·kcon+0x128(SB)/8, $0x0fc19dc60fc19dc6 141 DATA ·kcon+0x130(SB)/8, $0x240ca1cc240ca1cc 142 DATA ·kcon+0x138(SB)/8, $0x240ca1cc240ca1cc 143 DATA ·kcon+0x140(SB)/8, $0x2de92c6f2de92c6f 144 DATA ·kcon+0x148(SB)/8, $0x2de92c6f2de92c6f 145 DATA ·kcon+0x150(SB)/8, $0x4a7484aa4a7484aa 146 DATA ·kcon+0x158(SB)/8, $0x4a7484aa4a7484aa 147 DATA ·kcon+0x160(SB)/8, $0x5cb0a9dc5cb0a9dc 148 DATA ·kcon+0x168(SB)/8, $0x5cb0a9dc5cb0a9dc 149 DATA ·kcon+0x170(SB)/8, $0x76f988da76f988da 150 DATA ·kcon+0x178(SB)/8, $0x76f988da76f988da 151 DATA ·kcon+0x180(SB)/8, $0x983e5152983e5152 152 DATA ·kcon+0x188(SB)/8, $0x983e5152983e5152 153 DATA ·kcon+0x190(SB)/8, $0xa831c66da831c66d 154 DATA ·kcon+0x198(SB)/8, $0xa831c66da831c66d 155 DATA ·kcon+0x1A0(SB)/8, $0xb00327c8b00327c8 156 DATA ·kcon+0x1A8(SB)/8, $0xb00327c8b00327c8 157 DATA ·kcon+0x1B0(SB)/8, $0xbf597fc7bf597fc7 158 DATA ·kcon+0x1B8(SB)/8, $0xbf597fc7bf597fc7 159 DATA ·kcon+0x1C0(SB)/8, $0xc6e00bf3c6e00bf3 160 DATA ·kcon+0x1C8(SB)/8, $0xc6e00bf3c6e00bf3 161 DATA ·kcon+0x1D0(SB)/8, $0xd5a79147d5a79147 162 DATA ·kcon+0x1D8(SB)/8, $0xd5a79147d5a79147 163 DATA ·kcon+0x1E0(SB)/8, $0x06ca635106ca6351 164 DATA ·kcon+0x1E8(SB)/8, $0x06ca635106ca6351 165 DATA ·kcon+0x1F0(SB)/8, $0x1429296714292967 166 DATA ·kcon+0x1F8(SB)/8, $0x1429296714292967 167 DATA ·kcon+0x200(SB)/8, $0x27b70a8527b70a85 168 DATA ·kcon+0x208(SB)/8, $0x27b70a8527b70a85 169 DATA ·kcon+0x210(SB)/8, $0x2e1b21382e1b2138 170 DATA ·kcon+0x218(SB)/8, $0x2e1b21382e1b2138 171 DATA ·kcon+0x220(SB)/8, $0x4d2c6dfc4d2c6dfc 172 DATA ·kcon+0x228(SB)/8, $0x4d2c6dfc4d2c6dfc 173 DATA ·kcon+0x230(SB)/8, $0x53380d1353380d13 174 DATA ·kcon+0x238(SB)/8, $0x53380d1353380d13 175 DATA ·kcon+0x240(SB)/8, $0x650a7354650a7354 176 DATA ·kcon+0x248(SB)/8, $0x650a7354650a7354 177 DATA ·kcon+0x250(SB)/8, $0x766a0abb766a0abb 178 DATA ·kcon+0x258(SB)/8, $0x766a0abb766a0abb 179 DATA ·kcon+0x260(SB)/8, $0x81c2c92e81c2c92e 180 DATA ·kcon+0x268(SB)/8, $0x81c2c92e81c2c92e 181 DATA ·kcon+0x270(SB)/8, $0x92722c8592722c85 182 DATA ·kcon+0x278(SB)/8, $0x92722c8592722c85 183 DATA ·kcon+0x280(SB)/8, $0xa2bfe8a1a2bfe8a1 184 DATA ·kcon+0x288(SB)/8, $0xa2bfe8a1a2bfe8a1 185 DATA ·kcon+0x290(SB)/8, $0xa81a664ba81a664b 186 DATA ·kcon+0x298(SB)/8, $0xa81a664ba81a664b 187 DATA ·kcon+0x2A0(SB)/8, $0xc24b8b70c24b8b70 188 DATA ·kcon+0x2A8(SB)/8, $0xc24b8b70c24b8b70 189 DATA ·kcon+0x2B0(SB)/8, $0xc76c51a3c76c51a3 190 DATA ·kcon+0x2B8(SB)/8, $0xc76c51a3c76c51a3 191 DATA ·kcon+0x2C0(SB)/8, $0xd192e819d192e819 192 DATA ·kcon+0x2C8(SB)/8, $0xd192e819d192e819 193 DATA ·kcon+0x2D0(SB)/8, $0xd6990624d6990624 194 DATA ·kcon+0x2D8(SB)/8, $0xd6990624d6990624 195 DATA ·kcon+0x2E0(SB)/8, $0xf40e3585f40e3585 196 DATA ·kcon+0x2E8(SB)/8, $0xf40e3585f40e3585 197 DATA ·kcon+0x2F0(SB)/8, $0x106aa070106aa070 198 DATA ·kcon+0x2F8(SB)/8, $0x106aa070106aa070 199 DATA ·kcon+0x300(SB)/8, $0x19a4c11619a4c116 200 DATA ·kcon+0x308(SB)/8, $0x19a4c11619a4c116 201 DATA ·kcon+0x310(SB)/8, $0x1e376c081e376c08 202 DATA ·kcon+0x318(SB)/8, $0x1e376c081e376c08 203 DATA ·kcon+0x320(SB)/8, $0x2748774c2748774c 204 DATA ·kcon+0x328(SB)/8, $0x2748774c2748774c 205 DATA ·kcon+0x330(SB)/8, $0x34b0bcb534b0bcb5 206 DATA ·kcon+0x338(SB)/8, $0x34b0bcb534b0bcb5 207 DATA ·kcon+0x340(SB)/8, $0x391c0cb3391c0cb3 208 DATA ·kcon+0x348(SB)/8, $0x391c0cb3391c0cb3 209 DATA ·kcon+0x350(SB)/8, $0x4ed8aa4a4ed8aa4a 210 DATA ·kcon+0x358(SB)/8, $0x4ed8aa4a4ed8aa4a 211 DATA ·kcon+0x360(SB)/8, $0x5b9cca4f5b9cca4f 212 DATA ·kcon+0x368(SB)/8, $0x5b9cca4f5b9cca4f 213 DATA ·kcon+0x370(SB)/8, $0x682e6ff3682e6ff3 214 DATA ·kcon+0x378(SB)/8, $0x682e6ff3682e6ff3 215 DATA ·kcon+0x380(SB)/8, $0x748f82ee748f82ee 216 DATA ·kcon+0x388(SB)/8, $0x748f82ee748f82ee 217 DATA ·kcon+0x390(SB)/8, $0x78a5636f78a5636f 218 DATA ·kcon+0x398(SB)/8, $0x78a5636f78a5636f 219 DATA ·kcon+0x3A0(SB)/8, $0x84c8781484c87814 220 DATA ·kcon+0x3A8(SB)/8, $0x84c8781484c87814 221 DATA ·kcon+0x3B0(SB)/8, $0x8cc702088cc70208 222 DATA ·kcon+0x3B8(SB)/8, $0x8cc702088cc70208 223 DATA ·kcon+0x3C0(SB)/8, $0x90befffa90befffa 224 DATA ·kcon+0x3C8(SB)/8, $0x90befffa90befffa 225 DATA ·kcon+0x3D0(SB)/8, $0xa4506ceba4506ceb 226 DATA ·kcon+0x3D8(SB)/8, $0xa4506ceba4506ceb 227 DATA ·kcon+0x3E0(SB)/8, $0xbef9a3f7bef9a3f7 228 DATA ·kcon+0x3E8(SB)/8, $0xbef9a3f7bef9a3f7 229 DATA ·kcon+0x3F0(SB)/8, $0xc67178f2c67178f2 230 DATA ·kcon+0x3F8(SB)/8, $0xc67178f2c67178f2 231 DATA ·kcon+0x400(SB)/8, $0x0000000000000000 232 DATA ·kcon+0x408(SB)/8, $0x0000000000000000 233 234 #ifdef GOARCH_ppc64le 235 DATA ·kcon+0x410(SB)/8, $0x1011121310111213 // permutation control vectors 236 DATA ·kcon+0x418(SB)/8, $0x1011121300010203 237 DATA ·kcon+0x420(SB)/8, $0x1011121310111213 238 DATA ·kcon+0x428(SB)/8, $0x0405060700010203 239 DATA ·kcon+0x430(SB)/8, $0x1011121308090a0b 240 DATA ·kcon+0x438(SB)/8, $0x0405060700010203 241 #else 242 DATA ·kcon+0x410(SB)/8, $0x1011121300010203 243 DATA ·kcon+0x418(SB)/8, $0x1011121310111213 // permutation control vectors 244 DATA ·kcon+0x420(SB)/8, $0x0405060700010203 245 DATA ·kcon+0x428(SB)/8, $0x1011121310111213 246 DATA ·kcon+0x430(SB)/8, $0x0001020304050607 247 DATA ·kcon+0x438(SB)/8, $0x08090a0b10111213 248 #endif 249 250 GLOBL ·kcon(SB), RODATA, $1088 251 252 #define SHA256ROUND0(a, b, c, d, e, f, g, h, xi, idx) \ 253 VSEL g, f, e, FUNC; \ 254 VSHASIGMAW $15, e, $1, S1; \ 255 VADDUWM xi, h, h; \ 256 VSHASIGMAW $0, a, $1, S0; \ 257 VADDUWM FUNC, h, h; \ 258 VXOR b, a, FUNC; \ 259 VADDUWM S1, h, h; \ 260 VSEL b, c, FUNC, FUNC; \ 261 VADDUWM KI, g, g; \ 262 VADDUWM h, d, d; \ 263 VADDUWM FUNC, S0, S0; \ 264 LVX (TBL)(idx), KI; \ 265 VADDUWM S0, h, h 266 267 #define SHA256ROUND1(a, b, c, d, e, f, g, h, xi, xj, xj_1, xj_9, xj_14, idx) \ 268 VSHASIGMAW $0, xj_1, $0, s0; \ 269 VSEL g, f, e, FUNC; \ 270 VSHASIGMAW $15, e, $1, S1; \ 271 VADDUWM xi, h, h; \ 272 VSHASIGMAW $0, a, $1, S0; \ 273 VSHASIGMAW $15, xj_14, $0, s1; \ 274 VADDUWM FUNC, h, h; \ 275 VXOR b, a, FUNC; \ 276 VADDUWM xj_9, xj, xj; \ 277 VADDUWM S1, h, h; \ 278 VSEL b, c, FUNC, FUNC; \ 279 VADDUWM KI, g, g; \ 280 VADDUWM h, d, d; \ 281 VADDUWM FUNC, S0, S0; \ 282 VADDUWM s0, xj, xj; \ 283 LVX (TBL)(idx), KI; \ 284 VADDUWM S0, h, h; \ 285 VADDUWM s1, xj, xj 286 287 #ifdef GOARCH_ppc64le 288 #define VPERMLE(va,vb,vc,vt) VPERM va, vb, vc, vt 289 #else 290 #define VPERMLE(va,vb,vc,vt) 291 #endif 292 293 // func block(dig *digest, p []byte) 294 TEXT ·block(SB),0,$0-32 295 MOVD dig+0(FP), CTX 296 MOVD p_base+8(FP), INP 297 MOVD p_len+16(FP), LEN 298 299 SRD $6, LEN 300 SLD $6, LEN 301 ADD INP, LEN, END 302 303 CMP INP, END 304 BEQ end 305 306 MOVD $·kcon(SB), TBL_STRT 307 MOVD $0x10, R_x010 308 309 #ifdef GOARCH_ppc64le 310 MOVWZ $8, TEMP 311 LVSL (TEMP)(R0), LEMASK 312 VSPLTISB $0x0F, KI 313 VXOR KI, LEMASK, LEMASK 314 #endif 315 316 LXVW4X (CTX)(R_x000), V0 317 LXVW4X (CTX)(R_x010), V4 318 319 // unpack the input values into vector registers 320 VSLDOI $4, V0, V0, V1 321 VSLDOI $8, V0, V0, V2 322 VSLDOI $12, V0, V0, V3 323 VSLDOI $4, V4, V4, V5 324 VSLDOI $8, V4, V4, V6 325 VSLDOI $12, V4, V4, V7 326 327 MOVD $0x020, R_x020 328 MOVD $0x030, R_x030 329 MOVD $0x040, R_x040 330 MOVD $0x050, R_x050 331 MOVD $0x060, R_x060 332 MOVD $0x070, R_x070 333 MOVD $0x080, R_x080 334 MOVD $0x090, R_x090 335 MOVD $0x0a0, R_x0a0 336 MOVD $0x0b0, R_x0b0 337 MOVD $0x0c0, R_x0c0 338 MOVD $0x0d0, R_x0d0 339 MOVD $0x0e0, R_x0e0 340 MOVD $0x0f0, R_x0f0 341 MOVD $0x100, R_x100 342 MOVD $0x110, R_x110 343 344 loop: 345 MOVD TBL_STRT, TBL 346 LVX (TBL)(R_x000), KI 347 348 LXVD2X (INP)(R_x000), V8 // load v8 in advance 349 350 // Offload to VSR24-31 (aka FPR24-31) 351 XXLOR V0, V0, VS24 352 XXLOR V1, V1, VS25 353 XXLOR V2, V2, VS26 354 XXLOR V3, V3, VS27 355 XXLOR V4, V4, VS28 356 XXLOR V5, V5, VS29 357 XXLOR V6, V6, VS30 358 XXLOR V7, V7, VS31 359 360 VADDUWM KI, V7, V7 // h+K[i] 361 LVX (TBL)(R_x010), KI 362 363 VPERMLE(V8, V8, LEMASK, V8) 364 SHA256ROUND0(V0, V1, V2, V3, V4, V5, V6, V7, V8, R_x020) 365 VSLDOI $4, V8, V8, V9 366 SHA256ROUND0(V7, V0, V1, V2, V3, V4, V5, V6, V9, R_x030) 367 VSLDOI $4, V9, V9, V10 368 SHA256ROUND0(V6, V7, V0, V1, V2, V3, V4, V5, V10, R_x040) 369 LXVD2X (INP)(R_x010), V12 // load v12 in advance 370 VSLDOI $4, V10, V10, V11 371 SHA256ROUND0(V5, V6, V7, V0, V1, V2, V3, V4, V11, R_x050) 372 VPERMLE(V12, V12, LEMASK, V12) 373 SHA256ROUND0(V4, V5, V6, V7, V0, V1, V2, V3, V12, R_x060) 374 VSLDOI $4, V12, V12, V13 375 SHA256ROUND0(V3, V4, V5, V6, V7, V0, V1, V2, V13, R_x070) 376 VSLDOI $4, V13, V13, V14 377 SHA256ROUND0(V2, V3, V4, V5, V6, V7, V0, V1, V14, R_x080) 378 LXVD2X (INP)(R_x020), V16 // load v16 in advance 379 VSLDOI $4, V14, V14, V15 380 SHA256ROUND0(V1, V2, V3, V4, V5, V6, V7, V0, V15, R_x090) 381 VPERMLE(V16, V16, LEMASK, V16) 382 SHA256ROUND0(V0, V1, V2, V3, V4, V5, V6, V7, V16, R_x0a0) 383 VSLDOI $4, V16, V16, V17 384 SHA256ROUND0(V7, V0, V1, V2, V3, V4, V5, V6, V17, R_x0b0) 385 VSLDOI $4, V17, V17, V18 386 SHA256ROUND0(V6, V7, V0, V1, V2, V3, V4, V5, V18, R_x0c0) 387 VSLDOI $4, V18, V18, V19 388 LXVD2X (INP)(R_x030), V20 // load v20 in advance 389 SHA256ROUND0(V5, V6, V7, V0, V1, V2, V3, V4, V19, R_x0d0) 390 VPERMLE(V20, V20, LEMASK, V20) 391 SHA256ROUND0(V4, V5, V6, V7, V0, V1, V2, V3, V20, R_x0e0) 392 VSLDOI $4, V20, V20, V21 393 SHA256ROUND0(V3, V4, V5, V6, V7, V0, V1, V2, V21, R_x0f0) 394 VSLDOI $4, V21, V21, V22 395 SHA256ROUND0(V2, V3, V4, V5, V6, V7, V0, V1, V22, R_x100) 396 VSLDOI $4, V22, V22, V23 397 SHA256ROUND1(V1, V2, V3, V4, V5, V6, V7, V0, V23, V8, V9, V17, V22, R_x110) 398 399 MOVD $3, TEMP 400 MOVD TEMP, CTR 401 ADD $0x120, TBL 402 ADD $0x40, INP 403 404 L16_xx: 405 SHA256ROUND1(V0, V1, V2, V3, V4, V5, V6, V7, V8, V9, V10, V18, V23, R_x000) 406 SHA256ROUND1(V7, V0, V1, V2, V3, V4, V5, V6, V9, V10, V11, V19, V8, R_x010) 407 SHA256ROUND1(V6, V7, V0, V1, V2, V3, V4, V5, V10, V11, V12, V20, V9, R_x020) 408 SHA256ROUND1(V5, V6, V7, V0, V1, V2, V3, V4, V11, V12, V13, V21, V10, R_x030) 409 SHA256ROUND1(V4, V5, V6, V7, V0, V1, V2, V3, V12, V13, V14, V22, V11, R_x040) 410 SHA256ROUND1(V3, V4, V5, V6, V7, V0, V1, V2, V13, V14, V15, V23, V12, R_x050) 411 SHA256ROUND1(V2, V3, V4, V5, V6, V7, V0, V1, V14, V15, V16, V8, V13, R_x060) 412 SHA256ROUND1(V1, V2, V3, V4, V5, V6, V7, V0, V15, V16, V17, V9, V14, R_x070) 413 SHA256ROUND1(V0, V1, V2, V3, V4, V5, V6, V7, V16, V17, V18, V10, V15, R_x080) 414 SHA256ROUND1(V7, V0, V1, V2, V3, V4, V5, V6, V17, V18, V19, V11, V16, R_x090) 415 SHA256ROUND1(V6, V7, V0, V1, V2, V3, V4, V5, V18, V19, V20, V12, V17, R_x0a0) 416 SHA256ROUND1(V5, V6, V7, V0, V1, V2, V3, V4, V19, V20, V21, V13, V18, R_x0b0) 417 SHA256ROUND1(V4, V5, V6, V7, V0, V1, V2, V3, V20, V21, V22, V14, V19, R_x0c0) 418 SHA256ROUND1(V3, V4, V5, V6, V7, V0, V1, V2, V21, V22, V23, V15, V20, R_x0d0) 419 SHA256ROUND1(V2, V3, V4, V5, V6, V7, V0, V1, V22, V23, V8, V16, V21, R_x0e0) 420 SHA256ROUND1(V1, V2, V3, V4, V5, V6, V7, V0, V23, V8, V9, V17, V22, R_x0f0) 421 ADD $0x100, TBL 422 423 BDNZ L16_xx 424 425 XXLOR VS24, VS24, V10 426 427 XXLOR VS25, VS25, V11 428 VADDUWM V10, V0, V0 429 XXLOR VS26, VS26, V12 430 VADDUWM V11, V1, V1 431 XXLOR VS27, VS27, V13 432 VADDUWM V12, V2, V2 433 XXLOR VS28, VS28, V14 434 VADDUWM V13, V3, V3 435 XXLOR VS29, VS29, V15 436 VADDUWM V14, V4, V4 437 XXLOR VS30, VS30, V16 438 VADDUWM V15, V5, V5 439 XXLOR VS31, VS31, V17 440 VADDUWM V16, V6, V6 441 VADDUWM V17, V7, V7 442 443 CMPU INP, END 444 BLT loop 445 446 LVX (TBL)(R_x000), V8 447 VPERM V0, V1, KI, V0 448 LVX (TBL)(R_x010), V9 449 VPERM V4, V5, KI, V4 450 VPERM V0, V2, V8, V0 451 VPERM V4, V6, V8, V4 452 VPERM V0, V3, V9, V0 453 VPERM V4, V7, V9, V4 454 STXVD2X V0, (CTX+R_x000) 455 STXVD2X V4, (CTX+R_x010) 456 457 end: 458 RET 459