github.com/mtsmfm/go/src@v0.0.0-20221020090648-44bdcb9f8fde/crypto/sha512/sha512block_ppc64x.s (about) 1 // Copyright 2016 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Based on CRYPTOGAMS code with the following comment: 6 // # ==================================================================== 7 // # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 8 // # project. The module is, however, dual licensed under OpenSSL and 9 // # CRYPTOGAMS licenses depending on where you obtain it. For further 10 // # details see http://www.openssl.org/~appro/cryptogams/. 11 // # ==================================================================== 12 13 //go:build ppc64 || ppc64le 14 15 #include "textflag.h" 16 17 // SHA512 block routine. See sha512block.go for Go equivalent. 18 // 19 // The algorithm is detailed in FIPS 180-4: 20 // 21 // https://csrc.nist.gov/publications/fips/fips180-4/fips-180-4.pdf 22 // 23 // Wt = Mt; for 0 <= t <= 15 24 // Wt = SIGMA1(Wt-2) + SIGMA0(Wt-15) + Wt-16; for 16 <= t <= 79 25 // 26 // a = H0 27 // b = H1 28 // c = H2 29 // d = H3 30 // e = H4 31 // f = H5 32 // g = H6 33 // h = H7 34 // 35 // for t = 0 to 79 { 36 // T1 = h + BIGSIGMA1(e) + Ch(e,f,g) + Kt + Wt 37 // T2 = BIGSIGMA0(a) + Maj(a,b,c) 38 // h = g 39 // g = f 40 // f = e 41 // e = d + T1 42 // d = c 43 // c = b 44 // b = a 45 // a = T1 + T2 46 // } 47 // 48 // H0 = a + H0 49 // H1 = b + H1 50 // H2 = c + H2 51 // H3 = d + H3 52 // H4 = e + H4 53 // H5 = f + H5 54 // H6 = g + H6 55 // H7 = h + H7 56 57 #define CTX R3 58 #define INP R4 59 #define END R5 60 #define TBL R6 61 #define IDX R7 62 #define CNT R8 63 #define LEN R9 64 #define OFFLOAD R11 65 #define TEMP R12 66 67 #define HEX00 R0 68 #define HEX10 R10 69 #define HEX20 R25 70 #define HEX30 R26 71 72 // V0-V7 are A-H 73 // V8-V23 are used for the message schedule 74 #define KI V24 75 #define FUNC V25 76 #define S0 V26 77 #define S1 V27 78 #define s0 V28 79 #define s1 V29 80 #define LEMASK V31 // Permutation control register for little endian 81 82 // VPERM is needed on LE to switch the bytes 83 84 #ifdef GOARCH_ppc64le 85 #define VPERMLE(va,vb,vc,vt) VPERM va, vb, vc, vt 86 #else 87 #define VPERMLE(va,vb,vc,vt) 88 #endif 89 90 // 2 copies of each Kt, to fill both doublewords of a vector register 91 DATA ·kcon+0x000(SB)/8, $0x428a2f98d728ae22 92 DATA ·kcon+0x008(SB)/8, $0x428a2f98d728ae22 93 DATA ·kcon+0x010(SB)/8, $0x7137449123ef65cd 94 DATA ·kcon+0x018(SB)/8, $0x7137449123ef65cd 95 DATA ·kcon+0x020(SB)/8, $0xb5c0fbcfec4d3b2f 96 DATA ·kcon+0x028(SB)/8, $0xb5c0fbcfec4d3b2f 97 DATA ·kcon+0x030(SB)/8, $0xe9b5dba58189dbbc 98 DATA ·kcon+0x038(SB)/8, $0xe9b5dba58189dbbc 99 DATA ·kcon+0x040(SB)/8, $0x3956c25bf348b538 100 DATA ·kcon+0x048(SB)/8, $0x3956c25bf348b538 101 DATA ·kcon+0x050(SB)/8, $0x59f111f1b605d019 102 DATA ·kcon+0x058(SB)/8, $0x59f111f1b605d019 103 DATA ·kcon+0x060(SB)/8, $0x923f82a4af194f9b 104 DATA ·kcon+0x068(SB)/8, $0x923f82a4af194f9b 105 DATA ·kcon+0x070(SB)/8, $0xab1c5ed5da6d8118 106 DATA ·kcon+0x078(SB)/8, $0xab1c5ed5da6d8118 107 DATA ·kcon+0x080(SB)/8, $0xd807aa98a3030242 108 DATA ·kcon+0x088(SB)/8, $0xd807aa98a3030242 109 DATA ·kcon+0x090(SB)/8, $0x12835b0145706fbe 110 DATA ·kcon+0x098(SB)/8, $0x12835b0145706fbe 111 DATA ·kcon+0x0A0(SB)/8, $0x243185be4ee4b28c 112 DATA ·kcon+0x0A8(SB)/8, $0x243185be4ee4b28c 113 DATA ·kcon+0x0B0(SB)/8, $0x550c7dc3d5ffb4e2 114 DATA ·kcon+0x0B8(SB)/8, $0x550c7dc3d5ffb4e2 115 DATA ·kcon+0x0C0(SB)/8, $0x72be5d74f27b896f 116 DATA ·kcon+0x0C8(SB)/8, $0x72be5d74f27b896f 117 DATA ·kcon+0x0D0(SB)/8, $0x80deb1fe3b1696b1 118 DATA ·kcon+0x0D8(SB)/8, $0x80deb1fe3b1696b1 119 DATA ·kcon+0x0E0(SB)/8, $0x9bdc06a725c71235 120 DATA ·kcon+0x0E8(SB)/8, $0x9bdc06a725c71235 121 DATA ·kcon+0x0F0(SB)/8, $0xc19bf174cf692694 122 DATA ·kcon+0x0F8(SB)/8, $0xc19bf174cf692694 123 DATA ·kcon+0x100(SB)/8, $0xe49b69c19ef14ad2 124 DATA ·kcon+0x108(SB)/8, $0xe49b69c19ef14ad2 125 DATA ·kcon+0x110(SB)/8, $0xefbe4786384f25e3 126 DATA ·kcon+0x118(SB)/8, $0xefbe4786384f25e3 127 DATA ·kcon+0x120(SB)/8, $0x0fc19dc68b8cd5b5 128 DATA ·kcon+0x128(SB)/8, $0x0fc19dc68b8cd5b5 129 DATA ·kcon+0x130(SB)/8, $0x240ca1cc77ac9c65 130 DATA ·kcon+0x138(SB)/8, $0x240ca1cc77ac9c65 131 DATA ·kcon+0x140(SB)/8, $0x2de92c6f592b0275 132 DATA ·kcon+0x148(SB)/8, $0x2de92c6f592b0275 133 DATA ·kcon+0x150(SB)/8, $0x4a7484aa6ea6e483 134 DATA ·kcon+0x158(SB)/8, $0x4a7484aa6ea6e483 135 DATA ·kcon+0x160(SB)/8, $0x5cb0a9dcbd41fbd4 136 DATA ·kcon+0x168(SB)/8, $0x5cb0a9dcbd41fbd4 137 DATA ·kcon+0x170(SB)/8, $0x76f988da831153b5 138 DATA ·kcon+0x178(SB)/8, $0x76f988da831153b5 139 DATA ·kcon+0x180(SB)/8, $0x983e5152ee66dfab 140 DATA ·kcon+0x188(SB)/8, $0x983e5152ee66dfab 141 DATA ·kcon+0x190(SB)/8, $0xa831c66d2db43210 142 DATA ·kcon+0x198(SB)/8, $0xa831c66d2db43210 143 DATA ·kcon+0x1A0(SB)/8, $0xb00327c898fb213f 144 DATA ·kcon+0x1A8(SB)/8, $0xb00327c898fb213f 145 DATA ·kcon+0x1B0(SB)/8, $0xbf597fc7beef0ee4 146 DATA ·kcon+0x1B8(SB)/8, $0xbf597fc7beef0ee4 147 DATA ·kcon+0x1C0(SB)/8, $0xc6e00bf33da88fc2 148 DATA ·kcon+0x1C8(SB)/8, $0xc6e00bf33da88fc2 149 DATA ·kcon+0x1D0(SB)/8, $0xd5a79147930aa725 150 DATA ·kcon+0x1D8(SB)/8, $0xd5a79147930aa725 151 DATA ·kcon+0x1E0(SB)/8, $0x06ca6351e003826f 152 DATA ·kcon+0x1E8(SB)/8, $0x06ca6351e003826f 153 DATA ·kcon+0x1F0(SB)/8, $0x142929670a0e6e70 154 DATA ·kcon+0x1F8(SB)/8, $0x142929670a0e6e70 155 DATA ·kcon+0x200(SB)/8, $0x27b70a8546d22ffc 156 DATA ·kcon+0x208(SB)/8, $0x27b70a8546d22ffc 157 DATA ·kcon+0x210(SB)/8, $0x2e1b21385c26c926 158 DATA ·kcon+0x218(SB)/8, $0x2e1b21385c26c926 159 DATA ·kcon+0x220(SB)/8, $0x4d2c6dfc5ac42aed 160 DATA ·kcon+0x228(SB)/8, $0x4d2c6dfc5ac42aed 161 DATA ·kcon+0x230(SB)/8, $0x53380d139d95b3df 162 DATA ·kcon+0x238(SB)/8, $0x53380d139d95b3df 163 DATA ·kcon+0x240(SB)/8, $0x650a73548baf63de 164 DATA ·kcon+0x248(SB)/8, $0x650a73548baf63de 165 DATA ·kcon+0x250(SB)/8, $0x766a0abb3c77b2a8 166 DATA ·kcon+0x258(SB)/8, $0x766a0abb3c77b2a8 167 DATA ·kcon+0x260(SB)/8, $0x81c2c92e47edaee6 168 DATA ·kcon+0x268(SB)/8, $0x81c2c92e47edaee6 169 DATA ·kcon+0x270(SB)/8, $0x92722c851482353b 170 DATA ·kcon+0x278(SB)/8, $0x92722c851482353b 171 DATA ·kcon+0x280(SB)/8, $0xa2bfe8a14cf10364 172 DATA ·kcon+0x288(SB)/8, $0xa2bfe8a14cf10364 173 DATA ·kcon+0x290(SB)/8, $0xa81a664bbc423001 174 DATA ·kcon+0x298(SB)/8, $0xa81a664bbc423001 175 DATA ·kcon+0x2A0(SB)/8, $0xc24b8b70d0f89791 176 DATA ·kcon+0x2A8(SB)/8, $0xc24b8b70d0f89791 177 DATA ·kcon+0x2B0(SB)/8, $0xc76c51a30654be30 178 DATA ·kcon+0x2B8(SB)/8, $0xc76c51a30654be30 179 DATA ·kcon+0x2C0(SB)/8, $0xd192e819d6ef5218 180 DATA ·kcon+0x2C8(SB)/8, $0xd192e819d6ef5218 181 DATA ·kcon+0x2D0(SB)/8, $0xd69906245565a910 182 DATA ·kcon+0x2D8(SB)/8, $0xd69906245565a910 183 DATA ·kcon+0x2E0(SB)/8, $0xf40e35855771202a 184 DATA ·kcon+0x2E8(SB)/8, $0xf40e35855771202a 185 DATA ·kcon+0x2F0(SB)/8, $0x106aa07032bbd1b8 186 DATA ·kcon+0x2F8(SB)/8, $0x106aa07032bbd1b8 187 DATA ·kcon+0x300(SB)/8, $0x19a4c116b8d2d0c8 188 DATA ·kcon+0x308(SB)/8, $0x19a4c116b8d2d0c8 189 DATA ·kcon+0x310(SB)/8, $0x1e376c085141ab53 190 DATA ·kcon+0x318(SB)/8, $0x1e376c085141ab53 191 DATA ·kcon+0x320(SB)/8, $0x2748774cdf8eeb99 192 DATA ·kcon+0x328(SB)/8, $0x2748774cdf8eeb99 193 DATA ·kcon+0x330(SB)/8, $0x34b0bcb5e19b48a8 194 DATA ·kcon+0x338(SB)/8, $0x34b0bcb5e19b48a8 195 DATA ·kcon+0x340(SB)/8, $0x391c0cb3c5c95a63 196 DATA ·kcon+0x348(SB)/8, $0x391c0cb3c5c95a63 197 DATA ·kcon+0x350(SB)/8, $0x4ed8aa4ae3418acb 198 DATA ·kcon+0x358(SB)/8, $0x4ed8aa4ae3418acb 199 DATA ·kcon+0x360(SB)/8, $0x5b9cca4f7763e373 200 DATA ·kcon+0x368(SB)/8, $0x5b9cca4f7763e373 201 DATA ·kcon+0x370(SB)/8, $0x682e6ff3d6b2b8a3 202 DATA ·kcon+0x378(SB)/8, $0x682e6ff3d6b2b8a3 203 DATA ·kcon+0x380(SB)/8, $0x748f82ee5defb2fc 204 DATA ·kcon+0x388(SB)/8, $0x748f82ee5defb2fc 205 DATA ·kcon+0x390(SB)/8, $0x78a5636f43172f60 206 DATA ·kcon+0x398(SB)/8, $0x78a5636f43172f60 207 DATA ·kcon+0x3A0(SB)/8, $0x84c87814a1f0ab72 208 DATA ·kcon+0x3A8(SB)/8, $0x84c87814a1f0ab72 209 DATA ·kcon+0x3B0(SB)/8, $0x8cc702081a6439ec 210 DATA ·kcon+0x3B8(SB)/8, $0x8cc702081a6439ec 211 DATA ·kcon+0x3C0(SB)/8, $0x90befffa23631e28 212 DATA ·kcon+0x3C8(SB)/8, $0x90befffa23631e28 213 DATA ·kcon+0x3D0(SB)/8, $0xa4506cebde82bde9 214 DATA ·kcon+0x3D8(SB)/8, $0xa4506cebde82bde9 215 DATA ·kcon+0x3E0(SB)/8, $0xbef9a3f7b2c67915 216 DATA ·kcon+0x3E8(SB)/8, $0xbef9a3f7b2c67915 217 DATA ·kcon+0x3F0(SB)/8, $0xc67178f2e372532b 218 DATA ·kcon+0x3F8(SB)/8, $0xc67178f2e372532b 219 DATA ·kcon+0x400(SB)/8, $0xca273eceea26619c 220 DATA ·kcon+0x408(SB)/8, $0xca273eceea26619c 221 DATA ·kcon+0x410(SB)/8, $0xd186b8c721c0c207 222 DATA ·kcon+0x418(SB)/8, $0xd186b8c721c0c207 223 DATA ·kcon+0x420(SB)/8, $0xeada7dd6cde0eb1e 224 DATA ·kcon+0x428(SB)/8, $0xeada7dd6cde0eb1e 225 DATA ·kcon+0x430(SB)/8, $0xf57d4f7fee6ed178 226 DATA ·kcon+0x438(SB)/8, $0xf57d4f7fee6ed178 227 DATA ·kcon+0x440(SB)/8, $0x06f067aa72176fba 228 DATA ·kcon+0x448(SB)/8, $0x06f067aa72176fba 229 DATA ·kcon+0x450(SB)/8, $0x0a637dc5a2c898a6 230 DATA ·kcon+0x458(SB)/8, $0x0a637dc5a2c898a6 231 DATA ·kcon+0x460(SB)/8, $0x113f9804bef90dae 232 DATA ·kcon+0x468(SB)/8, $0x113f9804bef90dae 233 DATA ·kcon+0x470(SB)/8, $0x1b710b35131c471b 234 DATA ·kcon+0x478(SB)/8, $0x1b710b35131c471b 235 DATA ·kcon+0x480(SB)/8, $0x28db77f523047d84 236 DATA ·kcon+0x488(SB)/8, $0x28db77f523047d84 237 DATA ·kcon+0x490(SB)/8, $0x32caab7b40c72493 238 DATA ·kcon+0x498(SB)/8, $0x32caab7b40c72493 239 DATA ·kcon+0x4A0(SB)/8, $0x3c9ebe0a15c9bebc 240 DATA ·kcon+0x4A8(SB)/8, $0x3c9ebe0a15c9bebc 241 DATA ·kcon+0x4B0(SB)/8, $0x431d67c49c100d4c 242 DATA ·kcon+0x4B8(SB)/8, $0x431d67c49c100d4c 243 DATA ·kcon+0x4C0(SB)/8, $0x4cc5d4becb3e42b6 244 DATA ·kcon+0x4C8(SB)/8, $0x4cc5d4becb3e42b6 245 DATA ·kcon+0x4D0(SB)/8, $0x597f299cfc657e2a 246 DATA ·kcon+0x4D8(SB)/8, $0x597f299cfc657e2a 247 DATA ·kcon+0x4E0(SB)/8, $0x5fcb6fab3ad6faec 248 DATA ·kcon+0x4E8(SB)/8, $0x5fcb6fab3ad6faec 249 DATA ·kcon+0x4F0(SB)/8, $0x6c44198c4a475817 250 DATA ·kcon+0x4F8(SB)/8, $0x6c44198c4a475817 251 DATA ·kcon+0x500(SB)/8, $0x0000000000000000 252 DATA ·kcon+0x508(SB)/8, $0x0000000000000000 253 DATA ·kcon+0x510(SB)/8, $0x1011121314151617 254 DATA ·kcon+0x518(SB)/8, $0x0001020304050607 255 GLOBL ·kcon(SB), RODATA, $1312 256 257 #define SHA512ROUND0(a, b, c, d, e, f, g, h, xi) \ 258 VSEL g, f, e, FUNC; \ 259 VSHASIGMAD $15, e, $1, S1; \ 260 VADDUDM xi, h, h; \ 261 VSHASIGMAD $0, a, $1, S0; \ 262 VADDUDM FUNC, h, h; \ 263 VXOR b, a, FUNC; \ 264 VADDUDM S1, h, h; \ 265 VSEL b, c, FUNC, FUNC; \ 266 VADDUDM KI, g, g; \ 267 VADDUDM h, d, d; \ 268 VADDUDM FUNC, S0, S0; \ 269 LVX (TBL)(IDX), KI; \ 270 ADD $16, IDX; \ 271 VADDUDM S0, h, h 272 273 #define SHA512ROUND1(a, b, c, d, e, f, g, h, xi, xj, xj_1, xj_9, xj_14) \ 274 VSHASIGMAD $0, xj_1, $0, s0; \ 275 VSEL g, f, e, FUNC; \ 276 VSHASIGMAD $15, e, $1, S1; \ 277 VADDUDM xi, h, h; \ 278 VSHASIGMAD $0, a, $1, S0; \ 279 VSHASIGMAD $15, xj_14, $0, s1; \ 280 VADDUDM FUNC, h, h; \ 281 VXOR b, a, FUNC; \ 282 VADDUDM xj_9, xj, xj; \ 283 VADDUDM S1, h, h; \ 284 VSEL b, c, FUNC, FUNC; \ 285 VADDUDM KI, g, g; \ 286 VADDUDM h, d, d; \ 287 VADDUDM FUNC, S0, S0; \ 288 VADDUDM s0, xj, xj; \ 289 LVX (TBL)(IDX), KI; \ 290 ADD $16, IDX; \ 291 VADDUDM S0, h, h; \ 292 VADDUDM s1, xj, xj 293 294 // func block(dig *digest, p []byte) 295 TEXT ·block(SB),0,$0-32 296 MOVD dig+0(FP), CTX 297 MOVD p_base+8(FP), INP 298 MOVD p_len+16(FP), LEN 299 300 SRD $6, LEN 301 SLD $6, LEN 302 303 ADD INP, LEN, END 304 305 CMP INP, END 306 BEQ end 307 308 MOVD $·kcon(SB), TBL 309 MOVD R1, OFFLOAD 310 311 MOVD R0, CNT 312 MOVWZ $0x10, HEX10 313 MOVWZ $0x20, HEX20 314 MOVWZ $0x30, HEX30 315 316 // Generate the mask used with VPERM for LE 317 318 #ifdef GOARCH_ppc64le 319 MOVWZ $8, IDX 320 LVSL (IDX)(R0), LEMASK 321 VSPLTISB $0x0F, KI 322 VXOR KI, LEMASK, LEMASK 323 #endif 324 325 LXVD2X (CTX)(HEX00), VS32 // v0 = vs32 326 LXVD2X (CTX)(HEX10), VS34 // v2 = vs34 327 LXVD2X (CTX)(HEX20), VS36 // v4 = vs36 328 // unpack the input values into vector registers 329 VSLDOI $8, V0, V0, V1 330 LXVD2X (CTX)(HEX30), VS38 // v6 = vs38 331 VSLDOI $8, V2, V2, V3 332 VSLDOI $8, V4, V4, V5 333 VSLDOI $8, V6, V6, V7 334 335 loop: 336 LVX (TBL)(HEX00), KI 337 MOVWZ $16, IDX 338 339 LXVD2X (INP)(R0), VS40 // load v8 (=vs40) in advance 340 ADD $16, INP 341 342 // Copy V0-V7 to VS24-VS31 343 344 XXLOR V0, V0, VS24 345 XXLOR V1, V1, VS25 346 XXLOR V2, V2, VS26 347 XXLOR V3, V3, VS27 348 XXLOR V4, V4, VS28 349 XXLOR V5, V5, VS29 350 XXLOR V6, V6, VS30 351 XXLOR V7, V7, VS31 352 353 VADDUDM KI, V7, V7 // h+K[i] 354 LVX (TBL)(IDX), KI 355 ADD $16, IDX 356 357 VPERMLE(V8,V8,LEMASK,V8) 358 SHA512ROUND0(V0, V1, V2, V3, V4, V5, V6, V7, V8) 359 LXVD2X (INP)(R0), VS42 // load v10 (=vs42) in advance 360 ADD $16, INP, INP 361 VSLDOI $8, V8, V8, V9 362 SHA512ROUND0(V7, V0, V1, V2, V3, V4, V5, V6, V9) 363 VPERMLE(V10,V10,LEMASK,V10) 364 SHA512ROUND0(V6, V7, V0, V1, V2, V3, V4, V5, V10) 365 LXVD2X (INP)(R0), VS44 // load v12 (=vs44) in advance 366 ADD $16, INP, INP 367 VSLDOI $8, V10, V10, V11 368 SHA512ROUND0(V5, V6, V7, V0, V1, V2, V3, V4, V11) 369 VPERMLE(V12,V12,LEMASK,V12) 370 SHA512ROUND0(V4, V5, V6, V7, V0, V1, V2, V3, V12) 371 LXVD2X (INP)(R0), VS46 // load v14 (=vs46) in advance 372 ADD $16, INP, INP 373 VSLDOI $8, V12, V12, V13 374 SHA512ROUND0(V3, V4, V5, V6, V7, V0, V1, V2, V13) 375 VPERMLE(V14,V14,LEMASK,V14) 376 SHA512ROUND0(V2, V3, V4, V5, V6, V7, V0, V1, V14) 377 LXVD2X (INP)(R0), VS48 // load v16 (=vs48) in advance 378 ADD $16, INP, INP 379 VSLDOI $8, V14, V14, V15 380 SHA512ROUND0(V1, V2, V3, V4, V5, V6, V7, V0, V15) 381 VPERMLE(V16,V16,LEMASK,V16) 382 SHA512ROUND0(V0, V1, V2, V3, V4, V5, V6, V7, V16) 383 LXVD2X (INP)(R0), VS50 // load v18 (=vs50) in advance 384 ADD $16, INP, INP 385 VSLDOI $8, V16, V16, V17 386 SHA512ROUND0(V7, V0, V1, V2, V3, V4, V5, V6, V17) 387 VPERMLE(V18,V18,LEMASK,V18) 388 SHA512ROUND0(V6, V7, V0, V1, V2, V3, V4, V5, V18) 389 LXVD2X (INP)(R0), VS52 // load v20 (=vs52) in advance 390 ADD $16, INP, INP 391 VSLDOI $8, V18, V18, V19 392 SHA512ROUND0(V5, V6, V7, V0, V1, V2, V3, V4, V19) 393 VPERMLE(V20,V20,LEMASK,V20) 394 SHA512ROUND0(V4, V5, V6, V7, V0, V1, V2, V3, V20) 395 LXVD2X (INP)(R0), VS54 // load v22 (=vs54) in advance 396 ADD $16, INP, INP 397 VSLDOI $8, V20, V20, V21 398 SHA512ROUND0(V3, V4, V5, V6, V7, V0, V1, V2, V21) 399 VPERMLE(V22,V22,LEMASK,V22) 400 SHA512ROUND0(V2, V3, V4, V5, V6, V7, V0, V1, V22) 401 VSLDOI $8, V22, V22, V23 402 SHA512ROUND1(V1, V2, V3, V4, V5, V6, V7, V0, V23, V8, V9, V17, V22) 403 404 MOVWZ $4, TEMP 405 MOVWZ TEMP, CTR 406 407 L16_xx: 408 SHA512ROUND1(V0, V1, V2, V3, V4, V5, V6, V7, V8, V9, V10, V18, V23) 409 SHA512ROUND1(V7, V0, V1, V2, V3, V4, V5, V6, V9, V10, V11, V19, V8) 410 SHA512ROUND1(V6, V7, V0, V1, V2, V3, V4, V5, V10, V11, V12, V20, V9) 411 SHA512ROUND1(V5, V6, V7, V0, V1, V2, V3, V4, V11, V12, V13, V21, V10) 412 SHA512ROUND1(V4, V5, V6, V7, V0, V1, V2, V3, V12, V13, V14, V22, V11) 413 SHA512ROUND1(V3, V4, V5, V6, V7, V0, V1, V2, V13, V14, V15, V23, V12) 414 SHA512ROUND1(V2, V3, V4, V5, V6, V7, V0, V1, V14, V15, V16, V8, V13) 415 SHA512ROUND1(V1, V2, V3, V4, V5, V6, V7, V0, V15, V16, V17, V9, V14) 416 SHA512ROUND1(V0, V1, V2, V3, V4, V5, V6, V7, V16, V17, V18, V10, V15) 417 SHA512ROUND1(V7, V0, V1, V2, V3, V4, V5, V6, V17, V18, V19, V11, V16) 418 SHA512ROUND1(V6, V7, V0, V1, V2, V3, V4, V5, V18, V19, V20, V12, V17) 419 SHA512ROUND1(V5, V6, V7, V0, V1, V2, V3, V4, V19, V20, V21, V13, V18) 420 SHA512ROUND1(V4, V5, V6, V7, V0, V1, V2, V3, V20, V21, V22, V14, V19) 421 SHA512ROUND1(V3, V4, V5, V6, V7, V0, V1, V2, V21, V22, V23, V15, V20) 422 SHA512ROUND1(V2, V3, V4, V5, V6, V7, V0, V1, V22, V23, V8, V16, V21) 423 SHA512ROUND1(V1, V2, V3, V4, V5, V6, V7, V0, V23, V8, V9, V17, V22) 424 425 BC 0x10, 0, L16_xx // bdnz 426 427 XXLOR VS24, VS24, V10 428 XXLOR VS25, VS25, V11 429 XXLOR VS26, VS26, V12 430 XXLOR VS27, VS27, V13 431 XXLOR VS28, VS28, V14 432 XXLOR VS29, VS29, V15 433 XXLOR VS30, VS30, V16 434 XXLOR VS31, VS31, V17 435 VADDUDM V10, V0, V0 436 VADDUDM V11, V1, V1 437 VADDUDM V12, V2, V2 438 VADDUDM V13, V3, V3 439 VADDUDM V14, V4, V4 440 VADDUDM V15, V5, V5 441 VADDUDM V16, V6, V6 442 VADDUDM V17, V7, V7 443 444 CMPU INP, END 445 BLT loop 446 447 #ifdef GOARCH_ppc64le 448 VPERM V0, V1, KI, V0 449 VPERM V2, V3, KI, V2 450 VPERM V4, V5, KI, V4 451 VPERM V6, V7, KI, V6 452 #else 453 VPERM V1, V0, KI, V0 454 VPERM V3, V2, KI, V2 455 VPERM V5, V4, KI, V4 456 VPERM V7, V6, KI, V6 457 #endif 458 STXVD2X VS32, (CTX+HEX00) // v0 = vs32 459 STXVD2X VS34, (CTX+HEX10) // v2 = vs34 460 STXVD2X VS36, (CTX+HEX20) // v4 = vs36 461 STXVD2X VS38, (CTX+HEX30) // v6 = vs38 462 463 end: 464 RET 465