github.com/ice-blockchain/go/src@v0.0.0-20240403114104-1564d284e521/crypto/sha512/sha512block_ppc64x.s (about) 1 // Copyright 2016 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Based on CRYPTOGAMS code with the following comment: 6 // # ==================================================================== 7 // # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 8 // # project. The module is, however, dual licensed under OpenSSL and 9 // # CRYPTOGAMS licenses depending on where you obtain it. For further 10 // # details see http://www.openssl.org/~appro/cryptogams/. 11 // # ==================================================================== 12 13 //go:build (ppc64 || ppc64le) && !purego 14 15 #include "textflag.h" 16 17 // SHA512 block routine. See sha512block.go for Go equivalent. 18 // 19 // The algorithm is detailed in FIPS 180-4: 20 // 21 // https://csrc.nist.gov/publications/fips/fips180-4/fips-180-4.pdf 22 // 23 // Wt = Mt; for 0 <= t <= 15 24 // Wt = SIGMA1(Wt-2) + SIGMA0(Wt-15) + Wt-16; for 16 <= t <= 79 25 // 26 // a = H0 27 // b = H1 28 // c = H2 29 // d = H3 30 // e = H4 31 // f = H5 32 // g = H6 33 // h = H7 34 // 35 // for t = 0 to 79 { 36 // T1 = h + BIGSIGMA1(e) + Ch(e,f,g) + Kt + Wt 37 // T2 = BIGSIGMA0(a) + Maj(a,b,c) 38 // h = g 39 // g = f 40 // f = e 41 // e = d + T1 42 // d = c 43 // c = b 44 // b = a 45 // a = T1 + T2 46 // } 47 // 48 // H0 = a + H0 49 // H1 = b + H1 50 // H2 = c + H2 51 // H3 = d + H3 52 // H4 = e + H4 53 // H5 = f + H5 54 // H6 = g + H6 55 // H7 = h + H7 56 57 #define CTX R3 58 #define INP R4 59 #define END R5 60 #define TBL R6 61 #define CNT R8 62 #define LEN R9 63 #define TEMP R12 64 65 #define TBL_STRT R7 // Pointer to start of kcon table. 66 67 #define R_x000 R0 68 #define R_x010 R10 69 #define R_x020 R25 70 #define R_x030 R26 71 #define R_x040 R14 72 #define R_x050 R15 73 #define R_x060 R16 74 #define R_x070 R17 75 #define R_x080 R18 76 #define R_x090 R19 77 #define R_x0a0 R20 78 #define R_x0b0 R21 79 #define R_x0c0 R22 80 #define R_x0d0 R23 81 #define R_x0e0 R24 82 #define R_x0f0 R28 83 #define R_x100 R29 84 #define R_x110 R27 85 86 87 // V0-V7 are A-H 88 // V8-V23 are used for the message schedule 89 #define KI V24 90 #define FUNC V25 91 #define S0 V26 92 #define S1 V27 93 #define s0 V28 94 #define s1 V29 95 #define LEMASK V31 // Permutation control register for little endian 96 97 // VPERM is needed on LE to switch the bytes 98 99 #ifdef GOARCH_ppc64le 100 #define VPERMLE(va,vb,vc,vt) VPERM va, vb, vc, vt 101 #else 102 #define VPERMLE(va,vb,vc,vt) 103 #endif 104 105 // 2 copies of each Kt, to fill both doublewords of a vector register 106 DATA ·kcon+0x000(SB)/8, $0x428a2f98d728ae22 107 DATA ·kcon+0x008(SB)/8, $0x428a2f98d728ae22 108 DATA ·kcon+0x010(SB)/8, $0x7137449123ef65cd 109 DATA ·kcon+0x018(SB)/8, $0x7137449123ef65cd 110 DATA ·kcon+0x020(SB)/8, $0xb5c0fbcfec4d3b2f 111 DATA ·kcon+0x028(SB)/8, $0xb5c0fbcfec4d3b2f 112 DATA ·kcon+0x030(SB)/8, $0xe9b5dba58189dbbc 113 DATA ·kcon+0x038(SB)/8, $0xe9b5dba58189dbbc 114 DATA ·kcon+0x040(SB)/8, $0x3956c25bf348b538 115 DATA ·kcon+0x048(SB)/8, $0x3956c25bf348b538 116 DATA ·kcon+0x050(SB)/8, $0x59f111f1b605d019 117 DATA ·kcon+0x058(SB)/8, $0x59f111f1b605d019 118 DATA ·kcon+0x060(SB)/8, $0x923f82a4af194f9b 119 DATA ·kcon+0x068(SB)/8, $0x923f82a4af194f9b 120 DATA ·kcon+0x070(SB)/8, $0xab1c5ed5da6d8118 121 DATA ·kcon+0x078(SB)/8, $0xab1c5ed5da6d8118 122 DATA ·kcon+0x080(SB)/8, $0xd807aa98a3030242 123 DATA ·kcon+0x088(SB)/8, $0xd807aa98a3030242 124 DATA ·kcon+0x090(SB)/8, $0x12835b0145706fbe 125 DATA ·kcon+0x098(SB)/8, $0x12835b0145706fbe 126 DATA ·kcon+0x0A0(SB)/8, $0x243185be4ee4b28c 127 DATA ·kcon+0x0A8(SB)/8, $0x243185be4ee4b28c 128 DATA ·kcon+0x0B0(SB)/8, $0x550c7dc3d5ffb4e2 129 DATA ·kcon+0x0B8(SB)/8, $0x550c7dc3d5ffb4e2 130 DATA ·kcon+0x0C0(SB)/8, $0x72be5d74f27b896f 131 DATA ·kcon+0x0C8(SB)/8, $0x72be5d74f27b896f 132 DATA ·kcon+0x0D0(SB)/8, $0x80deb1fe3b1696b1 133 DATA ·kcon+0x0D8(SB)/8, $0x80deb1fe3b1696b1 134 DATA ·kcon+0x0E0(SB)/8, $0x9bdc06a725c71235 135 DATA ·kcon+0x0E8(SB)/8, $0x9bdc06a725c71235 136 DATA ·kcon+0x0F0(SB)/8, $0xc19bf174cf692694 137 DATA ·kcon+0x0F8(SB)/8, $0xc19bf174cf692694 138 DATA ·kcon+0x100(SB)/8, $0xe49b69c19ef14ad2 139 DATA ·kcon+0x108(SB)/8, $0xe49b69c19ef14ad2 140 DATA ·kcon+0x110(SB)/8, $0xefbe4786384f25e3 141 DATA ·kcon+0x118(SB)/8, $0xefbe4786384f25e3 142 DATA ·kcon+0x120(SB)/8, $0x0fc19dc68b8cd5b5 143 DATA ·kcon+0x128(SB)/8, $0x0fc19dc68b8cd5b5 144 DATA ·kcon+0x130(SB)/8, $0x240ca1cc77ac9c65 145 DATA ·kcon+0x138(SB)/8, $0x240ca1cc77ac9c65 146 DATA ·kcon+0x140(SB)/8, $0x2de92c6f592b0275 147 DATA ·kcon+0x148(SB)/8, $0x2de92c6f592b0275 148 DATA ·kcon+0x150(SB)/8, $0x4a7484aa6ea6e483 149 DATA ·kcon+0x158(SB)/8, $0x4a7484aa6ea6e483 150 DATA ·kcon+0x160(SB)/8, $0x5cb0a9dcbd41fbd4 151 DATA ·kcon+0x168(SB)/8, $0x5cb0a9dcbd41fbd4 152 DATA ·kcon+0x170(SB)/8, $0x76f988da831153b5 153 DATA ·kcon+0x178(SB)/8, $0x76f988da831153b5 154 DATA ·kcon+0x180(SB)/8, $0x983e5152ee66dfab 155 DATA ·kcon+0x188(SB)/8, $0x983e5152ee66dfab 156 DATA ·kcon+0x190(SB)/8, $0xa831c66d2db43210 157 DATA ·kcon+0x198(SB)/8, $0xa831c66d2db43210 158 DATA ·kcon+0x1A0(SB)/8, $0xb00327c898fb213f 159 DATA ·kcon+0x1A8(SB)/8, $0xb00327c898fb213f 160 DATA ·kcon+0x1B0(SB)/8, $0xbf597fc7beef0ee4 161 DATA ·kcon+0x1B8(SB)/8, $0xbf597fc7beef0ee4 162 DATA ·kcon+0x1C0(SB)/8, $0xc6e00bf33da88fc2 163 DATA ·kcon+0x1C8(SB)/8, $0xc6e00bf33da88fc2 164 DATA ·kcon+0x1D0(SB)/8, $0xd5a79147930aa725 165 DATA ·kcon+0x1D8(SB)/8, $0xd5a79147930aa725 166 DATA ·kcon+0x1E0(SB)/8, $0x06ca6351e003826f 167 DATA ·kcon+0x1E8(SB)/8, $0x06ca6351e003826f 168 DATA ·kcon+0x1F0(SB)/8, $0x142929670a0e6e70 169 DATA ·kcon+0x1F8(SB)/8, $0x142929670a0e6e70 170 DATA ·kcon+0x200(SB)/8, $0x27b70a8546d22ffc 171 DATA ·kcon+0x208(SB)/8, $0x27b70a8546d22ffc 172 DATA ·kcon+0x210(SB)/8, $0x2e1b21385c26c926 173 DATA ·kcon+0x218(SB)/8, $0x2e1b21385c26c926 174 DATA ·kcon+0x220(SB)/8, $0x4d2c6dfc5ac42aed 175 DATA ·kcon+0x228(SB)/8, $0x4d2c6dfc5ac42aed 176 DATA ·kcon+0x230(SB)/8, $0x53380d139d95b3df 177 DATA ·kcon+0x238(SB)/8, $0x53380d139d95b3df 178 DATA ·kcon+0x240(SB)/8, $0x650a73548baf63de 179 DATA ·kcon+0x248(SB)/8, $0x650a73548baf63de 180 DATA ·kcon+0x250(SB)/8, $0x766a0abb3c77b2a8 181 DATA ·kcon+0x258(SB)/8, $0x766a0abb3c77b2a8 182 DATA ·kcon+0x260(SB)/8, $0x81c2c92e47edaee6 183 DATA ·kcon+0x268(SB)/8, $0x81c2c92e47edaee6 184 DATA ·kcon+0x270(SB)/8, $0x92722c851482353b 185 DATA ·kcon+0x278(SB)/8, $0x92722c851482353b 186 DATA ·kcon+0x280(SB)/8, $0xa2bfe8a14cf10364 187 DATA ·kcon+0x288(SB)/8, $0xa2bfe8a14cf10364 188 DATA ·kcon+0x290(SB)/8, $0xa81a664bbc423001 189 DATA ·kcon+0x298(SB)/8, $0xa81a664bbc423001 190 DATA ·kcon+0x2A0(SB)/8, $0xc24b8b70d0f89791 191 DATA ·kcon+0x2A8(SB)/8, $0xc24b8b70d0f89791 192 DATA ·kcon+0x2B0(SB)/8, $0xc76c51a30654be30 193 DATA ·kcon+0x2B8(SB)/8, $0xc76c51a30654be30 194 DATA ·kcon+0x2C0(SB)/8, $0xd192e819d6ef5218 195 DATA ·kcon+0x2C8(SB)/8, $0xd192e819d6ef5218 196 DATA ·kcon+0x2D0(SB)/8, $0xd69906245565a910 197 DATA ·kcon+0x2D8(SB)/8, $0xd69906245565a910 198 DATA ·kcon+0x2E0(SB)/8, $0xf40e35855771202a 199 DATA ·kcon+0x2E8(SB)/8, $0xf40e35855771202a 200 DATA ·kcon+0x2F0(SB)/8, $0x106aa07032bbd1b8 201 DATA ·kcon+0x2F8(SB)/8, $0x106aa07032bbd1b8 202 DATA ·kcon+0x300(SB)/8, $0x19a4c116b8d2d0c8 203 DATA ·kcon+0x308(SB)/8, $0x19a4c116b8d2d0c8 204 DATA ·kcon+0x310(SB)/8, $0x1e376c085141ab53 205 DATA ·kcon+0x318(SB)/8, $0x1e376c085141ab53 206 DATA ·kcon+0x320(SB)/8, $0x2748774cdf8eeb99 207 DATA ·kcon+0x328(SB)/8, $0x2748774cdf8eeb99 208 DATA ·kcon+0x330(SB)/8, $0x34b0bcb5e19b48a8 209 DATA ·kcon+0x338(SB)/8, $0x34b0bcb5e19b48a8 210 DATA ·kcon+0x340(SB)/8, $0x391c0cb3c5c95a63 211 DATA ·kcon+0x348(SB)/8, $0x391c0cb3c5c95a63 212 DATA ·kcon+0x350(SB)/8, $0x4ed8aa4ae3418acb 213 DATA ·kcon+0x358(SB)/8, $0x4ed8aa4ae3418acb 214 DATA ·kcon+0x360(SB)/8, $0x5b9cca4f7763e373 215 DATA ·kcon+0x368(SB)/8, $0x5b9cca4f7763e373 216 DATA ·kcon+0x370(SB)/8, $0x682e6ff3d6b2b8a3 217 DATA ·kcon+0x378(SB)/8, $0x682e6ff3d6b2b8a3 218 DATA ·kcon+0x380(SB)/8, $0x748f82ee5defb2fc 219 DATA ·kcon+0x388(SB)/8, $0x748f82ee5defb2fc 220 DATA ·kcon+0x390(SB)/8, $0x78a5636f43172f60 221 DATA ·kcon+0x398(SB)/8, $0x78a5636f43172f60 222 DATA ·kcon+0x3A0(SB)/8, $0x84c87814a1f0ab72 223 DATA ·kcon+0x3A8(SB)/8, $0x84c87814a1f0ab72 224 DATA ·kcon+0x3B0(SB)/8, $0x8cc702081a6439ec 225 DATA ·kcon+0x3B8(SB)/8, $0x8cc702081a6439ec 226 DATA ·kcon+0x3C0(SB)/8, $0x90befffa23631e28 227 DATA ·kcon+0x3C8(SB)/8, $0x90befffa23631e28 228 DATA ·kcon+0x3D0(SB)/8, $0xa4506cebde82bde9 229 DATA ·kcon+0x3D8(SB)/8, $0xa4506cebde82bde9 230 DATA ·kcon+0x3E0(SB)/8, $0xbef9a3f7b2c67915 231 DATA ·kcon+0x3E8(SB)/8, $0xbef9a3f7b2c67915 232 DATA ·kcon+0x3F0(SB)/8, $0xc67178f2e372532b 233 DATA ·kcon+0x3F8(SB)/8, $0xc67178f2e372532b 234 DATA ·kcon+0x400(SB)/8, $0xca273eceea26619c 235 DATA ·kcon+0x408(SB)/8, $0xca273eceea26619c 236 DATA ·kcon+0x410(SB)/8, $0xd186b8c721c0c207 237 DATA ·kcon+0x418(SB)/8, $0xd186b8c721c0c207 238 DATA ·kcon+0x420(SB)/8, $0xeada7dd6cde0eb1e 239 DATA ·kcon+0x428(SB)/8, $0xeada7dd6cde0eb1e 240 DATA ·kcon+0x430(SB)/8, $0xf57d4f7fee6ed178 241 DATA ·kcon+0x438(SB)/8, $0xf57d4f7fee6ed178 242 DATA ·kcon+0x440(SB)/8, $0x06f067aa72176fba 243 DATA ·kcon+0x448(SB)/8, $0x06f067aa72176fba 244 DATA ·kcon+0x450(SB)/8, $0x0a637dc5a2c898a6 245 DATA ·kcon+0x458(SB)/8, $0x0a637dc5a2c898a6 246 DATA ·kcon+0x460(SB)/8, $0x113f9804bef90dae 247 DATA ·kcon+0x468(SB)/8, $0x113f9804bef90dae 248 DATA ·kcon+0x470(SB)/8, $0x1b710b35131c471b 249 DATA ·kcon+0x478(SB)/8, $0x1b710b35131c471b 250 DATA ·kcon+0x480(SB)/8, $0x28db77f523047d84 251 DATA ·kcon+0x488(SB)/8, $0x28db77f523047d84 252 DATA ·kcon+0x490(SB)/8, $0x32caab7b40c72493 253 DATA ·kcon+0x498(SB)/8, $0x32caab7b40c72493 254 DATA ·kcon+0x4A0(SB)/8, $0x3c9ebe0a15c9bebc 255 DATA ·kcon+0x4A8(SB)/8, $0x3c9ebe0a15c9bebc 256 DATA ·kcon+0x4B0(SB)/8, $0x431d67c49c100d4c 257 DATA ·kcon+0x4B8(SB)/8, $0x431d67c49c100d4c 258 DATA ·kcon+0x4C0(SB)/8, $0x4cc5d4becb3e42b6 259 DATA ·kcon+0x4C8(SB)/8, $0x4cc5d4becb3e42b6 260 DATA ·kcon+0x4D0(SB)/8, $0x597f299cfc657e2a 261 DATA ·kcon+0x4D8(SB)/8, $0x597f299cfc657e2a 262 DATA ·kcon+0x4E0(SB)/8, $0x5fcb6fab3ad6faec 263 DATA ·kcon+0x4E8(SB)/8, $0x5fcb6fab3ad6faec 264 DATA ·kcon+0x4F0(SB)/8, $0x6c44198c4a475817 265 DATA ·kcon+0x4F8(SB)/8, $0x6c44198c4a475817 266 DATA ·kcon+0x500(SB)/8, $0x0000000000000000 267 DATA ·kcon+0x508(SB)/8, $0x0000000000000000 268 DATA ·kcon+0x510(SB)/8, $0x1011121314151617 269 DATA ·kcon+0x518(SB)/8, $0x0001020304050607 270 GLOBL ·kcon(SB), RODATA, $1312 271 272 #define SHA512ROUND0(a, b, c, d, e, f, g, h, xi, idx) \ 273 VSEL g, f, e, FUNC; \ 274 VSHASIGMAD $15, e, $1, S1; \ 275 VADDUDM xi, h, h; \ 276 VSHASIGMAD $0, a, $1, S0; \ 277 VADDUDM FUNC, h, h; \ 278 VXOR b, a, FUNC; \ 279 VADDUDM S1, h, h; \ 280 VSEL b, c, FUNC, FUNC; \ 281 VADDUDM KI, g, g; \ 282 VADDUDM h, d, d; \ 283 VADDUDM FUNC, S0, S0; \ 284 LVX (TBL)(idx), KI; \ 285 VADDUDM S0, h, h 286 287 #define SHA512ROUND1(a, b, c, d, e, f, g, h, xi, xj, xj_1, xj_9, xj_14, idx) \ 288 VSHASIGMAD $0, xj_1, $0, s0; \ 289 VSEL g, f, e, FUNC; \ 290 VSHASIGMAD $15, e, $1, S1; \ 291 VADDUDM xi, h, h; \ 292 VSHASIGMAD $0, a, $1, S0; \ 293 VSHASIGMAD $15, xj_14, $0, s1; \ 294 VADDUDM FUNC, h, h; \ 295 VXOR b, a, FUNC; \ 296 VADDUDM xj_9, xj, xj; \ 297 VADDUDM S1, h, h; \ 298 VSEL b, c, FUNC, FUNC; \ 299 VADDUDM KI, g, g; \ 300 VADDUDM h, d, d; \ 301 VADDUDM FUNC, S0, S0; \ 302 VADDUDM s0, xj, xj; \ 303 LVX (TBL)(idx), KI; \ 304 VADDUDM S0, h, h; \ 305 VADDUDM s1, xj, xj 306 307 // func block(dig *digest, p []byte) 308 TEXT ·block(SB),0,$0-32 309 MOVD dig+0(FP), CTX 310 MOVD p_base+8(FP), INP 311 MOVD p_len+16(FP), LEN 312 313 SRD $6, LEN 314 SLD $6, LEN 315 316 ADD INP, LEN, END 317 318 CMP INP, END 319 BEQ end 320 321 MOVD $·kcon(SB), TBL_STRT 322 323 MOVD R0, CNT 324 MOVWZ $0x010, R_x010 325 MOVWZ $0x020, R_x020 326 MOVWZ $0x030, R_x030 327 MOVD $0x040, R_x040 328 MOVD $0x050, R_x050 329 MOVD $0x060, R_x060 330 MOVD $0x070, R_x070 331 MOVD $0x080, R_x080 332 MOVD $0x090, R_x090 333 MOVD $0x0a0, R_x0a0 334 MOVD $0x0b0, R_x0b0 335 MOVD $0x0c0, R_x0c0 336 MOVD $0x0d0, R_x0d0 337 MOVD $0x0e0, R_x0e0 338 MOVD $0x0f0, R_x0f0 339 MOVD $0x100, R_x100 340 MOVD $0x110, R_x110 341 342 343 #ifdef GOARCH_ppc64le 344 // Generate the mask used with VPERM for LE 345 MOVWZ $8, TEMP 346 LVSL (TEMP)(R0), LEMASK 347 VSPLTISB $0x0F, KI 348 VXOR KI, LEMASK, LEMASK 349 #endif 350 351 LXVD2X (CTX)(R_x000), VS32 // v0 = vs32 352 LXVD2X (CTX)(R_x010), VS34 // v2 = vs34 353 LXVD2X (CTX)(R_x020), VS36 // v4 = vs36 354 355 // unpack the input values into vector registers 356 VSLDOI $8, V0, V0, V1 357 LXVD2X (CTX)(R_x030), VS38 // v6 = vs38 358 VSLDOI $8, V2, V2, V3 359 VSLDOI $8, V4, V4, V5 360 VSLDOI $8, V6, V6, V7 361 362 loop: 363 MOVD TBL_STRT, TBL 364 LVX (TBL)(R_x000), KI 365 366 LXVD2X (INP)(R0), VS40 // load v8 (=vs40) in advance 367 ADD $16, INP 368 369 // Copy V0-V7 to VS24-VS31 370 371 XXLOR V0, V0, VS24 372 XXLOR V1, V1, VS25 373 XXLOR V2, V2, VS26 374 XXLOR V3, V3, VS27 375 XXLOR V4, V4, VS28 376 XXLOR V5, V5, VS29 377 XXLOR V6, V6, VS30 378 XXLOR V7, V7, VS31 379 380 VADDUDM KI, V7, V7 // h+K[i] 381 LVX (TBL)(R_x010), KI 382 383 VPERMLE(V8,V8,LEMASK,V8) 384 SHA512ROUND0(V0, V1, V2, V3, V4, V5, V6, V7, V8, R_x020) 385 LXVD2X (INP)(R_x000), VS42 // load v10 (=vs42) in advance 386 VSLDOI $8, V8, V8, V9 387 SHA512ROUND0(V7, V0, V1, V2, V3, V4, V5, V6, V9, R_x030) 388 VPERMLE(V10,V10,LEMASK,V10) 389 SHA512ROUND0(V6, V7, V0, V1, V2, V3, V4, V5, V10, R_x040) 390 LXVD2X (INP)(R_x010), VS44 // load v12 (=vs44) in advance 391 VSLDOI $8, V10, V10, V11 392 SHA512ROUND0(V5, V6, V7, V0, V1, V2, V3, V4, V11, R_x050) 393 VPERMLE(V12,V12,LEMASK,V12) 394 SHA512ROUND0(V4, V5, V6, V7, V0, V1, V2, V3, V12, R_x060) 395 LXVD2X (INP)(R_x020), VS46 // load v14 (=vs46) in advance 396 VSLDOI $8, V12, V12, V13 397 SHA512ROUND0(V3, V4, V5, V6, V7, V0, V1, V2, V13, R_x070) 398 VPERMLE(V14,V14,LEMASK,V14) 399 SHA512ROUND0(V2, V3, V4, V5, V6, V7, V0, V1, V14, R_x080) 400 LXVD2X (INP)(R_x030), VS48 // load v16 (=vs48) in advance 401 VSLDOI $8, V14, V14, V15 402 SHA512ROUND0(V1, V2, V3, V4, V5, V6, V7, V0, V15, R_x090) 403 VPERMLE(V16,V16,LEMASK,V16) 404 SHA512ROUND0(V0, V1, V2, V3, V4, V5, V6, V7, V16, R_x0a0) 405 LXVD2X (INP)(R_x040), VS50 // load v18 (=vs50) in advance 406 VSLDOI $8, V16, V16, V17 407 SHA512ROUND0(V7, V0, V1, V2, V3, V4, V5, V6, V17, R_x0b0) 408 VPERMLE(V18,V18,LEMASK,V18) 409 SHA512ROUND0(V6, V7, V0, V1, V2, V3, V4, V5, V18, R_x0c0) 410 LXVD2X (INP)(R_x050), VS52 // load v20 (=vs52) in advance 411 VSLDOI $8, V18, V18, V19 412 SHA512ROUND0(V5, V6, V7, V0, V1, V2, V3, V4, V19, R_x0d0) 413 VPERMLE(V20,V20,LEMASK,V20) 414 SHA512ROUND0(V4, V5, V6, V7, V0, V1, V2, V3, V20, R_x0e0) 415 LXVD2X (INP)(R_x060), VS54 // load v22 (=vs54) in advance 416 VSLDOI $8, V20, V20, V21 417 SHA512ROUND0(V3, V4, V5, V6, V7, V0, V1, V2, V21, R_x0f0) 418 VPERMLE(V22,V22,LEMASK,V22) 419 SHA512ROUND0(V2, V3, V4, V5, V6, V7, V0, V1, V22, R_x100) 420 VSLDOI $8, V22, V22, V23 421 SHA512ROUND1(V1, V2, V3, V4, V5, V6, V7, V0, V23, V8, V9, V17, V22, R_x110) 422 423 MOVWZ $4, TEMP 424 MOVWZ TEMP, CTR 425 ADD $0x120, TBL 426 ADD $0x70, INP 427 428 L16_xx: 429 SHA512ROUND1(V0, V1, V2, V3, V4, V5, V6, V7, V8, V9, V10, V18, V23, R_x000) 430 SHA512ROUND1(V7, V0, V1, V2, V3, V4, V5, V6, V9, V10, V11, V19, V8, R_x010) 431 SHA512ROUND1(V6, V7, V0, V1, V2, V3, V4, V5, V10, V11, V12, V20, V9, R_x020) 432 SHA512ROUND1(V5, V6, V7, V0, V1, V2, V3, V4, V11, V12, V13, V21, V10, R_x030) 433 SHA512ROUND1(V4, V5, V6, V7, V0, V1, V2, V3, V12, V13, V14, V22, V11, R_x040) 434 SHA512ROUND1(V3, V4, V5, V6, V7, V0, V1, V2, V13, V14, V15, V23, V12, R_x050) 435 SHA512ROUND1(V2, V3, V4, V5, V6, V7, V0, V1, V14, V15, V16, V8, V13, R_x060) 436 SHA512ROUND1(V1, V2, V3, V4, V5, V6, V7, V0, V15, V16, V17, V9, V14, R_x070) 437 SHA512ROUND1(V0, V1, V2, V3, V4, V5, V6, V7, V16, V17, V18, V10, V15, R_x080) 438 SHA512ROUND1(V7, V0, V1, V2, V3, V4, V5, V6, V17, V18, V19, V11, V16, R_x090) 439 SHA512ROUND1(V6, V7, V0, V1, V2, V3, V4, V5, V18, V19, V20, V12, V17, R_x0a0) 440 SHA512ROUND1(V5, V6, V7, V0, V1, V2, V3, V4, V19, V20, V21, V13, V18, R_x0b0) 441 SHA512ROUND1(V4, V5, V6, V7, V0, V1, V2, V3, V20, V21, V22, V14, V19, R_x0c0) 442 SHA512ROUND1(V3, V4, V5, V6, V7, V0, V1, V2, V21, V22, V23, V15, V20, R_x0d0) 443 SHA512ROUND1(V2, V3, V4, V5, V6, V7, V0, V1, V22, V23, V8, V16, V21, R_x0e0) 444 SHA512ROUND1(V1, V2, V3, V4, V5, V6, V7, V0, V23, V8, V9, V17, V22, R_x0f0) 445 ADD $0x100, TBL 446 447 BDNZ L16_xx 448 449 XXLOR VS24, VS24, V10 450 XXLOR VS25, VS25, V11 451 XXLOR VS26, VS26, V12 452 XXLOR VS27, VS27, V13 453 XXLOR VS28, VS28, V14 454 XXLOR VS29, VS29, V15 455 XXLOR VS30, VS30, V16 456 XXLOR VS31, VS31, V17 457 VADDUDM V10, V0, V0 458 VADDUDM V11, V1, V1 459 VADDUDM V12, V2, V2 460 VADDUDM V13, V3, V3 461 VADDUDM V14, V4, V4 462 VADDUDM V15, V5, V5 463 VADDUDM V16, V6, V6 464 VADDUDM V17, V7, V7 465 466 CMPU INP, END 467 BLT loop 468 469 #ifdef GOARCH_ppc64le 470 VPERM V0, V1, KI, V0 471 VPERM V2, V3, KI, V2 472 VPERM V4, V5, KI, V4 473 VPERM V6, V7, KI, V6 474 #else 475 VPERM V1, V0, KI, V0 476 VPERM V3, V2, KI, V2 477 VPERM V5, V4, KI, V4 478 VPERM V7, V6, KI, V6 479 #endif 480 STXVD2X VS32, (CTX+R_x000) // v0 = vs32 481 STXVD2X VS34, (CTX+R_x010) // v2 = vs34 482 STXVD2X VS36, (CTX+R_x020) // v4 = vs36 483 STXVD2X VS38, (CTX+R_x030) // v6 = vs38 484 485 end: 486 RET 487