github.com/devops-filetransfer/sshego@v7.0.4+incompatible/_vendor/golang.org/x/crypto/blake2b/blake2b_amd64.s (about) 1 // Copyright 2016 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // +build amd64,!gccgo,!appengine 6 7 #include "textflag.h" 8 9 DATA ·iv0<>+0x00(SB)/8, $0x6a09e667f3bcc908 10 DATA ·iv0<>+0x08(SB)/8, $0xbb67ae8584caa73b 11 GLOBL ·iv0<>(SB), (NOPTR+RODATA), $16 12 13 DATA ·iv1<>+0x00(SB)/8, $0x3c6ef372fe94f82b 14 DATA ·iv1<>+0x08(SB)/8, $0xa54ff53a5f1d36f1 15 GLOBL ·iv1<>(SB), (NOPTR+RODATA), $16 16 17 DATA ·iv2<>+0x00(SB)/8, $0x510e527fade682d1 18 DATA ·iv2<>+0x08(SB)/8, $0x9b05688c2b3e6c1f 19 GLOBL ·iv2<>(SB), (NOPTR+RODATA), $16 20 21 DATA ·iv3<>+0x00(SB)/8, $0x1f83d9abfb41bd6b 22 DATA ·iv3<>+0x08(SB)/8, $0x5be0cd19137e2179 23 GLOBL ·iv3<>(SB), (NOPTR+RODATA), $16 24 25 DATA ·c40<>+0x00(SB)/8, $0x0201000706050403 26 DATA ·c40<>+0x08(SB)/8, $0x0a09080f0e0d0c0b 27 GLOBL ·c40<>(SB), (NOPTR+RODATA), $16 28 29 DATA ·c48<>+0x00(SB)/8, $0x0100070605040302 30 DATA ·c48<>+0x08(SB)/8, $0x09080f0e0d0c0b0a 31 GLOBL ·c48<>(SB), (NOPTR+RODATA), $16 32 33 #define SHUFFLE(v2, v3, v4, v5, v6, v7, t1, t2) \ 34 MOVO v4, t1; \ 35 MOVO v5, v4; \ 36 MOVO t1, v5; \ 37 MOVO v6, t1; \ 38 PUNPCKLQDQ v6, t2; \ 39 PUNPCKHQDQ v7, v6; \ 40 PUNPCKHQDQ t2, v6; \ 41 PUNPCKLQDQ v7, t2; \ 42 MOVO t1, v7; \ 43 MOVO v2, t1; \ 44 PUNPCKHQDQ t2, v7; \ 45 PUNPCKLQDQ v3, t2; \ 46 PUNPCKHQDQ t2, v2; \ 47 PUNPCKLQDQ t1, t2; \ 48 PUNPCKHQDQ t2, v3 49 50 #define SHUFFLE_INV(v2, v3, v4, v5, v6, v7, t1, t2) \ 51 MOVO v4, t1; \ 52 MOVO v5, v4; \ 53 MOVO t1, v5; \ 54 MOVO v2, t1; \ 55 PUNPCKLQDQ v2, t2; \ 56 PUNPCKHQDQ v3, v2; \ 57 PUNPCKHQDQ t2, v2; \ 58 PUNPCKLQDQ v3, t2; \ 59 MOVO t1, v3; \ 60 MOVO v6, t1; \ 61 PUNPCKHQDQ t2, v3; \ 62 PUNPCKLQDQ v7, t2; \ 63 PUNPCKHQDQ t2, v6; \ 64 PUNPCKLQDQ t1, t2; \ 65 PUNPCKHQDQ t2, v7 66 67 #define HALF_ROUND(v0, v1, v2, v3, v4, v5, v6, v7, m0, m1, m2, m3, t0, c40, c48) \ 68 PADDQ m0, v0; \ 69 PADDQ m1, v1; \ 70 PADDQ v2, v0; \ 71 PADDQ v3, v1; \ 72 PXOR v0, v6; \ 73 PXOR v1, v7; \ 74 PSHUFD $0xB1, v6, v6; \ 75 PSHUFD $0xB1, v7, v7; \ 76 PADDQ v6, v4; \ 77 PADDQ v7, v5; \ 78 PXOR v4, v2; \ 79 PXOR v5, v3; \ 80 PSHUFB c40, v2; \ 81 PSHUFB c40, v3; \ 82 PADDQ m2, v0; \ 83 PADDQ m3, v1; \ 84 PADDQ v2, v0; \ 85 PADDQ v3, v1; \ 86 PXOR v0, v6; \ 87 PXOR v1, v7; \ 88 PSHUFB c48, v6; \ 89 PSHUFB c48, v7; \ 90 PADDQ v6, v4; \ 91 PADDQ v7, v5; \ 92 PXOR v4, v2; \ 93 PXOR v5, v3; \ 94 MOVOU v2, t0; \ 95 PADDQ v2, t0; \ 96 PSRLQ $63, v2; \ 97 PXOR t0, v2; \ 98 MOVOU v3, t0; \ 99 PADDQ v3, t0; \ 100 PSRLQ $63, v3; \ 101 PXOR t0, v3 102 103 #define LOAD_MSG(m0, m1, m2, m3, src, i0, i1, i2, i3, i4, i5, i6, i7) \ 104 MOVQ i0*8(src), m0; \ 105 PINSRQ $1, i1*8(src), m0; \ 106 MOVQ i2*8(src), m1; \ 107 PINSRQ $1, i3*8(src), m1; \ 108 MOVQ i4*8(src), m2; \ 109 PINSRQ $1, i5*8(src), m2; \ 110 MOVQ i6*8(src), m3; \ 111 PINSRQ $1, i7*8(src), m3 112 113 // func hashBlocksSSE4(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte) 114 TEXT ·hashBlocksSSE4(SB), 4, $288-48 // frame size = 272 + 16 byte alignment 115 MOVQ h+0(FP), AX 116 MOVQ c+8(FP), BX 117 MOVQ flag+16(FP), CX 118 MOVQ blocks_base+24(FP), SI 119 MOVQ blocks_len+32(FP), DI 120 121 MOVQ SP, BP 122 MOVQ SP, R9 123 ADDQ $15, R9 124 ANDQ $~15, R9 125 MOVQ R9, SP 126 127 MOVOU ·iv3<>(SB), X0 128 MOVO X0, 0(SP) 129 XORQ CX, 0(SP) // 0(SP) = ·iv3 ^ (CX || 0) 130 131 MOVOU ·c40<>(SB), X13 132 MOVOU ·c48<>(SB), X14 133 134 MOVOU 0(AX), X12 135 MOVOU 16(AX), X15 136 137 MOVQ 0(BX), R8 138 MOVQ 8(BX), R9 139 140 loop: 141 ADDQ $128, R8 142 CMPQ R8, $128 143 JGE noinc 144 INCQ R9 145 146 noinc: 147 MOVQ R8, X8 148 PINSRQ $1, R9, X8 149 150 MOVO X12, X0 151 MOVO X15, X1 152 MOVOU 32(AX), X2 153 MOVOU 48(AX), X3 154 MOVOU ·iv0<>(SB), X4 155 MOVOU ·iv1<>(SB), X5 156 MOVOU ·iv2<>(SB), X6 157 158 PXOR X8, X6 159 MOVO 0(SP), X7 160 161 LOAD_MSG(X8, X9, X10, X11, SI, 0, 2, 4, 6, 1, 3, 5, 7) 162 MOVO X8, 16(SP) 163 MOVO X9, 32(SP) 164 MOVO X10, 48(SP) 165 MOVO X11, 64(SP) 166 HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) 167 SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) 168 LOAD_MSG(X8, X9, X10, X11, SI, 8, 10, 12, 14, 9, 11, 13, 15) 169 MOVO X8, 80(SP) 170 MOVO X9, 96(SP) 171 MOVO X10, 112(SP) 172 MOVO X11, 128(SP) 173 HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) 174 SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) 175 176 LOAD_MSG(X8, X9, X10, X11, SI, 14, 4, 9, 13, 10, 8, 15, 6) 177 MOVO X8, 144(SP) 178 MOVO X9, 160(SP) 179 MOVO X10, 176(SP) 180 MOVO X11, 192(SP) 181 HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) 182 SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) 183 LOAD_MSG(X8, X9, X10, X11, SI, 1, 0, 11, 5, 12, 2, 7, 3) 184 MOVO X8, 208(SP) 185 MOVO X9, 224(SP) 186 MOVO X10, 240(SP) 187 MOVO X11, 256(SP) 188 HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) 189 SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) 190 191 LOAD_MSG(X8, X9, X10, X11, SI, 11, 12, 5, 15, 8, 0, 2, 13) 192 HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) 193 SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) 194 LOAD_MSG(X8, X9, X10, X11, SI, 10, 3, 7, 9, 14, 6, 1, 4) 195 HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) 196 SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) 197 198 LOAD_MSG(X8, X9, X10, X11, SI, 7, 3, 13, 11, 9, 1, 12, 14) 199 HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) 200 SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) 201 LOAD_MSG(X8, X9, X10, X11, SI, 2, 5, 4, 15, 6, 10, 0, 8) 202 HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) 203 SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) 204 205 LOAD_MSG(X8, X9, X10, X11, SI, 9, 5, 2, 10, 0, 7, 4, 15) 206 HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) 207 SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) 208 LOAD_MSG(X8, X9, X10, X11, SI, 14, 11, 6, 3, 1, 12, 8, 13) 209 HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) 210 SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) 211 212 LOAD_MSG(X8, X9, X10, X11, SI, 2, 6, 0, 8, 12, 10, 11, 3) 213 HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) 214 SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) 215 LOAD_MSG(X8, X9, X10, X11, SI, 4, 7, 15, 1, 13, 5, 14, 9) 216 HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) 217 SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) 218 219 LOAD_MSG(X8, X9, X10, X11, SI, 12, 1, 14, 4, 5, 15, 13, 10) 220 HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) 221 SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) 222 LOAD_MSG(X8, X9, X10, X11, SI, 0, 6, 9, 8, 7, 3, 2, 11) 223 HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) 224 SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) 225 226 LOAD_MSG(X8, X9, X10, X11, SI, 13, 7, 12, 3, 11, 14, 1, 9) 227 HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) 228 SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) 229 LOAD_MSG(X8, X9, X10, X11, SI, 5, 15, 8, 2, 0, 4, 6, 10) 230 HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) 231 SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) 232 233 LOAD_MSG(X8, X9, X10, X11, SI, 6, 14, 11, 0, 15, 9, 3, 8) 234 HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) 235 SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) 236 LOAD_MSG(X8, X9, X10, X11, SI, 12, 13, 1, 10, 2, 7, 4, 5) 237 HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) 238 SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) 239 240 LOAD_MSG(X8, X9, X10, X11, SI, 10, 8, 7, 1, 2, 4, 6, 5) 241 HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) 242 SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) 243 LOAD_MSG(X8, X9, X10, X11, SI, 15, 9, 3, 13, 11, 14, 12, 0) 244 HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) 245 SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) 246 247 HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 16(SP), 32(SP), 48(SP), 64(SP), X11, X13, X14) 248 SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) 249 HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 80(SP), 96(SP), 112(SP), 128(SP), X11, X13, X14) 250 SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) 251 252 HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 144(SP), 160(SP), 176(SP), 192(SP), X11, X13, X14) 253 SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) 254 HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 208(SP), 224(SP), 240(SP), 256(SP), X11, X13, X14) 255 SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) 256 257 MOVOU 32(AX), X10 258 MOVOU 48(AX), X11 259 PXOR X0, X12 260 PXOR X1, X15 261 PXOR X2, X10 262 PXOR X3, X11 263 PXOR X4, X12 264 PXOR X5, X15 265 PXOR X6, X10 266 PXOR X7, X11 267 MOVOU X10, 32(AX) 268 MOVOU X11, 48(AX) 269 270 LEAQ 128(SI), SI 271 SUBQ $128, DI 272 JNE loop 273 274 MOVOU X12, 0(AX) 275 MOVOU X15, 16(AX) 276 277 MOVQ R8, 0(BX) 278 MOVQ R9, 8(BX) 279 280 MOVQ BP, SP 281 RET 282 283 // func supportsSSE4() bool 284 TEXT ·supportsSSE4(SB), 4, $0-1 285 MOVL $1, AX 286 CPUID 287 SHRL $19, CX // Bit 19 indicates SSE4 support 288 ANDL $1, CX // CX != 0 if support SSE4 289 MOVB CX, ret+0(FP) 290 RET