github.com/luckypickle/go-ethereum-vet@v1.14.2/crypto/secp256k1/libsecp256k1/src/field_5x52_asm_impl.h (about) 1 /********************************************************************** 2 * Copyright (c) 2013-2014 Diederik Huys, Pieter Wuille * 3 * Distributed under the MIT software license, see the accompanying * 4 * file COPYING or http://www.opensource.org/licenses/mit-license.php.* 5 **********************************************************************/ 6 7 /** 8 * Changelog: 9 * - March 2013, Diederik Huys: original version 10 * - November 2014, Pieter Wuille: updated to use Peter Dettman's parallel multiplication algorithm 11 * - December 2014, Pieter Wuille: converted from YASM to GCC inline assembly 12 */ 13 14 #ifndef _SECP256K1_FIELD_INNER5X52_IMPL_H_ 15 #define _SECP256K1_FIELD_INNER5X52_IMPL_H_ 16 17 SECP256K1_INLINE static void vet_secp256k1_fe_mul_inner(uint64_t *r, const uint64_t *a, const uint64_t * SECP256K1_RESTRICT b) { 18 /** 19 * Registers: rdx:rax = multiplication accumulator 20 * r9:r8 = c 21 * r15:rcx = d 22 * r10-r14 = a0-a4 23 * rbx = b 24 * rdi = r 25 * rsi = a / t? 26 */ 27 uint64_t tmp1, tmp2, tmp3; 28 __asm__ __volatile__( 29 "movq 0(%%rsi),%%r10\n" 30 "movq 8(%%rsi),%%r11\n" 31 "movq 16(%%rsi),%%r12\n" 32 "movq 24(%%rsi),%%r13\n" 33 "movq 32(%%rsi),%%r14\n" 34 35 /* d += a3 * b0 */ 36 "movq 0(%%rbx),%%rax\n" 37 "mulq %%r13\n" 38 "movq %%rax,%%rcx\n" 39 "movq %%rdx,%%r15\n" 40 /* d += a2 * b1 */ 41 "movq 8(%%rbx),%%rax\n" 42 "mulq %%r12\n" 43 "addq %%rax,%%rcx\n" 44 "adcq %%rdx,%%r15\n" 45 /* d += a1 * b2 */ 46 "movq 16(%%rbx),%%rax\n" 47 "mulq %%r11\n" 48 "addq %%rax,%%rcx\n" 49 "adcq %%rdx,%%r15\n" 50 /* d = a0 * b3 */ 51 "movq 24(%%rbx),%%rax\n" 52 "mulq %%r10\n" 53 "addq %%rax,%%rcx\n" 54 "adcq %%rdx,%%r15\n" 55 /* c = a4 * b4 */ 56 "movq 32(%%rbx),%%rax\n" 57 "mulq %%r14\n" 58 "movq %%rax,%%r8\n" 59 "movq %%rdx,%%r9\n" 60 /* d += (c & M) * R */ 61 "movq $0xfffffffffffff,%%rdx\n" 62 "andq %%rdx,%%rax\n" 63 "movq $0x1000003d10,%%rdx\n" 64 "mulq %%rdx\n" 65 "addq %%rax,%%rcx\n" 66 "adcq %%rdx,%%r15\n" 67 /* c >>= 52 (%%r8 only) */ 68 "shrdq $52,%%r9,%%r8\n" 69 /* t3 (tmp1) = d & M */ 70 "movq %%rcx,%%rsi\n" 71 "movq $0xfffffffffffff,%%rdx\n" 72 "andq %%rdx,%%rsi\n" 73 "movq %%rsi,%q1\n" 74 /* d >>= 52 */ 75 "shrdq $52,%%r15,%%rcx\n" 76 "xorq %%r15,%%r15\n" 77 /* d += a4 * b0 */ 78 "movq 0(%%rbx),%%rax\n" 79 "mulq %%r14\n" 80 "addq %%rax,%%rcx\n" 81 "adcq %%rdx,%%r15\n" 82 /* d += a3 * b1 */ 83 "movq 8(%%rbx),%%rax\n" 84 "mulq %%r13\n" 85 "addq %%rax,%%rcx\n" 86 "adcq %%rdx,%%r15\n" 87 /* d += a2 * b2 */ 88 "movq 16(%%rbx),%%rax\n" 89 "mulq %%r12\n" 90 "addq %%rax,%%rcx\n" 91 "adcq %%rdx,%%r15\n" 92 /* d += a1 * b3 */ 93 "movq 24(%%rbx),%%rax\n" 94 "mulq %%r11\n" 95 "addq %%rax,%%rcx\n" 96 "adcq %%rdx,%%r15\n" 97 /* d += a0 * b4 */ 98 "movq 32(%%rbx),%%rax\n" 99 "mulq %%r10\n" 100 "addq %%rax,%%rcx\n" 101 "adcq %%rdx,%%r15\n" 102 /* d += c * R */ 103 "movq %%r8,%%rax\n" 104 "movq $0x1000003d10,%%rdx\n" 105 "mulq %%rdx\n" 106 "addq %%rax,%%rcx\n" 107 "adcq %%rdx,%%r15\n" 108 /* t4 = d & M (%%rsi) */ 109 "movq %%rcx,%%rsi\n" 110 "movq $0xfffffffffffff,%%rdx\n" 111 "andq %%rdx,%%rsi\n" 112 /* d >>= 52 */ 113 "shrdq $52,%%r15,%%rcx\n" 114 "xorq %%r15,%%r15\n" 115 /* tx = t4 >> 48 (tmp3) */ 116 "movq %%rsi,%%rax\n" 117 "shrq $48,%%rax\n" 118 "movq %%rax,%q3\n" 119 /* t4 &= (M >> 4) (tmp2) */ 120 "movq $0xffffffffffff,%%rax\n" 121 "andq %%rax,%%rsi\n" 122 "movq %%rsi,%q2\n" 123 /* c = a0 * b0 */ 124 "movq 0(%%rbx),%%rax\n" 125 "mulq %%r10\n" 126 "movq %%rax,%%r8\n" 127 "movq %%rdx,%%r9\n" 128 /* d += a4 * b1 */ 129 "movq 8(%%rbx),%%rax\n" 130 "mulq %%r14\n" 131 "addq %%rax,%%rcx\n" 132 "adcq %%rdx,%%r15\n" 133 /* d += a3 * b2 */ 134 "movq 16(%%rbx),%%rax\n" 135 "mulq %%r13\n" 136 "addq %%rax,%%rcx\n" 137 "adcq %%rdx,%%r15\n" 138 /* d += a2 * b3 */ 139 "movq 24(%%rbx),%%rax\n" 140 "mulq %%r12\n" 141 "addq %%rax,%%rcx\n" 142 "adcq %%rdx,%%r15\n" 143 /* d += a1 * b4 */ 144 "movq 32(%%rbx),%%rax\n" 145 "mulq %%r11\n" 146 "addq %%rax,%%rcx\n" 147 "adcq %%rdx,%%r15\n" 148 /* u0 = d & M (%%rsi) */ 149 "movq %%rcx,%%rsi\n" 150 "movq $0xfffffffffffff,%%rdx\n" 151 "andq %%rdx,%%rsi\n" 152 /* d >>= 52 */ 153 "shrdq $52,%%r15,%%rcx\n" 154 "xorq %%r15,%%r15\n" 155 /* u0 = (u0 << 4) | tx (%%rsi) */ 156 "shlq $4,%%rsi\n" 157 "movq %q3,%%rax\n" 158 "orq %%rax,%%rsi\n" 159 /* c += u0 * (R >> 4) */ 160 "movq $0x1000003d1,%%rax\n" 161 "mulq %%rsi\n" 162 "addq %%rax,%%r8\n" 163 "adcq %%rdx,%%r9\n" 164 /* r[0] = c & M */ 165 "movq %%r8,%%rax\n" 166 "movq $0xfffffffffffff,%%rdx\n" 167 "andq %%rdx,%%rax\n" 168 "movq %%rax,0(%%rdi)\n" 169 /* c >>= 52 */ 170 "shrdq $52,%%r9,%%r8\n" 171 "xorq %%r9,%%r9\n" 172 /* c += a1 * b0 */ 173 "movq 0(%%rbx),%%rax\n" 174 "mulq %%r11\n" 175 "addq %%rax,%%r8\n" 176 "adcq %%rdx,%%r9\n" 177 /* c += a0 * b1 */ 178 "movq 8(%%rbx),%%rax\n" 179 "mulq %%r10\n" 180 "addq %%rax,%%r8\n" 181 "adcq %%rdx,%%r9\n" 182 /* d += a4 * b2 */ 183 "movq 16(%%rbx),%%rax\n" 184 "mulq %%r14\n" 185 "addq %%rax,%%rcx\n" 186 "adcq %%rdx,%%r15\n" 187 /* d += a3 * b3 */ 188 "movq 24(%%rbx),%%rax\n" 189 "mulq %%r13\n" 190 "addq %%rax,%%rcx\n" 191 "adcq %%rdx,%%r15\n" 192 /* d += a2 * b4 */ 193 "movq 32(%%rbx),%%rax\n" 194 "mulq %%r12\n" 195 "addq %%rax,%%rcx\n" 196 "adcq %%rdx,%%r15\n" 197 /* c += (d & M) * R */ 198 "movq %%rcx,%%rax\n" 199 "movq $0xfffffffffffff,%%rdx\n" 200 "andq %%rdx,%%rax\n" 201 "movq $0x1000003d10,%%rdx\n" 202 "mulq %%rdx\n" 203 "addq %%rax,%%r8\n" 204 "adcq %%rdx,%%r9\n" 205 /* d >>= 52 */ 206 "shrdq $52,%%r15,%%rcx\n" 207 "xorq %%r15,%%r15\n" 208 /* r[1] = c & M */ 209 "movq %%r8,%%rax\n" 210 "movq $0xfffffffffffff,%%rdx\n" 211 "andq %%rdx,%%rax\n" 212 "movq %%rax,8(%%rdi)\n" 213 /* c >>= 52 */ 214 "shrdq $52,%%r9,%%r8\n" 215 "xorq %%r9,%%r9\n" 216 /* c += a2 * b0 */ 217 "movq 0(%%rbx),%%rax\n" 218 "mulq %%r12\n" 219 "addq %%rax,%%r8\n" 220 "adcq %%rdx,%%r9\n" 221 /* c += a1 * b1 */ 222 "movq 8(%%rbx),%%rax\n" 223 "mulq %%r11\n" 224 "addq %%rax,%%r8\n" 225 "adcq %%rdx,%%r9\n" 226 /* c += a0 * b2 (last use of %%r10 = a0) */ 227 "movq 16(%%rbx),%%rax\n" 228 "mulq %%r10\n" 229 "addq %%rax,%%r8\n" 230 "adcq %%rdx,%%r9\n" 231 /* fetch t3 (%%r10, overwrites a0), t4 (%%rsi) */ 232 "movq %q2,%%rsi\n" 233 "movq %q1,%%r10\n" 234 /* d += a4 * b3 */ 235 "movq 24(%%rbx),%%rax\n" 236 "mulq %%r14\n" 237 "addq %%rax,%%rcx\n" 238 "adcq %%rdx,%%r15\n" 239 /* d += a3 * b4 */ 240 "movq 32(%%rbx),%%rax\n" 241 "mulq %%r13\n" 242 "addq %%rax,%%rcx\n" 243 "adcq %%rdx,%%r15\n" 244 /* c += (d & M) * R */ 245 "movq %%rcx,%%rax\n" 246 "movq $0xfffffffffffff,%%rdx\n" 247 "andq %%rdx,%%rax\n" 248 "movq $0x1000003d10,%%rdx\n" 249 "mulq %%rdx\n" 250 "addq %%rax,%%r8\n" 251 "adcq %%rdx,%%r9\n" 252 /* d >>= 52 (%%rcx only) */ 253 "shrdq $52,%%r15,%%rcx\n" 254 /* r[2] = c & M */ 255 "movq %%r8,%%rax\n" 256 "movq $0xfffffffffffff,%%rdx\n" 257 "andq %%rdx,%%rax\n" 258 "movq %%rax,16(%%rdi)\n" 259 /* c >>= 52 */ 260 "shrdq $52,%%r9,%%r8\n" 261 "xorq %%r9,%%r9\n" 262 /* c += t3 */ 263 "addq %%r10,%%r8\n" 264 /* c += d * R */ 265 "movq %%rcx,%%rax\n" 266 "movq $0x1000003d10,%%rdx\n" 267 "mulq %%rdx\n" 268 "addq %%rax,%%r8\n" 269 "adcq %%rdx,%%r9\n" 270 /* r[3] = c & M */ 271 "movq %%r8,%%rax\n" 272 "movq $0xfffffffffffff,%%rdx\n" 273 "andq %%rdx,%%rax\n" 274 "movq %%rax,24(%%rdi)\n" 275 /* c >>= 52 (%%r8 only) */ 276 "shrdq $52,%%r9,%%r8\n" 277 /* c += t4 (%%r8 only) */ 278 "addq %%rsi,%%r8\n" 279 /* r[4] = c */ 280 "movq %%r8,32(%%rdi)\n" 281 : "+S"(a), "=m"(tmp1), "=m"(tmp2), "=m"(tmp3) 282 : "b"(b), "D"(r) 283 : "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", "cc", "memory" 284 ); 285 } 286 287 SECP256K1_INLINE static void vet_secp256k1_fe_sqr_inner(uint64_t *r, const uint64_t *a) { 288 /** 289 * Registers: rdx:rax = multiplication accumulator 290 * r9:r8 = c 291 * rcx:rbx = d 292 * r10-r14 = a0-a4 293 * r15 = M (0xfffffffffffff) 294 * rdi = r 295 * rsi = a / t? 296 */ 297 uint64_t tmp1, tmp2, tmp3; 298 __asm__ __volatile__( 299 "movq 0(%%rsi),%%r10\n" 300 "movq 8(%%rsi),%%r11\n" 301 "movq 16(%%rsi),%%r12\n" 302 "movq 24(%%rsi),%%r13\n" 303 "movq 32(%%rsi),%%r14\n" 304 "movq $0xfffffffffffff,%%r15\n" 305 306 /* d = (a0*2) * a3 */ 307 "leaq (%%r10,%%r10,1),%%rax\n" 308 "mulq %%r13\n" 309 "movq %%rax,%%rbx\n" 310 "movq %%rdx,%%rcx\n" 311 /* d += (a1*2) * a2 */ 312 "leaq (%%r11,%%r11,1),%%rax\n" 313 "mulq %%r12\n" 314 "addq %%rax,%%rbx\n" 315 "adcq %%rdx,%%rcx\n" 316 /* c = a4 * a4 */ 317 "movq %%r14,%%rax\n" 318 "mulq %%r14\n" 319 "movq %%rax,%%r8\n" 320 "movq %%rdx,%%r9\n" 321 /* d += (c & M) * R */ 322 "andq %%r15,%%rax\n" 323 "movq $0x1000003d10,%%rdx\n" 324 "mulq %%rdx\n" 325 "addq %%rax,%%rbx\n" 326 "adcq %%rdx,%%rcx\n" 327 /* c >>= 52 (%%r8 only) */ 328 "shrdq $52,%%r9,%%r8\n" 329 /* t3 (tmp1) = d & M */ 330 "movq %%rbx,%%rsi\n" 331 "andq %%r15,%%rsi\n" 332 "movq %%rsi,%q1\n" 333 /* d >>= 52 */ 334 "shrdq $52,%%rcx,%%rbx\n" 335 "xorq %%rcx,%%rcx\n" 336 /* a4 *= 2 */ 337 "addq %%r14,%%r14\n" 338 /* d += a0 * a4 */ 339 "movq %%r10,%%rax\n" 340 "mulq %%r14\n" 341 "addq %%rax,%%rbx\n" 342 "adcq %%rdx,%%rcx\n" 343 /* d+= (a1*2) * a3 */ 344 "leaq (%%r11,%%r11,1),%%rax\n" 345 "mulq %%r13\n" 346 "addq %%rax,%%rbx\n" 347 "adcq %%rdx,%%rcx\n" 348 /* d += a2 * a2 */ 349 "movq %%r12,%%rax\n" 350 "mulq %%r12\n" 351 "addq %%rax,%%rbx\n" 352 "adcq %%rdx,%%rcx\n" 353 /* d += c * R */ 354 "movq %%r8,%%rax\n" 355 "movq $0x1000003d10,%%rdx\n" 356 "mulq %%rdx\n" 357 "addq %%rax,%%rbx\n" 358 "adcq %%rdx,%%rcx\n" 359 /* t4 = d & M (%%rsi) */ 360 "movq %%rbx,%%rsi\n" 361 "andq %%r15,%%rsi\n" 362 /* d >>= 52 */ 363 "shrdq $52,%%rcx,%%rbx\n" 364 "xorq %%rcx,%%rcx\n" 365 /* tx = t4 >> 48 (tmp3) */ 366 "movq %%rsi,%%rax\n" 367 "shrq $48,%%rax\n" 368 "movq %%rax,%q3\n" 369 /* t4 &= (M >> 4) (tmp2) */ 370 "movq $0xffffffffffff,%%rax\n" 371 "andq %%rax,%%rsi\n" 372 "movq %%rsi,%q2\n" 373 /* c = a0 * a0 */ 374 "movq %%r10,%%rax\n" 375 "mulq %%r10\n" 376 "movq %%rax,%%r8\n" 377 "movq %%rdx,%%r9\n" 378 /* d += a1 * a4 */ 379 "movq %%r11,%%rax\n" 380 "mulq %%r14\n" 381 "addq %%rax,%%rbx\n" 382 "adcq %%rdx,%%rcx\n" 383 /* d += (a2*2) * a3 */ 384 "leaq (%%r12,%%r12,1),%%rax\n" 385 "mulq %%r13\n" 386 "addq %%rax,%%rbx\n" 387 "adcq %%rdx,%%rcx\n" 388 /* u0 = d & M (%%rsi) */ 389 "movq %%rbx,%%rsi\n" 390 "andq %%r15,%%rsi\n" 391 /* d >>= 52 */ 392 "shrdq $52,%%rcx,%%rbx\n" 393 "xorq %%rcx,%%rcx\n" 394 /* u0 = (u0 << 4) | tx (%%rsi) */ 395 "shlq $4,%%rsi\n" 396 "movq %q3,%%rax\n" 397 "orq %%rax,%%rsi\n" 398 /* c += u0 * (R >> 4) */ 399 "movq $0x1000003d1,%%rax\n" 400 "mulq %%rsi\n" 401 "addq %%rax,%%r8\n" 402 "adcq %%rdx,%%r9\n" 403 /* r[0] = c & M */ 404 "movq %%r8,%%rax\n" 405 "andq %%r15,%%rax\n" 406 "movq %%rax,0(%%rdi)\n" 407 /* c >>= 52 */ 408 "shrdq $52,%%r9,%%r8\n" 409 "xorq %%r9,%%r9\n" 410 /* a0 *= 2 */ 411 "addq %%r10,%%r10\n" 412 /* c += a0 * a1 */ 413 "movq %%r10,%%rax\n" 414 "mulq %%r11\n" 415 "addq %%rax,%%r8\n" 416 "adcq %%rdx,%%r9\n" 417 /* d += a2 * a4 */ 418 "movq %%r12,%%rax\n" 419 "mulq %%r14\n" 420 "addq %%rax,%%rbx\n" 421 "adcq %%rdx,%%rcx\n" 422 /* d += a3 * a3 */ 423 "movq %%r13,%%rax\n" 424 "mulq %%r13\n" 425 "addq %%rax,%%rbx\n" 426 "adcq %%rdx,%%rcx\n" 427 /* c += (d & M) * R */ 428 "movq %%rbx,%%rax\n" 429 "andq %%r15,%%rax\n" 430 "movq $0x1000003d10,%%rdx\n" 431 "mulq %%rdx\n" 432 "addq %%rax,%%r8\n" 433 "adcq %%rdx,%%r9\n" 434 /* d >>= 52 */ 435 "shrdq $52,%%rcx,%%rbx\n" 436 "xorq %%rcx,%%rcx\n" 437 /* r[1] = c & M */ 438 "movq %%r8,%%rax\n" 439 "andq %%r15,%%rax\n" 440 "movq %%rax,8(%%rdi)\n" 441 /* c >>= 52 */ 442 "shrdq $52,%%r9,%%r8\n" 443 "xorq %%r9,%%r9\n" 444 /* c += a0 * a2 (last use of %%r10) */ 445 "movq %%r10,%%rax\n" 446 "mulq %%r12\n" 447 "addq %%rax,%%r8\n" 448 "adcq %%rdx,%%r9\n" 449 /* fetch t3 (%%r10, overwrites a0),t4 (%%rsi) */ 450 "movq %q2,%%rsi\n" 451 "movq %q1,%%r10\n" 452 /* c += a1 * a1 */ 453 "movq %%r11,%%rax\n" 454 "mulq %%r11\n" 455 "addq %%rax,%%r8\n" 456 "adcq %%rdx,%%r9\n" 457 /* d += a3 * a4 */ 458 "movq %%r13,%%rax\n" 459 "mulq %%r14\n" 460 "addq %%rax,%%rbx\n" 461 "adcq %%rdx,%%rcx\n" 462 /* c += (d & M) * R */ 463 "movq %%rbx,%%rax\n" 464 "andq %%r15,%%rax\n" 465 "movq $0x1000003d10,%%rdx\n" 466 "mulq %%rdx\n" 467 "addq %%rax,%%r8\n" 468 "adcq %%rdx,%%r9\n" 469 /* d >>= 52 (%%rbx only) */ 470 "shrdq $52,%%rcx,%%rbx\n" 471 /* r[2] = c & M */ 472 "movq %%r8,%%rax\n" 473 "andq %%r15,%%rax\n" 474 "movq %%rax,16(%%rdi)\n" 475 /* c >>= 52 */ 476 "shrdq $52,%%r9,%%r8\n" 477 "xorq %%r9,%%r9\n" 478 /* c += t3 */ 479 "addq %%r10,%%r8\n" 480 /* c += d * R */ 481 "movq %%rbx,%%rax\n" 482 "movq $0x1000003d10,%%rdx\n" 483 "mulq %%rdx\n" 484 "addq %%rax,%%r8\n" 485 "adcq %%rdx,%%r9\n" 486 /* r[3] = c & M */ 487 "movq %%r8,%%rax\n" 488 "andq %%r15,%%rax\n" 489 "movq %%rax,24(%%rdi)\n" 490 /* c >>= 52 (%%r8 only) */ 491 "shrdq $52,%%r9,%%r8\n" 492 /* c += t4 (%%r8 only) */ 493 "addq %%rsi,%%r8\n" 494 /* r[4] = c */ 495 "movq %%r8,32(%%rdi)\n" 496 : "+S"(a), "=m"(tmp1), "=m"(tmp2), "=m"(tmp3) 497 : "D"(r) 498 : "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", "cc", "memory" 499 ); 500 } 501 502 #endif