github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/tune/common.c (about) 1 /* Shared speed subroutines. 2 3 Copyright 1999-2006, 2008-2015 Free Software Foundation, Inc. 4 5 This file is part of the GNU MP Library. 6 7 The GNU MP Library is free software; you can redistribute it and/or modify 8 it under the terms of either: 9 10 * the GNU Lesser General Public License as published by the Free 11 Software Foundation; either version 3 of the License, or (at your 12 option) any later version. 13 14 or 15 16 * the GNU General Public License as published by the Free Software 17 Foundation; either version 2 of the License, or (at your option) any 18 later version. 19 20 or both in parallel, as here. 21 22 The GNU MP Library is distributed in the hope that it will be useful, but 23 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25 for more details. 26 27 You should have received copies of the GNU General Public License and the 28 GNU Lesser General Public License along with the GNU MP Library. If not, 29 see https://www.gnu.org/licenses/. */ 30 31 #define __GMP_NO_ATTRIBUTE_CONST_PURE 32 33 #include <errno.h> 34 #include <fcntl.h> 35 #include <math.h> 36 #include <stdio.h> 37 #include <stdlib.h> /* for qsort */ 38 #include <string.h> 39 #include <unistd.h> 40 #if 0 41 #include <sys/ioctl.h> 42 #endif 43 44 #include "gmp.h" 45 #include "gmp-impl.h" 46 #include "longlong.h" 47 48 #include "tests.h" 49 #include "speed.h" 50 51 52 int speed_option_addrs = 0; 53 int speed_option_verbose = 0; 54 int speed_option_cycles_broken = 0; 55 56 57 /* Provide __clz_tab even if it's not required, for the benefit of new code 58 being tested with many.pl. */ 59 #ifndef COUNT_LEADING_ZEROS_NEED_CLZ_TAB 60 #define COUNT_LEADING_ZEROS_NEED_CLZ_TAB 61 #include "mp_clz_tab.c" 62 #undef COUNT_LEADING_ZEROS_NEED_CLZ_TAB 63 #endif 64 65 66 void 67 pentium_wbinvd(void) 68 { 69 #if 0 70 { 71 static int fd = -2; 72 73 if (fd == -2) 74 { 75 fd = open ("/dev/wbinvd", O_RDWR); 76 if (fd == -1) 77 perror ("open /dev/wbinvd"); 78 } 79 80 if (fd != -1) 81 ioctl (fd, 0, 0); 82 } 83 #endif 84 85 #if 0 86 #define WBINVDSIZE 1024*1024*2 87 { 88 static char *p = NULL; 89 int i, sum; 90 91 if (p == NULL) 92 p = malloc (WBINVDSIZE); 93 94 #if 0 95 for (i = 0; i < WBINVDSIZE; i++) 96 p[i] = i & 0xFF; 97 #endif 98 99 sum = 0; 100 for (i = 0; i < WBINVDSIZE; i++) 101 sum += p[i]; 102 103 mpn_cache_fill_dummy (sum); 104 } 105 #endif 106 } 107 108 109 int 110 double_cmp_ptr (const double *p, const double *q) 111 { 112 if (*p > *q) return 1; 113 if (*p < *q) return -1; 114 return 0; 115 } 116 117 118 /* Measure the speed of a given routine. 119 120 The routine is run with enough repetitions to make it take at least 121 speed_precision * speed_unittime. This aims to minimize the effects of a 122 limited accuracy time base and the overhead of the measuring itself. 123 124 Measurements are made looking for 4 results within TOLERANCE of each 125 other (or 3 for routines taking longer than 2 seconds). This aims to get 126 an accurate reading even if some runs are bloated by interrupts or task 127 switches or whatever. 128 129 The given (*fun)() is expected to run its function "s->reps" many times 130 and return the total elapsed time measured using speed_starttime() and 131 speed_endtime(). If the function doesn't support the given s->size or 132 s->r, -1.0 should be returned. See the various base routines below. */ 133 134 double 135 speed_measure (double (*fun) (struct speed_params *s), struct speed_params *s) 136 { 137 #define TOLERANCE 1.01 /* 1% */ 138 const int max_zeros = 10; 139 140 struct speed_params s_dummy; 141 int i, j, e; 142 double t[30]; 143 double t_unsorted[30]; 144 double reps_d; 145 int zeros = 0; 146 147 /* Use dummy parameters if caller doesn't provide any. Only a few special 148 "fun"s will cope with this, speed_noop() is one. */ 149 if (s == NULL) 150 { 151 memset (&s_dummy, '\0', sizeof (s_dummy)); 152 s = &s_dummy; 153 } 154 155 s->reps = 1; 156 s->time_divisor = 1.0; 157 for (i = 0; i < numberof (t); i++) 158 { 159 for (;;) 160 { 161 s->src_num = 0; 162 s->dst_num = 0; 163 164 t[i] = (*fun) (s); 165 166 if (speed_option_verbose >= 3) 167 gmp_printf("size=%ld reps=%u r=%Md attempt=%d %.9f\n", 168 (long) s->size, s->reps, s->r, i, t[i]); 169 170 if (t[i] == 0.0) 171 { 172 zeros++; 173 if (zeros > max_zeros) 174 { 175 fprintf (stderr, "Fatal error: too many (%d) failed measurements (0.0)\n", zeros); 176 abort (); 177 } 178 if (s->reps < 10000) 179 s->reps *= 2; 180 181 continue; 182 } 183 184 if (t[i] == -1.0) 185 return -1.0; 186 187 if (t[i] >= speed_unittime * speed_precision) 188 break; 189 190 /* go to a value of reps to make t[i] >= precision */ 191 reps_d = ceil (1.1 * s->reps 192 * speed_unittime * speed_precision 193 / MAX (t[i], speed_unittime)); 194 if (reps_d > 2e9 || reps_d < 1.0) 195 { 196 fprintf (stderr, "Fatal error: new reps bad: %.2f\n", reps_d); 197 fprintf (stderr, " (old reps %u, unittime %.4g, precision %d, t[i] %.4g)\n", 198 s->reps, speed_unittime, speed_precision, t[i]); 199 abort (); 200 } 201 s->reps = (unsigned) reps_d; 202 } 203 t[i] /= s->reps; 204 t_unsorted[i] = t[i]; 205 206 if (speed_precision == 0) 207 return t[i]; 208 209 /* require 3 values within TOLERANCE when >= 2 secs, 4 when below */ 210 if (t[0] >= 2.0) 211 e = 3; 212 else 213 e = 4; 214 215 /* Look for e many t[]'s within TOLERANCE of each other to consider a 216 valid measurement. Return smallest among them. */ 217 if (i >= e) 218 { 219 qsort (t, i+1, sizeof(t[0]), (qsort_function_t) double_cmp_ptr); 220 for (j = e-1; j < i; j++) 221 if (t[j] <= t[j-e+1] * TOLERANCE) 222 return t[j-e+1] / s->time_divisor; 223 } 224 } 225 226 fprintf (stderr, "speed_measure() could not get %d results within %.1f%%\n", 227 e, (TOLERANCE-1.0)*100.0); 228 fprintf (stderr, " unsorted sorted\n"); 229 fprintf (stderr, " %.12f %.12f is about 0.5%%\n", 230 t_unsorted[0]*(TOLERANCE-1.0), t[0]*(TOLERANCE-1.0)); 231 for (i = 0; i < numberof (t); i++) 232 fprintf (stderr, " %.09f %.09f\n", t_unsorted[i], t[i]); 233 234 return -1.0; 235 } 236 237 238 /* Read all of ptr,size to get it into the CPU memory cache. 239 240 A call to mpn_cache_fill_dummy() is used to make sure the compiler 241 doesn't optimize away the whole loop. Using "volatile mp_limb_t sum" 242 would work too, but the function call means we don't rely on every 243 compiler actually implementing volatile properly. 244 245 mpn_cache_fill_dummy() is in a separate source file to stop gcc thinking 246 it can inline it. */ 247 248 void 249 mpn_cache_fill (mp_srcptr ptr, mp_size_t size) 250 { 251 mp_limb_t sum = 0; 252 mp_size_t i; 253 254 for (i = 0; i < size; i++) 255 sum += ptr[i]; 256 257 mpn_cache_fill_dummy(sum); 258 } 259 260 261 void 262 mpn_cache_fill_write (mp_ptr ptr, mp_size_t size) 263 { 264 mpn_cache_fill (ptr, size); 265 266 #if 0 267 mpn_random (ptr, size); 268 #endif 269 270 #if 0 271 mp_size_t i; 272 273 for (i = 0; i < size; i++) 274 ptr[i] = i; 275 #endif 276 } 277 278 279 void 280 speed_operand_src (struct speed_params *s, mp_ptr ptr, mp_size_t size) 281 { 282 if (s->src_num >= numberof (s->src)) 283 { 284 fprintf (stderr, "speed_operand_src: no room left in s->src[]\n"); 285 abort (); 286 } 287 s->src[s->src_num].ptr = ptr; 288 s->src[s->src_num].size = size; 289 s->src_num++; 290 } 291 292 293 void 294 speed_operand_dst (struct speed_params *s, mp_ptr ptr, mp_size_t size) 295 { 296 if (s->dst_num >= numberof (s->dst)) 297 { 298 fprintf (stderr, "speed_operand_dst: no room left in s->dst[]\n"); 299 abort (); 300 } 301 s->dst[s->dst_num].ptr = ptr; 302 s->dst[s->dst_num].size = size; 303 s->dst_num++; 304 } 305 306 307 void 308 speed_cache_fill (struct speed_params *s) 309 { 310 static struct speed_params prev; 311 int i; 312 313 /* FIXME: need a better way to get the format string for a pointer */ 314 315 if (speed_option_addrs) 316 { 317 int different; 318 319 different = (s->dst_num != prev.dst_num || s->src_num != prev.src_num); 320 for (i = 0; i < s->dst_num; i++) 321 different |= (s->dst[i].ptr != prev.dst[i].ptr); 322 for (i = 0; i < s->src_num; i++) 323 different |= (s->src[i].ptr != prev.src[i].ptr); 324 325 if (different) 326 { 327 if (s->dst_num != 0) 328 { 329 printf ("dst"); 330 for (i = 0; i < s->dst_num; i++) 331 printf (" %08lX", (unsigned long) s->dst[i].ptr); 332 printf (" "); 333 } 334 335 if (s->src_num != 0) 336 { 337 printf ("src"); 338 for (i = 0; i < s->src_num; i++) 339 printf (" %08lX", (unsigned long) s->src[i].ptr); 340 printf (" "); 341 } 342 printf (" (cf sp approx %08lX)\n", (unsigned long) &different); 343 344 } 345 346 memcpy (&prev, s, sizeof(prev)); 347 } 348 349 switch (s->cache) { 350 case 0: 351 for (i = 0; i < s->dst_num; i++) 352 mpn_cache_fill_write (s->dst[i].ptr, s->dst[i].size); 353 for (i = 0; i < s->src_num; i++) 354 mpn_cache_fill (s->src[i].ptr, s->src[i].size); 355 break; 356 case 1: 357 pentium_wbinvd(); 358 break; 359 } 360 } 361 362 363 /* Miscellaneous options accepted by tune and speed programs under -o. */ 364 365 void 366 speed_option_set (const char *s) 367 { 368 int n; 369 370 if (strcmp (s, "addrs") == 0) 371 { 372 speed_option_addrs = 1; 373 } 374 else if (strcmp (s, "verbose") == 0) 375 { 376 speed_option_verbose++; 377 } 378 else if (sscanf (s, "verbose=%d", &n) == 1) 379 { 380 speed_option_verbose = n; 381 } 382 else if (strcmp (s, "cycles-broken") == 0) 383 { 384 speed_option_cycles_broken = 1; 385 } 386 else 387 { 388 printf ("Unrecognised -o option: %s\n", s); 389 exit (1); 390 } 391 } 392 393 394 /* The following are basic speed running routines for various gmp functions. 395 Many are very similar and use speed.h macros. 396 397 Each routine allocates it's own destination space for the result of the 398 function, because only it can know what the function needs. 399 400 speed_starttime() and speed_endtime() are put tight around the code to be 401 measured. Any setups are done outside the timed portion. 402 403 Each routine is responsible for its own cache priming. 404 speed_cache_fill() is a good way to do this, see examples in speed.h. 405 One cache priming possibility, for CPUs with write-allocate cache, and 406 functions that don't take too long, is to do one dummy call before timing 407 so as to cache everything that gets used. But speed_measure() runs a 408 routine at least twice and will take the smaller time, so this might not 409 be necessary. 410 411 Data alignment will be important, for source, destination and temporary 412 workspace. A routine can align its destination and workspace. Programs 413 using the routines will ensure s->xp and s->yp are aligned. Aligning 414 onto a CACHE_LINE_SIZE boundary is suggested. s->align_wp and 415 s->align_wp2 should be respected where it makes sense to do so. 416 SPEED_TMP_ALLOC_LIMBS is a good way to do this. 417 418 A loop of the following form can be expected to turn into good assembler 419 code on most CPUs, thereby minimizing overhead in the measurement. It 420 can always be assumed s->reps >= 1. 421 422 i = s->reps 423 do 424 foo(); 425 while (--i != 0); 426 427 Additional parameters might be added to "struct speed_params" in the 428 future. Routines should ignore anything they don't use. 429 430 s->size can be used creatively, and s->xp and s->yp can be ignored. For 431 example, speed_mpz_fac_ui() uses s->size as n for the factorial. s->r is 432 just a user-supplied parameter. speed_mpn_lshift() uses it as a shift, 433 speed_mpn_mul_1() uses it as a multiplier. */ 434 435 436 /* MPN_COPY etc can be macros, so the _CALL forms are necessary */ 437 double 438 speed_MPN_COPY (struct speed_params *s) 439 { 440 SPEED_ROUTINE_MPN_COPY (MPN_COPY); 441 } 442 double 443 speed_MPN_COPY_INCR (struct speed_params *s) 444 { 445 SPEED_ROUTINE_MPN_COPY (MPN_COPY_INCR); 446 } 447 double 448 speed_MPN_COPY_DECR (struct speed_params *s) 449 { 450 SPEED_ROUTINE_MPN_COPY (MPN_COPY_DECR); 451 } 452 #if HAVE_NATIVE_mpn_copyi 453 double 454 speed_mpn_copyi (struct speed_params *s) 455 { 456 SPEED_ROUTINE_MPN_COPY (mpn_copyi); 457 } 458 #endif 459 #if HAVE_NATIVE_mpn_copyd 460 double 461 speed_mpn_copyd (struct speed_params *s) 462 { 463 SPEED_ROUTINE_MPN_COPY (mpn_copyd); 464 } 465 #endif 466 double 467 speed_memcpy (struct speed_params *s) 468 { 469 SPEED_ROUTINE_MPN_COPY_BYTES (memcpy); 470 } 471 double 472 speed_mpn_com (struct speed_params *s) 473 { 474 SPEED_ROUTINE_MPN_COPY (mpn_com); 475 } 476 double 477 speed_mpn_neg (struct speed_params *s) 478 { 479 SPEED_ROUTINE_MPN_COPY (mpn_neg); 480 } 481 double 482 speed_mpn_sec_tabselect (struct speed_params *s) 483 { 484 SPEED_ROUTINE_MPN_TABSELECT (mpn_sec_tabselect); 485 } 486 487 488 double 489 speed_mpn_addmul_1 (struct speed_params *s) 490 { 491 SPEED_ROUTINE_MPN_UNARY_1 (mpn_addmul_1); 492 } 493 double 494 speed_mpn_submul_1 (struct speed_params *s) 495 { 496 SPEED_ROUTINE_MPN_UNARY_1 (mpn_submul_1); 497 } 498 499 #if HAVE_NATIVE_mpn_addmul_2 500 double 501 speed_mpn_addmul_2 (struct speed_params *s) 502 { 503 SPEED_ROUTINE_MPN_UNARY_2 (mpn_addmul_2); 504 } 505 #endif 506 #if HAVE_NATIVE_mpn_addmul_3 507 double 508 speed_mpn_addmul_3 (struct speed_params *s) 509 { 510 SPEED_ROUTINE_MPN_UNARY_3 (mpn_addmul_3); 511 } 512 #endif 513 #if HAVE_NATIVE_mpn_addmul_4 514 double 515 speed_mpn_addmul_4 (struct speed_params *s) 516 { 517 SPEED_ROUTINE_MPN_UNARY_4 (mpn_addmul_4); 518 } 519 #endif 520 #if HAVE_NATIVE_mpn_addmul_5 521 double 522 speed_mpn_addmul_5 (struct speed_params *s) 523 { 524 SPEED_ROUTINE_MPN_UNARY_5 (mpn_addmul_5); 525 } 526 #endif 527 #if HAVE_NATIVE_mpn_addmul_6 528 double 529 speed_mpn_addmul_6 (struct speed_params *s) 530 { 531 SPEED_ROUTINE_MPN_UNARY_6 (mpn_addmul_6); 532 } 533 #endif 534 #if HAVE_NATIVE_mpn_addmul_7 535 double 536 speed_mpn_addmul_7 (struct speed_params *s) 537 { 538 SPEED_ROUTINE_MPN_UNARY_7 (mpn_addmul_7); 539 } 540 #endif 541 #if HAVE_NATIVE_mpn_addmul_8 542 double 543 speed_mpn_addmul_8 (struct speed_params *s) 544 { 545 SPEED_ROUTINE_MPN_UNARY_8 (mpn_addmul_8); 546 } 547 #endif 548 549 double 550 speed_mpn_mul_1 (struct speed_params *s) 551 { 552 SPEED_ROUTINE_MPN_UNARY_1 (mpn_mul_1); 553 } 554 double 555 speed_mpn_mul_1_inplace (struct speed_params *s) 556 { 557 SPEED_ROUTINE_MPN_UNARY_1_INPLACE (mpn_mul_1); 558 } 559 560 #if HAVE_NATIVE_mpn_mul_2 561 double 562 speed_mpn_mul_2 (struct speed_params *s) 563 { 564 SPEED_ROUTINE_MPN_UNARY_2 (mpn_mul_2); 565 } 566 #endif 567 #if HAVE_NATIVE_mpn_mul_3 568 double 569 speed_mpn_mul_3 (struct speed_params *s) 570 { 571 SPEED_ROUTINE_MPN_UNARY_3 (mpn_mul_3); 572 } 573 #endif 574 #if HAVE_NATIVE_mpn_mul_4 575 double 576 speed_mpn_mul_4 (struct speed_params *s) 577 { 578 SPEED_ROUTINE_MPN_UNARY_4 (mpn_mul_4); 579 } 580 #endif 581 #if HAVE_NATIVE_mpn_mul_5 582 double 583 speed_mpn_mul_5 (struct speed_params *s) 584 { 585 SPEED_ROUTINE_MPN_UNARY_5 (mpn_mul_5); 586 } 587 #endif 588 #if HAVE_NATIVE_mpn_mul_6 589 double 590 speed_mpn_mul_6 (struct speed_params *s) 591 { 592 SPEED_ROUTINE_MPN_UNARY_6 (mpn_mul_6); 593 } 594 #endif 595 596 597 double 598 speed_mpn_lshift (struct speed_params *s) 599 { 600 SPEED_ROUTINE_MPN_UNARY_1 (mpn_lshift); 601 } 602 double 603 speed_mpn_lshiftc (struct speed_params *s) 604 { 605 SPEED_ROUTINE_MPN_UNARY_1 (mpn_lshiftc); 606 } 607 double 608 speed_mpn_rshift (struct speed_params *s) 609 { 610 SPEED_ROUTINE_MPN_UNARY_1 (mpn_rshift); 611 } 612 613 614 /* The carry-in variants (if available) are good for measuring because they 615 won't skip a division if high<divisor. Alternately, use -1 as a divisor 616 with the plain _1 forms. */ 617 double 618 speed_mpn_divrem_1 (struct speed_params *s) 619 { 620 SPEED_ROUTINE_MPN_DIVREM_1 (mpn_divrem_1); 621 } 622 double 623 speed_mpn_divrem_1f (struct speed_params *s) 624 { 625 SPEED_ROUTINE_MPN_DIVREM_1F (mpn_divrem_1); 626 } 627 #if HAVE_NATIVE_mpn_divrem_1c 628 double 629 speed_mpn_divrem_1c (struct speed_params *s) 630 { 631 SPEED_ROUTINE_MPN_DIVREM_1C (mpn_divrem_1c); 632 } 633 double 634 speed_mpn_divrem_1cf (struct speed_params *s) 635 { 636 SPEED_ROUTINE_MPN_DIVREM_1CF (mpn_divrem_1c); 637 } 638 #endif 639 640 double 641 speed_mpn_divrem_1_div (struct speed_params *s) 642 { 643 SPEED_ROUTINE_MPN_DIVREM_1 (mpn_divrem_1_div); 644 } 645 double 646 speed_mpn_divrem_1f_div (struct speed_params *s) 647 { 648 SPEED_ROUTINE_MPN_DIVREM_1F (mpn_divrem_1_div); 649 } 650 double 651 speed_mpn_divrem_1_inv (struct speed_params *s) 652 { 653 SPEED_ROUTINE_MPN_DIVREM_1 (mpn_divrem_1_inv); 654 } 655 double 656 speed_mpn_divrem_1f_inv (struct speed_params *s) 657 { 658 SPEED_ROUTINE_MPN_DIVREM_1F (mpn_divrem_1_inv); 659 } 660 double 661 speed_mpn_mod_1_div (struct speed_params *s) 662 { 663 SPEED_ROUTINE_MPN_MOD_1 (mpn_mod_1_div); 664 } 665 double 666 speed_mpn_mod_1_inv (struct speed_params *s) 667 { 668 SPEED_ROUTINE_MPN_MOD_1 (mpn_mod_1_inv); 669 } 670 671 double 672 speed_mpn_preinv_divrem_1 (struct speed_params *s) 673 { 674 SPEED_ROUTINE_MPN_PREINV_DIVREM_1 (mpn_preinv_divrem_1); 675 } 676 double 677 speed_mpn_preinv_divrem_1f (struct speed_params *s) 678 { 679 SPEED_ROUTINE_MPN_PREINV_DIVREM_1F (mpn_preinv_divrem_1); 680 } 681 682 #if GMP_NUMB_BITS % 4 == 0 683 double 684 speed_mpn_mod_34lsub1 (struct speed_params *s) 685 { 686 SPEED_ROUTINE_MPN_MOD_34LSUB1 (mpn_mod_34lsub1); 687 } 688 #endif 689 690 double 691 speed_mpn_divrem_2 (struct speed_params *s) 692 { 693 SPEED_ROUTINE_MPN_DIVREM_2 (mpn_divrem_2); 694 } 695 double 696 speed_mpn_divrem_2_div (struct speed_params *s) 697 { 698 SPEED_ROUTINE_MPN_DIVREM_2 (mpn_divrem_2_div); 699 } 700 double 701 speed_mpn_divrem_2_inv (struct speed_params *s) 702 { 703 SPEED_ROUTINE_MPN_DIVREM_2 (mpn_divrem_2_inv); 704 } 705 706 double 707 speed_mpn_div_qr_1n_pi1 (struct speed_params *s) 708 { 709 SPEED_ROUTINE_MPN_DIV_QR_1N_PI1 (mpn_div_qr_1n_pi1); 710 } 711 double 712 speed_mpn_div_qr_1n_pi1_1 (struct speed_params *s) 713 { 714 SPEED_ROUTINE_MPN_DIV_QR_1N_PI1 (mpn_div_qr_1n_pi1_1); 715 } 716 double 717 speed_mpn_div_qr_1n_pi1_2 (struct speed_params *s) 718 { 719 SPEED_ROUTINE_MPN_DIV_QR_1N_PI1 (mpn_div_qr_1n_pi1_2); 720 } 721 722 double 723 speed_mpn_div_qr_1 (struct speed_params *s) 724 { 725 SPEED_ROUTINE_MPN_DIV_QR_1 (mpn_div_qr_1); 726 } 727 728 double 729 speed_mpn_div_qr_2n (struct speed_params *s) 730 { 731 SPEED_ROUTINE_MPN_DIV_QR_2 (mpn_div_qr_2, 1); 732 } 733 double 734 speed_mpn_div_qr_2u (struct speed_params *s) 735 { 736 SPEED_ROUTINE_MPN_DIV_QR_2 (mpn_div_qr_2, 0); 737 } 738 739 double 740 speed_mpn_mod_1 (struct speed_params *s) 741 { 742 SPEED_ROUTINE_MPN_MOD_1 (mpn_mod_1); 743 } 744 #if HAVE_NATIVE_mpn_mod_1c 745 double 746 speed_mpn_mod_1c (struct speed_params *s) 747 { 748 SPEED_ROUTINE_MPN_MOD_1C (mpn_mod_1c); 749 } 750 #endif 751 double 752 speed_mpn_preinv_mod_1 (struct speed_params *s) 753 { 754 SPEED_ROUTINE_MPN_PREINV_MOD_1 (mpn_preinv_mod_1); 755 } 756 double 757 speed_mpn_mod_1_1 (struct speed_params *s) 758 { 759 SPEED_ROUTINE_MPN_MOD_1_1 (mpn_mod_1_1p,mpn_mod_1_1p_cps); 760 } 761 double 762 speed_mpn_mod_1_1_1 (struct speed_params *s) 763 { 764 SPEED_ROUTINE_MPN_MOD_1_1 (mpn_mod_1_1p_1,mpn_mod_1_1p_cps_1); 765 } 766 double 767 speed_mpn_mod_1_1_2 (struct speed_params *s) 768 { 769 SPEED_ROUTINE_MPN_MOD_1_1 (mpn_mod_1_1p_2,mpn_mod_1_1p_cps_2); 770 } 771 double 772 speed_mpn_mod_1_2 (struct speed_params *s) 773 { 774 SPEED_ROUTINE_MPN_MOD_1_N (mpn_mod_1s_2p,mpn_mod_1s_2p_cps,2); 775 } 776 double 777 speed_mpn_mod_1_3 (struct speed_params *s) 778 { 779 SPEED_ROUTINE_MPN_MOD_1_N (mpn_mod_1s_3p,mpn_mod_1s_3p_cps,3); 780 } 781 double 782 speed_mpn_mod_1_4 (struct speed_params *s) 783 { 784 SPEED_ROUTINE_MPN_MOD_1_N (mpn_mod_1s_4p,mpn_mod_1s_4p_cps,4); 785 } 786 787 double 788 speed_mpn_divexact_1 (struct speed_params *s) 789 { 790 SPEED_ROUTINE_MPN_DIVEXACT_1 (mpn_divexact_1); 791 } 792 793 double 794 speed_mpn_divexact_by3 (struct speed_params *s) 795 { 796 SPEED_ROUTINE_MPN_COPY (mpn_divexact_by3); 797 } 798 799 double 800 speed_mpn_bdiv_dbm1c (struct speed_params *s) 801 { 802 SPEED_ROUTINE_MPN_BDIV_DBM1C (mpn_bdiv_dbm1c); 803 } 804 805 double 806 speed_mpn_bdiv_q_1 (struct speed_params *s) 807 { 808 SPEED_ROUTINE_MPN_BDIV_Q_1 (mpn_bdiv_q_1); 809 } 810 811 double 812 speed_mpn_pi1_bdiv_q_1 (struct speed_params *s) 813 { 814 SPEED_ROUTINE_MPN_PI1_BDIV_Q_1 (mpn_pi1_bdiv_q_1); 815 } 816 817 #if HAVE_NATIVE_mpn_modexact_1_odd 818 double 819 speed_mpn_modexact_1_odd (struct speed_params *s) 820 { 821 SPEED_ROUTINE_MPN_MODEXACT_1_ODD (mpn_modexact_1_odd); 822 } 823 #endif 824 825 double 826 speed_mpn_modexact_1c_odd (struct speed_params *s) 827 { 828 SPEED_ROUTINE_MPN_MODEXACT_1C_ODD (mpn_modexact_1c_odd); 829 } 830 831 double 832 speed_mpz_mod (struct speed_params *s) 833 { 834 SPEED_ROUTINE_MPZ_MOD (mpz_mod); 835 } 836 837 double 838 speed_mpn_sbpi1_div_qr (struct speed_params *s) 839 { 840 SPEED_ROUTINE_MPN_PI1_DIV (mpn_sbpi1_div_qr, inv.inv32, 2,0); 841 } 842 double 843 speed_mpn_dcpi1_div_qr (struct speed_params *s) 844 { 845 SPEED_ROUTINE_MPN_PI1_DIV (mpn_dcpi1_div_qr, &inv, 6,3); 846 } 847 double 848 speed_mpn_sbpi1_divappr_q (struct speed_params *s) 849 { 850 SPEED_ROUTINE_MPN_PI1_DIV (mpn_sbpi1_divappr_q, inv.inv32, 2,0); 851 } 852 double 853 speed_mpn_dcpi1_divappr_q (struct speed_params *s) 854 { 855 SPEED_ROUTINE_MPN_PI1_DIV (mpn_dcpi1_divappr_q, &inv, 6,3); 856 } 857 double 858 speed_mpn_mu_div_qr (struct speed_params *s) 859 { 860 SPEED_ROUTINE_MPN_MU_DIV_QR (mpn_mu_div_qr, mpn_mu_div_qr_itch); 861 } 862 double 863 speed_mpn_mu_divappr_q (struct speed_params *s) 864 { 865 SPEED_ROUTINE_MPN_MU_DIV_Q (mpn_mu_divappr_q, mpn_mu_divappr_q_itch); 866 } 867 double 868 speed_mpn_mu_div_q (struct speed_params *s) 869 { 870 SPEED_ROUTINE_MPN_MU_DIV_Q (mpn_mu_div_q, mpn_mu_div_q_itch); 871 } 872 double 873 speed_mpn_mupi_div_qr (struct speed_params *s) 874 { 875 SPEED_ROUTINE_MPN_MUPI_DIV_QR (mpn_preinv_mu_div_qr, mpn_preinv_mu_div_qr_itch); 876 } 877 878 double 879 speed_mpn_sbpi1_bdiv_qr (struct speed_params *s) 880 { 881 SPEED_ROUTINE_MPN_PI1_BDIV_QR (mpn_sbpi1_bdiv_qr); 882 } 883 double 884 speed_mpn_dcpi1_bdiv_qr (struct speed_params *s) 885 { 886 SPEED_ROUTINE_MPN_PI1_BDIV_QR (mpn_dcpi1_bdiv_qr); 887 } 888 double 889 speed_mpn_sbpi1_bdiv_q (struct speed_params *s) 890 { 891 SPEED_ROUTINE_MPN_PI1_BDIV_Q (mpn_sbpi1_bdiv_q); 892 } 893 double 894 speed_mpn_dcpi1_bdiv_q (struct speed_params *s) 895 { 896 SPEED_ROUTINE_MPN_PI1_BDIV_Q (mpn_dcpi1_bdiv_q); 897 } 898 double 899 speed_mpn_mu_bdiv_q (struct speed_params *s) 900 { 901 SPEED_ROUTINE_MPN_MU_BDIV_Q (mpn_mu_bdiv_q, mpn_mu_bdiv_q_itch); 902 } 903 double 904 speed_mpn_mu_bdiv_qr (struct speed_params *s) 905 { 906 SPEED_ROUTINE_MPN_MU_BDIV_QR (mpn_mu_bdiv_qr, mpn_mu_bdiv_qr_itch); 907 } 908 909 double 910 speed_mpn_broot (struct speed_params *s) 911 { 912 SPEED_ROUTINE_MPN_BROOT (mpn_broot); 913 } 914 double 915 speed_mpn_broot_invm1 (struct speed_params *s) 916 { 917 SPEED_ROUTINE_MPN_BROOT (mpn_broot_invm1); 918 } 919 double 920 speed_mpn_brootinv (struct speed_params *s) 921 { 922 SPEED_ROUTINE_MPN_BROOTINV (mpn_brootinv, 5*s->size); 923 } 924 925 double 926 speed_mpn_binvert (struct speed_params *s) 927 { 928 SPEED_ROUTINE_MPN_BINVERT (mpn_binvert, mpn_binvert_itch); 929 } 930 931 double 932 speed_mpn_invert (struct speed_params *s) 933 { 934 SPEED_ROUTINE_MPN_INVERT (mpn_invert, mpn_invert_itch); 935 } 936 937 double 938 speed_mpn_invertappr (struct speed_params *s) 939 { 940 SPEED_ROUTINE_MPN_INVERTAPPR (mpn_invertappr, mpn_invertappr_itch); 941 } 942 943 double 944 speed_mpn_ni_invertappr (struct speed_params *s) 945 { 946 SPEED_ROUTINE_MPN_INVERTAPPR (mpn_ni_invertappr, mpn_invertappr_itch); 947 } 948 949 double 950 speed_mpn_sec_invert (struct speed_params *s) 951 { 952 SPEED_ROUTINE_MPN_SEC_INVERT (mpn_sec_invert, mpn_sec_invert_itch); 953 } 954 955 double 956 speed_mpn_redc_1 (struct speed_params *s) 957 { 958 SPEED_ROUTINE_REDC_1 (mpn_redc_1); 959 } 960 double 961 speed_mpn_redc_2 (struct speed_params *s) 962 { 963 SPEED_ROUTINE_REDC_2 (mpn_redc_2); 964 } 965 double 966 speed_mpn_redc_n (struct speed_params *s) 967 { 968 SPEED_ROUTINE_REDC_N (mpn_redc_n); 969 } 970 971 972 double 973 speed_mpn_popcount (struct speed_params *s) 974 { 975 SPEED_ROUTINE_MPN_POPCOUNT (mpn_popcount); 976 } 977 double 978 speed_mpn_hamdist (struct speed_params *s) 979 { 980 SPEED_ROUTINE_MPN_HAMDIST (mpn_hamdist); 981 } 982 983 984 double 985 speed_mpn_add_n (struct speed_params *s) 986 { 987 SPEED_ROUTINE_MPN_BINARY_N (mpn_add_n); 988 } 989 double 990 speed_mpn_sub_n (struct speed_params *s) 991 { 992 SPEED_ROUTINE_MPN_BINARY_N (mpn_sub_n); 993 } 994 double 995 speed_mpn_add_1 (struct speed_params *s) 996 { 997 SPEED_ROUTINE_MPN_UNARY_1 (mpn_add_1); 998 } 999 double 1000 speed_mpn_add_1_inplace (struct speed_params *s) 1001 { 1002 SPEED_ROUTINE_MPN_UNARY_1_INPLACE (mpn_add_1); 1003 } 1004 double 1005 speed_mpn_sub_1 (struct speed_params *s) 1006 { 1007 SPEED_ROUTINE_MPN_UNARY_1 (mpn_sub_1); 1008 } 1009 double 1010 speed_mpn_sub_1_inplace (struct speed_params *s) 1011 { 1012 SPEED_ROUTINE_MPN_UNARY_1_INPLACE (mpn_sub_1); 1013 } 1014 1015 double 1016 speed_mpn_add_err1_n (struct speed_params *s) 1017 { 1018 SPEED_ROUTINE_MPN_BINARY_ERR1_N (mpn_add_err1_n); 1019 } 1020 double 1021 speed_mpn_sub_err1_n (struct speed_params *s) 1022 { 1023 SPEED_ROUTINE_MPN_BINARY_ERR1_N (mpn_sub_err1_n); 1024 } 1025 double 1026 speed_mpn_add_err2_n (struct speed_params *s) 1027 { 1028 SPEED_ROUTINE_MPN_BINARY_ERR2_N (mpn_add_err2_n); 1029 } 1030 double 1031 speed_mpn_sub_err2_n (struct speed_params *s) 1032 { 1033 SPEED_ROUTINE_MPN_BINARY_ERR2_N (mpn_sub_err2_n); 1034 } 1035 double 1036 speed_mpn_add_err3_n (struct speed_params *s) 1037 { 1038 SPEED_ROUTINE_MPN_BINARY_ERR3_N (mpn_add_err3_n); 1039 } 1040 double 1041 speed_mpn_sub_err3_n (struct speed_params *s) 1042 { 1043 SPEED_ROUTINE_MPN_BINARY_ERR3_N (mpn_sub_err3_n); 1044 } 1045 1046 1047 #if HAVE_NATIVE_mpn_add_n_sub_n 1048 double 1049 speed_mpn_add_n_sub_n (struct speed_params *s) 1050 { 1051 SPEED_ROUTINE_MPN_ADDSUB_N_CALL (mpn_add_n_sub_n (ap, sp, s->xp, s->yp, s->size)); 1052 } 1053 #endif 1054 1055 #if HAVE_NATIVE_mpn_addlsh1_n == 1 1056 double 1057 speed_mpn_addlsh1_n (struct speed_params *s) 1058 { 1059 SPEED_ROUTINE_MPN_BINARY_N (mpn_addlsh1_n); 1060 } 1061 #endif 1062 #if HAVE_NATIVE_mpn_sublsh1_n == 1 1063 double 1064 speed_mpn_sublsh1_n (struct speed_params *s) 1065 { 1066 SPEED_ROUTINE_MPN_BINARY_N (mpn_sublsh1_n); 1067 } 1068 #endif 1069 #if HAVE_NATIVE_mpn_addlsh1_n_ip1 1070 double 1071 speed_mpn_addlsh1_n_ip1 (struct speed_params *s) 1072 { 1073 SPEED_ROUTINE_MPN_COPY (mpn_addlsh1_n_ip1); 1074 } 1075 #endif 1076 #if HAVE_NATIVE_mpn_addlsh1_n_ip2 1077 double 1078 speed_mpn_addlsh1_n_ip2 (struct speed_params *s) 1079 { 1080 SPEED_ROUTINE_MPN_COPY (mpn_addlsh1_n_ip2); 1081 } 1082 #endif 1083 #if HAVE_NATIVE_mpn_sublsh1_n_ip1 1084 double 1085 speed_mpn_sublsh1_n_ip1 (struct speed_params *s) 1086 { 1087 SPEED_ROUTINE_MPN_COPY (mpn_sublsh1_n_ip1); 1088 } 1089 #endif 1090 #if HAVE_NATIVE_mpn_rsblsh1_n == 1 1091 double 1092 speed_mpn_rsblsh1_n (struct speed_params *s) 1093 { 1094 SPEED_ROUTINE_MPN_BINARY_N (mpn_rsblsh1_n); 1095 } 1096 #endif 1097 #if HAVE_NATIVE_mpn_addlsh2_n == 1 1098 double 1099 speed_mpn_addlsh2_n (struct speed_params *s) 1100 { 1101 SPEED_ROUTINE_MPN_BINARY_N (mpn_addlsh2_n); 1102 } 1103 #endif 1104 #if HAVE_NATIVE_mpn_sublsh2_n == 1 1105 double 1106 speed_mpn_sublsh2_n (struct speed_params *s) 1107 { 1108 SPEED_ROUTINE_MPN_BINARY_N (mpn_sublsh2_n); 1109 } 1110 #endif 1111 #if HAVE_NATIVE_mpn_addlsh2_n_ip1 1112 double 1113 speed_mpn_addlsh2_n_ip1 (struct speed_params *s) 1114 { 1115 SPEED_ROUTINE_MPN_COPY (mpn_addlsh2_n_ip1); 1116 } 1117 #endif 1118 #if HAVE_NATIVE_mpn_addlsh2_n_ip2 1119 double 1120 speed_mpn_addlsh2_n_ip2 (struct speed_params *s) 1121 { 1122 SPEED_ROUTINE_MPN_COPY (mpn_addlsh2_n_ip2); 1123 } 1124 #endif 1125 #if HAVE_NATIVE_mpn_sublsh2_n_ip1 1126 double 1127 speed_mpn_sublsh2_n_ip1 (struct speed_params *s) 1128 { 1129 SPEED_ROUTINE_MPN_COPY (mpn_sublsh2_n_ip1); 1130 } 1131 #endif 1132 #if HAVE_NATIVE_mpn_rsblsh2_n == 1 1133 double 1134 speed_mpn_rsblsh2_n (struct speed_params *s) 1135 { 1136 SPEED_ROUTINE_MPN_BINARY_N (mpn_rsblsh2_n); 1137 } 1138 #endif 1139 #if HAVE_NATIVE_mpn_addlsh_n 1140 double 1141 speed_mpn_addlsh_n (struct speed_params *s) 1142 { 1143 SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_addlsh_n (wp, xp, yp, s->size, 7)); 1144 } 1145 #endif 1146 #if HAVE_NATIVE_mpn_sublsh_n 1147 double 1148 speed_mpn_sublsh_n (struct speed_params *s) 1149 { 1150 SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_sublsh_n (wp, xp, yp, s->size, 7)); 1151 } 1152 #endif 1153 #if HAVE_NATIVE_mpn_addlsh_n_ip1 1154 double 1155 speed_mpn_addlsh_n_ip1 (struct speed_params *s) 1156 { 1157 SPEED_ROUTINE_MPN_UNARY_1_CALL (mpn_addlsh_n_ip1 (wp, s->xp, s->size, 7)); 1158 } 1159 #endif 1160 #if HAVE_NATIVE_mpn_addlsh_n_ip2 1161 double 1162 speed_mpn_addlsh_n_ip2 (struct speed_params *s) 1163 { 1164 SPEED_ROUTINE_MPN_UNARY_1_CALL (mpn_addlsh_n_ip2 (wp, s->xp, s->size, 7)); 1165 } 1166 #endif 1167 #if HAVE_NATIVE_mpn_sublsh_n_ip1 1168 double 1169 speed_mpn_sublsh_n_ip1 (struct speed_params *s) 1170 { 1171 SPEED_ROUTINE_MPN_UNARY_1_CALL (mpn_sublsh_n_ip1 (wp, s->xp, s->size, 7)); 1172 } 1173 #endif 1174 #if HAVE_NATIVE_mpn_rsblsh_n 1175 double 1176 speed_mpn_rsblsh_n (struct speed_params *s) 1177 { 1178 SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_rsblsh_n (wp, xp, yp, s->size, 7)); 1179 } 1180 #endif 1181 #if HAVE_NATIVE_mpn_rsh1add_n 1182 double 1183 speed_mpn_rsh1add_n (struct speed_params *s) 1184 { 1185 SPEED_ROUTINE_MPN_BINARY_N (mpn_rsh1add_n); 1186 } 1187 #endif 1188 #if HAVE_NATIVE_mpn_rsh1sub_n 1189 double 1190 speed_mpn_rsh1sub_n (struct speed_params *s) 1191 { 1192 SPEED_ROUTINE_MPN_BINARY_N (mpn_rsh1sub_n); 1193 } 1194 #endif 1195 1196 double 1197 speed_mpn_cnd_add_n (struct speed_params *s) 1198 { 1199 SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_cnd_add_n (1, wp, xp, yp, s->size)); 1200 } 1201 double 1202 speed_mpn_cnd_sub_n (struct speed_params *s) 1203 { 1204 SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_cnd_sub_n (1, wp, xp, yp, s->size)); 1205 } 1206 1207 /* mpn_and_n etc can be macros and so have to be handled with 1208 SPEED_ROUTINE_MPN_BINARY_N_CALL forms */ 1209 double 1210 speed_mpn_and_n (struct speed_params *s) 1211 { 1212 SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_and_n (wp, xp, yp, s->size)); 1213 } 1214 double 1215 speed_mpn_andn_n (struct speed_params *s) 1216 { 1217 SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_andn_n (wp, xp, yp, s->size)); 1218 } 1219 double 1220 speed_mpn_nand_n (struct speed_params *s) 1221 { 1222 SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_nand_n (wp, xp, yp, s->size)); 1223 } 1224 double 1225 speed_mpn_ior_n (struct speed_params *s) 1226 { 1227 SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_ior_n (wp, xp, yp, s->size)); 1228 } 1229 double 1230 speed_mpn_iorn_n (struct speed_params *s) 1231 { 1232 SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_iorn_n (wp, xp, yp, s->size)); 1233 } 1234 double 1235 speed_mpn_nior_n (struct speed_params *s) 1236 { 1237 SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_nior_n (wp, xp, yp, s->size)); 1238 } 1239 double 1240 speed_mpn_xor_n (struct speed_params *s) 1241 { 1242 SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_xor_n (wp, xp, yp, s->size)); 1243 } 1244 double 1245 speed_mpn_xnor_n (struct speed_params *s) 1246 { 1247 SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_xnor_n (wp, xp, yp, s->size)); 1248 } 1249 1250 1251 double 1252 speed_mpn_mul_n (struct speed_params *s) 1253 { 1254 SPEED_ROUTINE_MPN_MUL_N (mpn_mul_n); 1255 } 1256 double 1257 speed_mpn_sqr (struct speed_params *s) 1258 { 1259 SPEED_ROUTINE_MPN_SQR (mpn_sqr); 1260 } 1261 double 1262 speed_mpn_mul_n_sqr (struct speed_params *s) 1263 { 1264 SPEED_ROUTINE_MPN_SQR_CALL (mpn_mul_n (wp, s->xp, s->xp, s->size)); 1265 } 1266 1267 double 1268 speed_mpn_mul_basecase (struct speed_params *s) 1269 { 1270 SPEED_ROUTINE_MPN_MUL(mpn_mul_basecase); 1271 } 1272 double 1273 speed_mpn_mul (struct speed_params *s) 1274 { 1275 SPEED_ROUTINE_MPN_MUL(mpn_mul); 1276 } 1277 double 1278 speed_mpn_sqr_basecase (struct speed_params *s) 1279 { 1280 /* FIXME: size restrictions on some versions of sqr_basecase */ 1281 SPEED_ROUTINE_MPN_SQR (mpn_sqr_basecase); 1282 } 1283 1284 #if HAVE_NATIVE_mpn_sqr_diagonal 1285 double 1286 speed_mpn_sqr_diagonal (struct speed_params *s) 1287 { 1288 SPEED_ROUTINE_MPN_SQR (mpn_sqr_diagonal); 1289 } 1290 #endif 1291 1292 #if HAVE_NATIVE_mpn_sqr_diag_addlsh1 1293 double 1294 speed_mpn_sqr_diag_addlsh1 (struct speed_params *s) 1295 { 1296 SPEED_ROUTINE_MPN_SQR_DIAG_ADDLSH1_CALL (mpn_sqr_diag_addlsh1 (wp, tp, s->xp, s->size)); 1297 } 1298 #endif 1299 1300 double 1301 speed_mpn_toom2_sqr (struct speed_params *s) 1302 { 1303 SPEED_ROUTINE_MPN_TOOM2_SQR (mpn_toom2_sqr); 1304 } 1305 double 1306 speed_mpn_toom3_sqr (struct speed_params *s) 1307 { 1308 SPEED_ROUTINE_MPN_TOOM3_SQR (mpn_toom3_sqr); 1309 } 1310 double 1311 speed_mpn_toom4_sqr (struct speed_params *s) 1312 { 1313 SPEED_ROUTINE_MPN_TOOM4_SQR (mpn_toom4_sqr); 1314 } 1315 double 1316 speed_mpn_toom6_sqr (struct speed_params *s) 1317 { 1318 SPEED_ROUTINE_MPN_TOOM6_SQR (mpn_toom6_sqr); 1319 } 1320 double 1321 speed_mpn_toom8_sqr (struct speed_params *s) 1322 { 1323 SPEED_ROUTINE_MPN_TOOM8_SQR (mpn_toom8_sqr); 1324 } 1325 double 1326 speed_mpn_toom22_mul (struct speed_params *s) 1327 { 1328 SPEED_ROUTINE_MPN_TOOM22_MUL_N (mpn_toom22_mul); 1329 } 1330 double 1331 speed_mpn_toom33_mul (struct speed_params *s) 1332 { 1333 SPEED_ROUTINE_MPN_TOOM33_MUL_N (mpn_toom33_mul); 1334 } 1335 double 1336 speed_mpn_toom44_mul (struct speed_params *s) 1337 { 1338 SPEED_ROUTINE_MPN_TOOM44_MUL_N (mpn_toom44_mul); 1339 } 1340 double 1341 speed_mpn_toom6h_mul (struct speed_params *s) 1342 { 1343 SPEED_ROUTINE_MPN_TOOM6H_MUL_N (mpn_toom6h_mul); 1344 } 1345 double 1346 speed_mpn_toom8h_mul (struct speed_params *s) 1347 { 1348 SPEED_ROUTINE_MPN_TOOM8H_MUL_N (mpn_toom8h_mul); 1349 } 1350 1351 double 1352 speed_mpn_toom32_mul (struct speed_params *s) 1353 { 1354 SPEED_ROUTINE_MPN_TOOM32_MUL (mpn_toom32_mul); 1355 } 1356 double 1357 speed_mpn_toom42_mul (struct speed_params *s) 1358 { 1359 SPEED_ROUTINE_MPN_TOOM42_MUL (mpn_toom42_mul); 1360 } 1361 double 1362 speed_mpn_toom43_mul (struct speed_params *s) 1363 { 1364 SPEED_ROUTINE_MPN_TOOM43_MUL (mpn_toom43_mul); 1365 } 1366 double 1367 speed_mpn_toom63_mul (struct speed_params *s) 1368 { 1369 SPEED_ROUTINE_MPN_TOOM63_MUL (mpn_toom63_mul); 1370 } 1371 double 1372 speed_mpn_toom32_for_toom43_mul (struct speed_params *s) 1373 { 1374 SPEED_ROUTINE_MPN_TOOM32_FOR_TOOM43_MUL (mpn_toom32_mul); 1375 } 1376 double 1377 speed_mpn_toom43_for_toom32_mul (struct speed_params *s) 1378 { 1379 SPEED_ROUTINE_MPN_TOOM43_FOR_TOOM32_MUL (mpn_toom43_mul); 1380 } 1381 double 1382 speed_mpn_toom32_for_toom53_mul (struct speed_params *s) 1383 { 1384 SPEED_ROUTINE_MPN_TOOM32_FOR_TOOM53_MUL (mpn_toom32_mul); 1385 } 1386 double 1387 speed_mpn_toom53_for_toom32_mul (struct speed_params *s) 1388 { 1389 SPEED_ROUTINE_MPN_TOOM53_FOR_TOOM32_MUL (mpn_toom53_mul); 1390 } 1391 double 1392 speed_mpn_toom42_for_toom53_mul (struct speed_params *s) 1393 { 1394 SPEED_ROUTINE_MPN_TOOM42_FOR_TOOM53_MUL (mpn_toom42_mul); 1395 } 1396 double 1397 speed_mpn_toom53_for_toom42_mul (struct speed_params *s) 1398 { 1399 SPEED_ROUTINE_MPN_TOOM53_FOR_TOOM42_MUL (mpn_toom53_mul); 1400 } 1401 double 1402 speed_mpn_toom43_for_toom54_mul (struct speed_params *s) 1403 { 1404 SPEED_ROUTINE_MPN_TOOM43_FOR_TOOM54_MUL (mpn_toom43_mul); 1405 } 1406 double 1407 speed_mpn_toom54_for_toom43_mul (struct speed_params *s) 1408 { 1409 SPEED_ROUTINE_MPN_TOOM54_FOR_TOOM43_MUL (mpn_toom54_mul); 1410 } 1411 1412 double 1413 speed_mpn_nussbaumer_mul (struct speed_params *s) 1414 { 1415 SPEED_ROUTINE_MPN_MUL_N_CALL 1416 (mpn_nussbaumer_mul (wp, s->xp, s->size, s->yp, s->size)); 1417 } 1418 double 1419 speed_mpn_nussbaumer_mul_sqr (struct speed_params *s) 1420 { 1421 SPEED_ROUTINE_MPN_SQR_CALL 1422 (mpn_nussbaumer_mul (wp, s->xp, s->size, s->xp, s->size)); 1423 } 1424 1425 #if WANT_OLD_FFT_FULL 1426 double 1427 speed_mpn_mul_fft_full (struct speed_params *s) 1428 { 1429 SPEED_ROUTINE_MPN_MUL_N_CALL 1430 (mpn_mul_fft_full (wp, s->xp, s->size, s->yp, s->size)); 1431 } 1432 double 1433 speed_mpn_mul_fft_full_sqr (struct speed_params *s) 1434 { 1435 SPEED_ROUTINE_MPN_SQR_CALL 1436 (mpn_mul_fft_full (wp, s->xp, s->size, s->xp, s->size)); 1437 } 1438 #endif 1439 1440 /* These are mod 2^N+1 multiplies and squares. If s->r is supplied it's 1441 used as k, otherwise the best k for the size is used. If s->size isn't a 1442 multiple of 2^k it's rounded up to make the effective operation size. */ 1443 1444 #define SPEED_ROUTINE_MPN_MUL_FFT_CALL(call, sqr) \ 1445 { \ 1446 mp_ptr wp; \ 1447 mp_size_t pl; \ 1448 int k; \ 1449 unsigned i; \ 1450 double t; \ 1451 TMP_DECL; \ 1452 \ 1453 SPEED_RESTRICT_COND (s->size >= 1); \ 1454 \ 1455 if (s->r != 0) \ 1456 k = s->r; \ 1457 else \ 1458 k = mpn_fft_best_k (s->size, sqr); \ 1459 \ 1460 TMP_MARK; \ 1461 pl = mpn_fft_next_size (s->size, k); \ 1462 SPEED_TMP_ALLOC_LIMBS (wp, pl+1, s->align_wp); \ 1463 \ 1464 speed_operand_src (s, s->xp, s->size); \ 1465 if (!sqr) \ 1466 speed_operand_src (s, s->yp, s->size); \ 1467 speed_operand_dst (s, wp, pl+1); \ 1468 speed_cache_fill (s); \ 1469 \ 1470 speed_starttime (); \ 1471 i = s->reps; \ 1472 do \ 1473 call; \ 1474 while (--i != 0); \ 1475 t = speed_endtime (); \ 1476 \ 1477 TMP_FREE; \ 1478 return t; \ 1479 } 1480 1481 double 1482 speed_mpn_mul_fft (struct speed_params *s) 1483 { 1484 SPEED_ROUTINE_MPN_MUL_FFT_CALL 1485 (mpn_mul_fft (wp, pl, s->xp, s->size, s->yp, s->size, k), 0); 1486 } 1487 1488 double 1489 speed_mpn_mul_fft_sqr (struct speed_params *s) 1490 { 1491 SPEED_ROUTINE_MPN_MUL_FFT_CALL 1492 (mpn_mul_fft (wp, pl, s->xp, s->size, s->xp, s->size, k), 1); 1493 } 1494 1495 double 1496 speed_mpn_fft_mul (struct speed_params *s) 1497 { 1498 SPEED_ROUTINE_MPN_MUL_N_CALL (mpn_fft_mul (wp, s->xp, s->size, s->yp, s->size)); 1499 } 1500 1501 double 1502 speed_mpn_fft_sqr (struct speed_params *s) 1503 { 1504 SPEED_ROUTINE_MPN_SQR_CALL (mpn_fft_mul (wp, s->xp, s->size, s->xp, s->size)); 1505 } 1506 1507 double 1508 speed_mpn_sqrlo (struct speed_params *s) 1509 { 1510 SPEED_ROUTINE_MPN_SQRLO (mpn_sqrlo); 1511 } 1512 double 1513 speed_mpn_sqrlo_basecase (struct speed_params *s) 1514 { 1515 SPEED_ROUTINE_MPN_SQRLO (mpn_sqrlo_basecase); 1516 } 1517 double 1518 speed_mpn_mullo_n (struct speed_params *s) 1519 { 1520 SPEED_ROUTINE_MPN_MULLO_N (mpn_mullo_n); 1521 } 1522 double 1523 speed_mpn_mullo_basecase (struct speed_params *s) 1524 { 1525 SPEED_ROUTINE_MPN_MULLO_BASECASE (mpn_mullo_basecase); 1526 } 1527 1528 double 1529 speed_mpn_mulmid_basecase (struct speed_params *s) 1530 { 1531 SPEED_ROUTINE_MPN_MULMID (mpn_mulmid_basecase); 1532 } 1533 1534 double 1535 speed_mpn_mulmid (struct speed_params *s) 1536 { 1537 SPEED_ROUTINE_MPN_MULMID (mpn_mulmid); 1538 } 1539 1540 double 1541 speed_mpn_mulmid_n (struct speed_params *s) 1542 { 1543 SPEED_ROUTINE_MPN_MULMID_N (mpn_mulmid_n); 1544 } 1545 1546 double 1547 speed_mpn_toom42_mulmid (struct speed_params *s) 1548 { 1549 SPEED_ROUTINE_MPN_TOOM42_MULMID (mpn_toom42_mulmid); 1550 } 1551 1552 double 1553 speed_mpn_mulmod_bnm1 (struct speed_params *s) 1554 { 1555 SPEED_ROUTINE_MPN_MULMOD_BNM1_CALL (mpn_mulmod_bnm1 (wp, s->size, s->xp, s->size, s->yp, s->size, tp)); 1556 } 1557 1558 double 1559 speed_mpn_bc_mulmod_bnm1 (struct speed_params *s) 1560 { 1561 SPEED_ROUTINE_MPN_MULMOD_BNM1_CALL (mpn_bc_mulmod_bnm1 (wp, s->xp, s->yp, s->size, tp)); 1562 } 1563 1564 double 1565 speed_mpn_mulmod_bnm1_rounded (struct speed_params *s) 1566 { 1567 SPEED_ROUTINE_MPN_MULMOD_BNM1_ROUNDED (mpn_mulmod_bnm1); 1568 } 1569 1570 double 1571 speed_mpn_sqrmod_bnm1 (struct speed_params *s) 1572 { 1573 SPEED_ROUTINE_MPN_MULMOD_BNM1_CALL (mpn_sqrmod_bnm1 (wp, s->size, s->xp, s->size, tp)); 1574 } 1575 1576 double 1577 speed_mpn_matrix22_mul (struct speed_params *s) 1578 { 1579 /* Speed params only includes 2 inputs, so we have to invent the 1580 other 6. */ 1581 1582 mp_ptr a; 1583 mp_ptr r; 1584 mp_ptr b; 1585 mp_ptr tp; 1586 mp_size_t itch; 1587 unsigned i; 1588 double t; 1589 TMP_DECL; 1590 1591 TMP_MARK; 1592 SPEED_TMP_ALLOC_LIMBS (a, 4 * s->size, s->align_xp); 1593 SPEED_TMP_ALLOC_LIMBS (b, 4 * s->size, s->align_yp); 1594 SPEED_TMP_ALLOC_LIMBS (r, 8 * s->size + 4, s->align_wp); 1595 1596 MPN_COPY (a, s->xp, s->size); 1597 mpn_random (a + s->size, 3 * s->size); 1598 MPN_COPY (b, s->yp, s->size); 1599 mpn_random (b + s->size, 3 * s->size); 1600 1601 itch = mpn_matrix22_mul_itch (s->size, s->size); 1602 SPEED_TMP_ALLOC_LIMBS (tp, itch, s->align_wp2); 1603 1604 speed_operand_src (s, a, 4 * s->size); 1605 speed_operand_src (s, b, 4 * s->size); 1606 speed_operand_dst (s, r, 8 * s->size + 4); 1607 speed_operand_dst (s, tp, itch); 1608 speed_cache_fill (s); 1609 1610 speed_starttime (); 1611 i = s->reps; 1612 do 1613 { 1614 mp_size_t sz = s->size; 1615 MPN_COPY (r + 0 * sz + 0, a + 0 * sz, sz); 1616 MPN_COPY (r + 2 * sz + 1, a + 1 * sz, sz); 1617 MPN_COPY (r + 4 * sz + 2, a + 2 * sz, sz); 1618 MPN_COPY (r + 6 * sz + 3, a + 3 * sz, sz); 1619 mpn_matrix22_mul (r, r + 2 * sz + 1, r + 4 * sz + 2, r + 6 * sz + 3, sz, 1620 b, b + 1 * sz, b + 2 * sz, b + 3 * sz, sz, 1621 tp); 1622 } 1623 while (--i != 0); 1624 t = speed_endtime(); 1625 TMP_FREE; 1626 return t; 1627 } 1628 1629 double 1630 speed_mpn_hgcd (struct speed_params *s) 1631 { 1632 SPEED_ROUTINE_MPN_HGCD_CALL (mpn_hgcd, mpn_hgcd_itch); 1633 } 1634 1635 double 1636 speed_mpn_hgcd_lehmer (struct speed_params *s) 1637 { 1638 SPEED_ROUTINE_MPN_HGCD_CALL (mpn_hgcd_lehmer, mpn_hgcd_lehmer_itch); 1639 } 1640 1641 double 1642 speed_mpn_hgcd_appr (struct speed_params *s) 1643 { 1644 SPEED_ROUTINE_MPN_HGCD_CALL (mpn_hgcd_appr, mpn_hgcd_appr_itch); 1645 } 1646 1647 double 1648 speed_mpn_hgcd_appr_lehmer (struct speed_params *s) 1649 { 1650 SPEED_ROUTINE_MPN_HGCD_CALL (mpn_hgcd_appr_lehmer, mpn_hgcd_appr_lehmer_itch); 1651 } 1652 1653 double 1654 speed_mpn_hgcd_reduce (struct speed_params *s) 1655 { 1656 SPEED_ROUTINE_MPN_HGCD_REDUCE_CALL (mpn_hgcd_reduce, mpn_hgcd_reduce_itch); 1657 } 1658 double 1659 speed_mpn_hgcd_reduce_1 (struct speed_params *s) 1660 { 1661 SPEED_ROUTINE_MPN_HGCD_REDUCE_CALL (mpn_hgcd_reduce_1, mpn_hgcd_reduce_1_itch); 1662 } 1663 double 1664 speed_mpn_hgcd_reduce_2 (struct speed_params *s) 1665 { 1666 SPEED_ROUTINE_MPN_HGCD_REDUCE_CALL (mpn_hgcd_reduce_2, mpn_hgcd_reduce_2_itch); 1667 } 1668 1669 double 1670 speed_mpn_gcd (struct speed_params *s) 1671 { 1672 SPEED_ROUTINE_MPN_GCD (mpn_gcd); 1673 } 1674 1675 double 1676 speed_mpn_gcdext (struct speed_params *s) 1677 { 1678 SPEED_ROUTINE_MPN_GCDEXT (mpn_gcdext); 1679 } 1680 #if 0 1681 double 1682 speed_mpn_gcdext_lehmer (struct speed_params *s) 1683 { 1684 SPEED_ROUTINE_MPN_GCDEXT (__gmpn_gcdext_lehmer); 1685 } 1686 #endif 1687 double 1688 speed_mpn_gcdext_single (struct speed_params *s) 1689 { 1690 SPEED_ROUTINE_MPN_GCDEXT (mpn_gcdext_single); 1691 } 1692 double 1693 speed_mpn_gcdext_double (struct speed_params *s) 1694 { 1695 SPEED_ROUTINE_MPN_GCDEXT (mpn_gcdext_double); 1696 } 1697 double 1698 speed_mpn_gcdext_one_single (struct speed_params *s) 1699 { 1700 SPEED_ROUTINE_MPN_GCDEXT_ONE (mpn_gcdext_one_single); 1701 } 1702 double 1703 speed_mpn_gcdext_one_double (struct speed_params *s) 1704 { 1705 SPEED_ROUTINE_MPN_GCDEXT_ONE (mpn_gcdext_one_double); 1706 } 1707 double 1708 speed_mpn_gcd_1 (struct speed_params *s) 1709 { 1710 SPEED_ROUTINE_MPN_GCD_1 (mpn_gcd_1); 1711 } 1712 double 1713 speed_mpn_gcd_1N (struct speed_params *s) 1714 { 1715 SPEED_ROUTINE_MPN_GCD_1N (mpn_gcd_1); 1716 } 1717 1718 1719 double 1720 speed_mpz_jacobi (struct speed_params *s) 1721 { 1722 SPEED_ROUTINE_MPZ_JACOBI (mpz_jacobi); 1723 } 1724 double 1725 speed_mpn_jacobi_base (struct speed_params *s) 1726 { 1727 SPEED_ROUTINE_MPN_JACBASE (mpn_jacobi_base); 1728 } 1729 double 1730 speed_mpn_jacobi_base_1 (struct speed_params *s) 1731 { 1732 SPEED_ROUTINE_MPN_JACBASE (mpn_jacobi_base_1); 1733 } 1734 double 1735 speed_mpn_jacobi_base_2 (struct speed_params *s) 1736 { 1737 SPEED_ROUTINE_MPN_JACBASE (mpn_jacobi_base_2); 1738 } 1739 double 1740 speed_mpn_jacobi_base_3 (struct speed_params *s) 1741 { 1742 SPEED_ROUTINE_MPN_JACBASE (mpn_jacobi_base_3); 1743 } 1744 double 1745 speed_mpn_jacobi_base_4 (struct speed_params *s) 1746 { 1747 SPEED_ROUTINE_MPN_JACBASE (mpn_jacobi_base_4); 1748 } 1749 1750 1751 double 1752 speed_mpn_sqrtrem (struct speed_params *s) 1753 { 1754 SPEED_ROUTINE_MPN_SQRTROOT_CALL (mpn_sqrtrem (wp, wp2, s->xp, s->size)); 1755 } 1756 1757 double 1758 speed_mpn_sqrt (struct speed_params *s) 1759 { 1760 SPEED_ROUTINE_MPN_SQRTROOT_CALL (mpn_sqrtrem (wp, NULL, s->xp, s->size)); 1761 } 1762 1763 double 1764 speed_mpn_rootrem (struct speed_params *s) 1765 { 1766 SPEED_ROUTINE_MPN_SQRTROOT_CALL (mpn_rootrem (wp, wp2, s->xp, s->size, s->r)); 1767 } 1768 1769 double 1770 speed_mpn_root (struct speed_params *s) 1771 { 1772 SPEED_ROUTINE_MPN_SQRTROOT_CALL (mpn_rootrem (wp, NULL, s->xp, s->size, s->r)); 1773 } 1774 1775 1776 double 1777 speed_mpz_fac_ui (struct speed_params *s) 1778 { 1779 SPEED_ROUTINE_MPZ_FAC_UI (mpz_fac_ui); 1780 } 1781 1782 double 1783 speed_mpz_2fac_ui (struct speed_params *s) 1784 { 1785 SPEED_ROUTINE_MPZ_UI (mpz_2fac_ui); 1786 } 1787 1788 1789 double 1790 speed_mpn_fib2_ui (struct speed_params *s) 1791 { 1792 SPEED_ROUTINE_MPN_FIB2_UI (mpn_fib2_ui); 1793 } 1794 double 1795 speed_mpz_fib_ui (struct speed_params *s) 1796 { 1797 SPEED_ROUTINE_MPZ_FIB_UI (mpz_fib_ui); 1798 } 1799 double 1800 speed_mpz_fib2_ui (struct speed_params *s) 1801 { 1802 SPEED_ROUTINE_MPZ_FIB2_UI (mpz_fib2_ui); 1803 } 1804 double 1805 speed_mpz_lucnum_ui (struct speed_params *s) 1806 { 1807 SPEED_ROUTINE_MPZ_LUCNUM_UI (mpz_lucnum_ui); 1808 } 1809 double 1810 speed_mpz_lucnum2_ui (struct speed_params *s) 1811 { 1812 SPEED_ROUTINE_MPZ_LUCNUM2_UI (mpz_lucnum2_ui); 1813 } 1814 1815 1816 double 1817 speed_mpz_powm (struct speed_params *s) 1818 { 1819 SPEED_ROUTINE_MPZ_POWM (mpz_powm); 1820 } 1821 double 1822 speed_mpz_powm_mod (struct speed_params *s) 1823 { 1824 SPEED_ROUTINE_MPZ_POWM (mpz_powm_mod); 1825 } 1826 double 1827 speed_mpz_powm_redc (struct speed_params *s) 1828 { 1829 SPEED_ROUTINE_MPZ_POWM (mpz_powm_redc); 1830 } 1831 double 1832 speed_mpz_powm_sec (struct speed_params *s) 1833 { 1834 SPEED_ROUTINE_MPZ_POWM (mpz_powm_sec); 1835 } 1836 double 1837 speed_mpz_powm_ui (struct speed_params *s) 1838 { 1839 SPEED_ROUTINE_MPZ_POWM_UI (mpz_powm_ui); 1840 } 1841 1842 1843 double 1844 speed_binvert_limb (struct speed_params *s) 1845 { 1846 SPEED_ROUTINE_MODLIMB_INVERT (binvert_limb); 1847 } 1848 1849 1850 double 1851 speed_noop (struct speed_params *s) 1852 { 1853 unsigned i; 1854 1855 speed_starttime (); 1856 i = s->reps; 1857 do 1858 noop (); 1859 while (--i != 0); 1860 return speed_endtime (); 1861 } 1862 1863 double 1864 speed_noop_wxs (struct speed_params *s) 1865 { 1866 mp_ptr wp; 1867 unsigned i; 1868 double t; 1869 TMP_DECL; 1870 1871 TMP_MARK; 1872 wp = TMP_ALLOC_LIMBS (1); 1873 1874 speed_starttime (); 1875 i = s->reps; 1876 do 1877 noop_wxs (wp, s->xp, s->size); 1878 while (--i != 0); 1879 t = speed_endtime (); 1880 1881 TMP_FREE; 1882 return t; 1883 } 1884 1885 double 1886 speed_noop_wxys (struct speed_params *s) 1887 { 1888 mp_ptr wp; 1889 unsigned i; 1890 double t; 1891 TMP_DECL; 1892 1893 TMP_MARK; 1894 wp = TMP_ALLOC_LIMBS (1); 1895 1896 speed_starttime (); 1897 i = s->reps; 1898 do 1899 noop_wxys (wp, s->xp, s->yp, s->size); 1900 while (--i != 0); 1901 t = speed_endtime (); 1902 1903 TMP_FREE; 1904 return t; 1905 } 1906 1907 1908 #define SPEED_ROUTINE_ALLOC_FREE(variables, calls) \ 1909 { \ 1910 unsigned i; \ 1911 variables; \ 1912 \ 1913 speed_starttime (); \ 1914 i = s->reps; \ 1915 do \ 1916 { \ 1917 calls; \ 1918 } \ 1919 while (--i != 0); \ 1920 return speed_endtime (); \ 1921 } 1922 1923 1924 /* Compare these to see how much malloc/free costs and then how much 1925 __gmp_default_allocate/free and mpz_init/clear add. mpz_init/clear or 1926 mpq_init/clear will be doing a 1 limb allocate, so use that as the size 1927 when including them in comparisons. */ 1928 1929 double 1930 speed_malloc_free (struct speed_params *s) 1931 { 1932 size_t bytes = s->size * GMP_LIMB_BYTES; 1933 SPEED_ROUTINE_ALLOC_FREE (void *p, 1934 p = malloc (bytes); 1935 free (p)); 1936 } 1937 1938 double 1939 speed_malloc_realloc_free (struct speed_params *s) 1940 { 1941 size_t bytes = s->size * GMP_LIMB_BYTES; 1942 SPEED_ROUTINE_ALLOC_FREE (void *p, 1943 p = malloc (GMP_LIMB_BYTES); 1944 p = realloc (p, bytes); 1945 free (p)); 1946 } 1947 1948 double 1949 speed_gmp_allocate_free (struct speed_params *s) 1950 { 1951 size_t bytes = s->size * GMP_LIMB_BYTES; 1952 SPEED_ROUTINE_ALLOC_FREE (void *p, 1953 p = (*__gmp_allocate_func) (bytes); 1954 (*__gmp_free_func) (p, bytes)); 1955 } 1956 1957 double 1958 speed_gmp_allocate_reallocate_free (struct speed_params *s) 1959 { 1960 size_t bytes = s->size * GMP_LIMB_BYTES; 1961 SPEED_ROUTINE_ALLOC_FREE 1962 (void *p, 1963 p = (*__gmp_allocate_func) (GMP_LIMB_BYTES); 1964 p = (*__gmp_reallocate_func) (p, bytes, GMP_LIMB_BYTES); 1965 (*__gmp_free_func) (p, bytes)); 1966 } 1967 1968 double 1969 speed_mpz_init_clear (struct speed_params *s) 1970 { 1971 SPEED_ROUTINE_ALLOC_FREE (mpz_t z, 1972 mpz_init (z); 1973 mpz_clear (z)); 1974 } 1975 1976 double 1977 speed_mpz_init_realloc_clear (struct speed_params *s) 1978 { 1979 SPEED_ROUTINE_ALLOC_FREE (mpz_t z, 1980 mpz_init (z); 1981 _mpz_realloc (z, s->size); 1982 mpz_clear (z)); 1983 } 1984 1985 double 1986 speed_mpq_init_clear (struct speed_params *s) 1987 { 1988 SPEED_ROUTINE_ALLOC_FREE (mpq_t q, 1989 mpq_init (q); 1990 mpq_clear (q)); 1991 } 1992 1993 double 1994 speed_mpf_init_clear (struct speed_params *s) 1995 { 1996 SPEED_ROUTINE_ALLOC_FREE (mpf_t f, 1997 mpf_init (f); 1998 mpf_clear (f)); 1999 } 2000 2001 2002 /* Compare this to mpn_add_n to see how much overhead mpz_add adds. Note 2003 that repeatedly calling mpz_add with the same data gives branch prediction 2004 in it an advantage. */ 2005 2006 double 2007 speed_mpz_add (struct speed_params *s) 2008 { 2009 mpz_t w, x, y; 2010 unsigned i; 2011 double t; 2012 2013 mpz_init (w); 2014 mpz_init (x); 2015 mpz_init (y); 2016 2017 mpz_set_n (x, s->xp, s->size); 2018 mpz_set_n (y, s->yp, s->size); 2019 mpz_add (w, x, y); 2020 2021 speed_starttime (); 2022 i = s->reps; 2023 do 2024 { 2025 mpz_add (w, x, y); 2026 } 2027 while (--i != 0); 2028 t = speed_endtime (); 2029 2030 mpz_clear (w); 2031 mpz_clear (x); 2032 mpz_clear (y); 2033 return t; 2034 } 2035 2036 2037 /* If r==0, calculate (size,size/2), 2038 otherwise calculate (size,r). */ 2039 2040 double 2041 speed_mpz_bin_uiui (struct speed_params *s) 2042 { 2043 mpz_t w; 2044 unsigned long k; 2045 unsigned i; 2046 double t; 2047 2048 mpz_init (w); 2049 if (s->r != 0) 2050 k = s->r; 2051 else 2052 k = s->size/2; 2053 2054 speed_starttime (); 2055 i = s->reps; 2056 do 2057 { 2058 mpz_bin_uiui (w, s->size, k); 2059 } 2060 while (--i != 0); 2061 t = speed_endtime (); 2062 2063 mpz_clear (w); 2064 return t; 2065 } 2066 2067 /* If r==0, calculate binomial(2^size,size), 2068 otherwise calculate binomial(2^size,r). */ 2069 2070 double 2071 speed_mpz_bin_ui (struct speed_params *s) 2072 { 2073 mpz_t w, x; 2074 unsigned long k; 2075 unsigned i; 2076 double t; 2077 2078 mpz_init (w); 2079 mpz_init_set_ui (x, 0); 2080 2081 mpz_setbit (x, s->size); 2082 2083 if (s->r != 0) 2084 k = s->r; 2085 else 2086 k = s->size; 2087 2088 speed_starttime (); 2089 i = s->reps; 2090 do 2091 { 2092 mpz_bin_ui (w, x, k); 2093 } 2094 while (--i != 0); 2095 t = speed_endtime (); 2096 2097 mpz_clear (w); 2098 mpz_clear (x); 2099 return t; 2100 } 2101 2102 /* The multiplies are successively dependent so the latency is measured, not 2103 the issue rate. There's only 10 per loop so the code doesn't get too big 2104 since umul_ppmm is several instructions on some cpus. 2105 2106 Putting the arguments as "h,l,l,h" gets slightly better code from gcc 2107 2.95.2 on x86, it puts only one mov between each mul, not two. That mov 2108 though will probably show up as a bogus extra cycle though. 2109 2110 The measuring function macros are into three parts to avoid overflowing 2111 preprocessor expansion space if umul_ppmm is big. 2112 2113 Limitations: 2114 2115 Don't blindly use this to set UMUL_TIME in gmp-mparam.h, check the code 2116 generated first, especially on CPUs with low latency multipliers. 2117 2118 The default umul_ppmm doing h*l will be getting increasing numbers of 2119 high zero bits in the calculation. CPUs with data-dependent multipliers 2120 will want to use umul_ppmm.1 to get some randomization into the 2121 calculation. The extra xors and fetches will be a slowdown of course. */ 2122 2123 #define SPEED_MACRO_UMUL_PPMM_A \ 2124 { \ 2125 mp_limb_t h, l; \ 2126 unsigned i; \ 2127 double t; \ 2128 \ 2129 s->time_divisor = 10; \ 2130 \ 2131 h = s->xp[0]; \ 2132 l = s->yp[0]; \ 2133 \ 2134 if (s->r == 1) \ 2135 { \ 2136 speed_starttime (); \ 2137 i = s->reps; \ 2138 do \ 2139 { 2140 2141 #define SPEED_MACRO_UMUL_PPMM_B \ 2142 } \ 2143 while (--i != 0); \ 2144 t = speed_endtime (); \ 2145 } \ 2146 else \ 2147 { \ 2148 speed_starttime (); \ 2149 i = s->reps; \ 2150 do \ 2151 { 2152 2153 #define SPEED_MACRO_UMUL_PPMM_C \ 2154 } \ 2155 while (--i != 0); \ 2156 t = speed_endtime (); \ 2157 } \ 2158 \ 2159 /* stop the compiler optimizing away the whole calculation! */ \ 2160 noop_1 (h); \ 2161 noop_1 (l); \ 2162 \ 2163 return t; \ 2164 } 2165 2166 2167 double 2168 speed_umul_ppmm (struct speed_params *s) 2169 { 2170 SPEED_MACRO_UMUL_PPMM_A; 2171 { 2172 umul_ppmm (h, l, l, h); h ^= s->xp_block[0]; l ^= s->yp_block[0]; 2173 umul_ppmm (h, l, l, h); h ^= s->xp_block[1]; l ^= s->yp_block[1]; 2174 umul_ppmm (h, l, l, h); h ^= s->xp_block[2]; l ^= s->yp_block[2]; 2175 umul_ppmm (h, l, l, h); h ^= s->xp_block[3]; l ^= s->yp_block[3]; 2176 umul_ppmm (h, l, l, h); h ^= s->xp_block[4]; l ^= s->yp_block[4]; 2177 umul_ppmm (h, l, l, h); h ^= s->xp_block[5]; l ^= s->yp_block[5]; 2178 umul_ppmm (h, l, l, h); h ^= s->xp_block[6]; l ^= s->yp_block[6]; 2179 umul_ppmm (h, l, l, h); h ^= s->xp_block[7]; l ^= s->yp_block[7]; 2180 umul_ppmm (h, l, l, h); h ^= s->xp_block[8]; l ^= s->yp_block[8]; 2181 umul_ppmm (h, l, l, h); h ^= s->xp_block[9]; l ^= s->yp_block[9]; 2182 } 2183 SPEED_MACRO_UMUL_PPMM_B; 2184 { 2185 umul_ppmm (h, l, l, h); 2186 umul_ppmm (h, l, l, h); 2187 umul_ppmm (h, l, l, h); 2188 umul_ppmm (h, l, l, h); 2189 umul_ppmm (h, l, l, h); 2190 umul_ppmm (h, l, l, h); 2191 umul_ppmm (h, l, l, h); 2192 umul_ppmm (h, l, l, h); 2193 umul_ppmm (h, l, l, h); 2194 umul_ppmm (h, l, l, h); 2195 } 2196 SPEED_MACRO_UMUL_PPMM_C; 2197 } 2198 2199 2200 #if HAVE_NATIVE_mpn_umul_ppmm 2201 double 2202 speed_mpn_umul_ppmm (struct speed_params *s) 2203 { 2204 SPEED_MACRO_UMUL_PPMM_A; 2205 { 2206 h = mpn_umul_ppmm (&l, h, l); h ^= s->xp_block[0]; l ^= s->yp_block[0]; 2207 h = mpn_umul_ppmm (&l, h, l); h ^= s->xp_block[1]; l ^= s->yp_block[1]; 2208 h = mpn_umul_ppmm (&l, h, l); h ^= s->xp_block[2]; l ^= s->yp_block[2]; 2209 h = mpn_umul_ppmm (&l, h, l); h ^= s->xp_block[3]; l ^= s->yp_block[3]; 2210 h = mpn_umul_ppmm (&l, h, l); h ^= s->xp_block[4]; l ^= s->yp_block[4]; 2211 h = mpn_umul_ppmm (&l, h, l); h ^= s->xp_block[5]; l ^= s->yp_block[5]; 2212 h = mpn_umul_ppmm (&l, h, l); h ^= s->xp_block[6]; l ^= s->yp_block[6]; 2213 h = mpn_umul_ppmm (&l, h, l); h ^= s->xp_block[7]; l ^= s->yp_block[7]; 2214 h = mpn_umul_ppmm (&l, h, l); h ^= s->xp_block[8]; l ^= s->yp_block[8]; 2215 h = mpn_umul_ppmm (&l, h, l); h ^= s->xp_block[9]; l ^= s->yp_block[9]; 2216 } 2217 SPEED_MACRO_UMUL_PPMM_B; 2218 { 2219 h = mpn_umul_ppmm (&l, h, l); 2220 h = mpn_umul_ppmm (&l, h, l); 2221 h = mpn_umul_ppmm (&l, h, l); 2222 h = mpn_umul_ppmm (&l, h, l); 2223 h = mpn_umul_ppmm (&l, h, l); 2224 h = mpn_umul_ppmm (&l, h, l); 2225 h = mpn_umul_ppmm (&l, h, l); 2226 h = mpn_umul_ppmm (&l, h, l); 2227 h = mpn_umul_ppmm (&l, h, l); 2228 h = mpn_umul_ppmm (&l, h, l); 2229 } 2230 SPEED_MACRO_UMUL_PPMM_C; 2231 } 2232 #endif 2233 2234 #if HAVE_NATIVE_mpn_umul_ppmm_r 2235 double 2236 speed_mpn_umul_ppmm_r (struct speed_params *s) 2237 { 2238 SPEED_MACRO_UMUL_PPMM_A; 2239 { 2240 h = mpn_umul_ppmm_r (h, l, &l); h ^= s->xp_block[0]; l ^= s->yp_block[0]; 2241 h = mpn_umul_ppmm_r (h, l, &l); h ^= s->xp_block[1]; l ^= s->yp_block[1]; 2242 h = mpn_umul_ppmm_r (h, l, &l); h ^= s->xp_block[2]; l ^= s->yp_block[2]; 2243 h = mpn_umul_ppmm_r (h, l, &l); h ^= s->xp_block[3]; l ^= s->yp_block[3]; 2244 h = mpn_umul_ppmm_r (h, l, &l); h ^= s->xp_block[4]; l ^= s->yp_block[4]; 2245 h = mpn_umul_ppmm_r (h, l, &l); h ^= s->xp_block[5]; l ^= s->yp_block[5]; 2246 h = mpn_umul_ppmm_r (h, l, &l); h ^= s->xp_block[6]; l ^= s->yp_block[6]; 2247 h = mpn_umul_ppmm_r (h, l, &l); h ^= s->xp_block[7]; l ^= s->yp_block[7]; 2248 h = mpn_umul_ppmm_r (h, l, &l); h ^= s->xp_block[8]; l ^= s->yp_block[8]; 2249 h = mpn_umul_ppmm_r (h, l, &l); h ^= s->xp_block[9]; l ^= s->yp_block[9]; 2250 } 2251 SPEED_MACRO_UMUL_PPMM_B; 2252 { 2253 h = mpn_umul_ppmm_r (h, l, &l); 2254 h = mpn_umul_ppmm_r (h, l, &l); 2255 h = mpn_umul_ppmm_r (h, l, &l); 2256 h = mpn_umul_ppmm_r (h, l, &l); 2257 h = mpn_umul_ppmm_r (h, l, &l); 2258 h = mpn_umul_ppmm_r (h, l, &l); 2259 h = mpn_umul_ppmm_r (h, l, &l); 2260 h = mpn_umul_ppmm_r (h, l, &l); 2261 h = mpn_umul_ppmm_r (h, l, &l); 2262 h = mpn_umul_ppmm_r (h, l, &l); 2263 } 2264 SPEED_MACRO_UMUL_PPMM_C; 2265 } 2266 #endif 2267 2268 2269 /* The divisions are successively dependent so latency is measured, not 2270 issue rate. There's only 10 per loop so the code doesn't get too big, 2271 especially for udiv_qrnnd_preinv and preinv2norm, which are several 2272 instructions each. 2273 2274 Note that it's only the division which is measured here, there's no data 2275 fetching and no shifting if the divisor gets normalized. 2276 2277 In speed_udiv_qrnnd with gcc 2.95.2 on x86 the parameters "q,r,r,q,d" 2278 generate x86 div instructions with nothing in between. 2279 2280 The measuring function macros are in two parts to avoid overflowing 2281 preprocessor expansion space if udiv_qrnnd etc are big. 2282 2283 Limitations: 2284 2285 Don't blindly use this to set UDIV_TIME in gmp-mparam.h, check the code 2286 generated first. 2287 2288 CPUs with data-dependent divisions may want more attention paid to the 2289 randomness of the data used. Probably the measurement wanted is over 2290 uniformly distributed numbers, but what's here might not be giving that. */ 2291 2292 #define SPEED_ROUTINE_UDIV_QRNND_A(normalize) \ 2293 { \ 2294 double t; \ 2295 unsigned i; \ 2296 mp_limb_t q, r, d; \ 2297 mp_limb_t dinv; \ 2298 \ 2299 s->time_divisor = 10; \ 2300 \ 2301 /* divisor from "r" parameter, or a default */ \ 2302 d = s->r; \ 2303 if (d == 0) \ 2304 d = mp_bases[10].big_base; \ 2305 \ 2306 if (normalize) \ 2307 { \ 2308 unsigned norm; \ 2309 count_leading_zeros (norm, d); \ 2310 d <<= norm; \ 2311 invert_limb (dinv, d); \ 2312 } \ 2313 \ 2314 q = s->xp[0]; \ 2315 r = s->yp[0] % d; \ 2316 \ 2317 speed_starttime (); \ 2318 i = s->reps; \ 2319 do \ 2320 { 2321 2322 #define SPEED_ROUTINE_UDIV_QRNND_B \ 2323 } \ 2324 while (--i != 0); \ 2325 t = speed_endtime (); \ 2326 \ 2327 /* stop the compiler optimizing away the whole calculation! */ \ 2328 noop_1 (q); \ 2329 noop_1 (r); \ 2330 \ 2331 return t; \ 2332 } 2333 2334 double 2335 speed_udiv_qrnnd (struct speed_params *s) 2336 { 2337 SPEED_ROUTINE_UDIV_QRNND_A (UDIV_NEEDS_NORMALIZATION); 2338 { 2339 udiv_qrnnd (q, r, r, q, d); 2340 udiv_qrnnd (q, r, r, q, d); 2341 udiv_qrnnd (q, r, r, q, d); 2342 udiv_qrnnd (q, r, r, q, d); 2343 udiv_qrnnd (q, r, r, q, d); 2344 udiv_qrnnd (q, r, r, q, d); 2345 udiv_qrnnd (q, r, r, q, d); 2346 udiv_qrnnd (q, r, r, q, d); 2347 udiv_qrnnd (q, r, r, q, d); 2348 udiv_qrnnd (q, r, r, q, d); 2349 } 2350 SPEED_ROUTINE_UDIV_QRNND_B; 2351 } 2352 2353 double 2354 speed_udiv_qrnnd_c (struct speed_params *s) 2355 { 2356 SPEED_ROUTINE_UDIV_QRNND_A (1); 2357 { 2358 __udiv_qrnnd_c (q, r, r, q, d); 2359 __udiv_qrnnd_c (q, r, r, q, d); 2360 __udiv_qrnnd_c (q, r, r, q, d); 2361 __udiv_qrnnd_c (q, r, r, q, d); 2362 __udiv_qrnnd_c (q, r, r, q, d); 2363 __udiv_qrnnd_c (q, r, r, q, d); 2364 __udiv_qrnnd_c (q, r, r, q, d); 2365 __udiv_qrnnd_c (q, r, r, q, d); 2366 __udiv_qrnnd_c (q, r, r, q, d); 2367 __udiv_qrnnd_c (q, r, r, q, d); 2368 } 2369 SPEED_ROUTINE_UDIV_QRNND_B; 2370 } 2371 2372 #if HAVE_NATIVE_mpn_udiv_qrnnd 2373 double 2374 speed_mpn_udiv_qrnnd (struct speed_params *s) 2375 { 2376 SPEED_ROUTINE_UDIV_QRNND_A (1); 2377 { 2378 q = mpn_udiv_qrnnd (&r, r, q, d); 2379 q = mpn_udiv_qrnnd (&r, r, q, d); 2380 q = mpn_udiv_qrnnd (&r, r, q, d); 2381 q = mpn_udiv_qrnnd (&r, r, q, d); 2382 q = mpn_udiv_qrnnd (&r, r, q, d); 2383 q = mpn_udiv_qrnnd (&r, r, q, d); 2384 q = mpn_udiv_qrnnd (&r, r, q, d); 2385 q = mpn_udiv_qrnnd (&r, r, q, d); 2386 q = mpn_udiv_qrnnd (&r, r, q, d); 2387 q = mpn_udiv_qrnnd (&r, r, q, d); 2388 } 2389 SPEED_ROUTINE_UDIV_QRNND_B; 2390 } 2391 #endif 2392 2393 #if HAVE_NATIVE_mpn_udiv_qrnnd_r 2394 double 2395 speed_mpn_udiv_qrnnd_r (struct speed_params *s) 2396 { 2397 SPEED_ROUTINE_UDIV_QRNND_A (1); 2398 { 2399 q = mpn_udiv_qrnnd_r (r, q, d, &r); 2400 q = mpn_udiv_qrnnd_r (r, q, d, &r); 2401 q = mpn_udiv_qrnnd_r (r, q, d, &r); 2402 q = mpn_udiv_qrnnd_r (r, q, d, &r); 2403 q = mpn_udiv_qrnnd_r (r, q, d, &r); 2404 q = mpn_udiv_qrnnd_r (r, q, d, &r); 2405 q = mpn_udiv_qrnnd_r (r, q, d, &r); 2406 q = mpn_udiv_qrnnd_r (r, q, d, &r); 2407 q = mpn_udiv_qrnnd_r (r, q, d, &r); 2408 q = mpn_udiv_qrnnd_r (r, q, d, &r); 2409 } 2410 SPEED_ROUTINE_UDIV_QRNND_B; 2411 } 2412 #endif 2413 2414 2415 double 2416 speed_invert_limb (struct speed_params *s) 2417 { 2418 SPEED_ROUTINE_INVERT_LIMB_CALL (invert_limb (dinv, d)); 2419 } 2420 2421 2422 /* xp[0] might not be particularly random, but should give an indication how 2423 "/" runs. Same for speed_operator_mod below. */ 2424 double 2425 speed_operator_div (struct speed_params *s) 2426 { 2427 double t; 2428 unsigned i; 2429 mp_limb_t x, q, d; 2430 2431 s->time_divisor = 10; 2432 2433 /* divisor from "r" parameter, or a default */ 2434 d = s->r; 2435 if (d == 0) 2436 d = mp_bases[10].big_base; 2437 2438 x = s->xp[0]; 2439 q = 0; 2440 2441 speed_starttime (); 2442 i = s->reps; 2443 do 2444 { 2445 q ^= x; q /= d; 2446 q ^= x; q /= d; 2447 q ^= x; q /= d; 2448 q ^= x; q /= d; 2449 q ^= x; q /= d; 2450 q ^= x; q /= d; 2451 q ^= x; q /= d; 2452 q ^= x; q /= d; 2453 q ^= x; q /= d; 2454 q ^= x; q /= d; 2455 } 2456 while (--i != 0); 2457 t = speed_endtime (); 2458 2459 /* stop the compiler optimizing away the whole calculation! */ 2460 noop_1 (q); 2461 2462 return t; 2463 } 2464 2465 double 2466 speed_operator_mod (struct speed_params *s) 2467 { 2468 double t; 2469 unsigned i; 2470 mp_limb_t x, r, d; 2471 2472 s->time_divisor = 10; 2473 2474 /* divisor from "r" parameter, or a default */ 2475 d = s->r; 2476 if (d == 0) 2477 d = mp_bases[10].big_base; 2478 2479 x = s->xp[0]; 2480 r = 0; 2481 2482 speed_starttime (); 2483 i = s->reps; 2484 do 2485 { 2486 r ^= x; r %= d; 2487 r ^= x; r %= d; 2488 r ^= x; r %= d; 2489 r ^= x; r %= d; 2490 r ^= x; r %= d; 2491 r ^= x; r %= d; 2492 r ^= x; r %= d; 2493 r ^= x; r %= d; 2494 r ^= x; r %= d; 2495 r ^= x; r %= d; 2496 } 2497 while (--i != 0); 2498 t = speed_endtime (); 2499 2500 /* stop the compiler optimizing away the whole calculation! */ 2501 noop_1 (r); 2502 2503 return t; 2504 } 2505 2506 2507 /* r==0 measures on data with the values uniformly distributed. This will 2508 be typical for count_trailing_zeros in a GCD etc. 2509 2510 r==1 measures on data with the resultant count uniformly distributed 2511 between 0 and GMP_LIMB_BITS-1. This is probably sensible for 2512 count_leading_zeros on the high limbs of divisors. */ 2513 2514 int 2515 speed_routine_count_zeros_setup (struct speed_params *s, 2516 mp_ptr xp, int leading, int zero) 2517 { 2518 int i, c; 2519 mp_limb_t n; 2520 2521 if (s->r == 0) 2522 { 2523 /* Make uniformly distributed data. If zero isn't allowed then change 2524 it to 1 for leading, or 0x800..00 for trailing. */ 2525 MPN_COPY (xp, s->xp_block, SPEED_BLOCK_SIZE); 2526 if (! zero) 2527 for (i = 0; i < SPEED_BLOCK_SIZE; i++) 2528 if (xp[i] == 0) 2529 xp[i] = leading ? 1 : GMP_LIMB_HIGHBIT; 2530 } 2531 else if (s->r == 1) 2532 { 2533 /* Make counts uniformly distributed. A randomly chosen bit is set, and 2534 for leading the rest above it are cleared, or for trailing then the 2535 rest below. */ 2536 for (i = 0; i < SPEED_BLOCK_SIZE; i++) 2537 { 2538 mp_limb_t set = CNST_LIMB(1) << (s->yp_block[i] % GMP_LIMB_BITS); 2539 mp_limb_t keep_below = set-1; 2540 mp_limb_t keep_above = MP_LIMB_T_MAX ^ keep_below; 2541 mp_limb_t keep = (leading ? keep_below : keep_above); 2542 xp[i] = (s->xp_block[i] & keep) | set; 2543 } 2544 } 2545 else 2546 { 2547 return 0; 2548 } 2549 2550 /* Account for the effect of n^=c. */ 2551 c = 0; 2552 for (i = 0; i < SPEED_BLOCK_SIZE; i++) 2553 { 2554 n = xp[i]; 2555 xp[i] ^= c; 2556 2557 if (leading) 2558 count_leading_zeros (c, n); 2559 else 2560 count_trailing_zeros (c, n); 2561 } 2562 2563 return 1; 2564 } 2565 2566 double 2567 speed_count_leading_zeros (struct speed_params *s) 2568 { 2569 #ifdef COUNT_LEADING_ZEROS_0 2570 #define COUNT_LEADING_ZEROS_0_ALLOWED 1 2571 #else 2572 #define COUNT_LEADING_ZEROS_0_ALLOWED 0 2573 #endif 2574 2575 SPEED_ROUTINE_COUNT_ZEROS_A (1, COUNT_LEADING_ZEROS_0_ALLOWED); 2576 count_leading_zeros (c, n); 2577 SPEED_ROUTINE_COUNT_ZEROS_B (); 2578 } 2579 double 2580 speed_count_trailing_zeros (struct speed_params *s) 2581 { 2582 SPEED_ROUTINE_COUNT_ZEROS_A (0, 0); 2583 count_trailing_zeros (c, n); 2584 SPEED_ROUTINE_COUNT_ZEROS_B (); 2585 } 2586 2587 2588 double 2589 speed_mpn_get_str (struct speed_params *s) 2590 { 2591 SPEED_ROUTINE_MPN_GET_STR (mpn_get_str); 2592 } 2593 2594 double 2595 speed_mpn_set_str (struct speed_params *s) 2596 { 2597 SPEED_ROUTINE_MPN_SET_STR_CALL (mpn_set_str (wp, xp, s->size, base)); 2598 } 2599 double 2600 speed_mpn_bc_set_str (struct speed_params *s) 2601 { 2602 SPEED_ROUTINE_MPN_SET_STR_CALL (mpn_bc_set_str (wp, xp, s->size, base)); 2603 } 2604 2605 double 2606 speed_MPN_ZERO (struct speed_params *s) 2607 { 2608 SPEED_ROUTINE_MPN_ZERO_CALL (MPN_ZERO (wp, s->size)); 2609 } 2610 2611 2612 int 2613 speed_randinit (struct speed_params *s, gmp_randstate_ptr rstate) 2614 { 2615 if (s->r == 0) 2616 gmp_randinit_default (rstate); 2617 else if (s->r == 1) 2618 gmp_randinit_mt (rstate); 2619 else 2620 { 2621 return gmp_randinit_lc_2exp_size (rstate, s->r); 2622 } 2623 return 1; 2624 } 2625 2626 double 2627 speed_gmp_randseed (struct speed_params *s) 2628 { 2629 gmp_randstate_t rstate; 2630 unsigned i; 2631 double t; 2632 mpz_t x; 2633 2634 SPEED_RESTRICT_COND (s->size >= 1); 2635 SPEED_RESTRICT_COND (speed_randinit (s, rstate)); 2636 2637 /* s->size bits of seed */ 2638 mpz_init_set_n (x, s->xp, s->size); 2639 mpz_fdiv_r_2exp (x, x, (unsigned long) s->size); 2640 2641 /* cache priming */ 2642 gmp_randseed (rstate, x); 2643 2644 speed_starttime (); 2645 i = s->reps; 2646 do 2647 gmp_randseed (rstate, x); 2648 while (--i != 0); 2649 t = speed_endtime (); 2650 2651 gmp_randclear (rstate); 2652 mpz_clear (x); 2653 return t; 2654 } 2655 2656 double 2657 speed_gmp_randseed_ui (struct speed_params *s) 2658 { 2659 gmp_randstate_t rstate; 2660 unsigned i, j; 2661 double t; 2662 2663 SPEED_RESTRICT_COND (speed_randinit (s, rstate)); 2664 2665 /* cache priming */ 2666 gmp_randseed_ui (rstate, 123L); 2667 2668 speed_starttime (); 2669 i = s->reps; 2670 j = 0; 2671 do 2672 { 2673 gmp_randseed_ui (rstate, (unsigned long) s->xp_block[j]); 2674 j++; 2675 if (j >= SPEED_BLOCK_SIZE) 2676 j = 0; 2677 } 2678 while (--i != 0); 2679 t = speed_endtime (); 2680 2681 gmp_randclear (rstate); 2682 return t; 2683 } 2684 2685 double 2686 speed_mpz_urandomb (struct speed_params *s) 2687 { 2688 gmp_randstate_t rstate; 2689 mpz_t z; 2690 unsigned i; 2691 double t; 2692 2693 SPEED_RESTRICT_COND (s->size >= 0); 2694 SPEED_RESTRICT_COND (speed_randinit (s, rstate)); 2695 2696 mpz_init (z); 2697 2698 /* cache priming */ 2699 mpz_urandomb (z, rstate, (unsigned long) s->size); 2700 mpz_urandomb (z, rstate, (unsigned long) s->size); 2701 2702 speed_starttime (); 2703 i = s->reps; 2704 do 2705 mpz_urandomb (z, rstate, (unsigned long) s->size); 2706 while (--i != 0); 2707 t = speed_endtime (); 2708 2709 mpz_clear (z); 2710 gmp_randclear (rstate); 2711 return t; 2712 }