github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/tune/common.c (about)

     1  /* Shared speed subroutines.
     2  
     3  Copyright 1999-2006, 2008-2015 Free Software Foundation, Inc.
     4  
     5  This file is part of the GNU MP Library.
     6  
     7  The GNU MP Library is free software; you can redistribute it and/or modify
     8  it under the terms of either:
     9  
    10    * the GNU Lesser General Public License as published by the Free
    11      Software Foundation; either version 3 of the License, or (at your
    12      option) any later version.
    13  
    14  or
    15  
    16    * the GNU General Public License as published by the Free Software
    17      Foundation; either version 2 of the License, or (at your option) any
    18      later version.
    19  
    20  or both in parallel, as here.
    21  
    22  The GNU MP Library is distributed in the hope that it will be useful, but
    23  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    24  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    25  for more details.
    26  
    27  You should have received copies of the GNU General Public License and the
    28  GNU Lesser General Public License along with the GNU MP Library.  If not,
    29  see https://www.gnu.org/licenses/.  */
    30  
    31  #define __GMP_NO_ATTRIBUTE_CONST_PURE
    32  
    33  #include <errno.h>
    34  #include <fcntl.h>
    35  #include <math.h>
    36  #include <stdio.h>
    37  #include <stdlib.h> /* for qsort */
    38  #include <string.h>
    39  #include <unistd.h>
    40  #if 0
    41  #include <sys/ioctl.h>
    42  #endif
    43  
    44  #include "gmp.h"
    45  #include "gmp-impl.h"
    46  #include "longlong.h"
    47  
    48  #include "tests.h"
    49  #include "speed.h"
    50  
    51  
    52  int   speed_option_addrs = 0;
    53  int   speed_option_verbose = 0;
    54  int   speed_option_cycles_broken = 0;
    55  
    56  
    57  /* Provide __clz_tab even if it's not required, for the benefit of new code
    58     being tested with many.pl. */
    59  #ifndef COUNT_LEADING_ZEROS_NEED_CLZ_TAB
    60  #define COUNT_LEADING_ZEROS_NEED_CLZ_TAB
    61  #include "mp_clz_tab.c"
    62  #undef COUNT_LEADING_ZEROS_NEED_CLZ_TAB
    63  #endif
    64  
    65  
    66  void
    67  pentium_wbinvd(void)
    68  {
    69  #if 0
    70    {
    71      static int  fd = -2;
    72  
    73      if (fd == -2)
    74        {
    75  	fd = open ("/dev/wbinvd", O_RDWR);
    76  	if (fd == -1)
    77  	  perror ("open /dev/wbinvd");
    78        }
    79  
    80      if (fd != -1)
    81        ioctl (fd, 0, 0);
    82    }
    83  #endif
    84  
    85  #if 0
    86  #define WBINVDSIZE  1024*1024*2
    87    {
    88      static char  *p = NULL;
    89      int   i, sum;
    90  
    91      if (p == NULL)
    92        p = malloc (WBINVDSIZE);
    93  
    94  #if 0
    95      for (i = 0; i < WBINVDSIZE; i++)
    96        p[i] = i & 0xFF;
    97  #endif
    98  
    99      sum = 0;
   100      for (i = 0; i < WBINVDSIZE; i++)
   101        sum += p[i];
   102  
   103      mpn_cache_fill_dummy (sum);
   104    }
   105  #endif
   106  }
   107  
   108  
   109  int
   110  double_cmp_ptr (const double *p, const double *q)
   111  {
   112    if (*p > *q)  return 1;
   113    if (*p < *q)  return -1;
   114    return 0;
   115  }
   116  
   117  
   118  /* Measure the speed of a given routine.
   119  
   120     The routine is run with enough repetitions to make it take at least
   121     speed_precision * speed_unittime.  This aims to minimize the effects of a
   122     limited accuracy time base and the overhead of the measuring itself.
   123  
   124     Measurements are made looking for 4 results within TOLERANCE of each
   125     other (or 3 for routines taking longer than 2 seconds).  This aims to get
   126     an accurate reading even if some runs are bloated by interrupts or task
   127     switches or whatever.
   128  
   129     The given (*fun)() is expected to run its function "s->reps" many times
   130     and return the total elapsed time measured using speed_starttime() and
   131     speed_endtime().  If the function doesn't support the given s->size or
   132     s->r, -1.0 should be returned.  See the various base routines below.  */
   133  
   134  double
   135  speed_measure (double (*fun) (struct speed_params *s), struct speed_params *s)
   136  {
   137  #define TOLERANCE    1.01  /* 1% */
   138    const int max_zeros = 10;
   139  
   140    struct speed_params  s_dummy;
   141    int     i, j, e;
   142    double  t[30];
   143    double  t_unsorted[30];
   144    double  reps_d;
   145    int     zeros = 0;
   146  
   147    /* Use dummy parameters if caller doesn't provide any.  Only a few special
   148       "fun"s will cope with this, speed_noop() is one.  */
   149    if (s == NULL)
   150      {
   151        memset (&s_dummy, '\0', sizeof (s_dummy));
   152        s = &s_dummy;
   153      }
   154  
   155    s->reps = 1;
   156    s->time_divisor = 1.0;
   157    for (i = 0; i < numberof (t); i++)
   158      {
   159        for (;;)
   160  	{
   161  	  s->src_num = 0;
   162  	  s->dst_num = 0;
   163  
   164  	  t[i] = (*fun) (s);
   165  
   166  	  if (speed_option_verbose >= 3)
   167  	    gmp_printf("size=%ld reps=%u r=%Md attempt=%d  %.9f\n",
   168  		       (long) s->size, s->reps, s->r, i, t[i]);
   169  
   170  	  if (t[i] == 0.0)
   171  	    {
   172  	      zeros++;
   173  	      if (zeros > max_zeros)
   174  		{
   175  		  fprintf (stderr, "Fatal error: too many (%d) failed measurements (0.0)\n", zeros);
   176  		  abort ();
   177  		}
   178  	     if (s->reps < 10000)
   179  	       s->reps *= 2;
   180  
   181  	      continue;
   182  	    }
   183  
   184  	  if (t[i] == -1.0)
   185  	    return -1.0;
   186  
   187  	  if (t[i] >= speed_unittime * speed_precision)
   188  	    break;
   189  
   190  	  /* go to a value of reps to make t[i] >= precision */
   191  	  reps_d = ceil (1.1 * s->reps
   192  			 * speed_unittime * speed_precision
   193  			 / MAX (t[i], speed_unittime));
   194  	  if (reps_d > 2e9 || reps_d < 1.0)
   195  	    {
   196  	      fprintf (stderr, "Fatal error: new reps bad: %.2f\n", reps_d);
   197  	      fprintf (stderr, "  (old reps %u, unittime %.4g, precision %d, t[i] %.4g)\n",
   198  		       s->reps, speed_unittime, speed_precision, t[i]);
   199  	      abort ();
   200  	    }
   201  	  s->reps = (unsigned) reps_d;
   202  	}
   203        t[i] /= s->reps;
   204        t_unsorted[i] = t[i];
   205  
   206        if (speed_precision == 0)
   207  	return t[i];
   208  
   209        /* require 3 values within TOLERANCE when >= 2 secs, 4 when below */
   210        if (t[0] >= 2.0)
   211  	e = 3;
   212        else
   213  	e = 4;
   214  
   215        /* Look for e many t[]'s within TOLERANCE of each other to consider a
   216  	 valid measurement.  Return smallest among them.  */
   217        if (i >= e)
   218  	{
   219  	  qsort (t, i+1, sizeof(t[0]), (qsort_function_t) double_cmp_ptr);
   220  	  for (j = e-1; j < i; j++)
   221  	    if (t[j] <= t[j-e+1] * TOLERANCE)
   222  	      return t[j-e+1] / s->time_divisor;
   223  	}
   224      }
   225  
   226    fprintf (stderr, "speed_measure() could not get %d results within %.1f%%\n",
   227  	   e, (TOLERANCE-1.0)*100.0);
   228    fprintf (stderr, "    unsorted         sorted\n");
   229    fprintf (stderr, "  %.12f    %.12f    is about 0.5%%\n",
   230  	   t_unsorted[0]*(TOLERANCE-1.0), t[0]*(TOLERANCE-1.0));
   231    for (i = 0; i < numberof (t); i++)
   232      fprintf (stderr, "  %.09f       %.09f\n", t_unsorted[i], t[i]);
   233  
   234    return -1.0;
   235  }
   236  
   237  
   238  /* Read all of ptr,size to get it into the CPU memory cache.
   239  
   240     A call to mpn_cache_fill_dummy() is used to make sure the compiler
   241     doesn't optimize away the whole loop.  Using "volatile mp_limb_t sum"
   242     would work too, but the function call means we don't rely on every
   243     compiler actually implementing volatile properly.
   244  
   245     mpn_cache_fill_dummy() is in a separate source file to stop gcc thinking
   246     it can inline it.  */
   247  
   248  void
   249  mpn_cache_fill (mp_srcptr ptr, mp_size_t size)
   250  {
   251    mp_limb_t  sum = 0;
   252    mp_size_t  i;
   253  
   254    for (i = 0; i < size; i++)
   255      sum += ptr[i];
   256  
   257    mpn_cache_fill_dummy(sum);
   258  }
   259  
   260  
   261  void
   262  mpn_cache_fill_write (mp_ptr ptr, mp_size_t size)
   263  {
   264    mpn_cache_fill (ptr, size);
   265  
   266  #if 0
   267    mpn_random (ptr, size);
   268  #endif
   269  
   270  #if 0
   271    mp_size_t  i;
   272  
   273    for (i = 0; i < size; i++)
   274      ptr[i] = i;
   275  #endif
   276  }
   277  
   278  
   279  void
   280  speed_operand_src (struct speed_params *s, mp_ptr ptr, mp_size_t size)
   281  {
   282    if (s->src_num >= numberof (s->src))
   283      {
   284        fprintf (stderr, "speed_operand_src: no room left in s->src[]\n");
   285        abort ();
   286      }
   287    s->src[s->src_num].ptr = ptr;
   288    s->src[s->src_num].size = size;
   289    s->src_num++;
   290  }
   291  
   292  
   293  void
   294  speed_operand_dst (struct speed_params *s, mp_ptr ptr, mp_size_t size)
   295  {
   296    if (s->dst_num >= numberof (s->dst))
   297      {
   298        fprintf (stderr, "speed_operand_dst: no room left in s->dst[]\n");
   299        abort ();
   300      }
   301    s->dst[s->dst_num].ptr = ptr;
   302    s->dst[s->dst_num].size = size;
   303    s->dst_num++;
   304  }
   305  
   306  
   307  void
   308  speed_cache_fill (struct speed_params *s)
   309  {
   310    static struct speed_params  prev;
   311    int  i;
   312  
   313    /* FIXME: need a better way to get the format string for a pointer */
   314  
   315    if (speed_option_addrs)
   316      {
   317        int  different;
   318  
   319        different = (s->dst_num != prev.dst_num || s->src_num != prev.src_num);
   320        for (i = 0; i < s->dst_num; i++)
   321  	different |= (s->dst[i].ptr != prev.dst[i].ptr);
   322        for (i = 0; i < s->src_num; i++)
   323  	different |= (s->src[i].ptr != prev.src[i].ptr);
   324  
   325        if (different)
   326  	{
   327  	  if (s->dst_num != 0)
   328  	    {
   329  	      printf ("dst");
   330  	      for (i = 0; i < s->dst_num; i++)
   331  		printf (" %08lX", (unsigned long) s->dst[i].ptr);
   332  	      printf (" ");
   333  	    }
   334  
   335  	  if (s->src_num != 0)
   336  	    {
   337  	      printf ("src");
   338  	      for (i = 0; i < s->src_num; i++)
   339  		printf (" %08lX", (unsigned long) s->src[i].ptr);
   340  	      printf (" ");
   341  	    }
   342  	  printf ("  (cf sp approx %08lX)\n", (unsigned long) &different);
   343  
   344  	}
   345  
   346        memcpy (&prev, s, sizeof(prev));
   347      }
   348  
   349    switch (s->cache) {
   350    case 0:
   351      for (i = 0; i < s->dst_num; i++)
   352        mpn_cache_fill_write (s->dst[i].ptr, s->dst[i].size);
   353      for (i = 0; i < s->src_num; i++)
   354        mpn_cache_fill (s->src[i].ptr, s->src[i].size);
   355      break;
   356    case 1:
   357      pentium_wbinvd();
   358      break;
   359    }
   360  }
   361  
   362  
   363  /* Miscellaneous options accepted by tune and speed programs under -o. */
   364  
   365  void
   366  speed_option_set (const char *s)
   367  {
   368    int  n;
   369  
   370    if (strcmp (s, "addrs") == 0)
   371      {
   372        speed_option_addrs = 1;
   373      }
   374    else if (strcmp (s, "verbose") == 0)
   375      {
   376        speed_option_verbose++;
   377      }
   378    else if (sscanf (s, "verbose=%d", &n) == 1)
   379      {
   380        speed_option_verbose = n;
   381      }
   382    else if (strcmp (s, "cycles-broken") == 0)
   383      {
   384        speed_option_cycles_broken = 1;
   385      }
   386    else
   387      {
   388        printf ("Unrecognised -o option: %s\n", s);
   389        exit (1);
   390      }
   391  }
   392  
   393  
   394  /* The following are basic speed running routines for various gmp functions.
   395     Many are very similar and use speed.h macros.
   396  
   397     Each routine allocates it's own destination space for the result of the
   398     function, because only it can know what the function needs.
   399  
   400     speed_starttime() and speed_endtime() are put tight around the code to be
   401     measured.  Any setups are done outside the timed portion.
   402  
   403     Each routine is responsible for its own cache priming.
   404     speed_cache_fill() is a good way to do this, see examples in speed.h.
   405     One cache priming possibility, for CPUs with write-allocate cache, and
   406     functions that don't take too long, is to do one dummy call before timing
   407     so as to cache everything that gets used.  But speed_measure() runs a
   408     routine at least twice and will take the smaller time, so this might not
   409     be necessary.
   410  
   411     Data alignment will be important, for source, destination and temporary
   412     workspace.  A routine can align its destination and workspace.  Programs
   413     using the routines will ensure s->xp and s->yp are aligned.  Aligning
   414     onto a CACHE_LINE_SIZE boundary is suggested.  s->align_wp and
   415     s->align_wp2 should be respected where it makes sense to do so.
   416     SPEED_TMP_ALLOC_LIMBS is a good way to do this.
   417  
   418     A loop of the following form can be expected to turn into good assembler
   419     code on most CPUs, thereby minimizing overhead in the measurement.  It
   420     can always be assumed s->reps >= 1.
   421  
   422  	  i = s->reps
   423  	  do
   424  	    foo();
   425  	  while (--i != 0);
   426  
   427     Additional parameters might be added to "struct speed_params" in the
   428     future.  Routines should ignore anything they don't use.
   429  
   430     s->size can be used creatively, and s->xp and s->yp can be ignored.  For
   431     example, speed_mpz_fac_ui() uses s->size as n for the factorial.  s->r is
   432     just a user-supplied parameter.  speed_mpn_lshift() uses it as a shift,
   433     speed_mpn_mul_1() uses it as a multiplier.  */
   434  
   435  
   436  /* MPN_COPY etc can be macros, so the _CALL forms are necessary */
   437  double
   438  speed_MPN_COPY (struct speed_params *s)
   439  {
   440    SPEED_ROUTINE_MPN_COPY (MPN_COPY);
   441  }
   442  double
   443  speed_MPN_COPY_INCR (struct speed_params *s)
   444  {
   445    SPEED_ROUTINE_MPN_COPY (MPN_COPY_INCR);
   446  }
   447  double
   448  speed_MPN_COPY_DECR (struct speed_params *s)
   449  {
   450    SPEED_ROUTINE_MPN_COPY (MPN_COPY_DECR);
   451  }
   452  #if HAVE_NATIVE_mpn_copyi
   453  double
   454  speed_mpn_copyi (struct speed_params *s)
   455  {
   456    SPEED_ROUTINE_MPN_COPY (mpn_copyi);
   457  }
   458  #endif
   459  #if HAVE_NATIVE_mpn_copyd
   460  double
   461  speed_mpn_copyd (struct speed_params *s)
   462  {
   463    SPEED_ROUTINE_MPN_COPY (mpn_copyd);
   464  }
   465  #endif
   466  double
   467  speed_memcpy (struct speed_params *s)
   468  {
   469    SPEED_ROUTINE_MPN_COPY_BYTES (memcpy);
   470  }
   471  double
   472  speed_mpn_com (struct speed_params *s)
   473  {
   474    SPEED_ROUTINE_MPN_COPY (mpn_com);
   475  }
   476  double
   477  speed_mpn_neg (struct speed_params *s)
   478  {
   479    SPEED_ROUTINE_MPN_COPY (mpn_neg);
   480  }
   481  double
   482  speed_mpn_sec_tabselect (struct speed_params *s)
   483  {
   484    SPEED_ROUTINE_MPN_TABSELECT (mpn_sec_tabselect);
   485  }
   486  
   487  
   488  double
   489  speed_mpn_addmul_1 (struct speed_params *s)
   490  {
   491    SPEED_ROUTINE_MPN_UNARY_1 (mpn_addmul_1);
   492  }
   493  double
   494  speed_mpn_submul_1 (struct speed_params *s)
   495  {
   496    SPEED_ROUTINE_MPN_UNARY_1 (mpn_submul_1);
   497  }
   498  
   499  #if HAVE_NATIVE_mpn_addmul_2
   500  double
   501  speed_mpn_addmul_2 (struct speed_params *s)
   502  {
   503    SPEED_ROUTINE_MPN_UNARY_2 (mpn_addmul_2);
   504  }
   505  #endif
   506  #if HAVE_NATIVE_mpn_addmul_3
   507  double
   508  speed_mpn_addmul_3 (struct speed_params *s)
   509  {
   510    SPEED_ROUTINE_MPN_UNARY_3 (mpn_addmul_3);
   511  }
   512  #endif
   513  #if HAVE_NATIVE_mpn_addmul_4
   514  double
   515  speed_mpn_addmul_4 (struct speed_params *s)
   516  {
   517    SPEED_ROUTINE_MPN_UNARY_4 (mpn_addmul_4);
   518  }
   519  #endif
   520  #if HAVE_NATIVE_mpn_addmul_5
   521  double
   522  speed_mpn_addmul_5 (struct speed_params *s)
   523  {
   524    SPEED_ROUTINE_MPN_UNARY_5 (mpn_addmul_5);
   525  }
   526  #endif
   527  #if HAVE_NATIVE_mpn_addmul_6
   528  double
   529  speed_mpn_addmul_6 (struct speed_params *s)
   530  {
   531    SPEED_ROUTINE_MPN_UNARY_6 (mpn_addmul_6);
   532  }
   533  #endif
   534  #if HAVE_NATIVE_mpn_addmul_7
   535  double
   536  speed_mpn_addmul_7 (struct speed_params *s)
   537  {
   538    SPEED_ROUTINE_MPN_UNARY_7 (mpn_addmul_7);
   539  }
   540  #endif
   541  #if HAVE_NATIVE_mpn_addmul_8
   542  double
   543  speed_mpn_addmul_8 (struct speed_params *s)
   544  {
   545    SPEED_ROUTINE_MPN_UNARY_8 (mpn_addmul_8);
   546  }
   547  #endif
   548  
   549  double
   550  speed_mpn_mul_1 (struct speed_params *s)
   551  {
   552    SPEED_ROUTINE_MPN_UNARY_1 (mpn_mul_1);
   553  }
   554  double
   555  speed_mpn_mul_1_inplace (struct speed_params *s)
   556  {
   557    SPEED_ROUTINE_MPN_UNARY_1_INPLACE (mpn_mul_1);
   558  }
   559  
   560  #if HAVE_NATIVE_mpn_mul_2
   561  double
   562  speed_mpn_mul_2 (struct speed_params *s)
   563  {
   564    SPEED_ROUTINE_MPN_UNARY_2 (mpn_mul_2);
   565  }
   566  #endif
   567  #if HAVE_NATIVE_mpn_mul_3
   568  double
   569  speed_mpn_mul_3 (struct speed_params *s)
   570  {
   571    SPEED_ROUTINE_MPN_UNARY_3 (mpn_mul_3);
   572  }
   573  #endif
   574  #if HAVE_NATIVE_mpn_mul_4
   575  double
   576  speed_mpn_mul_4 (struct speed_params *s)
   577  {
   578    SPEED_ROUTINE_MPN_UNARY_4 (mpn_mul_4);
   579  }
   580  #endif
   581  #if HAVE_NATIVE_mpn_mul_5
   582  double
   583  speed_mpn_mul_5 (struct speed_params *s)
   584  {
   585    SPEED_ROUTINE_MPN_UNARY_5 (mpn_mul_5);
   586  }
   587  #endif
   588  #if HAVE_NATIVE_mpn_mul_6
   589  double
   590  speed_mpn_mul_6 (struct speed_params *s)
   591  {
   592    SPEED_ROUTINE_MPN_UNARY_6 (mpn_mul_6);
   593  }
   594  #endif
   595  
   596  
   597  double
   598  speed_mpn_lshift (struct speed_params *s)
   599  {
   600    SPEED_ROUTINE_MPN_UNARY_1 (mpn_lshift);
   601  }
   602  double
   603  speed_mpn_lshiftc (struct speed_params *s)
   604  {
   605    SPEED_ROUTINE_MPN_UNARY_1 (mpn_lshiftc);
   606  }
   607  double
   608  speed_mpn_rshift (struct speed_params *s)
   609  {
   610    SPEED_ROUTINE_MPN_UNARY_1 (mpn_rshift);
   611  }
   612  
   613  
   614  /* The carry-in variants (if available) are good for measuring because they
   615     won't skip a division if high<divisor.  Alternately, use -1 as a divisor
   616     with the plain _1 forms. */
   617  double
   618  speed_mpn_divrem_1 (struct speed_params *s)
   619  {
   620    SPEED_ROUTINE_MPN_DIVREM_1 (mpn_divrem_1);
   621  }
   622  double
   623  speed_mpn_divrem_1f (struct speed_params *s)
   624  {
   625    SPEED_ROUTINE_MPN_DIVREM_1F (mpn_divrem_1);
   626  }
   627  #if HAVE_NATIVE_mpn_divrem_1c
   628  double
   629  speed_mpn_divrem_1c (struct speed_params *s)
   630  {
   631    SPEED_ROUTINE_MPN_DIVREM_1C (mpn_divrem_1c);
   632  }
   633  double
   634  speed_mpn_divrem_1cf (struct speed_params *s)
   635  {
   636    SPEED_ROUTINE_MPN_DIVREM_1CF (mpn_divrem_1c);
   637  }
   638  #endif
   639  
   640  double
   641  speed_mpn_divrem_1_div (struct speed_params *s)
   642  {
   643    SPEED_ROUTINE_MPN_DIVREM_1 (mpn_divrem_1_div);
   644  }
   645  double
   646  speed_mpn_divrem_1f_div (struct speed_params *s)
   647  {
   648    SPEED_ROUTINE_MPN_DIVREM_1F (mpn_divrem_1_div);
   649  }
   650  double
   651  speed_mpn_divrem_1_inv (struct speed_params *s)
   652  {
   653    SPEED_ROUTINE_MPN_DIVREM_1 (mpn_divrem_1_inv);
   654  }
   655  double
   656  speed_mpn_divrem_1f_inv (struct speed_params *s)
   657  {
   658    SPEED_ROUTINE_MPN_DIVREM_1F (mpn_divrem_1_inv);
   659  }
   660  double
   661  speed_mpn_mod_1_div (struct speed_params *s)
   662  {
   663    SPEED_ROUTINE_MPN_MOD_1 (mpn_mod_1_div);
   664  }
   665  double
   666  speed_mpn_mod_1_inv (struct speed_params *s)
   667  {
   668    SPEED_ROUTINE_MPN_MOD_1 (mpn_mod_1_inv);
   669  }
   670  
   671  double
   672  speed_mpn_preinv_divrem_1 (struct speed_params *s)
   673  {
   674    SPEED_ROUTINE_MPN_PREINV_DIVREM_1 (mpn_preinv_divrem_1);
   675  }
   676  double
   677  speed_mpn_preinv_divrem_1f (struct speed_params *s)
   678  {
   679    SPEED_ROUTINE_MPN_PREINV_DIVREM_1F (mpn_preinv_divrem_1);
   680  }
   681  
   682  #if GMP_NUMB_BITS % 4 == 0
   683  double
   684  speed_mpn_mod_34lsub1 (struct speed_params *s)
   685  {
   686    SPEED_ROUTINE_MPN_MOD_34LSUB1 (mpn_mod_34lsub1);
   687  }
   688  #endif
   689  
   690  double
   691  speed_mpn_divrem_2 (struct speed_params *s)
   692  {
   693    SPEED_ROUTINE_MPN_DIVREM_2 (mpn_divrem_2);
   694  }
   695  double
   696  speed_mpn_divrem_2_div (struct speed_params *s)
   697  {
   698    SPEED_ROUTINE_MPN_DIVREM_2 (mpn_divrem_2_div);
   699  }
   700  double
   701  speed_mpn_divrem_2_inv (struct speed_params *s)
   702  {
   703    SPEED_ROUTINE_MPN_DIVREM_2 (mpn_divrem_2_inv);
   704  }
   705  
   706  double
   707  speed_mpn_div_qr_1n_pi1 (struct speed_params *s)
   708  {
   709    SPEED_ROUTINE_MPN_DIV_QR_1N_PI1 (mpn_div_qr_1n_pi1);
   710  }
   711  double
   712  speed_mpn_div_qr_1n_pi1_1 (struct speed_params *s)
   713  {
   714    SPEED_ROUTINE_MPN_DIV_QR_1N_PI1 (mpn_div_qr_1n_pi1_1);
   715  }
   716  double
   717  speed_mpn_div_qr_1n_pi1_2 (struct speed_params *s)
   718  {
   719    SPEED_ROUTINE_MPN_DIV_QR_1N_PI1 (mpn_div_qr_1n_pi1_2);
   720  }
   721  
   722  double
   723  speed_mpn_div_qr_1 (struct speed_params *s)
   724  {
   725    SPEED_ROUTINE_MPN_DIV_QR_1 (mpn_div_qr_1);
   726  }
   727  
   728  double
   729  speed_mpn_div_qr_2n (struct speed_params *s)
   730  {
   731    SPEED_ROUTINE_MPN_DIV_QR_2 (mpn_div_qr_2, 1);
   732  }
   733  double
   734  speed_mpn_div_qr_2u (struct speed_params *s)
   735  {
   736    SPEED_ROUTINE_MPN_DIV_QR_2 (mpn_div_qr_2, 0);
   737  }
   738  
   739  double
   740  speed_mpn_mod_1 (struct speed_params *s)
   741  {
   742    SPEED_ROUTINE_MPN_MOD_1 (mpn_mod_1);
   743  }
   744  #if HAVE_NATIVE_mpn_mod_1c
   745  double
   746  speed_mpn_mod_1c (struct speed_params *s)
   747  {
   748    SPEED_ROUTINE_MPN_MOD_1C (mpn_mod_1c);
   749  }
   750  #endif
   751  double
   752  speed_mpn_preinv_mod_1 (struct speed_params *s)
   753  {
   754    SPEED_ROUTINE_MPN_PREINV_MOD_1 (mpn_preinv_mod_1);
   755  }
   756  double
   757  speed_mpn_mod_1_1 (struct speed_params *s)
   758  {
   759    SPEED_ROUTINE_MPN_MOD_1_1 (mpn_mod_1_1p,mpn_mod_1_1p_cps);
   760  }
   761  double
   762  speed_mpn_mod_1_1_1 (struct speed_params *s)
   763  {
   764    SPEED_ROUTINE_MPN_MOD_1_1 (mpn_mod_1_1p_1,mpn_mod_1_1p_cps_1);
   765  }
   766  double
   767  speed_mpn_mod_1_1_2 (struct speed_params *s)
   768  {
   769    SPEED_ROUTINE_MPN_MOD_1_1 (mpn_mod_1_1p_2,mpn_mod_1_1p_cps_2);
   770  }
   771  double
   772  speed_mpn_mod_1_2 (struct speed_params *s)
   773  {
   774    SPEED_ROUTINE_MPN_MOD_1_N (mpn_mod_1s_2p,mpn_mod_1s_2p_cps,2);
   775  }
   776  double
   777  speed_mpn_mod_1_3 (struct speed_params *s)
   778  {
   779    SPEED_ROUTINE_MPN_MOD_1_N (mpn_mod_1s_3p,mpn_mod_1s_3p_cps,3);
   780  }
   781  double
   782  speed_mpn_mod_1_4 (struct speed_params *s)
   783  {
   784    SPEED_ROUTINE_MPN_MOD_1_N (mpn_mod_1s_4p,mpn_mod_1s_4p_cps,4);
   785  }
   786  
   787  double
   788  speed_mpn_divexact_1 (struct speed_params *s)
   789  {
   790    SPEED_ROUTINE_MPN_DIVEXACT_1 (mpn_divexact_1);
   791  }
   792  
   793  double
   794  speed_mpn_divexact_by3 (struct speed_params *s)
   795  {
   796    SPEED_ROUTINE_MPN_COPY (mpn_divexact_by3);
   797  }
   798  
   799  double
   800  speed_mpn_bdiv_dbm1c (struct speed_params *s)
   801  {
   802    SPEED_ROUTINE_MPN_BDIV_DBM1C (mpn_bdiv_dbm1c);
   803  }
   804  
   805  double
   806  speed_mpn_bdiv_q_1 (struct speed_params *s)
   807  {
   808    SPEED_ROUTINE_MPN_BDIV_Q_1 (mpn_bdiv_q_1);
   809  }
   810  
   811  double
   812  speed_mpn_pi1_bdiv_q_1 (struct speed_params *s)
   813  {
   814    SPEED_ROUTINE_MPN_PI1_BDIV_Q_1 (mpn_pi1_bdiv_q_1);
   815  }
   816  
   817  #if HAVE_NATIVE_mpn_modexact_1_odd
   818  double
   819  speed_mpn_modexact_1_odd (struct speed_params *s)
   820  {
   821    SPEED_ROUTINE_MPN_MODEXACT_1_ODD (mpn_modexact_1_odd);
   822  }
   823  #endif
   824  
   825  double
   826  speed_mpn_modexact_1c_odd (struct speed_params *s)
   827  {
   828    SPEED_ROUTINE_MPN_MODEXACT_1C_ODD (mpn_modexact_1c_odd);
   829  }
   830  
   831  double
   832  speed_mpz_mod (struct speed_params *s)
   833  {
   834    SPEED_ROUTINE_MPZ_MOD (mpz_mod);
   835  }
   836  
   837  double
   838  speed_mpn_sbpi1_div_qr (struct speed_params *s)
   839  {
   840    SPEED_ROUTINE_MPN_PI1_DIV (mpn_sbpi1_div_qr, inv.inv32, 2,0);
   841  }
   842  double
   843  speed_mpn_dcpi1_div_qr (struct speed_params *s)
   844  {
   845    SPEED_ROUTINE_MPN_PI1_DIV (mpn_dcpi1_div_qr, &inv, 6,3);
   846  }
   847  double
   848  speed_mpn_sbpi1_divappr_q (struct speed_params *s)
   849  {
   850    SPEED_ROUTINE_MPN_PI1_DIV (mpn_sbpi1_divappr_q, inv.inv32, 2,0);
   851  }
   852  double
   853  speed_mpn_dcpi1_divappr_q (struct speed_params *s)
   854  {
   855    SPEED_ROUTINE_MPN_PI1_DIV (mpn_dcpi1_divappr_q, &inv, 6,3);
   856  }
   857  double
   858  speed_mpn_mu_div_qr (struct speed_params *s)
   859  {
   860    SPEED_ROUTINE_MPN_MU_DIV_QR (mpn_mu_div_qr, mpn_mu_div_qr_itch);
   861  }
   862  double
   863  speed_mpn_mu_divappr_q (struct speed_params *s)
   864  {
   865    SPEED_ROUTINE_MPN_MU_DIV_Q (mpn_mu_divappr_q, mpn_mu_divappr_q_itch);
   866  }
   867  double
   868  speed_mpn_mu_div_q (struct speed_params *s)
   869  {
   870    SPEED_ROUTINE_MPN_MU_DIV_Q (mpn_mu_div_q, mpn_mu_div_q_itch);
   871  }
   872  double
   873  speed_mpn_mupi_div_qr (struct speed_params *s)
   874  {
   875    SPEED_ROUTINE_MPN_MUPI_DIV_QR (mpn_preinv_mu_div_qr, mpn_preinv_mu_div_qr_itch);
   876  }
   877  
   878  double
   879  speed_mpn_sbpi1_bdiv_qr (struct speed_params *s)
   880  {
   881    SPEED_ROUTINE_MPN_PI1_BDIV_QR (mpn_sbpi1_bdiv_qr);
   882  }
   883  double
   884  speed_mpn_dcpi1_bdiv_qr (struct speed_params *s)
   885  {
   886    SPEED_ROUTINE_MPN_PI1_BDIV_QR (mpn_dcpi1_bdiv_qr);
   887  }
   888  double
   889  speed_mpn_sbpi1_bdiv_q (struct speed_params *s)
   890  {
   891    SPEED_ROUTINE_MPN_PI1_BDIV_Q (mpn_sbpi1_bdiv_q);
   892  }
   893  double
   894  speed_mpn_dcpi1_bdiv_q (struct speed_params *s)
   895  {
   896    SPEED_ROUTINE_MPN_PI1_BDIV_Q (mpn_dcpi1_bdiv_q);
   897  }
   898  double
   899  speed_mpn_mu_bdiv_q (struct speed_params *s)
   900  {
   901    SPEED_ROUTINE_MPN_MU_BDIV_Q (mpn_mu_bdiv_q, mpn_mu_bdiv_q_itch);
   902  }
   903  double
   904  speed_mpn_mu_bdiv_qr (struct speed_params *s)
   905  {
   906    SPEED_ROUTINE_MPN_MU_BDIV_QR (mpn_mu_bdiv_qr, mpn_mu_bdiv_qr_itch);
   907  }
   908  
   909  double
   910  speed_mpn_broot (struct speed_params *s)
   911  {
   912    SPEED_ROUTINE_MPN_BROOT (mpn_broot);
   913  }
   914  double
   915  speed_mpn_broot_invm1 (struct speed_params *s)
   916  {
   917    SPEED_ROUTINE_MPN_BROOT (mpn_broot_invm1);
   918  }
   919  double
   920  speed_mpn_brootinv (struct speed_params *s)
   921  {
   922    SPEED_ROUTINE_MPN_BROOTINV (mpn_brootinv, 5*s->size);
   923  }
   924  
   925  double
   926  speed_mpn_binvert (struct speed_params *s)
   927  {
   928    SPEED_ROUTINE_MPN_BINVERT (mpn_binvert, mpn_binvert_itch);
   929  }
   930  
   931  double
   932  speed_mpn_invert (struct speed_params *s)
   933  {
   934    SPEED_ROUTINE_MPN_INVERT (mpn_invert, mpn_invert_itch);
   935  }
   936  
   937  double
   938  speed_mpn_invertappr (struct speed_params *s)
   939  {
   940    SPEED_ROUTINE_MPN_INVERTAPPR (mpn_invertappr, mpn_invertappr_itch);
   941  }
   942  
   943  double
   944  speed_mpn_ni_invertappr (struct speed_params *s)
   945  {
   946    SPEED_ROUTINE_MPN_INVERTAPPR (mpn_ni_invertappr, mpn_invertappr_itch);
   947  }
   948  
   949  double
   950  speed_mpn_sec_invert (struct speed_params *s)
   951  {
   952    SPEED_ROUTINE_MPN_SEC_INVERT (mpn_sec_invert, mpn_sec_invert_itch);
   953  }
   954  
   955  double
   956  speed_mpn_redc_1 (struct speed_params *s)
   957  {
   958    SPEED_ROUTINE_REDC_1 (mpn_redc_1);
   959  }
   960  double
   961  speed_mpn_redc_2 (struct speed_params *s)
   962  {
   963    SPEED_ROUTINE_REDC_2 (mpn_redc_2);
   964  }
   965  double
   966  speed_mpn_redc_n (struct speed_params *s)
   967  {
   968    SPEED_ROUTINE_REDC_N (mpn_redc_n);
   969  }
   970  
   971  
   972  double
   973  speed_mpn_popcount (struct speed_params *s)
   974  {
   975    SPEED_ROUTINE_MPN_POPCOUNT (mpn_popcount);
   976  }
   977  double
   978  speed_mpn_hamdist (struct speed_params *s)
   979  {
   980    SPEED_ROUTINE_MPN_HAMDIST (mpn_hamdist);
   981  }
   982  
   983  
   984  double
   985  speed_mpn_add_n (struct speed_params *s)
   986  {
   987    SPEED_ROUTINE_MPN_BINARY_N (mpn_add_n);
   988  }
   989  double
   990  speed_mpn_sub_n (struct speed_params *s)
   991  {
   992  SPEED_ROUTINE_MPN_BINARY_N (mpn_sub_n);
   993  }
   994  double
   995  speed_mpn_add_1 (struct speed_params *s)
   996  {
   997    SPEED_ROUTINE_MPN_UNARY_1 (mpn_add_1);
   998  }
   999  double
  1000  speed_mpn_add_1_inplace (struct speed_params *s)
  1001  {
  1002    SPEED_ROUTINE_MPN_UNARY_1_INPLACE (mpn_add_1);
  1003  }
  1004  double
  1005  speed_mpn_sub_1 (struct speed_params *s)
  1006  {
  1007    SPEED_ROUTINE_MPN_UNARY_1 (mpn_sub_1);
  1008  }
  1009  double
  1010  speed_mpn_sub_1_inplace (struct speed_params *s)
  1011  {
  1012    SPEED_ROUTINE_MPN_UNARY_1_INPLACE (mpn_sub_1);
  1013  }
  1014  
  1015  double
  1016  speed_mpn_add_err1_n (struct speed_params *s)
  1017  {
  1018    SPEED_ROUTINE_MPN_BINARY_ERR1_N (mpn_add_err1_n);
  1019  }
  1020  double
  1021  speed_mpn_sub_err1_n (struct speed_params *s)
  1022  {
  1023    SPEED_ROUTINE_MPN_BINARY_ERR1_N (mpn_sub_err1_n);
  1024  }
  1025  double
  1026  speed_mpn_add_err2_n (struct speed_params *s)
  1027  {
  1028    SPEED_ROUTINE_MPN_BINARY_ERR2_N (mpn_add_err2_n);
  1029  }
  1030  double
  1031  speed_mpn_sub_err2_n (struct speed_params *s)
  1032  {
  1033    SPEED_ROUTINE_MPN_BINARY_ERR2_N (mpn_sub_err2_n);
  1034  }
  1035  double
  1036  speed_mpn_add_err3_n (struct speed_params *s)
  1037  {
  1038    SPEED_ROUTINE_MPN_BINARY_ERR3_N (mpn_add_err3_n);
  1039  }
  1040  double
  1041  speed_mpn_sub_err3_n (struct speed_params *s)
  1042  {
  1043    SPEED_ROUTINE_MPN_BINARY_ERR3_N (mpn_sub_err3_n);
  1044  }
  1045  
  1046  
  1047  #if HAVE_NATIVE_mpn_add_n_sub_n
  1048  double
  1049  speed_mpn_add_n_sub_n (struct speed_params *s)
  1050  {
  1051    SPEED_ROUTINE_MPN_ADDSUB_N_CALL (mpn_add_n_sub_n (ap, sp, s->xp, s->yp, s->size));
  1052  }
  1053  #endif
  1054  
  1055  #if HAVE_NATIVE_mpn_addlsh1_n == 1
  1056  double
  1057  speed_mpn_addlsh1_n (struct speed_params *s)
  1058  {
  1059    SPEED_ROUTINE_MPN_BINARY_N (mpn_addlsh1_n);
  1060  }
  1061  #endif
  1062  #if HAVE_NATIVE_mpn_sublsh1_n == 1
  1063  double
  1064  speed_mpn_sublsh1_n (struct speed_params *s)
  1065  {
  1066    SPEED_ROUTINE_MPN_BINARY_N (mpn_sublsh1_n);
  1067  }
  1068  #endif
  1069  #if HAVE_NATIVE_mpn_addlsh1_n_ip1
  1070  double
  1071  speed_mpn_addlsh1_n_ip1 (struct speed_params *s)
  1072  {
  1073    SPEED_ROUTINE_MPN_COPY (mpn_addlsh1_n_ip1);
  1074  }
  1075  #endif
  1076  #if HAVE_NATIVE_mpn_addlsh1_n_ip2
  1077  double
  1078  speed_mpn_addlsh1_n_ip2 (struct speed_params *s)
  1079  {
  1080    SPEED_ROUTINE_MPN_COPY (mpn_addlsh1_n_ip2);
  1081  }
  1082  #endif
  1083  #if HAVE_NATIVE_mpn_sublsh1_n_ip1
  1084  double
  1085  speed_mpn_sublsh1_n_ip1 (struct speed_params *s)
  1086  {
  1087    SPEED_ROUTINE_MPN_COPY (mpn_sublsh1_n_ip1);
  1088  }
  1089  #endif
  1090  #if HAVE_NATIVE_mpn_rsblsh1_n == 1
  1091  double
  1092  speed_mpn_rsblsh1_n (struct speed_params *s)
  1093  {
  1094    SPEED_ROUTINE_MPN_BINARY_N (mpn_rsblsh1_n);
  1095  }
  1096  #endif
  1097  #if HAVE_NATIVE_mpn_addlsh2_n == 1
  1098  double
  1099  speed_mpn_addlsh2_n (struct speed_params *s)
  1100  {
  1101    SPEED_ROUTINE_MPN_BINARY_N (mpn_addlsh2_n);
  1102  }
  1103  #endif
  1104  #if HAVE_NATIVE_mpn_sublsh2_n == 1
  1105  double
  1106  speed_mpn_sublsh2_n (struct speed_params *s)
  1107  {
  1108    SPEED_ROUTINE_MPN_BINARY_N (mpn_sublsh2_n);
  1109  }
  1110  #endif
  1111  #if HAVE_NATIVE_mpn_addlsh2_n_ip1
  1112  double
  1113  speed_mpn_addlsh2_n_ip1 (struct speed_params *s)
  1114  {
  1115    SPEED_ROUTINE_MPN_COPY (mpn_addlsh2_n_ip1);
  1116  }
  1117  #endif
  1118  #if HAVE_NATIVE_mpn_addlsh2_n_ip2
  1119  double
  1120  speed_mpn_addlsh2_n_ip2 (struct speed_params *s)
  1121  {
  1122    SPEED_ROUTINE_MPN_COPY (mpn_addlsh2_n_ip2);
  1123  }
  1124  #endif
  1125  #if HAVE_NATIVE_mpn_sublsh2_n_ip1
  1126  double
  1127  speed_mpn_sublsh2_n_ip1 (struct speed_params *s)
  1128  {
  1129    SPEED_ROUTINE_MPN_COPY (mpn_sublsh2_n_ip1);
  1130  }
  1131  #endif
  1132  #if HAVE_NATIVE_mpn_rsblsh2_n == 1
  1133  double
  1134  speed_mpn_rsblsh2_n (struct speed_params *s)
  1135  {
  1136    SPEED_ROUTINE_MPN_BINARY_N (mpn_rsblsh2_n);
  1137  }
  1138  #endif
  1139  #if HAVE_NATIVE_mpn_addlsh_n
  1140  double
  1141  speed_mpn_addlsh_n (struct speed_params *s)
  1142  {
  1143    SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_addlsh_n (wp, xp, yp, s->size, 7));
  1144  }
  1145  #endif
  1146  #if HAVE_NATIVE_mpn_sublsh_n
  1147  double
  1148  speed_mpn_sublsh_n (struct speed_params *s)
  1149  {
  1150    SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_sublsh_n (wp, xp, yp, s->size, 7));
  1151  }
  1152  #endif
  1153  #if HAVE_NATIVE_mpn_addlsh_n_ip1
  1154  double
  1155  speed_mpn_addlsh_n_ip1 (struct speed_params *s)
  1156  {
  1157    SPEED_ROUTINE_MPN_UNARY_1_CALL (mpn_addlsh_n_ip1 (wp, s->xp, s->size, 7));
  1158  }
  1159  #endif
  1160  #if HAVE_NATIVE_mpn_addlsh_n_ip2
  1161  double
  1162  speed_mpn_addlsh_n_ip2 (struct speed_params *s)
  1163  {
  1164    SPEED_ROUTINE_MPN_UNARY_1_CALL (mpn_addlsh_n_ip2 (wp, s->xp, s->size, 7));
  1165  }
  1166  #endif
  1167  #if HAVE_NATIVE_mpn_sublsh_n_ip1
  1168  double
  1169  speed_mpn_sublsh_n_ip1 (struct speed_params *s)
  1170  {
  1171    SPEED_ROUTINE_MPN_UNARY_1_CALL (mpn_sublsh_n_ip1 (wp, s->xp, s->size, 7));
  1172  }
  1173  #endif
  1174  #if HAVE_NATIVE_mpn_rsblsh_n
  1175  double
  1176  speed_mpn_rsblsh_n (struct speed_params *s)
  1177  {
  1178    SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_rsblsh_n (wp, xp, yp, s->size, 7));
  1179  }
  1180  #endif
  1181  #if HAVE_NATIVE_mpn_rsh1add_n
  1182  double
  1183  speed_mpn_rsh1add_n (struct speed_params *s)
  1184  {
  1185    SPEED_ROUTINE_MPN_BINARY_N (mpn_rsh1add_n);
  1186  }
  1187  #endif
  1188  #if HAVE_NATIVE_mpn_rsh1sub_n
  1189  double
  1190  speed_mpn_rsh1sub_n (struct speed_params *s)
  1191  {
  1192    SPEED_ROUTINE_MPN_BINARY_N (mpn_rsh1sub_n);
  1193  }
  1194  #endif
  1195  
  1196  double
  1197  speed_mpn_cnd_add_n (struct speed_params *s)
  1198  {
  1199    SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_cnd_add_n (1, wp, xp, yp, s->size));
  1200  }
  1201  double
  1202  speed_mpn_cnd_sub_n (struct speed_params *s)
  1203  {
  1204    SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_cnd_sub_n (1, wp, xp, yp, s->size));
  1205  }
  1206  
  1207  /* mpn_and_n etc can be macros and so have to be handled with
  1208     SPEED_ROUTINE_MPN_BINARY_N_CALL forms */
  1209  double
  1210  speed_mpn_and_n (struct speed_params *s)
  1211  {
  1212    SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_and_n (wp, xp, yp, s->size));
  1213  }
  1214  double
  1215  speed_mpn_andn_n (struct speed_params *s)
  1216  {
  1217    SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_andn_n (wp, xp, yp, s->size));
  1218  }
  1219  double
  1220  speed_mpn_nand_n (struct speed_params *s)
  1221  {
  1222    SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_nand_n (wp, xp, yp, s->size));
  1223  }
  1224  double
  1225  speed_mpn_ior_n (struct speed_params *s)
  1226  {
  1227    SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_ior_n (wp, xp, yp, s->size));
  1228  }
  1229  double
  1230  speed_mpn_iorn_n (struct speed_params *s)
  1231  {
  1232    SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_iorn_n (wp, xp, yp, s->size));
  1233  }
  1234  double
  1235  speed_mpn_nior_n (struct speed_params *s)
  1236  {
  1237    SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_nior_n (wp, xp, yp, s->size));
  1238  }
  1239  double
  1240  speed_mpn_xor_n (struct speed_params *s)
  1241  {
  1242    SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_xor_n (wp, xp, yp, s->size));
  1243  }
  1244  double
  1245  speed_mpn_xnor_n (struct speed_params *s)
  1246  {
  1247    SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_xnor_n (wp, xp, yp, s->size));
  1248  }
  1249  
  1250  
  1251  double
  1252  speed_mpn_mul_n (struct speed_params *s)
  1253  {
  1254    SPEED_ROUTINE_MPN_MUL_N (mpn_mul_n);
  1255  }
  1256  double
  1257  speed_mpn_sqr (struct speed_params *s)
  1258  {
  1259    SPEED_ROUTINE_MPN_SQR (mpn_sqr);
  1260  }
  1261  double
  1262  speed_mpn_mul_n_sqr (struct speed_params *s)
  1263  {
  1264    SPEED_ROUTINE_MPN_SQR_CALL (mpn_mul_n (wp, s->xp, s->xp, s->size));
  1265  }
  1266  
  1267  double
  1268  speed_mpn_mul_basecase (struct speed_params *s)
  1269  {
  1270    SPEED_ROUTINE_MPN_MUL(mpn_mul_basecase);
  1271  }
  1272  double
  1273  speed_mpn_mul (struct speed_params *s)
  1274  {
  1275    SPEED_ROUTINE_MPN_MUL(mpn_mul);
  1276  }
  1277  double
  1278  speed_mpn_sqr_basecase (struct speed_params *s)
  1279  {
  1280    /* FIXME: size restrictions on some versions of sqr_basecase */
  1281    SPEED_ROUTINE_MPN_SQR (mpn_sqr_basecase);
  1282  }
  1283  
  1284  #if HAVE_NATIVE_mpn_sqr_diagonal
  1285  double
  1286  speed_mpn_sqr_diagonal (struct speed_params *s)
  1287  {
  1288    SPEED_ROUTINE_MPN_SQR (mpn_sqr_diagonal);
  1289  }
  1290  #endif
  1291  
  1292  #if HAVE_NATIVE_mpn_sqr_diag_addlsh1
  1293  double
  1294  speed_mpn_sqr_diag_addlsh1 (struct speed_params *s)
  1295  {
  1296    SPEED_ROUTINE_MPN_SQR_DIAG_ADDLSH1_CALL (mpn_sqr_diag_addlsh1 (wp, tp, s->xp, s->size));
  1297  }
  1298  #endif
  1299  
  1300  double
  1301  speed_mpn_toom2_sqr (struct speed_params *s)
  1302  {
  1303    SPEED_ROUTINE_MPN_TOOM2_SQR (mpn_toom2_sqr);
  1304  }
  1305  double
  1306  speed_mpn_toom3_sqr (struct speed_params *s)
  1307  {
  1308    SPEED_ROUTINE_MPN_TOOM3_SQR (mpn_toom3_sqr);
  1309  }
  1310  double
  1311  speed_mpn_toom4_sqr (struct speed_params *s)
  1312  {
  1313    SPEED_ROUTINE_MPN_TOOM4_SQR (mpn_toom4_sqr);
  1314  }
  1315  double
  1316  speed_mpn_toom6_sqr (struct speed_params *s)
  1317  {
  1318    SPEED_ROUTINE_MPN_TOOM6_SQR (mpn_toom6_sqr);
  1319  }
  1320  double
  1321  speed_mpn_toom8_sqr (struct speed_params *s)
  1322  {
  1323    SPEED_ROUTINE_MPN_TOOM8_SQR (mpn_toom8_sqr);
  1324  }
  1325  double
  1326  speed_mpn_toom22_mul (struct speed_params *s)
  1327  {
  1328    SPEED_ROUTINE_MPN_TOOM22_MUL_N (mpn_toom22_mul);
  1329  }
  1330  double
  1331  speed_mpn_toom33_mul (struct speed_params *s)
  1332  {
  1333    SPEED_ROUTINE_MPN_TOOM33_MUL_N (mpn_toom33_mul);
  1334  }
  1335  double
  1336  speed_mpn_toom44_mul (struct speed_params *s)
  1337  {
  1338    SPEED_ROUTINE_MPN_TOOM44_MUL_N (mpn_toom44_mul);
  1339  }
  1340  double
  1341  speed_mpn_toom6h_mul (struct speed_params *s)
  1342  {
  1343    SPEED_ROUTINE_MPN_TOOM6H_MUL_N (mpn_toom6h_mul);
  1344  }
  1345  double
  1346  speed_mpn_toom8h_mul (struct speed_params *s)
  1347  {
  1348    SPEED_ROUTINE_MPN_TOOM8H_MUL_N (mpn_toom8h_mul);
  1349  }
  1350  
  1351  double
  1352  speed_mpn_toom32_mul (struct speed_params *s)
  1353  {
  1354    SPEED_ROUTINE_MPN_TOOM32_MUL (mpn_toom32_mul);
  1355  }
  1356  double
  1357  speed_mpn_toom42_mul (struct speed_params *s)
  1358  {
  1359    SPEED_ROUTINE_MPN_TOOM42_MUL (mpn_toom42_mul);
  1360  }
  1361  double
  1362  speed_mpn_toom43_mul (struct speed_params *s)
  1363  {
  1364    SPEED_ROUTINE_MPN_TOOM43_MUL (mpn_toom43_mul);
  1365  }
  1366  double
  1367  speed_mpn_toom63_mul (struct speed_params *s)
  1368  {
  1369    SPEED_ROUTINE_MPN_TOOM63_MUL (mpn_toom63_mul);
  1370  }
  1371  double
  1372  speed_mpn_toom32_for_toom43_mul (struct speed_params *s)
  1373  {
  1374    SPEED_ROUTINE_MPN_TOOM32_FOR_TOOM43_MUL (mpn_toom32_mul);
  1375  }
  1376  double
  1377  speed_mpn_toom43_for_toom32_mul (struct speed_params *s)
  1378  {
  1379    SPEED_ROUTINE_MPN_TOOM43_FOR_TOOM32_MUL (mpn_toom43_mul);
  1380  }
  1381  double
  1382  speed_mpn_toom32_for_toom53_mul (struct speed_params *s)
  1383  {
  1384    SPEED_ROUTINE_MPN_TOOM32_FOR_TOOM53_MUL (mpn_toom32_mul);
  1385  }
  1386  double
  1387  speed_mpn_toom53_for_toom32_mul (struct speed_params *s)
  1388  {
  1389    SPEED_ROUTINE_MPN_TOOM53_FOR_TOOM32_MUL (mpn_toom53_mul);
  1390  }
  1391  double
  1392  speed_mpn_toom42_for_toom53_mul (struct speed_params *s)
  1393  {
  1394    SPEED_ROUTINE_MPN_TOOM42_FOR_TOOM53_MUL (mpn_toom42_mul);
  1395  }
  1396  double
  1397  speed_mpn_toom53_for_toom42_mul (struct speed_params *s)
  1398  {
  1399    SPEED_ROUTINE_MPN_TOOM53_FOR_TOOM42_MUL (mpn_toom53_mul);
  1400  }
  1401  double
  1402  speed_mpn_toom43_for_toom54_mul (struct speed_params *s)
  1403  {
  1404    SPEED_ROUTINE_MPN_TOOM43_FOR_TOOM54_MUL (mpn_toom43_mul);
  1405  }
  1406  double
  1407  speed_mpn_toom54_for_toom43_mul (struct speed_params *s)
  1408  {
  1409    SPEED_ROUTINE_MPN_TOOM54_FOR_TOOM43_MUL (mpn_toom54_mul);
  1410  }
  1411  
  1412  double
  1413  speed_mpn_nussbaumer_mul (struct speed_params *s)
  1414  {
  1415    SPEED_ROUTINE_MPN_MUL_N_CALL
  1416      (mpn_nussbaumer_mul (wp, s->xp, s->size, s->yp, s->size));
  1417  }
  1418  double
  1419  speed_mpn_nussbaumer_mul_sqr (struct speed_params *s)
  1420  {
  1421    SPEED_ROUTINE_MPN_SQR_CALL
  1422      (mpn_nussbaumer_mul (wp, s->xp, s->size, s->xp, s->size));
  1423  }
  1424  
  1425  #if WANT_OLD_FFT_FULL
  1426  double
  1427  speed_mpn_mul_fft_full (struct speed_params *s)
  1428  {
  1429    SPEED_ROUTINE_MPN_MUL_N_CALL
  1430      (mpn_mul_fft_full (wp, s->xp, s->size, s->yp, s->size));
  1431  }
  1432  double
  1433  speed_mpn_mul_fft_full_sqr (struct speed_params *s)
  1434  {
  1435    SPEED_ROUTINE_MPN_SQR_CALL
  1436      (mpn_mul_fft_full (wp, s->xp, s->size, s->xp, s->size));
  1437  }
  1438  #endif
  1439  
  1440  /* These are mod 2^N+1 multiplies and squares.  If s->r is supplied it's
  1441     used as k, otherwise the best k for the size is used.  If s->size isn't a
  1442     multiple of 2^k it's rounded up to make the effective operation size.  */
  1443  
  1444  #define SPEED_ROUTINE_MPN_MUL_FFT_CALL(call, sqr)       \
  1445    {                                                     \
  1446      mp_ptr     wp;                                      \
  1447      mp_size_t  pl;                                      \
  1448      int        k;                                       \
  1449      unsigned   i;                                       \
  1450      double     t;                                       \
  1451      TMP_DECL;                                           \
  1452  							\
  1453      SPEED_RESTRICT_COND (s->size >= 1);                 \
  1454  							\
  1455      if (s->r != 0)                                      \
  1456        k = s->r;                                         \
  1457      else                                                \
  1458        k = mpn_fft_best_k (s->size, sqr);                \
  1459  							\
  1460      TMP_MARK;                                           \
  1461      pl = mpn_fft_next_size (s->size, k);                \
  1462      SPEED_TMP_ALLOC_LIMBS (wp, pl+1, s->align_wp);      \
  1463  							\
  1464      speed_operand_src (s, s->xp, s->size);              \
  1465      if (!sqr)                                           \
  1466        speed_operand_src (s, s->yp, s->size);            \
  1467      speed_operand_dst (s, wp, pl+1);                    \
  1468      speed_cache_fill (s);                               \
  1469  							\
  1470      speed_starttime ();                                 \
  1471      i = s->reps;                                        \
  1472      do                                                  \
  1473        call;                                             \
  1474      while (--i != 0);                                   \
  1475      t = speed_endtime ();                               \
  1476  							\
  1477      TMP_FREE;                                           \
  1478      return t;                                           \
  1479    }
  1480  
  1481  double
  1482  speed_mpn_mul_fft (struct speed_params *s)
  1483  {
  1484    SPEED_ROUTINE_MPN_MUL_FFT_CALL
  1485      (mpn_mul_fft (wp, pl, s->xp, s->size, s->yp, s->size, k), 0);
  1486  }
  1487  
  1488  double
  1489  speed_mpn_mul_fft_sqr (struct speed_params *s)
  1490  {
  1491    SPEED_ROUTINE_MPN_MUL_FFT_CALL
  1492      (mpn_mul_fft (wp, pl, s->xp, s->size, s->xp, s->size, k), 1);
  1493  }
  1494  
  1495  double
  1496  speed_mpn_fft_mul (struct speed_params *s)
  1497  {
  1498    SPEED_ROUTINE_MPN_MUL_N_CALL (mpn_fft_mul (wp, s->xp, s->size, s->yp, s->size));
  1499  }
  1500  
  1501  double
  1502  speed_mpn_fft_sqr (struct speed_params *s)
  1503  {
  1504    SPEED_ROUTINE_MPN_SQR_CALL (mpn_fft_mul (wp, s->xp, s->size, s->xp, s->size));
  1505  }
  1506  
  1507  double
  1508  speed_mpn_sqrlo (struct speed_params *s)
  1509  {
  1510    SPEED_ROUTINE_MPN_SQRLO (mpn_sqrlo);
  1511  }
  1512  double
  1513  speed_mpn_sqrlo_basecase (struct speed_params *s)
  1514  {
  1515    SPEED_ROUTINE_MPN_SQRLO (mpn_sqrlo_basecase);
  1516  }
  1517  double
  1518  speed_mpn_mullo_n (struct speed_params *s)
  1519  {
  1520    SPEED_ROUTINE_MPN_MULLO_N (mpn_mullo_n);
  1521  }
  1522  double
  1523  speed_mpn_mullo_basecase (struct speed_params *s)
  1524  {
  1525    SPEED_ROUTINE_MPN_MULLO_BASECASE (mpn_mullo_basecase);
  1526  }
  1527  
  1528  double
  1529  speed_mpn_mulmid_basecase (struct speed_params *s)
  1530  {
  1531    SPEED_ROUTINE_MPN_MULMID (mpn_mulmid_basecase);
  1532  }
  1533  
  1534  double
  1535  speed_mpn_mulmid (struct speed_params *s)
  1536  {
  1537    SPEED_ROUTINE_MPN_MULMID (mpn_mulmid);
  1538  }
  1539  
  1540  double
  1541  speed_mpn_mulmid_n (struct speed_params *s)
  1542  {
  1543    SPEED_ROUTINE_MPN_MULMID_N (mpn_mulmid_n);
  1544  }
  1545  
  1546  double
  1547  speed_mpn_toom42_mulmid (struct speed_params *s)
  1548  {
  1549    SPEED_ROUTINE_MPN_TOOM42_MULMID (mpn_toom42_mulmid);
  1550  }
  1551  
  1552  double
  1553  speed_mpn_mulmod_bnm1 (struct speed_params *s)
  1554  {
  1555    SPEED_ROUTINE_MPN_MULMOD_BNM1_CALL (mpn_mulmod_bnm1 (wp, s->size, s->xp, s->size, s->yp, s->size, tp));
  1556  }
  1557  
  1558  double
  1559  speed_mpn_bc_mulmod_bnm1 (struct speed_params *s)
  1560  {
  1561    SPEED_ROUTINE_MPN_MULMOD_BNM1_CALL (mpn_bc_mulmod_bnm1 (wp, s->xp, s->yp, s->size, tp));
  1562  }
  1563  
  1564  double
  1565  speed_mpn_mulmod_bnm1_rounded (struct speed_params *s)
  1566  {
  1567    SPEED_ROUTINE_MPN_MULMOD_BNM1_ROUNDED (mpn_mulmod_bnm1);
  1568  }
  1569  
  1570  double
  1571  speed_mpn_sqrmod_bnm1 (struct speed_params *s)
  1572  {
  1573    SPEED_ROUTINE_MPN_MULMOD_BNM1_CALL (mpn_sqrmod_bnm1 (wp, s->size, s->xp, s->size, tp));
  1574  }
  1575  
  1576  double
  1577  speed_mpn_matrix22_mul (struct speed_params *s)
  1578  {
  1579    /* Speed params only includes 2 inputs, so we have to invent the
  1580       other 6. */
  1581  
  1582    mp_ptr a;
  1583    mp_ptr r;
  1584    mp_ptr b;
  1585    mp_ptr tp;
  1586    mp_size_t itch;
  1587    unsigned i;
  1588    double t;
  1589    TMP_DECL;
  1590  
  1591    TMP_MARK;
  1592    SPEED_TMP_ALLOC_LIMBS (a, 4 * s->size, s->align_xp);
  1593    SPEED_TMP_ALLOC_LIMBS (b, 4 * s->size, s->align_yp);
  1594    SPEED_TMP_ALLOC_LIMBS (r, 8 * s->size + 4, s->align_wp);
  1595  
  1596    MPN_COPY (a, s->xp, s->size);
  1597    mpn_random (a + s->size, 3 * s->size);
  1598    MPN_COPY (b, s->yp, s->size);
  1599    mpn_random (b + s->size, 3 * s->size);
  1600  
  1601    itch = mpn_matrix22_mul_itch (s->size, s->size);
  1602    SPEED_TMP_ALLOC_LIMBS (tp, itch, s->align_wp2);
  1603  
  1604    speed_operand_src (s, a, 4 * s->size);
  1605    speed_operand_src (s, b, 4 * s->size);
  1606    speed_operand_dst (s, r, 8 * s->size + 4);
  1607    speed_operand_dst (s, tp, itch);
  1608    speed_cache_fill (s);
  1609  
  1610    speed_starttime ();
  1611    i = s->reps;
  1612    do
  1613      {
  1614        mp_size_t sz = s->size;
  1615        MPN_COPY (r + 0 * sz + 0, a + 0 * sz, sz);
  1616        MPN_COPY (r + 2 * sz + 1, a + 1 * sz, sz);
  1617        MPN_COPY (r + 4 * sz + 2, a + 2 * sz, sz);
  1618        MPN_COPY (r + 6 * sz + 3, a + 3 * sz, sz);
  1619        mpn_matrix22_mul (r, r + 2 * sz + 1, r + 4 * sz + 2, r + 6 * sz + 3, sz,
  1620  			b, b + 1 * sz,     b + 2 * sz,     b + 3 * sz,     sz,
  1621  			tp);
  1622      }
  1623    while (--i != 0);
  1624    t = speed_endtime();
  1625    TMP_FREE;
  1626    return t;
  1627  }
  1628  
  1629  double
  1630  speed_mpn_hgcd (struct speed_params *s)
  1631  {
  1632    SPEED_ROUTINE_MPN_HGCD_CALL (mpn_hgcd, mpn_hgcd_itch);
  1633  }
  1634  
  1635  double
  1636  speed_mpn_hgcd_lehmer (struct speed_params *s)
  1637  {
  1638    SPEED_ROUTINE_MPN_HGCD_CALL (mpn_hgcd_lehmer, mpn_hgcd_lehmer_itch);
  1639  }
  1640  
  1641  double
  1642  speed_mpn_hgcd_appr (struct speed_params *s)
  1643  {
  1644    SPEED_ROUTINE_MPN_HGCD_CALL (mpn_hgcd_appr, mpn_hgcd_appr_itch);
  1645  }
  1646  
  1647  double
  1648  speed_mpn_hgcd_appr_lehmer (struct speed_params *s)
  1649  {
  1650    SPEED_ROUTINE_MPN_HGCD_CALL (mpn_hgcd_appr_lehmer, mpn_hgcd_appr_lehmer_itch);
  1651  }
  1652  
  1653  double
  1654  speed_mpn_hgcd_reduce (struct speed_params *s)
  1655  {
  1656    SPEED_ROUTINE_MPN_HGCD_REDUCE_CALL (mpn_hgcd_reduce, mpn_hgcd_reduce_itch);
  1657  }
  1658  double
  1659  speed_mpn_hgcd_reduce_1 (struct speed_params *s)
  1660  {
  1661    SPEED_ROUTINE_MPN_HGCD_REDUCE_CALL (mpn_hgcd_reduce_1, mpn_hgcd_reduce_1_itch);
  1662  }
  1663  double
  1664  speed_mpn_hgcd_reduce_2 (struct speed_params *s)
  1665  {
  1666    SPEED_ROUTINE_MPN_HGCD_REDUCE_CALL (mpn_hgcd_reduce_2, mpn_hgcd_reduce_2_itch);
  1667  }
  1668  
  1669  double
  1670  speed_mpn_gcd (struct speed_params *s)
  1671  {
  1672    SPEED_ROUTINE_MPN_GCD (mpn_gcd);
  1673  }
  1674  
  1675  double
  1676  speed_mpn_gcdext (struct speed_params *s)
  1677  {
  1678    SPEED_ROUTINE_MPN_GCDEXT (mpn_gcdext);
  1679  }
  1680  #if 0
  1681  double
  1682  speed_mpn_gcdext_lehmer (struct speed_params *s)
  1683  {
  1684    SPEED_ROUTINE_MPN_GCDEXT (__gmpn_gcdext_lehmer);
  1685  }
  1686  #endif
  1687  double
  1688  speed_mpn_gcdext_single (struct speed_params *s)
  1689  {
  1690    SPEED_ROUTINE_MPN_GCDEXT (mpn_gcdext_single);
  1691  }
  1692  double
  1693  speed_mpn_gcdext_double (struct speed_params *s)
  1694  {
  1695    SPEED_ROUTINE_MPN_GCDEXT (mpn_gcdext_double);
  1696  }
  1697  double
  1698  speed_mpn_gcdext_one_single (struct speed_params *s)
  1699  {
  1700    SPEED_ROUTINE_MPN_GCDEXT_ONE (mpn_gcdext_one_single);
  1701  }
  1702  double
  1703  speed_mpn_gcdext_one_double (struct speed_params *s)
  1704  {
  1705    SPEED_ROUTINE_MPN_GCDEXT_ONE (mpn_gcdext_one_double);
  1706  }
  1707  double
  1708  speed_mpn_gcd_1 (struct speed_params *s)
  1709  {
  1710    SPEED_ROUTINE_MPN_GCD_1 (mpn_gcd_1);
  1711  }
  1712  double
  1713  speed_mpn_gcd_1N (struct speed_params *s)
  1714  {
  1715    SPEED_ROUTINE_MPN_GCD_1N (mpn_gcd_1);
  1716  }
  1717  
  1718  
  1719  double
  1720  speed_mpz_jacobi (struct speed_params *s)
  1721  {
  1722    SPEED_ROUTINE_MPZ_JACOBI (mpz_jacobi);
  1723  }
  1724  double
  1725  speed_mpn_jacobi_base (struct speed_params *s)
  1726  {
  1727    SPEED_ROUTINE_MPN_JACBASE (mpn_jacobi_base);
  1728  }
  1729  double
  1730  speed_mpn_jacobi_base_1 (struct speed_params *s)
  1731  {
  1732    SPEED_ROUTINE_MPN_JACBASE (mpn_jacobi_base_1);
  1733  }
  1734  double
  1735  speed_mpn_jacobi_base_2 (struct speed_params *s)
  1736  {
  1737    SPEED_ROUTINE_MPN_JACBASE (mpn_jacobi_base_2);
  1738  }
  1739  double
  1740  speed_mpn_jacobi_base_3 (struct speed_params *s)
  1741  {
  1742    SPEED_ROUTINE_MPN_JACBASE (mpn_jacobi_base_3);
  1743  }
  1744  double
  1745  speed_mpn_jacobi_base_4 (struct speed_params *s)
  1746  {
  1747    SPEED_ROUTINE_MPN_JACBASE (mpn_jacobi_base_4);
  1748  }
  1749  
  1750  
  1751  double
  1752  speed_mpn_sqrtrem (struct speed_params *s)
  1753  {
  1754    SPEED_ROUTINE_MPN_SQRTROOT_CALL (mpn_sqrtrem (wp, wp2, s->xp, s->size));
  1755  }
  1756  
  1757  double
  1758  speed_mpn_sqrt (struct speed_params *s)
  1759  {
  1760    SPEED_ROUTINE_MPN_SQRTROOT_CALL (mpn_sqrtrem (wp, NULL, s->xp, s->size));
  1761  }
  1762  
  1763  double
  1764  speed_mpn_rootrem (struct speed_params *s)
  1765  {
  1766    SPEED_ROUTINE_MPN_SQRTROOT_CALL (mpn_rootrem (wp, wp2, s->xp, s->size, s->r));
  1767  }
  1768  
  1769  double
  1770  speed_mpn_root (struct speed_params *s)
  1771  {
  1772    SPEED_ROUTINE_MPN_SQRTROOT_CALL (mpn_rootrem (wp, NULL, s->xp, s->size, s->r));
  1773  }
  1774  
  1775  
  1776  double
  1777  speed_mpz_fac_ui (struct speed_params *s)
  1778  {
  1779    SPEED_ROUTINE_MPZ_FAC_UI (mpz_fac_ui);
  1780  }
  1781  
  1782  double
  1783  speed_mpz_2fac_ui (struct speed_params *s)
  1784  {
  1785    SPEED_ROUTINE_MPZ_UI (mpz_2fac_ui);
  1786  }
  1787  
  1788  
  1789  double
  1790  speed_mpn_fib2_ui (struct speed_params *s)
  1791  {
  1792    SPEED_ROUTINE_MPN_FIB2_UI (mpn_fib2_ui);
  1793  }
  1794  double
  1795  speed_mpz_fib_ui (struct speed_params *s)
  1796  {
  1797    SPEED_ROUTINE_MPZ_FIB_UI (mpz_fib_ui);
  1798  }
  1799  double
  1800  speed_mpz_fib2_ui (struct speed_params *s)
  1801  {
  1802    SPEED_ROUTINE_MPZ_FIB2_UI (mpz_fib2_ui);
  1803  }
  1804  double
  1805  speed_mpz_lucnum_ui (struct speed_params *s)
  1806  {
  1807    SPEED_ROUTINE_MPZ_LUCNUM_UI (mpz_lucnum_ui);
  1808  }
  1809  double
  1810  speed_mpz_lucnum2_ui (struct speed_params *s)
  1811  {
  1812    SPEED_ROUTINE_MPZ_LUCNUM2_UI (mpz_lucnum2_ui);
  1813  }
  1814  
  1815  
  1816  double
  1817  speed_mpz_powm (struct speed_params *s)
  1818  {
  1819    SPEED_ROUTINE_MPZ_POWM (mpz_powm);
  1820  }
  1821  double
  1822  speed_mpz_powm_mod (struct speed_params *s)
  1823  {
  1824    SPEED_ROUTINE_MPZ_POWM (mpz_powm_mod);
  1825  }
  1826  double
  1827  speed_mpz_powm_redc (struct speed_params *s)
  1828  {
  1829    SPEED_ROUTINE_MPZ_POWM (mpz_powm_redc);
  1830  }
  1831  double
  1832  speed_mpz_powm_sec (struct speed_params *s)
  1833  {
  1834    SPEED_ROUTINE_MPZ_POWM (mpz_powm_sec);
  1835  }
  1836  double
  1837  speed_mpz_powm_ui (struct speed_params *s)
  1838  {
  1839    SPEED_ROUTINE_MPZ_POWM_UI (mpz_powm_ui);
  1840  }
  1841  
  1842  
  1843  double
  1844  speed_binvert_limb (struct speed_params *s)
  1845  {
  1846    SPEED_ROUTINE_MODLIMB_INVERT (binvert_limb);
  1847  }
  1848  
  1849  
  1850  double
  1851  speed_noop (struct speed_params *s)
  1852  {
  1853    unsigned  i;
  1854  
  1855    speed_starttime ();
  1856    i = s->reps;
  1857    do
  1858      noop ();
  1859    while (--i != 0);
  1860    return speed_endtime ();
  1861  }
  1862  
  1863  double
  1864  speed_noop_wxs (struct speed_params *s)
  1865  {
  1866    mp_ptr   wp;
  1867    unsigned i;
  1868    double   t;
  1869    TMP_DECL;
  1870  
  1871    TMP_MARK;
  1872    wp = TMP_ALLOC_LIMBS (1);
  1873  
  1874    speed_starttime ();
  1875    i = s->reps;
  1876    do
  1877      noop_wxs (wp, s->xp, s->size);
  1878    while (--i != 0);
  1879    t = speed_endtime ();
  1880  
  1881    TMP_FREE;
  1882    return t;
  1883  }
  1884  
  1885  double
  1886  speed_noop_wxys (struct speed_params *s)
  1887  {
  1888    mp_ptr   wp;
  1889    unsigned i;
  1890    double   t;
  1891    TMP_DECL;
  1892  
  1893    TMP_MARK;
  1894    wp = TMP_ALLOC_LIMBS (1);
  1895  
  1896    speed_starttime ();
  1897    i = s->reps;
  1898    do
  1899      noop_wxys (wp, s->xp, s->yp, s->size);
  1900    while (--i != 0);
  1901    t = speed_endtime ();
  1902  
  1903    TMP_FREE;
  1904    return t;
  1905  }
  1906  
  1907  
  1908  #define SPEED_ROUTINE_ALLOC_FREE(variables, calls)      \
  1909    {                                                     \
  1910      unsigned  i;                                        \
  1911      variables;                                          \
  1912  							\
  1913      speed_starttime ();                                 \
  1914      i = s->reps;                                        \
  1915      do                                                  \
  1916        {                                                 \
  1917  	calls;                                          \
  1918        }                                                 \
  1919      while (--i != 0);                                   \
  1920      return speed_endtime ();                            \
  1921    }
  1922  
  1923  
  1924  /* Compare these to see how much malloc/free costs and then how much
  1925     __gmp_default_allocate/free and mpz_init/clear add.  mpz_init/clear or
  1926     mpq_init/clear will be doing a 1 limb allocate, so use that as the size
  1927     when including them in comparisons.  */
  1928  
  1929  double
  1930  speed_malloc_free (struct speed_params *s)
  1931  {
  1932    size_t  bytes = s->size * GMP_LIMB_BYTES;
  1933    SPEED_ROUTINE_ALLOC_FREE (void *p,
  1934  			    p = malloc (bytes);
  1935  			    free (p));
  1936  }
  1937  
  1938  double
  1939  speed_malloc_realloc_free (struct speed_params *s)
  1940  {
  1941    size_t  bytes = s->size * GMP_LIMB_BYTES;
  1942    SPEED_ROUTINE_ALLOC_FREE (void *p,
  1943  			    p = malloc (GMP_LIMB_BYTES);
  1944  			    p = realloc (p, bytes);
  1945  			    free (p));
  1946  }
  1947  
  1948  double
  1949  speed_gmp_allocate_free (struct speed_params *s)
  1950  {
  1951    size_t  bytes = s->size * GMP_LIMB_BYTES;
  1952    SPEED_ROUTINE_ALLOC_FREE (void *p,
  1953  			    p = (*__gmp_allocate_func) (bytes);
  1954  			    (*__gmp_free_func) (p, bytes));
  1955  }
  1956  
  1957  double
  1958  speed_gmp_allocate_reallocate_free (struct speed_params *s)
  1959  {
  1960    size_t  bytes = s->size * GMP_LIMB_BYTES;
  1961    SPEED_ROUTINE_ALLOC_FREE
  1962      (void *p,
  1963       p = (*__gmp_allocate_func) (GMP_LIMB_BYTES);
  1964       p = (*__gmp_reallocate_func) (p, bytes, GMP_LIMB_BYTES);
  1965       (*__gmp_free_func) (p, bytes));
  1966  }
  1967  
  1968  double
  1969  speed_mpz_init_clear (struct speed_params *s)
  1970  {
  1971    SPEED_ROUTINE_ALLOC_FREE (mpz_t z,
  1972  			    mpz_init (z);
  1973  			    mpz_clear (z));
  1974  }
  1975  
  1976  double
  1977  speed_mpz_init_realloc_clear (struct speed_params *s)
  1978  {
  1979    SPEED_ROUTINE_ALLOC_FREE (mpz_t z,
  1980  			    mpz_init (z);
  1981  			    _mpz_realloc (z, s->size);
  1982  			    mpz_clear (z));
  1983  }
  1984  
  1985  double
  1986  speed_mpq_init_clear (struct speed_params *s)
  1987  {
  1988    SPEED_ROUTINE_ALLOC_FREE (mpq_t q,
  1989  			    mpq_init (q);
  1990  			    mpq_clear (q));
  1991  }
  1992  
  1993  double
  1994  speed_mpf_init_clear (struct speed_params *s)
  1995  {
  1996    SPEED_ROUTINE_ALLOC_FREE (mpf_t f,
  1997  			    mpf_init (f);
  1998  			    mpf_clear (f));
  1999  }
  2000  
  2001  
  2002  /* Compare this to mpn_add_n to see how much overhead mpz_add adds.  Note
  2003     that repeatedly calling mpz_add with the same data gives branch prediction
  2004     in it an advantage.  */
  2005  
  2006  double
  2007  speed_mpz_add (struct speed_params *s)
  2008  {
  2009    mpz_t     w, x, y;
  2010    unsigned  i;
  2011    double    t;
  2012  
  2013    mpz_init (w);
  2014    mpz_init (x);
  2015    mpz_init (y);
  2016  
  2017    mpz_set_n (x, s->xp, s->size);
  2018    mpz_set_n (y, s->yp, s->size);
  2019    mpz_add (w, x, y);
  2020  
  2021    speed_starttime ();
  2022    i = s->reps;
  2023    do
  2024      {
  2025        mpz_add (w, x, y);
  2026      }
  2027    while (--i != 0);
  2028    t = speed_endtime ();
  2029  
  2030    mpz_clear (w);
  2031    mpz_clear (x);
  2032    mpz_clear (y);
  2033    return t;
  2034  }
  2035  
  2036  
  2037  /* If r==0, calculate (size,size/2),
  2038     otherwise calculate (size,r). */
  2039  
  2040  double
  2041  speed_mpz_bin_uiui (struct speed_params *s)
  2042  {
  2043    mpz_t          w;
  2044    unsigned long  k;
  2045    unsigned  i;
  2046    double    t;
  2047  
  2048    mpz_init (w);
  2049    if (s->r != 0)
  2050      k = s->r;
  2051    else
  2052      k = s->size/2;
  2053  
  2054    speed_starttime ();
  2055    i = s->reps;
  2056    do
  2057      {
  2058        mpz_bin_uiui (w, s->size, k);
  2059      }
  2060    while (--i != 0);
  2061    t = speed_endtime ();
  2062  
  2063    mpz_clear (w);
  2064    return t;
  2065  }
  2066  
  2067  /* If r==0, calculate binomial(2^size,size),
  2068     otherwise calculate binomial(2^size,r). */
  2069  
  2070  double
  2071  speed_mpz_bin_ui (struct speed_params *s)
  2072  {
  2073    mpz_t          w, x;
  2074    unsigned long  k;
  2075    unsigned  i;
  2076    double    t;
  2077  
  2078    mpz_init (w);
  2079    mpz_init_set_ui (x, 0);
  2080  
  2081    mpz_setbit (x, s->size);
  2082  
  2083    if (s->r != 0)
  2084      k = s->r;
  2085    else
  2086      k = s->size;
  2087  
  2088    speed_starttime ();
  2089    i = s->reps;
  2090    do
  2091      {
  2092        mpz_bin_ui (w, x, k);
  2093      }
  2094    while (--i != 0);
  2095    t = speed_endtime ();
  2096  
  2097    mpz_clear (w);
  2098    mpz_clear (x);
  2099    return t;
  2100  }
  2101  
  2102  /* The multiplies are successively dependent so the latency is measured, not
  2103     the issue rate.  There's only 10 per loop so the code doesn't get too big
  2104     since umul_ppmm is several instructions on some cpus.
  2105  
  2106     Putting the arguments as "h,l,l,h" gets slightly better code from gcc
  2107     2.95.2 on x86, it puts only one mov between each mul, not two.  That mov
  2108     though will probably show up as a bogus extra cycle though.
  2109  
  2110     The measuring function macros are into three parts to avoid overflowing
  2111     preprocessor expansion space if umul_ppmm is big.
  2112  
  2113     Limitations:
  2114  
  2115     Don't blindly use this to set UMUL_TIME in gmp-mparam.h, check the code
  2116     generated first, especially on CPUs with low latency multipliers.
  2117  
  2118     The default umul_ppmm doing h*l will be getting increasing numbers of
  2119     high zero bits in the calculation.  CPUs with data-dependent multipliers
  2120     will want to use umul_ppmm.1 to get some randomization into the
  2121     calculation.  The extra xors and fetches will be a slowdown of course.  */
  2122  
  2123  #define SPEED_MACRO_UMUL_PPMM_A \
  2124    {                             \
  2125      mp_limb_t  h, l;            \
  2126      unsigned   i;               \
  2127      double     t;               \
  2128  				\
  2129      s->time_divisor = 10;       \
  2130  				\
  2131      h = s->xp[0];               \
  2132      l = s->yp[0];               \
  2133  				\
  2134      if (s->r == 1)              \
  2135        {                         \
  2136  	speed_starttime ();     \
  2137  	i = s->reps;            \
  2138  	do                      \
  2139  	  {
  2140  
  2141  #define SPEED_MACRO_UMUL_PPMM_B \
  2142  	  }                     \
  2143  	while (--i != 0);       \
  2144  	t = speed_endtime ();   \
  2145        }                         \
  2146      else                        \
  2147        {                         \
  2148  	speed_starttime ();     \
  2149  	i = s->reps;            \
  2150  	do                      \
  2151  	  {
  2152  
  2153  #define SPEED_MACRO_UMUL_PPMM_C                                         \
  2154  	  }                                                             \
  2155  	while (--i != 0);                                               \
  2156  	t = speed_endtime ();                                           \
  2157        }                                                                 \
  2158  									\
  2159      /* stop the compiler optimizing away the whole calculation! */      \
  2160      noop_1 (h);                                                         \
  2161      noop_1 (l);                                                         \
  2162  									\
  2163      return t;                                                           \
  2164    }
  2165  
  2166  
  2167  double
  2168  speed_umul_ppmm (struct speed_params *s)
  2169  {
  2170    SPEED_MACRO_UMUL_PPMM_A;
  2171    {
  2172      umul_ppmm (h, l, l, h);  h ^= s->xp_block[0]; l ^= s->yp_block[0];
  2173       umul_ppmm (h, l, l, h); h ^= s->xp_block[1]; l ^= s->yp_block[1];
  2174       umul_ppmm (h, l, l, h); h ^= s->xp_block[2]; l ^= s->yp_block[2];
  2175      umul_ppmm (h, l, l, h);  h ^= s->xp_block[3]; l ^= s->yp_block[3];
  2176       umul_ppmm (h, l, l, h); h ^= s->xp_block[4]; l ^= s->yp_block[4];
  2177       umul_ppmm (h, l, l, h); h ^= s->xp_block[5]; l ^= s->yp_block[5];
  2178      umul_ppmm (h, l, l, h);  h ^= s->xp_block[6]; l ^= s->yp_block[6];
  2179       umul_ppmm (h, l, l, h); h ^= s->xp_block[7]; l ^= s->yp_block[7];
  2180       umul_ppmm (h, l, l, h); h ^= s->xp_block[8]; l ^= s->yp_block[8];
  2181      umul_ppmm (h, l, l, h);  h ^= s->xp_block[9]; l ^= s->yp_block[9];
  2182    }
  2183    SPEED_MACRO_UMUL_PPMM_B;
  2184    {
  2185      umul_ppmm (h, l, l, h);
  2186       umul_ppmm (h, l, l, h);
  2187       umul_ppmm (h, l, l, h);
  2188      umul_ppmm (h, l, l, h);
  2189       umul_ppmm (h, l, l, h);
  2190       umul_ppmm (h, l, l, h);
  2191      umul_ppmm (h, l, l, h);
  2192       umul_ppmm (h, l, l, h);
  2193       umul_ppmm (h, l, l, h);
  2194      umul_ppmm (h, l, l, h);
  2195    }
  2196    SPEED_MACRO_UMUL_PPMM_C;
  2197  }
  2198  
  2199  
  2200  #if HAVE_NATIVE_mpn_umul_ppmm
  2201  double
  2202  speed_mpn_umul_ppmm (struct speed_params *s)
  2203  {
  2204    SPEED_MACRO_UMUL_PPMM_A;
  2205    {
  2206      h = mpn_umul_ppmm (&l, h, l);  h ^= s->xp_block[0]; l ^= s->yp_block[0];
  2207       h = mpn_umul_ppmm (&l, h, l); h ^= s->xp_block[1]; l ^= s->yp_block[1];
  2208       h = mpn_umul_ppmm (&l, h, l); h ^= s->xp_block[2]; l ^= s->yp_block[2];
  2209      h = mpn_umul_ppmm (&l, h, l);  h ^= s->xp_block[3]; l ^= s->yp_block[3];
  2210       h = mpn_umul_ppmm (&l, h, l); h ^= s->xp_block[4]; l ^= s->yp_block[4];
  2211       h = mpn_umul_ppmm (&l, h, l); h ^= s->xp_block[5]; l ^= s->yp_block[5];
  2212      h = mpn_umul_ppmm (&l, h, l);  h ^= s->xp_block[6]; l ^= s->yp_block[6];
  2213       h = mpn_umul_ppmm (&l, h, l); h ^= s->xp_block[7]; l ^= s->yp_block[7];
  2214       h = mpn_umul_ppmm (&l, h, l); h ^= s->xp_block[8]; l ^= s->yp_block[8];
  2215      h = mpn_umul_ppmm (&l, h, l);  h ^= s->xp_block[9]; l ^= s->yp_block[9];
  2216    }
  2217    SPEED_MACRO_UMUL_PPMM_B;
  2218    {
  2219      h = mpn_umul_ppmm (&l, h, l);
  2220       h = mpn_umul_ppmm (&l, h, l);
  2221       h = mpn_umul_ppmm (&l, h, l);
  2222      h = mpn_umul_ppmm (&l, h, l);
  2223       h = mpn_umul_ppmm (&l, h, l);
  2224       h = mpn_umul_ppmm (&l, h, l);
  2225      h = mpn_umul_ppmm (&l, h, l);
  2226       h = mpn_umul_ppmm (&l, h, l);
  2227       h = mpn_umul_ppmm (&l, h, l);
  2228      h = mpn_umul_ppmm (&l, h, l);
  2229    }
  2230    SPEED_MACRO_UMUL_PPMM_C;
  2231  }
  2232  #endif
  2233  
  2234  #if HAVE_NATIVE_mpn_umul_ppmm_r
  2235  double
  2236  speed_mpn_umul_ppmm_r (struct speed_params *s)
  2237  {
  2238    SPEED_MACRO_UMUL_PPMM_A;
  2239    {
  2240      h = mpn_umul_ppmm_r (h, l, &l);  h ^= s->xp_block[0]; l ^= s->yp_block[0];
  2241       h = mpn_umul_ppmm_r (h, l, &l); h ^= s->xp_block[1]; l ^= s->yp_block[1];
  2242       h = mpn_umul_ppmm_r (h, l, &l); h ^= s->xp_block[2]; l ^= s->yp_block[2];
  2243      h = mpn_umul_ppmm_r (h, l, &l);  h ^= s->xp_block[3]; l ^= s->yp_block[3];
  2244       h = mpn_umul_ppmm_r (h, l, &l); h ^= s->xp_block[4]; l ^= s->yp_block[4];
  2245       h = mpn_umul_ppmm_r (h, l, &l); h ^= s->xp_block[5]; l ^= s->yp_block[5];
  2246      h = mpn_umul_ppmm_r (h, l, &l);  h ^= s->xp_block[6]; l ^= s->yp_block[6];
  2247       h = mpn_umul_ppmm_r (h, l, &l); h ^= s->xp_block[7]; l ^= s->yp_block[7];
  2248       h = mpn_umul_ppmm_r (h, l, &l); h ^= s->xp_block[8]; l ^= s->yp_block[8];
  2249      h = mpn_umul_ppmm_r (h, l, &l);  h ^= s->xp_block[9]; l ^= s->yp_block[9];
  2250    }
  2251    SPEED_MACRO_UMUL_PPMM_B;
  2252    {
  2253      h = mpn_umul_ppmm_r (h, l, &l);
  2254       h = mpn_umul_ppmm_r (h, l, &l);
  2255       h = mpn_umul_ppmm_r (h, l, &l);
  2256      h = mpn_umul_ppmm_r (h, l, &l);
  2257       h = mpn_umul_ppmm_r (h, l, &l);
  2258       h = mpn_umul_ppmm_r (h, l, &l);
  2259      h = mpn_umul_ppmm_r (h, l, &l);
  2260       h = mpn_umul_ppmm_r (h, l, &l);
  2261       h = mpn_umul_ppmm_r (h, l, &l);
  2262      h = mpn_umul_ppmm_r (h, l, &l);
  2263    }
  2264    SPEED_MACRO_UMUL_PPMM_C;
  2265  }
  2266  #endif
  2267  
  2268  
  2269  /* The divisions are successively dependent so latency is measured, not
  2270     issue rate.  There's only 10 per loop so the code doesn't get too big,
  2271     especially for udiv_qrnnd_preinv and preinv2norm, which are several
  2272     instructions each.
  2273  
  2274     Note that it's only the division which is measured here, there's no data
  2275     fetching and no shifting if the divisor gets normalized.
  2276  
  2277     In speed_udiv_qrnnd with gcc 2.95.2 on x86 the parameters "q,r,r,q,d"
  2278     generate x86 div instructions with nothing in between.
  2279  
  2280     The measuring function macros are in two parts to avoid overflowing
  2281     preprocessor expansion space if udiv_qrnnd etc are big.
  2282  
  2283     Limitations:
  2284  
  2285     Don't blindly use this to set UDIV_TIME in gmp-mparam.h, check the code
  2286     generated first.
  2287  
  2288     CPUs with data-dependent divisions may want more attention paid to the
  2289     randomness of the data used.  Probably the measurement wanted is over
  2290     uniformly distributed numbers, but what's here might not be giving that.  */
  2291  
  2292  #define SPEED_ROUTINE_UDIV_QRNND_A(normalize)           \
  2293    {                                                     \
  2294      double     t;                                       \
  2295      unsigned   i;                                       \
  2296      mp_limb_t  q, r, d;                                 \
  2297      mp_limb_t  dinv;                                    \
  2298  							\
  2299      s->time_divisor = 10;                               \
  2300  							\
  2301      /* divisor from "r" parameter, or a default */      \
  2302      d = s->r;                                           \
  2303      if (d == 0)                                         \
  2304        d = mp_bases[10].big_base;                        \
  2305  							\
  2306      if (normalize)                                      \
  2307        {                                                 \
  2308  	unsigned  norm;                                 \
  2309  	count_leading_zeros (norm, d);                  \
  2310  	d <<= norm;                                     \
  2311  	invert_limb (dinv, d);                          \
  2312        }                                                 \
  2313  							\
  2314      q = s->xp[0];                                       \
  2315      r = s->yp[0] % d;                                   \
  2316  							\
  2317      speed_starttime ();                                 \
  2318      i = s->reps;                                        \
  2319      do                                                  \
  2320        {
  2321  
  2322  #define SPEED_ROUTINE_UDIV_QRNND_B                                      \
  2323        }                                                                 \
  2324      while (--i != 0);                                                   \
  2325      t = speed_endtime ();                                               \
  2326  									\
  2327      /* stop the compiler optimizing away the whole calculation! */      \
  2328      noop_1 (q);                                                         \
  2329      noop_1 (r);                                                         \
  2330  									\
  2331      return t;                                                           \
  2332    }
  2333  
  2334  double
  2335  speed_udiv_qrnnd (struct speed_params *s)
  2336  {
  2337    SPEED_ROUTINE_UDIV_QRNND_A (UDIV_NEEDS_NORMALIZATION);
  2338    {
  2339      udiv_qrnnd (q, r, r, q, d);
  2340       udiv_qrnnd (q, r, r, q, d);
  2341       udiv_qrnnd (q, r, r, q, d);
  2342      udiv_qrnnd (q, r, r, q, d);
  2343       udiv_qrnnd (q, r, r, q, d);
  2344       udiv_qrnnd (q, r, r, q, d);
  2345      udiv_qrnnd (q, r, r, q, d);
  2346       udiv_qrnnd (q, r, r, q, d);
  2347       udiv_qrnnd (q, r, r, q, d);
  2348      udiv_qrnnd (q, r, r, q, d);
  2349    }
  2350    SPEED_ROUTINE_UDIV_QRNND_B;
  2351  }
  2352  
  2353  double
  2354  speed_udiv_qrnnd_c (struct speed_params *s)
  2355  {
  2356    SPEED_ROUTINE_UDIV_QRNND_A (1);
  2357    {
  2358      __udiv_qrnnd_c (q, r, r, q, d);
  2359       __udiv_qrnnd_c (q, r, r, q, d);
  2360       __udiv_qrnnd_c (q, r, r, q, d);
  2361      __udiv_qrnnd_c (q, r, r, q, d);
  2362       __udiv_qrnnd_c (q, r, r, q, d);
  2363       __udiv_qrnnd_c (q, r, r, q, d);
  2364      __udiv_qrnnd_c (q, r, r, q, d);
  2365       __udiv_qrnnd_c (q, r, r, q, d);
  2366       __udiv_qrnnd_c (q, r, r, q, d);
  2367      __udiv_qrnnd_c (q, r, r, q, d);
  2368    }
  2369    SPEED_ROUTINE_UDIV_QRNND_B;
  2370  }
  2371  
  2372  #if HAVE_NATIVE_mpn_udiv_qrnnd
  2373  double
  2374  speed_mpn_udiv_qrnnd (struct speed_params *s)
  2375  {
  2376    SPEED_ROUTINE_UDIV_QRNND_A (1);
  2377    {
  2378      q = mpn_udiv_qrnnd (&r, r, q, d);
  2379       q = mpn_udiv_qrnnd (&r, r, q, d);
  2380       q = mpn_udiv_qrnnd (&r, r, q, d);
  2381      q = mpn_udiv_qrnnd (&r, r, q, d);
  2382       q = mpn_udiv_qrnnd (&r, r, q, d);
  2383       q = mpn_udiv_qrnnd (&r, r, q, d);
  2384      q = mpn_udiv_qrnnd (&r, r, q, d);
  2385       q = mpn_udiv_qrnnd (&r, r, q, d);
  2386       q = mpn_udiv_qrnnd (&r, r, q, d);
  2387      q = mpn_udiv_qrnnd (&r, r, q, d);
  2388    }
  2389    SPEED_ROUTINE_UDIV_QRNND_B;
  2390  }
  2391  #endif
  2392  
  2393  #if HAVE_NATIVE_mpn_udiv_qrnnd_r
  2394  double
  2395  speed_mpn_udiv_qrnnd_r (struct speed_params *s)
  2396  {
  2397    SPEED_ROUTINE_UDIV_QRNND_A (1);
  2398    {
  2399      q = mpn_udiv_qrnnd_r (r, q, d, &r);
  2400       q = mpn_udiv_qrnnd_r (r, q, d, &r);
  2401       q = mpn_udiv_qrnnd_r (r, q, d, &r);
  2402      q = mpn_udiv_qrnnd_r (r, q, d, &r);
  2403       q = mpn_udiv_qrnnd_r (r, q, d, &r);
  2404       q = mpn_udiv_qrnnd_r (r, q, d, &r);
  2405      q = mpn_udiv_qrnnd_r (r, q, d, &r);
  2406       q = mpn_udiv_qrnnd_r (r, q, d, &r);
  2407       q = mpn_udiv_qrnnd_r (r, q, d, &r);
  2408      q = mpn_udiv_qrnnd_r (r, q, d, &r);
  2409    }
  2410    SPEED_ROUTINE_UDIV_QRNND_B;
  2411  }
  2412  #endif
  2413  
  2414  
  2415  double
  2416  speed_invert_limb (struct speed_params *s)
  2417  {
  2418    SPEED_ROUTINE_INVERT_LIMB_CALL (invert_limb (dinv, d));
  2419  }
  2420  
  2421  
  2422  /* xp[0] might not be particularly random, but should give an indication how
  2423     "/" runs.  Same for speed_operator_mod below.  */
  2424  double
  2425  speed_operator_div (struct speed_params *s)
  2426  {
  2427    double     t;
  2428    unsigned   i;
  2429    mp_limb_t  x, q, d;
  2430  
  2431    s->time_divisor = 10;
  2432  
  2433    /* divisor from "r" parameter, or a default */
  2434    d = s->r;
  2435    if (d == 0)
  2436      d = mp_bases[10].big_base;
  2437  
  2438    x = s->xp[0];
  2439    q = 0;
  2440  
  2441    speed_starttime ();
  2442    i = s->reps;
  2443    do
  2444      {
  2445        q ^= x; q /= d;
  2446         q ^= x; q /= d;
  2447         q ^= x; q /= d;
  2448        q ^= x; q /= d;
  2449         q ^= x; q /= d;
  2450         q ^= x; q /= d;
  2451        q ^= x; q /= d;
  2452         q ^= x; q /= d;
  2453         q ^= x; q /= d;
  2454        q ^= x; q /= d;
  2455      }
  2456    while (--i != 0);
  2457    t = speed_endtime ();
  2458  
  2459    /* stop the compiler optimizing away the whole calculation! */
  2460    noop_1 (q);
  2461  
  2462    return t;
  2463  }
  2464  
  2465  double
  2466  speed_operator_mod (struct speed_params *s)
  2467  {
  2468    double     t;
  2469    unsigned   i;
  2470    mp_limb_t  x, r, d;
  2471  
  2472    s->time_divisor = 10;
  2473  
  2474    /* divisor from "r" parameter, or a default */
  2475    d = s->r;
  2476    if (d == 0)
  2477      d = mp_bases[10].big_base;
  2478  
  2479    x = s->xp[0];
  2480    r = 0;
  2481  
  2482    speed_starttime ();
  2483    i = s->reps;
  2484    do
  2485      {
  2486        r ^= x; r %= d;
  2487         r ^= x; r %= d;
  2488         r ^= x; r %= d;
  2489        r ^= x; r %= d;
  2490         r ^= x; r %= d;
  2491         r ^= x; r %= d;
  2492        r ^= x; r %= d;
  2493         r ^= x; r %= d;
  2494         r ^= x; r %= d;
  2495        r ^= x; r %= d;
  2496      }
  2497    while (--i != 0);
  2498    t = speed_endtime ();
  2499  
  2500    /* stop the compiler optimizing away the whole calculation! */
  2501    noop_1 (r);
  2502  
  2503    return t;
  2504  }
  2505  
  2506  
  2507  /* r==0 measures on data with the values uniformly distributed.  This will
  2508     be typical for count_trailing_zeros in a GCD etc.
  2509  
  2510     r==1 measures on data with the resultant count uniformly distributed
  2511     between 0 and GMP_LIMB_BITS-1.  This is probably sensible for
  2512     count_leading_zeros on the high limbs of divisors.  */
  2513  
  2514  int
  2515  speed_routine_count_zeros_setup (struct speed_params *s,
  2516  				 mp_ptr xp, int leading, int zero)
  2517  {
  2518    int        i, c;
  2519    mp_limb_t  n;
  2520  
  2521    if (s->r == 0)
  2522      {
  2523        /* Make uniformly distributed data.  If zero isn't allowed then change
  2524  	 it to 1 for leading, or 0x800..00 for trailing.  */
  2525        MPN_COPY (xp, s->xp_block, SPEED_BLOCK_SIZE);
  2526        if (! zero)
  2527  	for (i = 0; i < SPEED_BLOCK_SIZE; i++)
  2528  	  if (xp[i] == 0)
  2529  	    xp[i] = leading ? 1 : GMP_LIMB_HIGHBIT;
  2530      }
  2531    else if (s->r == 1)
  2532      {
  2533        /* Make counts uniformly distributed.  A randomly chosen bit is set, and
  2534  	 for leading the rest above it are cleared, or for trailing then the
  2535  	 rest below.  */
  2536        for (i = 0; i < SPEED_BLOCK_SIZE; i++)
  2537  	{
  2538  	  mp_limb_t  set = CNST_LIMB(1) << (s->yp_block[i] % GMP_LIMB_BITS);
  2539  	  mp_limb_t  keep_below = set-1;
  2540  	  mp_limb_t  keep_above = MP_LIMB_T_MAX ^ keep_below;
  2541  	  mp_limb_t  keep = (leading ? keep_below : keep_above);
  2542  	  xp[i] = (s->xp_block[i] & keep) | set;
  2543  	}
  2544      }
  2545    else
  2546      {
  2547        return 0;
  2548      }
  2549  
  2550    /* Account for the effect of n^=c. */
  2551    c = 0;
  2552    for (i = 0; i < SPEED_BLOCK_SIZE; i++)
  2553      {
  2554        n = xp[i];
  2555        xp[i] ^= c;
  2556  
  2557        if (leading)
  2558  	count_leading_zeros (c, n);
  2559        else
  2560  	count_trailing_zeros (c, n);
  2561      }
  2562  
  2563    return 1;
  2564  }
  2565  
  2566  double
  2567  speed_count_leading_zeros (struct speed_params *s)
  2568  {
  2569  #ifdef COUNT_LEADING_ZEROS_0
  2570  #define COUNT_LEADING_ZEROS_0_ALLOWED   1
  2571  #else
  2572  #define COUNT_LEADING_ZEROS_0_ALLOWED   0
  2573  #endif
  2574  
  2575    SPEED_ROUTINE_COUNT_ZEROS_A (1, COUNT_LEADING_ZEROS_0_ALLOWED);
  2576    count_leading_zeros (c, n);
  2577    SPEED_ROUTINE_COUNT_ZEROS_B ();
  2578  }
  2579  double
  2580  speed_count_trailing_zeros (struct speed_params *s)
  2581  {
  2582    SPEED_ROUTINE_COUNT_ZEROS_A (0, 0);
  2583    count_trailing_zeros (c, n);
  2584    SPEED_ROUTINE_COUNT_ZEROS_B ();
  2585  }
  2586  
  2587  
  2588  double
  2589  speed_mpn_get_str (struct speed_params *s)
  2590  {
  2591    SPEED_ROUTINE_MPN_GET_STR (mpn_get_str);
  2592  }
  2593  
  2594  double
  2595  speed_mpn_set_str (struct speed_params *s)
  2596  {
  2597    SPEED_ROUTINE_MPN_SET_STR_CALL (mpn_set_str (wp, xp, s->size, base));
  2598  }
  2599  double
  2600  speed_mpn_bc_set_str (struct speed_params *s)
  2601  {
  2602    SPEED_ROUTINE_MPN_SET_STR_CALL (mpn_bc_set_str (wp, xp, s->size, base));
  2603  }
  2604  
  2605  double
  2606  speed_MPN_ZERO (struct speed_params *s)
  2607  {
  2608    SPEED_ROUTINE_MPN_ZERO_CALL (MPN_ZERO (wp, s->size));
  2609  }
  2610  
  2611  
  2612  int
  2613  speed_randinit (struct speed_params *s, gmp_randstate_ptr rstate)
  2614  {
  2615    if (s->r == 0)
  2616      gmp_randinit_default (rstate);
  2617    else if (s->r == 1)
  2618      gmp_randinit_mt (rstate);
  2619    else
  2620      {
  2621        return gmp_randinit_lc_2exp_size (rstate, s->r);
  2622      }
  2623    return 1;
  2624  }
  2625  
  2626  double
  2627  speed_gmp_randseed (struct speed_params *s)
  2628  {
  2629    gmp_randstate_t  rstate;
  2630    unsigned  i;
  2631    double    t;
  2632    mpz_t     x;
  2633  
  2634    SPEED_RESTRICT_COND (s->size >= 1);
  2635    SPEED_RESTRICT_COND (speed_randinit (s, rstate));
  2636  
  2637    /* s->size bits of seed */
  2638    mpz_init_set_n (x, s->xp, s->size);
  2639    mpz_fdiv_r_2exp (x, x, (unsigned long) s->size);
  2640  
  2641    /* cache priming */
  2642    gmp_randseed (rstate, x);
  2643  
  2644    speed_starttime ();
  2645    i = s->reps;
  2646    do
  2647      gmp_randseed (rstate, x);
  2648    while (--i != 0);
  2649    t = speed_endtime ();
  2650  
  2651    gmp_randclear (rstate);
  2652    mpz_clear (x);
  2653    return t;
  2654  }
  2655  
  2656  double
  2657  speed_gmp_randseed_ui (struct speed_params *s)
  2658  {
  2659    gmp_randstate_t  rstate;
  2660    unsigned  i, j;
  2661    double    t;
  2662  
  2663    SPEED_RESTRICT_COND (speed_randinit (s, rstate));
  2664  
  2665    /* cache priming */
  2666    gmp_randseed_ui (rstate, 123L);
  2667  
  2668    speed_starttime ();
  2669    i = s->reps;
  2670    j = 0;
  2671    do
  2672      {
  2673        gmp_randseed_ui (rstate, (unsigned long) s->xp_block[j]);
  2674        j++;
  2675        if (j >= SPEED_BLOCK_SIZE)
  2676  	j = 0;
  2677      }
  2678    while (--i != 0);
  2679    t = speed_endtime ();
  2680  
  2681    gmp_randclear (rstate);
  2682    return t;
  2683  }
  2684  
  2685  double
  2686  speed_mpz_urandomb (struct speed_params *s)
  2687  {
  2688    gmp_randstate_t  rstate;
  2689    mpz_t     z;
  2690    unsigned  i;
  2691    double    t;
  2692  
  2693    SPEED_RESTRICT_COND (s->size >= 0);
  2694    SPEED_RESTRICT_COND (speed_randinit (s, rstate));
  2695  
  2696    mpz_init (z);
  2697  
  2698    /* cache priming */
  2699    mpz_urandomb (z, rstate, (unsigned long) s->size);
  2700    mpz_urandomb (z, rstate, (unsigned long) s->size);
  2701  
  2702    speed_starttime ();
  2703    i = s->reps;
  2704    do
  2705      mpz_urandomb (z, rstate, (unsigned long) s->size);
  2706    while (--i != 0);
  2707    t = speed_endtime ();
  2708  
  2709    mpz_clear (z);
  2710    gmp_randclear (rstate);
  2711    return t;
  2712  }