github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/tune/speed.c (about)

     1  /* Speed measuring program.
     2  
     3  Copyright 1999-2003, 2005, 2006, 2008-2015 Free Software Foundation, Inc.
     4  
     5  This file is part of the GNU MP Library.
     6  
     7  The GNU MP Library is free software; you can redistribute it and/or modify
     8  it under the terms of either:
     9  
    10    * the GNU Lesser General Public License as published by the Free
    11      Software Foundation; either version 3 of the License, or (at your
    12      option) any later version.
    13  
    14  or
    15  
    16    * the GNU General Public License as published by the Free Software
    17      Foundation; either version 2 of the License, or (at your option) any
    18      later version.
    19  
    20  or both in parallel, as here.
    21  
    22  The GNU MP Library is distributed in the hope that it will be useful, but
    23  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    24  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    25  for more details.
    26  
    27  You should have received copies of the GNU General Public License and the
    28  GNU Lesser General Public License along with the GNU MP Library.  If not,
    29  see https://www.gnu.org/licenses/.  */
    30  
    31  /* Usage message is in the code below, run with no arguments to print it.
    32     See README for interesting applications.
    33  
    34     To add a new routine foo(), create a speed_foo() function in the style of
    35     the existing ones and add an entry in the routine[] array.  Put FLAG_R if
    36     speed_foo() wants an "r" parameter.
    37  
    38     The routines don't have help messages or descriptions, but most have
    39     suggestive names.  See the source code for full details.
    40  
    41  */
    42  
    43  #include "config.h"
    44  
    45  #include <limits.h>
    46  #include <stdio.h>
    47  #include <stdlib.h>
    48  #include <string.h>
    49  
    50  #if HAVE_UNISTD_H
    51  #include <unistd.h>  /* for getpid, R_OK */
    52  #endif
    53  
    54  #if TIME_WITH_SYS_TIME
    55  # include <sys/time.h>  /* for struct timeval */
    56  # include <time.h>
    57  #else
    58  # if HAVE_SYS_TIME_H
    59  #  include <sys/time.h>
    60  # else
    61  #  include <time.h>
    62  # endif
    63  #endif
    64  
    65  #if HAVE_SYS_RESOURCE_H
    66  #include <sys/resource.h>  /* for getrusage() */
    67  #endif
    68  
    69  
    70  #include "gmp.h"
    71  #include "gmp-impl.h"
    72  #include "longlong.h"  /* for the benefit of speed-many.c */
    73  #include "tests.h"
    74  #include "speed.h"
    75  
    76  
    77  #if !HAVE_DECL_OPTARG
    78  extern char *optarg;
    79  extern int optind, opterr;
    80  #endif
    81  
    82  #if !HAVE_STRTOUL
    83  #define strtoul(p,e,b)  (unsigned long) strtol(p,e,b)
    84  #endif
    85  
    86  #ifdef SPEED_EXTRA_PROTOS
    87  SPEED_EXTRA_PROTOS
    88  #endif
    89  #ifdef SPEED_EXTRA_PROTOS2
    90  SPEED_EXTRA_PROTOS2
    91  #endif
    92  
    93  
    94  #if GMP_LIMB_BITS == 32
    95  #define GMP_NUMB_0xAA  (CNST_LIMB(0xAAAAAAAA) & GMP_NUMB_MASK)
    96  #endif
    97  #if GMP_LIMB_BITS == 64
    98  #define GMP_NUMB_0xAA  (CNST_LIMB(0xAAAAAAAAAAAAAAAA) & GMP_NUMB_MASK)
    99  #endif
   100  
   101  
   102  #define CMP_ABSOLUTE     1
   103  #define CMP_RATIO        2
   104  #define CMP_DIFFERENCE   3
   105  #define CMP_DIFFPREV     4
   106  int  option_cmp = CMP_ABSOLUTE;
   107  
   108  #define UNIT_SECONDS        1
   109  #define UNIT_CYCLES         2
   110  #define UNIT_CYCLESPERLIMB  3
   111  int  option_unit = UNIT_SECONDS;
   112  
   113  #define DATA_RANDOM   1
   114  #define DATA_RANDOM2  2
   115  #define DATA_ZEROS    3
   116  #define DATA_AAS      4
   117  #define DATA_FFS      5
   118  #define DATA_2FD      6
   119  int  option_data = DATA_RANDOM;
   120  
   121  int        option_square = 0;
   122  double     option_factor = 0.0;
   123  mp_size_t  option_step = 1;
   124  int        option_gnuplot = 0;
   125  char      *option_gnuplot_basename;
   126  struct size_array_t {
   127    mp_size_t start, end;
   128  } *size_array = NULL;
   129  mp_size_t  size_num = 0;
   130  mp_size_t  size_allocnum = 0;
   131  int        option_resource_usage = 0;
   132  long       option_seed = 123456789;
   133  
   134  struct speed_params  sp;
   135  
   136  #define COLUMN_WIDTH  13  /* for the free-form output */
   137  
   138  #define FLAG_R            (1<<0)  /* require ".r" */
   139  #define FLAG_R_OPTIONAL   (1<<1)  /* optional ".r" */
   140  #define FLAG_RSIZE        (1<<2)
   141  #define FLAG_NODATA       (1<<3)  /* don't alloc xp, yp */
   142  
   143  const struct routine_t {
   144    /* constants */
   145    const char        *name;
   146    speed_function_t  fun;
   147    int               flag;
   148  } routine[] = {
   149  
   150    { "noop",              speed_noop                 },
   151    { "noop_wxs",          speed_noop_wxs             },
   152    { "noop_wxys",         speed_noop_wxys            },
   153  
   154    { "mpn_add_n",         speed_mpn_add_n,     FLAG_R_OPTIONAL },
   155    { "mpn_sub_n",         speed_mpn_sub_n,     FLAG_R_OPTIONAL },
   156    { "mpn_add_1",         speed_mpn_add_1,     FLAG_R },
   157    { "mpn_add_1_inplace", speed_mpn_add_1_inplace, FLAG_R },
   158    { "mpn_sub_1",         speed_mpn_sub_1,     FLAG_R },
   159    { "mpn_sub_1_inplace", speed_mpn_sub_1_inplace, FLAG_R },
   160  
   161    { "mpn_add_err1_n",    speed_mpn_add_err1_n    },
   162    { "mpn_add_err2_n",    speed_mpn_add_err2_n    },
   163    { "mpn_add_err3_n",    speed_mpn_add_err3_n    },
   164    { "mpn_sub_err1_n",    speed_mpn_sub_err1_n    },
   165    { "mpn_sub_err2_n",    speed_mpn_sub_err2_n    },
   166    { "mpn_sub_err3_n",    speed_mpn_sub_err3_n    },
   167  
   168  #if HAVE_NATIVE_mpn_add_n_sub_n
   169    { "mpn_add_n_sub_n",      speed_mpn_add_n_sub_n,     FLAG_R_OPTIONAL },
   170  #endif
   171  
   172    { "mpn_addmul_1",      speed_mpn_addmul_1,  FLAG_R },
   173    { "mpn_submul_1",      speed_mpn_submul_1,  FLAG_R },
   174  #if HAVE_NATIVE_mpn_addmul_2
   175    { "mpn_addmul_2",      speed_mpn_addmul_2,  FLAG_R_OPTIONAL },
   176  #endif
   177  #if HAVE_NATIVE_mpn_addmul_3
   178    { "mpn_addmul_3",      speed_mpn_addmul_3,  FLAG_R_OPTIONAL },
   179  #endif
   180  #if HAVE_NATIVE_mpn_addmul_4
   181    { "mpn_addmul_4",      speed_mpn_addmul_4,  FLAG_R_OPTIONAL },
   182  #endif
   183  #if HAVE_NATIVE_mpn_addmul_5
   184    { "mpn_addmul_5",      speed_mpn_addmul_5,  FLAG_R_OPTIONAL },
   185  #endif
   186  #if HAVE_NATIVE_mpn_addmul_6
   187    { "mpn_addmul_6",      speed_mpn_addmul_6,  FLAG_R_OPTIONAL },
   188  #endif
   189  #if HAVE_NATIVE_mpn_addmul_7
   190    { "mpn_addmul_7",      speed_mpn_addmul_7,  FLAG_R_OPTIONAL },
   191  #endif
   192  #if HAVE_NATIVE_mpn_addmul_8
   193    { "mpn_addmul_8",      speed_mpn_addmul_8,  FLAG_R_OPTIONAL },
   194  #endif
   195    { "mpn_mul_1",         speed_mpn_mul_1,     FLAG_R },
   196    { "mpn_mul_1_inplace", speed_mpn_mul_1_inplace, FLAG_R },
   197  #if HAVE_NATIVE_mpn_mul_2
   198    { "mpn_mul_2",         speed_mpn_mul_2,     FLAG_R_OPTIONAL },
   199  #endif
   200  #if HAVE_NATIVE_mpn_mul_3
   201    { "mpn_mul_3",         speed_mpn_mul_3,     FLAG_R_OPTIONAL },
   202  #endif
   203  #if HAVE_NATIVE_mpn_mul_4
   204    { "mpn_mul_4",         speed_mpn_mul_4,     FLAG_R_OPTIONAL },
   205  #endif
   206  #if HAVE_NATIVE_mpn_mul_5
   207    { "mpn_mul_5",         speed_mpn_mul_5,     FLAG_R_OPTIONAL },
   208  #endif
   209  #if HAVE_NATIVE_mpn_mul_6
   210    { "mpn_mul_6",         speed_mpn_mul_6,     FLAG_R_OPTIONAL },
   211  #endif
   212  
   213    { "mpn_divrem_1",      speed_mpn_divrem_1,  FLAG_R },
   214    { "mpn_divrem_1f",     speed_mpn_divrem_1f, FLAG_R },
   215  #if HAVE_NATIVE_mpn_divrem_1c
   216    { "mpn_divrem_1c",     speed_mpn_divrem_1c, FLAG_R },
   217    { "mpn_divrem_1cf",    speed_mpn_divrem_1cf,FLAG_R },
   218  #endif
   219    { "mpn_mod_1",         speed_mpn_mod_1,     FLAG_R },
   220  #if HAVE_NATIVE_mpn_mod_1c
   221    { "mpn_mod_1c",        speed_mpn_mod_1c,    FLAG_R },
   222  #endif
   223    { "mpn_preinv_divrem_1",  speed_mpn_preinv_divrem_1,  FLAG_R },
   224    { "mpn_preinv_divrem_1f", speed_mpn_preinv_divrem_1f, FLAG_R },
   225    { "mpn_preinv_mod_1",  speed_mpn_preinv_mod_1, FLAG_R },
   226  
   227    { "mpn_mod_1_1",       speed_mpn_mod_1_1,       FLAG_R },
   228    { "mpn_mod_1_1_1",     speed_mpn_mod_1_1_1,     FLAG_R },
   229    { "mpn_mod_1_1_2",     speed_mpn_mod_1_1_2,     FLAG_R },
   230    { "mpn_mod_1s_2",      speed_mpn_mod_1_2,       FLAG_R },
   231    { "mpn_mod_1s_3",      speed_mpn_mod_1_3,       FLAG_R },
   232    { "mpn_mod_1s_4",      speed_mpn_mod_1_4,       FLAG_R },
   233  
   234    { "mpn_divrem_1_div",  speed_mpn_divrem_1_div,  FLAG_R },
   235    { "mpn_divrem_1_inv",  speed_mpn_divrem_1_inv,  FLAG_R },
   236    { "mpn_divrem_1f_div", speed_mpn_divrem_1f_div, FLAG_R },
   237    { "mpn_divrem_1f_inv", speed_mpn_divrem_1f_inv, FLAG_R },
   238    { "mpn_mod_1_div",     speed_mpn_mod_1_div,     FLAG_R },
   239    { "mpn_mod_1_inv",     speed_mpn_mod_1_inv,     FLAG_R },
   240  
   241    { "mpn_divrem_2",      speed_mpn_divrem_2,        },
   242    { "mpn_divrem_2_div",  speed_mpn_divrem_2_div,    },
   243    { "mpn_divrem_2_inv",  speed_mpn_divrem_2_inv,    },
   244  
   245    { "mpn_div_qr_1n_pi1", speed_mpn_div_qr_1n_pi1, FLAG_R  },
   246    { "mpn_div_qr_1n_pi1_1",speed_mpn_div_qr_1n_pi1_1, FLAG_R  },
   247    { "mpn_div_qr_1n_pi1_2",speed_mpn_div_qr_1n_pi1_2, FLAG_R  },
   248    { "mpn_div_qr_1",      speed_mpn_div_qr_1,      FLAG_R },
   249  
   250    { "mpn_div_qr_2n",     speed_mpn_div_qr_2n,       },
   251    { "mpn_div_qr_2u",     speed_mpn_div_qr_2u,       },
   252  
   253    { "mpn_divexact_1",    speed_mpn_divexact_1,    FLAG_R },
   254    { "mpn_divexact_by3",  speed_mpn_divexact_by3          },
   255  
   256    { "mpn_bdiv_q_1",      speed_mpn_bdiv_q_1,      FLAG_R },
   257    { "mpn_pi1_bdiv_q_1",  speed_mpn_pi1_bdiv_q_1,  FLAG_R_OPTIONAL },
   258    { "mpn_bdiv_dbm1c",    speed_mpn_bdiv_dbm1c,    FLAG_R_OPTIONAL },
   259  
   260  #if HAVE_NATIVE_mpn_modexact_1_odd
   261    { "mpn_modexact_1_odd",  speed_mpn_modexact_1_odd,  FLAG_R },
   262  #endif
   263    { "mpn_modexact_1c_odd", speed_mpn_modexact_1c_odd, FLAG_R },
   264  
   265  #if GMP_NUMB_BITS % 4 == 0
   266    { "mpn_mod_34lsub1",   speed_mpn_mod_34lsub1 },
   267  #endif
   268  
   269    { "mpn_lshift",        speed_mpn_lshift, FLAG_R   },
   270    { "mpn_lshiftc",       speed_mpn_lshiftc, FLAG_R   },
   271    { "mpn_rshift",        speed_mpn_rshift, FLAG_R   },
   272  
   273    { "mpn_and_n",         speed_mpn_and_n,  FLAG_R_OPTIONAL },
   274    { "mpn_andn_n",        speed_mpn_andn_n, FLAG_R_OPTIONAL },
   275    { "mpn_nand_n",        speed_mpn_nand_n, FLAG_R_OPTIONAL },
   276    { "mpn_ior_n",         speed_mpn_ior_n,  FLAG_R_OPTIONAL },
   277    { "mpn_iorn_n",        speed_mpn_iorn_n, FLAG_R_OPTIONAL },
   278    { "mpn_nior_n",        speed_mpn_nior_n, FLAG_R_OPTIONAL },
   279    { "mpn_xor_n",         speed_mpn_xor_n,  FLAG_R_OPTIONAL },
   280    { "mpn_xnor_n",        speed_mpn_xnor_n, FLAG_R_OPTIONAL },
   281    { "mpn_com",           speed_mpn_com              },
   282    { "mpn_neg",           speed_mpn_neg              },
   283  
   284    { "mpn_popcount",      speed_mpn_popcount         },
   285    { "mpn_hamdist",       speed_mpn_hamdist          },
   286  
   287    { "mpn_matrix22_mul",  speed_mpn_matrix22_mul     },
   288  
   289    { "mpn_hgcd",          speed_mpn_hgcd             },
   290    { "mpn_hgcd_lehmer",   speed_mpn_hgcd_lehmer      },
   291    { "mpn_hgcd_appr",     speed_mpn_hgcd_appr        },
   292    { "mpn_hgcd_appr_lehmer", speed_mpn_hgcd_appr_lehmer },
   293  
   294    { "mpn_hgcd_reduce",   speed_mpn_hgcd_reduce      },
   295    { "mpn_hgcd_reduce_1", speed_mpn_hgcd_reduce_1    },
   296    { "mpn_hgcd_reduce_2", speed_mpn_hgcd_reduce_2    },
   297  
   298    { "mpn_gcd_1",         speed_mpn_gcd_1,  FLAG_R_OPTIONAL },
   299    { "mpn_gcd_1N",        speed_mpn_gcd_1N, FLAG_R_OPTIONAL },
   300  
   301    { "mpn_gcd",           speed_mpn_gcd                    },
   302  
   303    { "mpn_gcdext",            speed_mpn_gcdext            },
   304    { "mpn_gcdext_single",     speed_mpn_gcdext_single     },
   305    { "mpn_gcdext_double",     speed_mpn_gcdext_double     },
   306    { "mpn_gcdext_one_single", speed_mpn_gcdext_one_single },
   307    { "mpn_gcdext_one_double", speed_mpn_gcdext_one_double },
   308  #if 0
   309    { "mpn_gcdext_lehmer",     speed_mpn_gcdext_lehmer     },
   310  #endif
   311    { "mpz_jacobi",        speed_mpz_jacobi           },
   312    { "mpn_jacobi_base",   speed_mpn_jacobi_base      },
   313    { "mpn_jacobi_base_1", speed_mpn_jacobi_base_1    },
   314    { "mpn_jacobi_base_2", speed_mpn_jacobi_base_2    },
   315    { "mpn_jacobi_base_3", speed_mpn_jacobi_base_3    },
   316    { "mpn_jacobi_base_4", speed_mpn_jacobi_base_4    },
   317  
   318    { "mpn_mul",           speed_mpn_mul,         FLAG_R_OPTIONAL },
   319    { "mpn_mul_basecase",  speed_mpn_mul_basecase,FLAG_R_OPTIONAL },
   320    { "mpn_sqr_basecase",  speed_mpn_sqr_basecase     },
   321  #if HAVE_NATIVE_mpn_sqr_diagonal
   322    { "mpn_sqr_diagonal",  speed_mpn_sqr_diagonal     },
   323  #endif
   324  #if HAVE_NATIVE_mpn_sqr_diag_addlsh1
   325    { "mpn_sqr_diag_addlsh1", speed_mpn_sqr_diag_addlsh1 },
   326  #endif
   327  
   328    { "mpn_mul_n",         speed_mpn_mul_n            },
   329    { "mpn_sqr",           speed_mpn_sqr              },
   330  
   331    { "mpn_toom2_sqr",     speed_mpn_toom2_sqr        },
   332    { "mpn_toom3_sqr",     speed_mpn_toom3_sqr        },
   333    { "mpn_toom4_sqr",     speed_mpn_toom4_sqr        },
   334    { "mpn_toom6_sqr",     speed_mpn_toom6_sqr        },
   335    { "mpn_toom8_sqr",     speed_mpn_toom8_sqr        },
   336    { "mpn_toom22_mul",    speed_mpn_toom22_mul       },
   337    { "mpn_toom33_mul",    speed_mpn_toom33_mul       },
   338    { "mpn_toom44_mul",    speed_mpn_toom44_mul       },
   339    { "mpn_toom6h_mul",    speed_mpn_toom6h_mul       },
   340    { "mpn_toom8h_mul",    speed_mpn_toom8h_mul       },
   341    { "mpn_toom32_mul",    speed_mpn_toom32_mul       },
   342    { "mpn_toom42_mul",    speed_mpn_toom42_mul       },
   343    { "mpn_toom43_mul",    speed_mpn_toom43_mul       },
   344    { "mpn_toom63_mul",    speed_mpn_toom63_mul       },
   345    { "mpn_nussbaumer_mul",    speed_mpn_nussbaumer_mul    },
   346    { "mpn_nussbaumer_mul_sqr",speed_mpn_nussbaumer_mul_sqr},
   347  #if WANT_OLD_FFT_FULL
   348    { "mpn_mul_fft_full",      speed_mpn_mul_fft_full      },
   349    { "mpn_mul_fft_full_sqr",  speed_mpn_mul_fft_full_sqr  },
   350  #endif
   351    { "mpn_mul_fft",       speed_mpn_mul_fft,     FLAG_R_OPTIONAL },
   352    { "mpn_mul_fft_sqr",   speed_mpn_mul_fft_sqr, FLAG_R_OPTIONAL },
   353  
   354    { "mpn_sqrlo",          speed_mpn_sqrlo           },
   355    { "mpn_sqrlo_basecase", speed_mpn_sqrlo_basecase  },
   356    { "mpn_mullo_n",        speed_mpn_mullo_n         },
   357    { "mpn_mullo_basecase", speed_mpn_mullo_basecase  },
   358  
   359    { "mpn_mulmid_basecase",  speed_mpn_mulmid_basecase, FLAG_R_OPTIONAL },
   360    { "mpn_toom42_mulmid",    speed_mpn_toom42_mulmid },
   361    { "mpn_mulmid_n",         speed_mpn_mulmid_n },
   362    { "mpn_mulmid",           speed_mpn_mulmid, FLAG_R_OPTIONAL },
   363  
   364    { "mpn_bc_mulmod_bnm1",      speed_mpn_bc_mulmod_bnm1      },
   365    { "mpn_mulmod_bnm1",         speed_mpn_mulmod_bnm1         },
   366    { "mpn_mulmod_bnm1_rounded", speed_mpn_mulmod_bnm1_rounded },
   367    { "mpn_sqrmod_bnm1",         speed_mpn_sqrmod_bnm1         },
   368  
   369    { "mpn_invert",              speed_mpn_invert              },
   370    { "mpn_invertappr",          speed_mpn_invertappr          },
   371    { "mpn_ni_invertappr",       speed_mpn_ni_invertappr       },
   372    { "mpn_binvert",             speed_mpn_binvert             },
   373    { "mpn_sec_invert",          speed_mpn_sec_invert          },
   374  
   375    { "mpn_sbpi1_div_qr",        speed_mpn_sbpi1_div_qr,    FLAG_R_OPTIONAL},
   376    { "mpn_dcpi1_div_qr",        speed_mpn_dcpi1_div_qr,    FLAG_R_OPTIONAL},
   377    { "mpn_mu_div_qr",           speed_mpn_mu_div_qr,       FLAG_R_OPTIONAL},
   378    { "mpn_mupi_div_qr",         speed_mpn_mupi_div_qr,     FLAG_R_OPTIONAL},
   379    { "mpn_sbpi1_divappr_q",     speed_mpn_sbpi1_divappr_q, FLAG_R_OPTIONAL},
   380    { "mpn_dcpi1_divappr_q",     speed_mpn_dcpi1_divappr_q, FLAG_R_OPTIONAL},
   381  
   382    { "mpn_sbpi1_bdiv_qr",       speed_mpn_sbpi1_bdiv_qr       },
   383    { "mpn_dcpi1_bdiv_qr",       speed_mpn_dcpi1_bdiv_qr       },
   384    { "mpn_sbpi1_bdiv_q",        speed_mpn_sbpi1_bdiv_q        },
   385    { "mpn_dcpi1_bdiv_q",        speed_mpn_dcpi1_bdiv_q        },
   386  
   387    { "mpn_broot",               speed_mpn_broot,    FLAG_R },
   388    { "mpn_broot_invm1",         speed_mpn_broot_invm1, FLAG_R },
   389    { "mpn_brootinv",            speed_mpn_brootinv, FLAG_R },
   390  
   391    { "mpn_get_str",          speed_mpn_get_str,     FLAG_R_OPTIONAL },
   392    { "mpn_set_str",          speed_mpn_set_str,     FLAG_R_OPTIONAL },
   393    { "mpn_set_str_basecase", speed_mpn_bc_set_str,  FLAG_R_OPTIONAL },
   394  
   395    { "mpn_sqrtrem",       speed_mpn_sqrtrem          },
   396    { "mpn_rootrem",       speed_mpn_rootrem, FLAG_R  },
   397    { "mpn_sqrt",          speed_mpn_sqrt             },
   398    { "mpn_root",          speed_mpn_root, FLAG_R     },
   399  
   400    { "mpn_fib2_ui",       speed_mpn_fib2_ui,    FLAG_NODATA },
   401    { "mpz_fib_ui",        speed_mpz_fib_ui,     FLAG_NODATA },
   402    { "mpz_fib2_ui",       speed_mpz_fib2_ui,    FLAG_NODATA },
   403    { "mpz_lucnum_ui",     speed_mpz_lucnum_ui,  FLAG_NODATA },
   404    { "mpz_lucnum2_ui",    speed_mpz_lucnum2_ui, FLAG_NODATA },
   405  
   406    { "mpz_add",           speed_mpz_add              },
   407    { "mpz_bin_uiui",      speed_mpz_bin_uiui, FLAG_NODATA | FLAG_R_OPTIONAL },
   408    { "mpz_bin_ui",        speed_mpz_bin_ui,   FLAG_NODATA | FLAG_R_OPTIONAL },
   409    { "mpz_fac_ui",        speed_mpz_fac_ui,   FLAG_NODATA   },
   410    { "mpz_2fac_ui",       speed_mpz_2fac_ui,  FLAG_NODATA   },
   411    { "mpz_powm",          speed_mpz_powm             },
   412    { "mpz_powm_mod",      speed_mpz_powm_mod         },
   413    { "mpz_powm_redc",     speed_mpz_powm_redc        },
   414    { "mpz_powm_sec",      speed_mpz_powm_sec        },
   415    { "mpz_powm_ui",       speed_mpz_powm_ui,  FLAG_R_OPTIONAL },
   416  
   417    { "mpz_mod",           speed_mpz_mod              },
   418    { "mpn_redc_1",        speed_mpn_redc_1           },
   419    { "mpn_redc_2",        speed_mpn_redc_2           },
   420    { "mpn_redc_n",        speed_mpn_redc_n           },
   421  
   422    { "MPN_COPY",          speed_MPN_COPY             },
   423    { "MPN_COPY_INCR",     speed_MPN_COPY_INCR        },
   424    { "MPN_COPY_DECR",     speed_MPN_COPY_DECR        },
   425    { "memcpy",            speed_memcpy               },
   426  #if HAVE_NATIVE_mpn_copyi
   427    { "mpn_copyi",         speed_mpn_copyi            },
   428  #endif
   429  #if HAVE_NATIVE_mpn_copyd
   430    { "mpn_copyd",         speed_mpn_copyd            },
   431  #endif
   432    { "mpn_sec_tabselect", speed_mpn_sec_tabselect, FLAG_R_OPTIONAL },
   433  #if HAVE_NATIVE_mpn_addlsh1_n == 1
   434    { "mpn_addlsh1_n",     speed_mpn_addlsh1_n, FLAG_R_OPTIONAL },
   435  #endif
   436  #if HAVE_NATIVE_mpn_sublsh1_n == 1
   437    { "mpn_sublsh1_n",     speed_mpn_sublsh1_n, FLAG_R_OPTIONAL },
   438  #endif
   439  #if HAVE_NATIVE_mpn_addlsh1_n_ip1
   440    { "mpn_addlsh1_n_ip1", speed_mpn_addlsh1_n_ip1    },
   441  #endif
   442  #if HAVE_NATIVE_mpn_addlsh1_n_ip2
   443    { "mpn_addlsh1_n_ip2", speed_mpn_addlsh1_n_ip2    },
   444  #endif
   445  #if HAVE_NATIVE_mpn_sublsh1_n_ip1
   446    { "mpn_sublsh1_n_ip1", speed_mpn_sublsh1_n_ip1    },
   447  #endif
   448  #if HAVE_NATIVE_mpn_rsblsh1_n == 1
   449    { "mpn_rsblsh1_n",     speed_mpn_rsblsh1_n, FLAG_R_OPTIONAL },
   450  #endif
   451  #if HAVE_NATIVE_mpn_addlsh2_n == 1
   452    { "mpn_addlsh2_n",     speed_mpn_addlsh2_n, FLAG_R_OPTIONAL },
   453  #endif
   454  #if HAVE_NATIVE_mpn_sublsh2_n == 1
   455    { "mpn_sublsh2_n",     speed_mpn_sublsh2_n, FLAG_R_OPTIONAL },
   456  #endif
   457  #if HAVE_NATIVE_mpn_addlsh2_n_ip1
   458    { "mpn_addlsh2_n_ip1", speed_mpn_addlsh2_n_ip1    },
   459  #endif
   460  #if HAVE_NATIVE_mpn_addlsh2_n_ip2
   461    { "mpn_addlsh2_n_ip2", speed_mpn_addlsh2_n_ip2    },
   462  #endif
   463  #if HAVE_NATIVE_mpn_sublsh2_n_ip1
   464    { "mpn_sublsh2_n_ip1", speed_mpn_sublsh2_n_ip1    },
   465  #endif
   466  #if HAVE_NATIVE_mpn_rsblsh2_n == 1
   467    { "mpn_rsblsh2_n",     speed_mpn_rsblsh2_n, FLAG_R_OPTIONAL },
   468  #endif
   469  #if HAVE_NATIVE_mpn_addlsh_n
   470    { "mpn_addlsh_n",     speed_mpn_addlsh_n, FLAG_R_OPTIONAL },
   471  #endif
   472  #if HAVE_NATIVE_mpn_sublsh_n
   473    { "mpn_sublsh_n",     speed_mpn_sublsh_n, FLAG_R_OPTIONAL },
   474  #endif
   475  #if HAVE_NATIVE_mpn_addlsh_n_ip1
   476    { "mpn_addlsh_n_ip1", speed_mpn_addlsh_n_ip1    },
   477  #endif
   478  #if HAVE_NATIVE_mpn_addlsh_n_ip2
   479    { "mpn_addlsh_n_ip2", speed_mpn_addlsh_n_ip2    },
   480  #endif
   481  #if HAVE_NATIVE_mpn_sublsh_n_ip1
   482    { "mpn_sublsh_n_ip1", speed_mpn_sublsh_n_ip1    },
   483  #endif
   484  #if HAVE_NATIVE_mpn_rsblsh_n
   485    { "mpn_rsblsh_n",     speed_mpn_rsblsh_n, FLAG_R_OPTIONAL },
   486  #endif
   487  #if HAVE_NATIVE_mpn_rsh1add_n
   488    { "mpn_rsh1add_n",     speed_mpn_rsh1add_n, FLAG_R_OPTIONAL },
   489  #endif
   490  #if HAVE_NATIVE_mpn_rsh1sub_n
   491    { "mpn_rsh1sub_n",     speed_mpn_rsh1sub_n, FLAG_R_OPTIONAL },
   492  #endif
   493  
   494    { "mpn_cnd_add_n",     speed_mpn_cnd_add_n, FLAG_R_OPTIONAL },
   495    { "mpn_cnd_sub_n",     speed_mpn_cnd_sub_n, FLAG_R_OPTIONAL },
   496  
   497    { "MPN_ZERO",          speed_MPN_ZERO             },
   498  
   499    { "binvert_limb",       speed_binvert_limb,       FLAG_NODATA },
   500    { "binvert_limb_mul1",  speed_binvert_limb_mul1,  FLAG_NODATA },
   501    { "binvert_limb_loop",  speed_binvert_limb_loop,  FLAG_NODATA },
   502    { "binvert_limb_cond",  speed_binvert_limb_cond,  FLAG_NODATA },
   503    { "binvert_limb_arith", speed_binvert_limb_arith, FLAG_NODATA },
   504  
   505    { "malloc_free",                  speed_malloc_free                  },
   506    { "malloc_realloc_free",          speed_malloc_realloc_free          },
   507    { "gmp_allocate_free",            speed_gmp_allocate_free            },
   508    { "gmp_allocate_reallocate_free", speed_gmp_allocate_reallocate_free },
   509    { "mpz_init_clear",               speed_mpz_init_clear               },
   510    { "mpq_init_clear",               speed_mpq_init_clear               },
   511    { "mpf_init_clear",               speed_mpf_init_clear               },
   512    { "mpz_init_realloc_clear",       speed_mpz_init_realloc_clear       },
   513  
   514    { "umul_ppmm",         speed_umul_ppmm,     FLAG_R_OPTIONAL },
   515  #if HAVE_NATIVE_mpn_umul_ppmm
   516    { "mpn_umul_ppmm",     speed_mpn_umul_ppmm, FLAG_R_OPTIONAL },
   517  #endif
   518  #if HAVE_NATIVE_mpn_umul_ppmm_r
   519    { "mpn_umul_ppmm_r",   speed_mpn_umul_ppmm_r, FLAG_R_OPTIONAL },
   520  #endif
   521  
   522    { "count_leading_zeros",  speed_count_leading_zeros,  FLAG_NODATA | FLAG_R_OPTIONAL },
   523    { "count_trailing_zeros", speed_count_trailing_zeros, FLAG_NODATA | FLAG_R_OPTIONAL },
   524  
   525    { "udiv_qrnnd",             speed_udiv_qrnnd,             FLAG_R_OPTIONAL },
   526    { "udiv_qrnnd_c",           speed_udiv_qrnnd_c,           FLAG_R_OPTIONAL },
   527  #if HAVE_NATIVE_mpn_udiv_qrnnd
   528    { "mpn_udiv_qrnnd",         speed_mpn_udiv_qrnnd,         FLAG_R_OPTIONAL },
   529  #endif
   530  #if HAVE_NATIVE_mpn_udiv_qrnnd_r
   531    { "mpn_udiv_qrnnd_r",       speed_mpn_udiv_qrnnd_r,       FLAG_R_OPTIONAL },
   532  #endif
   533    { "invert_limb",            speed_invert_limb,            FLAG_R_OPTIONAL },
   534  
   535    { "operator_div",           speed_operator_div,           FLAG_R_OPTIONAL },
   536    { "operator_mod",           speed_operator_mod,           FLAG_R_OPTIONAL },
   537  
   538    { "gmp_randseed",    speed_gmp_randseed,    FLAG_R_OPTIONAL               },
   539    { "gmp_randseed_ui", speed_gmp_randseed_ui, FLAG_R_OPTIONAL | FLAG_NODATA },
   540    { "mpz_urandomb",    speed_mpz_urandomb,    FLAG_R_OPTIONAL | FLAG_NODATA },
   541  
   542  #ifdef SPEED_EXTRA_ROUTINES
   543    SPEED_EXTRA_ROUTINES
   544  #endif
   545  #ifdef SPEED_EXTRA_ROUTINES2
   546    SPEED_EXTRA_ROUTINES2
   547  #endif
   548  };
   549  
   550  
   551  struct choice_t {
   552    const struct routine_t  *p;
   553    mp_limb_t               r;
   554    double                  scale;
   555    double                  time;
   556    int                     no_time;
   557    double                  prev_time;
   558    const char              *name;
   559  };
   560  struct choice_t  *choice;
   561  int  num_choices = 0;
   562  
   563  
   564  void
   565  data_fill (mp_ptr ptr, mp_size_t size)
   566  {
   567    switch (option_data) {
   568    case DATA_RANDOM:
   569      mpn_random (ptr, size);
   570      break;
   571    case DATA_RANDOM2:
   572      mpn_random2 (ptr, size);
   573      break;
   574    case DATA_ZEROS:
   575      MPN_ZERO (ptr, size);
   576      break;
   577    case DATA_AAS:
   578      MPN_FILL (ptr, size, GMP_NUMB_0xAA);
   579      break;
   580    case DATA_FFS:
   581      MPN_FILL (ptr, size, GMP_NUMB_MAX);
   582      break;
   583    case DATA_2FD:
   584      MPN_FILL (ptr, size, GMP_NUMB_MAX);
   585      ptr[0] -= 2;
   586      break;
   587    default:
   588      abort();
   589      /*NOTREACHED*/
   590    }
   591  }
   592  
   593  /* The code here handling the various combinations of output options isn't
   594     too attractive, but it works and is fairly clean.  */
   595  
   596  #define SIZE_TO_DIVISOR(n)              \
   597    (option_square == 1 ? (n)*(n)         \
   598    : option_square == 2 ? (n)*((n)+1)/2  \
   599    : (n))
   600  
   601  void
   602  run_one (FILE *fp, struct speed_params *s, mp_size_t prev_size)
   603  {
   604    const char  *first_open_fastest, *first_open_notfastest, *first_close;
   605    int         i, fastest, want_data;
   606    double      fastest_time;
   607    TMP_DECL;
   608  
   609    TMP_MARK;
   610  
   611    /* allocate data, unless all routines are NODATA */
   612    want_data = 0;
   613    for (i = 0; i < num_choices; i++)
   614      want_data |= ((choice[i].p->flag & FLAG_NODATA) == 0);
   615  
   616    if (want_data)
   617      {
   618        SPEED_TMP_ALLOC_LIMBS (sp.xp, s->size, s->align_xp);
   619        SPEED_TMP_ALLOC_LIMBS (sp.yp, s->size, s->align_yp);
   620  
   621        data_fill (s->xp, s->size);
   622        data_fill (s->yp, s->size);
   623      }
   624    else
   625      {
   626        sp.xp = NULL;
   627        sp.yp = NULL;
   628      }
   629  
   630    if (prev_size == -1 && option_cmp == CMP_DIFFPREV)
   631      {
   632        first_open_fastest = "(#";
   633        first_open_notfastest = " (";
   634        first_close = ")";
   635      }
   636    else
   637      {
   638        first_open_fastest = "#";
   639        first_open_notfastest = " ";
   640        first_close = "";
   641      }
   642  
   643    fastest = -1;
   644    fastest_time = -1.0;
   645    for (i = 0; i < num_choices; i++)
   646      {
   647        s->r = choice[i].r;
   648        choice[i].time = speed_measure (choice[i].p->fun, s);
   649        choice[i].no_time = (choice[i].time == -1.0);
   650        if (! choice[i].no_time)
   651          choice[i].time *= choice[i].scale;
   652  
   653        /* Apply the effect of CMP_DIFFPREV, but the new choice[i].prev_time
   654           is before any differences.  */
   655        {
   656          double     t;
   657          t = choice[i].time;
   658          if (t != -1.0 && option_cmp == CMP_DIFFPREV && prev_size != -1)
   659            {
   660              if (choice[i].prev_time == -1.0)
   661                choice[i].no_time = 1;
   662              else
   663                choice[i].time = choice[i].time - choice[i].prev_time;
   664            }
   665          choice[i].prev_time = t;
   666        }
   667  
   668        if (choice[i].no_time)
   669          continue;
   670  
   671        /* Look for the fastest after CMP_DIFFPREV has been applied, but
   672           before CMP_RATIO or CMP_DIFFERENCE.  There's only a fastest shown
   673           if there's more than one routine.  */
   674        if (num_choices > 1 && (fastest == -1 || choice[i].time < fastest_time))
   675          {
   676            fastest = i;
   677            fastest_time = choice[i].time;
   678          }
   679  
   680        if (option_cmp == CMP_DIFFPREV)
   681          {
   682            /* Conversion for UNIT_CYCLESPERLIMB differs in CMP_DIFFPREV. */
   683            if (option_unit == UNIT_CYCLES)
   684              choice[i].time /= speed_cycletime;
   685            else if (option_unit == UNIT_CYCLESPERLIMB)
   686              {
   687                if (prev_size == -1)
   688                  choice[i].time /= speed_cycletime;
   689                else
   690                  choice[i].time /=  (speed_cycletime
   691                                      * (SIZE_TO_DIVISOR(s->size)
   692                                         - SIZE_TO_DIVISOR(prev_size)));
   693              }
   694          }
   695        else
   696          {
   697            if (option_unit == UNIT_CYCLES)
   698              choice[i].time /= speed_cycletime;
   699            else if (option_unit == UNIT_CYCLESPERLIMB)
   700              choice[i].time /= (speed_cycletime * SIZE_TO_DIVISOR(s->size));
   701  
   702            if (option_cmp == CMP_RATIO && i > 0)
   703              {
   704                /* A ratio isn't affected by the units chosen. */
   705                if (choice[0].no_time || choice[0].time == 0.0)
   706                  choice[i].no_time = 1;
   707                else
   708                  choice[i].time /= choice[0].time;
   709              }
   710            else if (option_cmp == CMP_DIFFERENCE && i > 0)
   711              {
   712                if (choice[0].no_time)
   713                  {
   714                    choice[i].no_time = 1;
   715                    continue;
   716                  }
   717                choice[i].time -= choice[0].time;
   718              }
   719          }
   720      }
   721  
   722    if (option_gnuplot)
   723      {
   724        /* In CMP_DIFFPREV, don't print anything for the first size, start
   725           with the second where an actual difference is available.
   726  
   727           In CMP_RATIO, print the first column as 1.0.
   728  
   729           The 9 decimals printed is much more than the expected precision of
   730           the measurements actually. */
   731  
   732        if (! (option_cmp == CMP_DIFFPREV && prev_size == -1))
   733          {
   734            fprintf (fp, "%-6ld ", s->size);
   735            for (i = 0; i < num_choices; i++)
   736              fprintf (fp, "  %.9e",
   737                       choice[i].no_time ? 0.0
   738                       : (option_cmp == CMP_RATIO && i == 0) ? 1.0
   739                       : choice[i].time);
   740            fprintf (fp, "\n");
   741          }
   742      }
   743    else
   744      {
   745        fprintf (fp, "%-6ld ", s->size);
   746        for (i = 0; i < num_choices; i++)
   747          {
   748            char  buf[128];
   749            int   decimals;
   750  
   751            if (choice[i].no_time)
   752              {
   753                fprintf (fp, " %*s", COLUMN_WIDTH, "n/a");
   754              }
   755            else
   756              {if (option_unit == UNIT_CYCLESPERLIMB
   757                   || (option_cmp == CMP_RATIO && i > 0))
   758                  decimals = 4;
   759                else if (option_unit == UNIT_CYCLES)
   760                  decimals = 2;
   761                else
   762                  decimals = 9;
   763  
   764                sprintf (buf, "%s%.*f%s",
   765                         i == fastest ? first_open_fastest : first_open_notfastest,
   766                         decimals, choice[i].time, first_close);
   767                fprintf (fp, " %*s", COLUMN_WIDTH, buf);
   768              }
   769          }
   770        fprintf (fp, "\n");
   771      }
   772  
   773    TMP_FREE;
   774  }
   775  
   776  void
   777  run_all (FILE *fp)
   778  {
   779    mp_size_t  prev_size;
   780    int        i;
   781    TMP_DECL;
   782  
   783    TMP_MARK;
   784    SPEED_TMP_ALLOC_LIMBS (sp.xp_block, SPEED_BLOCK_SIZE, sp.align_xp);
   785    SPEED_TMP_ALLOC_LIMBS (sp.yp_block, SPEED_BLOCK_SIZE, sp.align_yp);
   786  
   787    data_fill (sp.xp_block, SPEED_BLOCK_SIZE);
   788    data_fill (sp.yp_block, SPEED_BLOCK_SIZE);
   789  
   790    for (i = 0; i < size_num; i++)
   791      {
   792        sp.size = size_array[i].start;
   793        prev_size = -1;
   794        for (;;)
   795          {
   796            mp_size_t  step;
   797  
   798            if (option_data == DATA_2FD && sp.size >= 2)
   799              sp.xp[sp.size-1] = 2;
   800  
   801            run_one (fp, &sp, prev_size);
   802            prev_size = sp.size;
   803  
   804            if (option_data == DATA_2FD && sp.size >= 2)
   805              sp.xp[sp.size-1] = MP_LIMB_T_MAX;
   806  
   807            if (option_factor != 0.0)
   808              {
   809                step = (mp_size_t) (sp.size * option_factor - sp.size);
   810                if (step < 1)
   811                  step = 1;
   812              }
   813            else
   814              step = 1;
   815            if (step < option_step)
   816              step = option_step;
   817  
   818            sp.size += step;
   819            if (sp.size > size_array[i].end)
   820              break;
   821          }
   822      }
   823  
   824    TMP_FREE;
   825  }
   826  
   827  
   828  FILE *
   829  fopen_for_write (const char *filename)
   830  {
   831    FILE  *fp;
   832    if ((fp = fopen (filename, "w")) == NULL)
   833      {
   834        fprintf (stderr, "Cannot create %s\n", filename);
   835        exit(1);
   836      }
   837    return fp;
   838  }
   839  
   840  void
   841  fclose_written (FILE *fp, const char *filename)
   842  {
   843    int  err;
   844  
   845    err = ferror (fp);
   846    err |= fclose (fp);
   847  
   848    if (err)
   849      {
   850        fprintf (stderr, "Error writing %s\n", filename);
   851        exit(1);
   852      }
   853  }
   854  
   855  
   856  void
   857  run_gnuplot (int argc, char *argv[])
   858  {
   859    char  *plot_filename;
   860    char  *data_filename;
   861    FILE  *fp;
   862    int   i;
   863  
   864    plot_filename = (char *) (*__gmp_allocate_func)
   865      (strlen (option_gnuplot_basename) + 20);
   866    data_filename = (char *) (*__gmp_allocate_func)
   867      (strlen (option_gnuplot_basename) + 20);
   868  
   869    sprintf (plot_filename, "%s.gnuplot", option_gnuplot_basename);
   870    sprintf (data_filename, "%s.data",    option_gnuplot_basename);
   871  
   872    fp = fopen_for_write (plot_filename);
   873  
   874    fprintf (fp, "# Generated with:\n");
   875    fprintf (fp, "#");
   876    for (i = 0; i < argc; i++)
   877      fprintf (fp, " %s", argv[i]);
   878    fprintf (fp, "\n");
   879    fprintf (fp, "\n");
   880  
   881    fprintf (fp, "reset\n");
   882  
   883    /* Putting the key at the top left is usually good, and you can change it
   884       interactively if it's not. */
   885    fprintf (fp, "set key left\n");
   886  
   887    /* designed to make it possible to see crossovers easily */
   888    fprintf (fp, "set style data lines\n");
   889  
   890    fprintf (fp, "plot ");
   891    for (i = 0; i < num_choices; i++)
   892      {
   893        fprintf (fp, " \"%s\" using 1:%d", data_filename, i+2);
   894        fprintf (fp, " title \"%s\"", choice[i].name);
   895  
   896        if (i != num_choices-1)
   897          fprintf (fp, ", \\");
   898        fprintf (fp, "\n");
   899      }
   900  
   901    fprintf (fp, "load \"-\"\n");
   902    fclose_written (fp, plot_filename);
   903  
   904    fp = fopen_for_write (data_filename);
   905  
   906    /* Unbuffered so you can see where the program was up to if it crashes or
   907       you kill it. */
   908    setbuf (fp, NULL);
   909  
   910    run_all (fp);
   911    fclose_written (fp, data_filename);
   912  }
   913  
   914  
   915  /* Return a limb with n many one bits (starting from the least significant) */
   916  
   917  #define LIMB_ONES(n) \
   918    ((n) == GMP_LIMB_BITS ? MP_LIMB_T_MAX      \
   919      : (n) == 0 ? CNST_LIMB(0)                   \
   920      : (CNST_LIMB(1) << (n)) - 1)
   921  
   922  mp_limb_t
   923  r_string (const char *s)
   924  {
   925    const char  *s_orig = s;
   926    long        n;
   927  
   928    if (strcmp (s, "aas") == 0)
   929      return GMP_NUMB_0xAA;
   930  
   931    {
   932      mpz_t      z;
   933      mp_limb_t  l;
   934      int        set, siz;
   935  
   936      mpz_init (z);
   937      set = mpz_set_str (z, s, 0);
   938      siz = SIZ(z);
   939      l = (siz == 0 ? 0 : siz > 0 ? PTR(z)[0] : -PTR(z)[0]);
   940      mpz_clear (z);
   941      if (set == 0)
   942        {
   943          if (siz > 1 || siz < -1)
   944            printf ("Warning, r parameter %s truncated to %d bits\n",
   945                    s_orig, GMP_LIMB_BITS);
   946          return l;
   947        }
   948    }
   949  
   950    if (s[0] == '0' && (s[1] == 'x' || s[1] == 'X'))
   951      n = strtoul (s+2, (char **) &s, 16);
   952    else
   953      n = strtol (s, (char **) &s, 10);
   954  
   955    if (strcmp (s, "bits") == 0)
   956      {
   957        mp_limb_t  l;
   958        if (n > GMP_LIMB_BITS)
   959          {
   960            fprintf (stderr, "%ld bit parameter invalid (max %d bits)\n",
   961                     n, GMP_LIMB_BITS);
   962            exit (1);
   963          }
   964        mpn_random (&l, 1);
   965        return (l | (CNST_LIMB(1) << (n-1))) & LIMB_ONES(n);
   966      }
   967    else  if (strcmp (s, "ones") == 0)
   968      {
   969        if (n > GMP_LIMB_BITS)
   970          {
   971            fprintf (stderr, "%ld bit parameter invalid (max %d bits)\n",
   972                     n, GMP_LIMB_BITS);
   973            exit (1);
   974          }
   975        return LIMB_ONES (n);
   976      }
   977    else if (*s != '\0')
   978      {
   979        fprintf (stderr, "invalid r parameter: %s\n", s_orig);
   980        exit (1);
   981      }
   982  
   983    return n;
   984  }
   985  
   986  
   987  void
   988  routine_find (struct choice_t *c, const char *s_orig)
   989  {
   990    const char  *s;
   991    int     i;
   992    size_t  nlen;
   993  
   994    c->name = s_orig;
   995    s = strchr (s_orig, '*');
   996    if (s != NULL)
   997      {
   998        c->scale = atof(s_orig);
   999        s++;
  1000      }
  1001    else
  1002      {
  1003        c->scale = 1.0;
  1004        s = s_orig;
  1005      }
  1006  
  1007    for (i = 0; i < numberof (routine); i++)
  1008      {
  1009        nlen = strlen (routine[i].name);
  1010        if (memcmp (s, routine[i].name, nlen) != 0)
  1011          continue;
  1012  
  1013        if (s[nlen] == '.')
  1014          {
  1015            /* match, with a .r parameter */
  1016  
  1017            if (! (routine[i].flag & (FLAG_R|FLAG_R_OPTIONAL)))
  1018              {
  1019                fprintf (stderr,
  1020                         "Choice %s bad: doesn't take a \".<r>\" parameter\n",
  1021                         s_orig);
  1022                exit (1);
  1023              }
  1024  
  1025            c->p = &routine[i];
  1026            c->r = r_string (s + nlen + 1);
  1027            return;
  1028          }
  1029  
  1030        if (s[nlen] == '\0')
  1031          {
  1032            /* match, with no parameter */
  1033  
  1034            if (routine[i].flag & FLAG_R)
  1035              {
  1036                fprintf (stderr,
  1037                         "Choice %s bad: needs a \".<r>\" parameter\n",
  1038                         s_orig);
  1039                exit (1);
  1040              }
  1041  
  1042            c->p = &routine[i];
  1043            c->r = 0;
  1044            return;
  1045          }
  1046      }
  1047  
  1048    fprintf (stderr, "Choice %s unrecognised\n", s_orig);
  1049    exit (1);
  1050  }
  1051  
  1052  
  1053  void
  1054  usage (void)
  1055  {
  1056    int  i;
  1057  
  1058    speed_time_init ();
  1059  
  1060    printf ("Usage: speed [-options] -s size <routine>...\n");
  1061    printf ("Measure the speed of some routines.\n");
  1062    printf ("Times are in seconds, accuracy is shown.\n");
  1063    printf ("\n");
  1064    printf ("   -p num     set precision as number of time units each routine must run\n");
  1065    printf ("   -s size[-end][,size[-end]]...   sizes to measure\n");
  1066    printf ("              single sizes or ranges, sep with comma or use multiple -s\n");
  1067    printf ("   -t step    step through sizes by given amount\n");
  1068    printf ("   -f factor  step through sizes by given factor (eg. 1.05)\n");
  1069    printf ("   -r         show times as ratios of the first routine\n");
  1070    printf ("   -d         show times as difference from the first routine\n");
  1071    printf ("   -D         show times as difference from previous size shown\n");
  1072    printf ("   -c         show times in CPU cycles\n");
  1073    printf ("   -C         show times in cycles per limb\n");
  1074    printf ("   -u         print resource usage (memory) at end\n");
  1075    printf ("   -P name    output plot files \"name.gnuplot\" and \"name.data\"\n");
  1076    printf ("   -a <type>  use given data: random(default), random2, zeros, aas, ffs, 2fd\n");
  1077    printf ("   -x, -y, -w, -W <align>  specify data alignments, sources and dests\n");
  1078    printf ("   -o addrs   print addresses of data blocks\n");
  1079    printf ("\n");
  1080    printf ("If both -t and -f are used, it means step by the factor or the step, whichever\n");
  1081    printf ("is greater.\n");
  1082    printf ("If both -C and -D are used, it means cycles per however many limbs between a\n");
  1083    printf ("size and the previous size.\n");
  1084    printf ("\n");
  1085    printf ("After running with -P, plots can be viewed with Gnuplot or Quickplot.\n");
  1086    printf ("\"gnuplot name.gnuplot\" (use \"set logscale xy; replot\" at the prompt for\n");
  1087    printf ("a log/log plot).\n");
  1088    printf ("\"quickplot -s name.data\" (has interactive zooming, and note -s is important\n");
  1089    printf ("when viewing more than one routine, it means same axis scales for all data).\n");
  1090    printf ("\n");
  1091    printf ("The available routines are as follows.\n");
  1092    printf ("\n");
  1093  
  1094    for (i = 0; i < numberof (routine); i++)
  1095      {
  1096        if (routine[i].flag & FLAG_R)
  1097          printf ("\t%s.r\n", routine[i].name);
  1098        else if (routine[i].flag & FLAG_R_OPTIONAL)
  1099          printf ("\t%s (optional .r)\n", routine[i].name);
  1100        else
  1101          printf ("\t%s\n", routine[i].name);
  1102      }
  1103    printf ("\n");
  1104    printf ("Routines with a \".r\" need an extra parameter, for example mpn_lshift.6\n");
  1105    printf ("r should be in decimal, or use 0xN for hexadecimal.\n");
  1106    printf ("\n");
  1107    printf ("Special forms for r are \"<N>bits\" for a random N bit number, \"<N>ones\" for\n");
  1108    printf ("N one bits, or \"aas\" for 0xAA..AA.\n");
  1109    printf ("\n");
  1110    printf ("Times for sizes out of the range accepted by a routine are shown as 0.\n");
  1111    printf ("The fastest routine at each size is marked with a # (free form output only).\n");
  1112    printf ("\n");
  1113    printf ("%s", speed_time_string);
  1114    printf ("\n");
  1115    printf ("Gnuplot home page http://www.gnuplot.info/\n");
  1116    printf ("Quickplot home page http://quickplot.sourceforge.net/\n");
  1117  }
  1118  
  1119  void
  1120  check_align_option (const char *name, mp_size_t align)
  1121  {
  1122    if (align < 0 || align > SPEED_TMP_ALLOC_ADJUST_MASK)
  1123      {
  1124        fprintf (stderr, "Alignment request out of range: %s %ld\n",
  1125                 name, (long) align);
  1126        fprintf (stderr, "  should be 0 to %d (limbs), inclusive\n",
  1127                 SPEED_TMP_ALLOC_ADJUST_MASK);
  1128        exit (1);
  1129      }
  1130  }
  1131  
  1132  int
  1133  main (int argc, char *argv[])
  1134  {
  1135    int  i;
  1136    int  opt;
  1137  
  1138    /* Unbuffered so output goes straight out when directed to a pipe or file
  1139       and isn't lost on killing the program half way.  */
  1140    setbuf (stdout, NULL);
  1141  
  1142    for (;;)
  1143      {
  1144        opt = getopt(argc, argv, "a:CcDdEFf:o:p:P:rRs:t:ux:y:w:W:z");
  1145        if (opt == EOF)
  1146          break;
  1147  
  1148        switch (opt) {
  1149        case 'a':
  1150          if (strcmp (optarg, "random") == 0)       option_data = DATA_RANDOM;
  1151          else if (strcmp (optarg, "random2") == 0) option_data = DATA_RANDOM2;
  1152          else if (strcmp (optarg, "zeros") == 0)   option_data = DATA_ZEROS;
  1153          else if (strcmp (optarg, "aas") == 0)     option_data = DATA_AAS;
  1154          else if (strcmp (optarg, "ffs") == 0)     option_data = DATA_FFS;
  1155          else if (strcmp (optarg, "2fd") == 0)     option_data = DATA_2FD;
  1156          else
  1157            {
  1158              fprintf (stderr, "unrecognised data option: %s\n", optarg);
  1159              exit (1);
  1160            }
  1161          break;
  1162        case 'C':
  1163          if (option_unit  != UNIT_SECONDS) goto bad_unit;
  1164          option_unit = UNIT_CYCLESPERLIMB;
  1165          break;
  1166        case 'c':
  1167          if (option_unit != UNIT_SECONDS)
  1168            {
  1169            bad_unit:
  1170              fprintf (stderr, "cannot use more than one of -c, -C\n");
  1171              exit (1);
  1172            }
  1173          option_unit = UNIT_CYCLES;
  1174          break;
  1175        case 'D':
  1176          if (option_cmp != CMP_ABSOLUTE) goto bad_cmp;
  1177          option_cmp = CMP_DIFFPREV;
  1178          break;
  1179        case 'd':
  1180          if (option_cmp != CMP_ABSOLUTE)
  1181            {
  1182            bad_cmp:
  1183              fprintf (stderr, "cannot use more than one of -d, -D, -r\n");
  1184              exit (1);
  1185            }
  1186          option_cmp = CMP_DIFFERENCE;
  1187          break;
  1188        case 'E':
  1189          option_square = 1;
  1190          break;
  1191        case 'F':
  1192          option_square = 2;
  1193          break;
  1194        case 'f':
  1195          option_factor = atof (optarg);
  1196          if (option_factor <= 1.0)
  1197            {
  1198              fprintf (stderr, "-f factor must be > 1.0\n");
  1199              exit (1);
  1200            }
  1201          break;
  1202        case 'o':
  1203          speed_option_set (optarg);
  1204          break;
  1205        case 'P':
  1206          option_gnuplot = 1;
  1207          option_gnuplot_basename = optarg;
  1208          break;
  1209        case 'p':
  1210          speed_precision = atoi (optarg);
  1211          break;
  1212        case 'R':
  1213          option_seed = time (NULL);
  1214          break;
  1215        case 'r':
  1216          if (option_cmp != CMP_ABSOLUTE)
  1217            goto bad_cmp;
  1218          option_cmp = CMP_RATIO;
  1219          break;
  1220        case 's':
  1221          {
  1222            char  *s;
  1223            for (s = strtok (optarg, ","); s != NULL; s = strtok (NULL, ","))
  1224              {
  1225                if (size_num == size_allocnum)
  1226                  {
  1227                    size_array = (struct size_array_t *)
  1228                      __gmp_allocate_or_reallocate
  1229                      (size_array,
  1230                       size_allocnum * sizeof(size_array[0]),
  1231                       (size_allocnum+10) * sizeof(size_array[0]));
  1232                    size_allocnum += 10;
  1233                  }
  1234                if (sscanf (s, "%ld-%ld",
  1235                            &size_array[size_num].start,
  1236                            &size_array[size_num].end) != 2)
  1237                  {
  1238                    size_array[size_num].start = size_array[size_num].end
  1239                      = atol (s);
  1240                  }
  1241  
  1242                if (size_array[size_num].start < 0
  1243                    || size_array[size_num].end < 0
  1244                    || size_array[size_num].start > size_array[size_num].end)
  1245                  {
  1246                    fprintf (stderr, "invalid size parameter: %s\n", s);
  1247                    exit (1);
  1248                  }
  1249  
  1250                size_num++;
  1251              }
  1252          }
  1253          break;
  1254        case 't':
  1255          option_step = atol (optarg);
  1256          if (option_step < 1)
  1257            {
  1258              fprintf (stderr, "-t step must be >= 1\n");
  1259              exit (1);
  1260            }
  1261          break;
  1262        case 'u':
  1263          option_resource_usage = 1;
  1264          break;
  1265        case 'z':
  1266          sp.cache = 1;
  1267          break;
  1268        case 'x':
  1269          sp.align_xp = atol (optarg);
  1270          check_align_option ("-x", sp.align_xp);
  1271          break;
  1272        case 'y':
  1273          sp.align_yp = atol (optarg);
  1274          check_align_option ("-y", sp.align_yp);
  1275          break;
  1276        case 'w':
  1277          sp.align_wp = atol (optarg);
  1278          check_align_option ("-w", sp.align_wp);
  1279          break;
  1280        case 'W':
  1281          sp.align_wp2 = atol (optarg);
  1282          check_align_option ("-W", sp.align_wp2);
  1283          break;
  1284        case '?':
  1285          exit(1);
  1286        }
  1287      }
  1288  
  1289    if (optind >= argc)
  1290      {
  1291        usage ();
  1292        exit (1);
  1293      }
  1294  
  1295    if (size_num == 0)
  1296      {
  1297        fprintf (stderr, "-s <size> must be specified\n");
  1298        exit (1);
  1299      }
  1300  
  1301    gmp_randinit_default (__gmp_rands);
  1302    __gmp_rands_initialized = 1;
  1303    gmp_randseed_ui (__gmp_rands, option_seed);
  1304  
  1305    choice = (struct choice_t *) (*__gmp_allocate_func)
  1306      ((argc - optind) * sizeof(choice[0]));
  1307    for ( ; optind < argc; optind++)
  1308      {
  1309        struct choice_t  c;
  1310        routine_find (&c, argv[optind]);
  1311        choice[num_choices] = c;
  1312        num_choices++;
  1313      }
  1314  
  1315    if ((option_cmp == CMP_RATIO || option_cmp == CMP_DIFFERENCE) &&
  1316        num_choices < 2)
  1317      {
  1318        fprintf (stderr, "WARNING, -d or -r does nothing when only one routine requested\n");
  1319      }
  1320  
  1321    speed_time_init ();
  1322    if (option_unit == UNIT_CYCLES || option_unit == UNIT_CYCLESPERLIMB)
  1323      speed_cycletime_need_cycles ();
  1324    else
  1325      speed_cycletime_need_seconds ();
  1326  
  1327    if (option_gnuplot)
  1328      {
  1329        run_gnuplot (argc, argv);
  1330      }
  1331    else
  1332      {
  1333        if (option_unit == UNIT_SECONDS)
  1334          printf ("overhead %.9f secs", speed_measure (speed_noop, NULL));
  1335        else
  1336          printf ("overhead %.2f cycles",
  1337                  speed_measure (speed_noop, NULL) / speed_cycletime);
  1338        printf (", precision %d units of %.2e secs",
  1339                speed_precision, speed_unittime);
  1340  
  1341        if (speed_cycletime == 1.0 || speed_cycletime == 0.0)
  1342          printf (", CPU freq unknown\n");
  1343        else
  1344          printf (", CPU freq %.2f MHz\n", 1e-6/speed_cycletime);
  1345  
  1346        printf ("       ");
  1347        for (i = 0; i < num_choices; i++)
  1348          printf (" %*s", COLUMN_WIDTH, choice[i].name);
  1349        printf ("\n");
  1350  
  1351        run_all (stdout);
  1352      }
  1353  
  1354    if (option_resource_usage)
  1355      {
  1356  #if HAVE_GETRUSAGE
  1357        {
  1358          /* This doesn't give data sizes on linux 2.0.x, only utime. */
  1359          struct rusage  r;
  1360          if (getrusage (RUSAGE_SELF, &r) != 0)
  1361            perror ("getrusage");
  1362          else
  1363            printf ("getrusage(): utime %ld.%06ld data %ld stack %ld maxresident %ld\n",
  1364                    r.ru_utime.tv_sec, r.ru_utime.tv_usec,
  1365                    r.ru_idrss, r.ru_isrss, r.ru_ixrss);
  1366        }
  1367  #else
  1368        printf ("getrusage() not available\n");
  1369  #endif
  1370  
  1371        /* Linux kernel. */
  1372        {
  1373          char  buf[128];
  1374          sprintf (buf, "/proc/%d/status", getpid());
  1375          if (access (buf, R_OK) == 0)
  1376            {
  1377              sprintf (buf, "cat /proc/%d/status", getpid());
  1378              system (buf);
  1379            }
  1380  
  1381        }
  1382      }
  1383  
  1384    return 0;
  1385  }