github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/tune/speed.c (about) 1 /* Speed measuring program. 2 3 Copyright 1999-2003, 2005, 2006, 2008-2015 Free Software Foundation, Inc. 4 5 This file is part of the GNU MP Library. 6 7 The GNU MP Library is free software; you can redistribute it and/or modify 8 it under the terms of either: 9 10 * the GNU Lesser General Public License as published by the Free 11 Software Foundation; either version 3 of the License, or (at your 12 option) any later version. 13 14 or 15 16 * the GNU General Public License as published by the Free Software 17 Foundation; either version 2 of the License, or (at your option) any 18 later version. 19 20 or both in parallel, as here. 21 22 The GNU MP Library is distributed in the hope that it will be useful, but 23 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25 for more details. 26 27 You should have received copies of the GNU General Public License and the 28 GNU Lesser General Public License along with the GNU MP Library. If not, 29 see https://www.gnu.org/licenses/. */ 30 31 /* Usage message is in the code below, run with no arguments to print it. 32 See README for interesting applications. 33 34 To add a new routine foo(), create a speed_foo() function in the style of 35 the existing ones and add an entry in the routine[] array. Put FLAG_R if 36 speed_foo() wants an "r" parameter. 37 38 The routines don't have help messages or descriptions, but most have 39 suggestive names. See the source code for full details. 40 41 */ 42 43 #include "config.h" 44 45 #include <limits.h> 46 #include <stdio.h> 47 #include <stdlib.h> 48 #include <string.h> 49 50 #if HAVE_UNISTD_H 51 #include <unistd.h> /* for getpid, R_OK */ 52 #endif 53 54 #if TIME_WITH_SYS_TIME 55 # include <sys/time.h> /* for struct timeval */ 56 # include <time.h> 57 #else 58 # if HAVE_SYS_TIME_H 59 # include <sys/time.h> 60 # else 61 # include <time.h> 62 # endif 63 #endif 64 65 #if HAVE_SYS_RESOURCE_H 66 #include <sys/resource.h> /* for getrusage() */ 67 #endif 68 69 70 #include "gmp.h" 71 #include "gmp-impl.h" 72 #include "longlong.h" /* for the benefit of speed-many.c */ 73 #include "tests.h" 74 #include "speed.h" 75 76 77 #if !HAVE_DECL_OPTARG 78 extern char *optarg; 79 extern int optind, opterr; 80 #endif 81 82 #if !HAVE_STRTOUL 83 #define strtoul(p,e,b) (unsigned long) strtol(p,e,b) 84 #endif 85 86 #ifdef SPEED_EXTRA_PROTOS 87 SPEED_EXTRA_PROTOS 88 #endif 89 #ifdef SPEED_EXTRA_PROTOS2 90 SPEED_EXTRA_PROTOS2 91 #endif 92 93 94 #if GMP_LIMB_BITS == 32 95 #define GMP_NUMB_0xAA (CNST_LIMB(0xAAAAAAAA) & GMP_NUMB_MASK) 96 #endif 97 #if GMP_LIMB_BITS == 64 98 #define GMP_NUMB_0xAA (CNST_LIMB(0xAAAAAAAAAAAAAAAA) & GMP_NUMB_MASK) 99 #endif 100 101 102 #define CMP_ABSOLUTE 1 103 #define CMP_RATIO 2 104 #define CMP_DIFFERENCE 3 105 #define CMP_DIFFPREV 4 106 int option_cmp = CMP_ABSOLUTE; 107 108 #define UNIT_SECONDS 1 109 #define UNIT_CYCLES 2 110 #define UNIT_CYCLESPERLIMB 3 111 int option_unit = UNIT_SECONDS; 112 113 #define DATA_RANDOM 1 114 #define DATA_RANDOM2 2 115 #define DATA_ZEROS 3 116 #define DATA_AAS 4 117 #define DATA_FFS 5 118 #define DATA_2FD 6 119 int option_data = DATA_RANDOM; 120 121 int option_square = 0; 122 double option_factor = 0.0; 123 mp_size_t option_step = 1; 124 int option_gnuplot = 0; 125 char *option_gnuplot_basename; 126 struct size_array_t { 127 mp_size_t start, end; 128 } *size_array = NULL; 129 mp_size_t size_num = 0; 130 mp_size_t size_allocnum = 0; 131 int option_resource_usage = 0; 132 long option_seed = 123456789; 133 134 struct speed_params sp; 135 136 #define COLUMN_WIDTH 13 /* for the free-form output */ 137 138 #define FLAG_R (1<<0) /* require ".r" */ 139 #define FLAG_R_OPTIONAL (1<<1) /* optional ".r" */ 140 #define FLAG_RSIZE (1<<2) 141 #define FLAG_NODATA (1<<3) /* don't alloc xp, yp */ 142 143 const struct routine_t { 144 /* constants */ 145 const char *name; 146 speed_function_t fun; 147 int flag; 148 } routine[] = { 149 150 { "noop", speed_noop }, 151 { "noop_wxs", speed_noop_wxs }, 152 { "noop_wxys", speed_noop_wxys }, 153 154 { "mpn_add_n", speed_mpn_add_n, FLAG_R_OPTIONAL }, 155 { "mpn_sub_n", speed_mpn_sub_n, FLAG_R_OPTIONAL }, 156 { "mpn_add_1", speed_mpn_add_1, FLAG_R }, 157 { "mpn_add_1_inplace", speed_mpn_add_1_inplace, FLAG_R }, 158 { "mpn_sub_1", speed_mpn_sub_1, FLAG_R }, 159 { "mpn_sub_1_inplace", speed_mpn_sub_1_inplace, FLAG_R }, 160 161 { "mpn_add_err1_n", speed_mpn_add_err1_n }, 162 { "mpn_add_err2_n", speed_mpn_add_err2_n }, 163 { "mpn_add_err3_n", speed_mpn_add_err3_n }, 164 { "mpn_sub_err1_n", speed_mpn_sub_err1_n }, 165 { "mpn_sub_err2_n", speed_mpn_sub_err2_n }, 166 { "mpn_sub_err3_n", speed_mpn_sub_err3_n }, 167 168 #if HAVE_NATIVE_mpn_add_n_sub_n 169 { "mpn_add_n_sub_n", speed_mpn_add_n_sub_n, FLAG_R_OPTIONAL }, 170 #endif 171 172 { "mpn_addmul_1", speed_mpn_addmul_1, FLAG_R }, 173 { "mpn_submul_1", speed_mpn_submul_1, FLAG_R }, 174 #if HAVE_NATIVE_mpn_addmul_2 175 { "mpn_addmul_2", speed_mpn_addmul_2, FLAG_R_OPTIONAL }, 176 #endif 177 #if HAVE_NATIVE_mpn_addmul_3 178 { "mpn_addmul_3", speed_mpn_addmul_3, FLAG_R_OPTIONAL }, 179 #endif 180 #if HAVE_NATIVE_mpn_addmul_4 181 { "mpn_addmul_4", speed_mpn_addmul_4, FLAG_R_OPTIONAL }, 182 #endif 183 #if HAVE_NATIVE_mpn_addmul_5 184 { "mpn_addmul_5", speed_mpn_addmul_5, FLAG_R_OPTIONAL }, 185 #endif 186 #if HAVE_NATIVE_mpn_addmul_6 187 { "mpn_addmul_6", speed_mpn_addmul_6, FLAG_R_OPTIONAL }, 188 #endif 189 #if HAVE_NATIVE_mpn_addmul_7 190 { "mpn_addmul_7", speed_mpn_addmul_7, FLAG_R_OPTIONAL }, 191 #endif 192 #if HAVE_NATIVE_mpn_addmul_8 193 { "mpn_addmul_8", speed_mpn_addmul_8, FLAG_R_OPTIONAL }, 194 #endif 195 { "mpn_mul_1", speed_mpn_mul_1, FLAG_R }, 196 { "mpn_mul_1_inplace", speed_mpn_mul_1_inplace, FLAG_R }, 197 #if HAVE_NATIVE_mpn_mul_2 198 { "mpn_mul_2", speed_mpn_mul_2, FLAG_R_OPTIONAL }, 199 #endif 200 #if HAVE_NATIVE_mpn_mul_3 201 { "mpn_mul_3", speed_mpn_mul_3, FLAG_R_OPTIONAL }, 202 #endif 203 #if HAVE_NATIVE_mpn_mul_4 204 { "mpn_mul_4", speed_mpn_mul_4, FLAG_R_OPTIONAL }, 205 #endif 206 #if HAVE_NATIVE_mpn_mul_5 207 { "mpn_mul_5", speed_mpn_mul_5, FLAG_R_OPTIONAL }, 208 #endif 209 #if HAVE_NATIVE_mpn_mul_6 210 { "mpn_mul_6", speed_mpn_mul_6, FLAG_R_OPTIONAL }, 211 #endif 212 213 { "mpn_divrem_1", speed_mpn_divrem_1, FLAG_R }, 214 { "mpn_divrem_1f", speed_mpn_divrem_1f, FLAG_R }, 215 #if HAVE_NATIVE_mpn_divrem_1c 216 { "mpn_divrem_1c", speed_mpn_divrem_1c, FLAG_R }, 217 { "mpn_divrem_1cf", speed_mpn_divrem_1cf,FLAG_R }, 218 #endif 219 { "mpn_mod_1", speed_mpn_mod_1, FLAG_R }, 220 #if HAVE_NATIVE_mpn_mod_1c 221 { "mpn_mod_1c", speed_mpn_mod_1c, FLAG_R }, 222 #endif 223 { "mpn_preinv_divrem_1", speed_mpn_preinv_divrem_1, FLAG_R }, 224 { "mpn_preinv_divrem_1f", speed_mpn_preinv_divrem_1f, FLAG_R }, 225 { "mpn_preinv_mod_1", speed_mpn_preinv_mod_1, FLAG_R }, 226 227 { "mpn_mod_1_1", speed_mpn_mod_1_1, FLAG_R }, 228 { "mpn_mod_1_1_1", speed_mpn_mod_1_1_1, FLAG_R }, 229 { "mpn_mod_1_1_2", speed_mpn_mod_1_1_2, FLAG_R }, 230 { "mpn_mod_1s_2", speed_mpn_mod_1_2, FLAG_R }, 231 { "mpn_mod_1s_3", speed_mpn_mod_1_3, FLAG_R }, 232 { "mpn_mod_1s_4", speed_mpn_mod_1_4, FLAG_R }, 233 234 { "mpn_divrem_1_div", speed_mpn_divrem_1_div, FLAG_R }, 235 { "mpn_divrem_1_inv", speed_mpn_divrem_1_inv, FLAG_R }, 236 { "mpn_divrem_1f_div", speed_mpn_divrem_1f_div, FLAG_R }, 237 { "mpn_divrem_1f_inv", speed_mpn_divrem_1f_inv, FLAG_R }, 238 { "mpn_mod_1_div", speed_mpn_mod_1_div, FLAG_R }, 239 { "mpn_mod_1_inv", speed_mpn_mod_1_inv, FLAG_R }, 240 241 { "mpn_divrem_2", speed_mpn_divrem_2, }, 242 { "mpn_divrem_2_div", speed_mpn_divrem_2_div, }, 243 { "mpn_divrem_2_inv", speed_mpn_divrem_2_inv, }, 244 245 { "mpn_div_qr_1n_pi1", speed_mpn_div_qr_1n_pi1, FLAG_R }, 246 { "mpn_div_qr_1n_pi1_1",speed_mpn_div_qr_1n_pi1_1, FLAG_R }, 247 { "mpn_div_qr_1n_pi1_2",speed_mpn_div_qr_1n_pi1_2, FLAG_R }, 248 { "mpn_div_qr_1", speed_mpn_div_qr_1, FLAG_R }, 249 250 { "mpn_div_qr_2n", speed_mpn_div_qr_2n, }, 251 { "mpn_div_qr_2u", speed_mpn_div_qr_2u, }, 252 253 { "mpn_divexact_1", speed_mpn_divexact_1, FLAG_R }, 254 { "mpn_divexact_by3", speed_mpn_divexact_by3 }, 255 256 { "mpn_bdiv_q_1", speed_mpn_bdiv_q_1, FLAG_R }, 257 { "mpn_pi1_bdiv_q_1", speed_mpn_pi1_bdiv_q_1, FLAG_R_OPTIONAL }, 258 { "mpn_bdiv_dbm1c", speed_mpn_bdiv_dbm1c, FLAG_R_OPTIONAL }, 259 260 #if HAVE_NATIVE_mpn_modexact_1_odd 261 { "mpn_modexact_1_odd", speed_mpn_modexact_1_odd, FLAG_R }, 262 #endif 263 { "mpn_modexact_1c_odd", speed_mpn_modexact_1c_odd, FLAG_R }, 264 265 #if GMP_NUMB_BITS % 4 == 0 266 { "mpn_mod_34lsub1", speed_mpn_mod_34lsub1 }, 267 #endif 268 269 { "mpn_lshift", speed_mpn_lshift, FLAG_R }, 270 { "mpn_lshiftc", speed_mpn_lshiftc, FLAG_R }, 271 { "mpn_rshift", speed_mpn_rshift, FLAG_R }, 272 273 { "mpn_and_n", speed_mpn_and_n, FLAG_R_OPTIONAL }, 274 { "mpn_andn_n", speed_mpn_andn_n, FLAG_R_OPTIONAL }, 275 { "mpn_nand_n", speed_mpn_nand_n, FLAG_R_OPTIONAL }, 276 { "mpn_ior_n", speed_mpn_ior_n, FLAG_R_OPTIONAL }, 277 { "mpn_iorn_n", speed_mpn_iorn_n, FLAG_R_OPTIONAL }, 278 { "mpn_nior_n", speed_mpn_nior_n, FLAG_R_OPTIONAL }, 279 { "mpn_xor_n", speed_mpn_xor_n, FLAG_R_OPTIONAL }, 280 { "mpn_xnor_n", speed_mpn_xnor_n, FLAG_R_OPTIONAL }, 281 { "mpn_com", speed_mpn_com }, 282 { "mpn_neg", speed_mpn_neg }, 283 284 { "mpn_popcount", speed_mpn_popcount }, 285 { "mpn_hamdist", speed_mpn_hamdist }, 286 287 { "mpn_matrix22_mul", speed_mpn_matrix22_mul }, 288 289 { "mpn_hgcd", speed_mpn_hgcd }, 290 { "mpn_hgcd_lehmer", speed_mpn_hgcd_lehmer }, 291 { "mpn_hgcd_appr", speed_mpn_hgcd_appr }, 292 { "mpn_hgcd_appr_lehmer", speed_mpn_hgcd_appr_lehmer }, 293 294 { "mpn_hgcd_reduce", speed_mpn_hgcd_reduce }, 295 { "mpn_hgcd_reduce_1", speed_mpn_hgcd_reduce_1 }, 296 { "mpn_hgcd_reduce_2", speed_mpn_hgcd_reduce_2 }, 297 298 { "mpn_gcd_1", speed_mpn_gcd_1, FLAG_R_OPTIONAL }, 299 { "mpn_gcd_1N", speed_mpn_gcd_1N, FLAG_R_OPTIONAL }, 300 301 { "mpn_gcd", speed_mpn_gcd }, 302 303 { "mpn_gcdext", speed_mpn_gcdext }, 304 { "mpn_gcdext_single", speed_mpn_gcdext_single }, 305 { "mpn_gcdext_double", speed_mpn_gcdext_double }, 306 { "mpn_gcdext_one_single", speed_mpn_gcdext_one_single }, 307 { "mpn_gcdext_one_double", speed_mpn_gcdext_one_double }, 308 #if 0 309 { "mpn_gcdext_lehmer", speed_mpn_gcdext_lehmer }, 310 #endif 311 { "mpz_jacobi", speed_mpz_jacobi }, 312 { "mpn_jacobi_base", speed_mpn_jacobi_base }, 313 { "mpn_jacobi_base_1", speed_mpn_jacobi_base_1 }, 314 { "mpn_jacobi_base_2", speed_mpn_jacobi_base_2 }, 315 { "mpn_jacobi_base_3", speed_mpn_jacobi_base_3 }, 316 { "mpn_jacobi_base_4", speed_mpn_jacobi_base_4 }, 317 318 { "mpn_mul", speed_mpn_mul, FLAG_R_OPTIONAL }, 319 { "mpn_mul_basecase", speed_mpn_mul_basecase,FLAG_R_OPTIONAL }, 320 { "mpn_sqr_basecase", speed_mpn_sqr_basecase }, 321 #if HAVE_NATIVE_mpn_sqr_diagonal 322 { "mpn_sqr_diagonal", speed_mpn_sqr_diagonal }, 323 #endif 324 #if HAVE_NATIVE_mpn_sqr_diag_addlsh1 325 { "mpn_sqr_diag_addlsh1", speed_mpn_sqr_diag_addlsh1 }, 326 #endif 327 328 { "mpn_mul_n", speed_mpn_mul_n }, 329 { "mpn_sqr", speed_mpn_sqr }, 330 331 { "mpn_toom2_sqr", speed_mpn_toom2_sqr }, 332 { "mpn_toom3_sqr", speed_mpn_toom3_sqr }, 333 { "mpn_toom4_sqr", speed_mpn_toom4_sqr }, 334 { "mpn_toom6_sqr", speed_mpn_toom6_sqr }, 335 { "mpn_toom8_sqr", speed_mpn_toom8_sqr }, 336 { "mpn_toom22_mul", speed_mpn_toom22_mul }, 337 { "mpn_toom33_mul", speed_mpn_toom33_mul }, 338 { "mpn_toom44_mul", speed_mpn_toom44_mul }, 339 { "mpn_toom6h_mul", speed_mpn_toom6h_mul }, 340 { "mpn_toom8h_mul", speed_mpn_toom8h_mul }, 341 { "mpn_toom32_mul", speed_mpn_toom32_mul }, 342 { "mpn_toom42_mul", speed_mpn_toom42_mul }, 343 { "mpn_toom43_mul", speed_mpn_toom43_mul }, 344 { "mpn_toom63_mul", speed_mpn_toom63_mul }, 345 { "mpn_nussbaumer_mul", speed_mpn_nussbaumer_mul }, 346 { "mpn_nussbaumer_mul_sqr",speed_mpn_nussbaumer_mul_sqr}, 347 #if WANT_OLD_FFT_FULL 348 { "mpn_mul_fft_full", speed_mpn_mul_fft_full }, 349 { "mpn_mul_fft_full_sqr", speed_mpn_mul_fft_full_sqr }, 350 #endif 351 { "mpn_mul_fft", speed_mpn_mul_fft, FLAG_R_OPTIONAL }, 352 { "mpn_mul_fft_sqr", speed_mpn_mul_fft_sqr, FLAG_R_OPTIONAL }, 353 354 { "mpn_sqrlo", speed_mpn_sqrlo }, 355 { "mpn_sqrlo_basecase", speed_mpn_sqrlo_basecase }, 356 { "mpn_mullo_n", speed_mpn_mullo_n }, 357 { "mpn_mullo_basecase", speed_mpn_mullo_basecase }, 358 359 { "mpn_mulmid_basecase", speed_mpn_mulmid_basecase, FLAG_R_OPTIONAL }, 360 { "mpn_toom42_mulmid", speed_mpn_toom42_mulmid }, 361 { "mpn_mulmid_n", speed_mpn_mulmid_n }, 362 { "mpn_mulmid", speed_mpn_mulmid, FLAG_R_OPTIONAL }, 363 364 { "mpn_bc_mulmod_bnm1", speed_mpn_bc_mulmod_bnm1 }, 365 { "mpn_mulmod_bnm1", speed_mpn_mulmod_bnm1 }, 366 { "mpn_mulmod_bnm1_rounded", speed_mpn_mulmod_bnm1_rounded }, 367 { "mpn_sqrmod_bnm1", speed_mpn_sqrmod_bnm1 }, 368 369 { "mpn_invert", speed_mpn_invert }, 370 { "mpn_invertappr", speed_mpn_invertappr }, 371 { "mpn_ni_invertappr", speed_mpn_ni_invertappr }, 372 { "mpn_binvert", speed_mpn_binvert }, 373 { "mpn_sec_invert", speed_mpn_sec_invert }, 374 375 { "mpn_sbpi1_div_qr", speed_mpn_sbpi1_div_qr, FLAG_R_OPTIONAL}, 376 { "mpn_dcpi1_div_qr", speed_mpn_dcpi1_div_qr, FLAG_R_OPTIONAL}, 377 { "mpn_mu_div_qr", speed_mpn_mu_div_qr, FLAG_R_OPTIONAL}, 378 { "mpn_mupi_div_qr", speed_mpn_mupi_div_qr, FLAG_R_OPTIONAL}, 379 { "mpn_sbpi1_divappr_q", speed_mpn_sbpi1_divappr_q, FLAG_R_OPTIONAL}, 380 { "mpn_dcpi1_divappr_q", speed_mpn_dcpi1_divappr_q, FLAG_R_OPTIONAL}, 381 382 { "mpn_sbpi1_bdiv_qr", speed_mpn_sbpi1_bdiv_qr }, 383 { "mpn_dcpi1_bdiv_qr", speed_mpn_dcpi1_bdiv_qr }, 384 { "mpn_sbpi1_bdiv_q", speed_mpn_sbpi1_bdiv_q }, 385 { "mpn_dcpi1_bdiv_q", speed_mpn_dcpi1_bdiv_q }, 386 387 { "mpn_broot", speed_mpn_broot, FLAG_R }, 388 { "mpn_broot_invm1", speed_mpn_broot_invm1, FLAG_R }, 389 { "mpn_brootinv", speed_mpn_brootinv, FLAG_R }, 390 391 { "mpn_get_str", speed_mpn_get_str, FLAG_R_OPTIONAL }, 392 { "mpn_set_str", speed_mpn_set_str, FLAG_R_OPTIONAL }, 393 { "mpn_set_str_basecase", speed_mpn_bc_set_str, FLAG_R_OPTIONAL }, 394 395 { "mpn_sqrtrem", speed_mpn_sqrtrem }, 396 { "mpn_rootrem", speed_mpn_rootrem, FLAG_R }, 397 { "mpn_sqrt", speed_mpn_sqrt }, 398 { "mpn_root", speed_mpn_root, FLAG_R }, 399 400 { "mpn_fib2_ui", speed_mpn_fib2_ui, FLAG_NODATA }, 401 { "mpz_fib_ui", speed_mpz_fib_ui, FLAG_NODATA }, 402 { "mpz_fib2_ui", speed_mpz_fib2_ui, FLAG_NODATA }, 403 { "mpz_lucnum_ui", speed_mpz_lucnum_ui, FLAG_NODATA }, 404 { "mpz_lucnum2_ui", speed_mpz_lucnum2_ui, FLAG_NODATA }, 405 406 { "mpz_add", speed_mpz_add }, 407 { "mpz_bin_uiui", speed_mpz_bin_uiui, FLAG_NODATA | FLAG_R_OPTIONAL }, 408 { "mpz_bin_ui", speed_mpz_bin_ui, FLAG_NODATA | FLAG_R_OPTIONAL }, 409 { "mpz_fac_ui", speed_mpz_fac_ui, FLAG_NODATA }, 410 { "mpz_2fac_ui", speed_mpz_2fac_ui, FLAG_NODATA }, 411 { "mpz_powm", speed_mpz_powm }, 412 { "mpz_powm_mod", speed_mpz_powm_mod }, 413 { "mpz_powm_redc", speed_mpz_powm_redc }, 414 { "mpz_powm_sec", speed_mpz_powm_sec }, 415 { "mpz_powm_ui", speed_mpz_powm_ui, FLAG_R_OPTIONAL }, 416 417 { "mpz_mod", speed_mpz_mod }, 418 { "mpn_redc_1", speed_mpn_redc_1 }, 419 { "mpn_redc_2", speed_mpn_redc_2 }, 420 { "mpn_redc_n", speed_mpn_redc_n }, 421 422 { "MPN_COPY", speed_MPN_COPY }, 423 { "MPN_COPY_INCR", speed_MPN_COPY_INCR }, 424 { "MPN_COPY_DECR", speed_MPN_COPY_DECR }, 425 { "memcpy", speed_memcpy }, 426 #if HAVE_NATIVE_mpn_copyi 427 { "mpn_copyi", speed_mpn_copyi }, 428 #endif 429 #if HAVE_NATIVE_mpn_copyd 430 { "mpn_copyd", speed_mpn_copyd }, 431 #endif 432 { "mpn_sec_tabselect", speed_mpn_sec_tabselect, FLAG_R_OPTIONAL }, 433 #if HAVE_NATIVE_mpn_addlsh1_n == 1 434 { "mpn_addlsh1_n", speed_mpn_addlsh1_n, FLAG_R_OPTIONAL }, 435 #endif 436 #if HAVE_NATIVE_mpn_sublsh1_n == 1 437 { "mpn_sublsh1_n", speed_mpn_sublsh1_n, FLAG_R_OPTIONAL }, 438 #endif 439 #if HAVE_NATIVE_mpn_addlsh1_n_ip1 440 { "mpn_addlsh1_n_ip1", speed_mpn_addlsh1_n_ip1 }, 441 #endif 442 #if HAVE_NATIVE_mpn_addlsh1_n_ip2 443 { "mpn_addlsh1_n_ip2", speed_mpn_addlsh1_n_ip2 }, 444 #endif 445 #if HAVE_NATIVE_mpn_sublsh1_n_ip1 446 { "mpn_sublsh1_n_ip1", speed_mpn_sublsh1_n_ip1 }, 447 #endif 448 #if HAVE_NATIVE_mpn_rsblsh1_n == 1 449 { "mpn_rsblsh1_n", speed_mpn_rsblsh1_n, FLAG_R_OPTIONAL }, 450 #endif 451 #if HAVE_NATIVE_mpn_addlsh2_n == 1 452 { "mpn_addlsh2_n", speed_mpn_addlsh2_n, FLAG_R_OPTIONAL }, 453 #endif 454 #if HAVE_NATIVE_mpn_sublsh2_n == 1 455 { "mpn_sublsh2_n", speed_mpn_sublsh2_n, FLAG_R_OPTIONAL }, 456 #endif 457 #if HAVE_NATIVE_mpn_addlsh2_n_ip1 458 { "mpn_addlsh2_n_ip1", speed_mpn_addlsh2_n_ip1 }, 459 #endif 460 #if HAVE_NATIVE_mpn_addlsh2_n_ip2 461 { "mpn_addlsh2_n_ip2", speed_mpn_addlsh2_n_ip2 }, 462 #endif 463 #if HAVE_NATIVE_mpn_sublsh2_n_ip1 464 { "mpn_sublsh2_n_ip1", speed_mpn_sublsh2_n_ip1 }, 465 #endif 466 #if HAVE_NATIVE_mpn_rsblsh2_n == 1 467 { "mpn_rsblsh2_n", speed_mpn_rsblsh2_n, FLAG_R_OPTIONAL }, 468 #endif 469 #if HAVE_NATIVE_mpn_addlsh_n 470 { "mpn_addlsh_n", speed_mpn_addlsh_n, FLAG_R_OPTIONAL }, 471 #endif 472 #if HAVE_NATIVE_mpn_sublsh_n 473 { "mpn_sublsh_n", speed_mpn_sublsh_n, FLAG_R_OPTIONAL }, 474 #endif 475 #if HAVE_NATIVE_mpn_addlsh_n_ip1 476 { "mpn_addlsh_n_ip1", speed_mpn_addlsh_n_ip1 }, 477 #endif 478 #if HAVE_NATIVE_mpn_addlsh_n_ip2 479 { "mpn_addlsh_n_ip2", speed_mpn_addlsh_n_ip2 }, 480 #endif 481 #if HAVE_NATIVE_mpn_sublsh_n_ip1 482 { "mpn_sublsh_n_ip1", speed_mpn_sublsh_n_ip1 }, 483 #endif 484 #if HAVE_NATIVE_mpn_rsblsh_n 485 { "mpn_rsblsh_n", speed_mpn_rsblsh_n, FLAG_R_OPTIONAL }, 486 #endif 487 #if HAVE_NATIVE_mpn_rsh1add_n 488 { "mpn_rsh1add_n", speed_mpn_rsh1add_n, FLAG_R_OPTIONAL }, 489 #endif 490 #if HAVE_NATIVE_mpn_rsh1sub_n 491 { "mpn_rsh1sub_n", speed_mpn_rsh1sub_n, FLAG_R_OPTIONAL }, 492 #endif 493 494 { "mpn_cnd_add_n", speed_mpn_cnd_add_n, FLAG_R_OPTIONAL }, 495 { "mpn_cnd_sub_n", speed_mpn_cnd_sub_n, FLAG_R_OPTIONAL }, 496 497 { "MPN_ZERO", speed_MPN_ZERO }, 498 499 { "binvert_limb", speed_binvert_limb, FLAG_NODATA }, 500 { "binvert_limb_mul1", speed_binvert_limb_mul1, FLAG_NODATA }, 501 { "binvert_limb_loop", speed_binvert_limb_loop, FLAG_NODATA }, 502 { "binvert_limb_cond", speed_binvert_limb_cond, FLAG_NODATA }, 503 { "binvert_limb_arith", speed_binvert_limb_arith, FLAG_NODATA }, 504 505 { "malloc_free", speed_malloc_free }, 506 { "malloc_realloc_free", speed_malloc_realloc_free }, 507 { "gmp_allocate_free", speed_gmp_allocate_free }, 508 { "gmp_allocate_reallocate_free", speed_gmp_allocate_reallocate_free }, 509 { "mpz_init_clear", speed_mpz_init_clear }, 510 { "mpq_init_clear", speed_mpq_init_clear }, 511 { "mpf_init_clear", speed_mpf_init_clear }, 512 { "mpz_init_realloc_clear", speed_mpz_init_realloc_clear }, 513 514 { "umul_ppmm", speed_umul_ppmm, FLAG_R_OPTIONAL }, 515 #if HAVE_NATIVE_mpn_umul_ppmm 516 { "mpn_umul_ppmm", speed_mpn_umul_ppmm, FLAG_R_OPTIONAL }, 517 #endif 518 #if HAVE_NATIVE_mpn_umul_ppmm_r 519 { "mpn_umul_ppmm_r", speed_mpn_umul_ppmm_r, FLAG_R_OPTIONAL }, 520 #endif 521 522 { "count_leading_zeros", speed_count_leading_zeros, FLAG_NODATA | FLAG_R_OPTIONAL }, 523 { "count_trailing_zeros", speed_count_trailing_zeros, FLAG_NODATA | FLAG_R_OPTIONAL }, 524 525 { "udiv_qrnnd", speed_udiv_qrnnd, FLAG_R_OPTIONAL }, 526 { "udiv_qrnnd_c", speed_udiv_qrnnd_c, FLAG_R_OPTIONAL }, 527 #if HAVE_NATIVE_mpn_udiv_qrnnd 528 { "mpn_udiv_qrnnd", speed_mpn_udiv_qrnnd, FLAG_R_OPTIONAL }, 529 #endif 530 #if HAVE_NATIVE_mpn_udiv_qrnnd_r 531 { "mpn_udiv_qrnnd_r", speed_mpn_udiv_qrnnd_r, FLAG_R_OPTIONAL }, 532 #endif 533 { "invert_limb", speed_invert_limb, FLAG_R_OPTIONAL }, 534 535 { "operator_div", speed_operator_div, FLAG_R_OPTIONAL }, 536 { "operator_mod", speed_operator_mod, FLAG_R_OPTIONAL }, 537 538 { "gmp_randseed", speed_gmp_randseed, FLAG_R_OPTIONAL }, 539 { "gmp_randseed_ui", speed_gmp_randseed_ui, FLAG_R_OPTIONAL | FLAG_NODATA }, 540 { "mpz_urandomb", speed_mpz_urandomb, FLAG_R_OPTIONAL | FLAG_NODATA }, 541 542 #ifdef SPEED_EXTRA_ROUTINES 543 SPEED_EXTRA_ROUTINES 544 #endif 545 #ifdef SPEED_EXTRA_ROUTINES2 546 SPEED_EXTRA_ROUTINES2 547 #endif 548 }; 549 550 551 struct choice_t { 552 const struct routine_t *p; 553 mp_limb_t r; 554 double scale; 555 double time; 556 int no_time; 557 double prev_time; 558 const char *name; 559 }; 560 struct choice_t *choice; 561 int num_choices = 0; 562 563 564 void 565 data_fill (mp_ptr ptr, mp_size_t size) 566 { 567 switch (option_data) { 568 case DATA_RANDOM: 569 mpn_random (ptr, size); 570 break; 571 case DATA_RANDOM2: 572 mpn_random2 (ptr, size); 573 break; 574 case DATA_ZEROS: 575 MPN_ZERO (ptr, size); 576 break; 577 case DATA_AAS: 578 MPN_FILL (ptr, size, GMP_NUMB_0xAA); 579 break; 580 case DATA_FFS: 581 MPN_FILL (ptr, size, GMP_NUMB_MAX); 582 break; 583 case DATA_2FD: 584 MPN_FILL (ptr, size, GMP_NUMB_MAX); 585 ptr[0] -= 2; 586 break; 587 default: 588 abort(); 589 /*NOTREACHED*/ 590 } 591 } 592 593 /* The code here handling the various combinations of output options isn't 594 too attractive, but it works and is fairly clean. */ 595 596 #define SIZE_TO_DIVISOR(n) \ 597 (option_square == 1 ? (n)*(n) \ 598 : option_square == 2 ? (n)*((n)+1)/2 \ 599 : (n)) 600 601 void 602 run_one (FILE *fp, struct speed_params *s, mp_size_t prev_size) 603 { 604 const char *first_open_fastest, *first_open_notfastest, *first_close; 605 int i, fastest, want_data; 606 double fastest_time; 607 TMP_DECL; 608 609 TMP_MARK; 610 611 /* allocate data, unless all routines are NODATA */ 612 want_data = 0; 613 for (i = 0; i < num_choices; i++) 614 want_data |= ((choice[i].p->flag & FLAG_NODATA) == 0); 615 616 if (want_data) 617 { 618 SPEED_TMP_ALLOC_LIMBS (sp.xp, s->size, s->align_xp); 619 SPEED_TMP_ALLOC_LIMBS (sp.yp, s->size, s->align_yp); 620 621 data_fill (s->xp, s->size); 622 data_fill (s->yp, s->size); 623 } 624 else 625 { 626 sp.xp = NULL; 627 sp.yp = NULL; 628 } 629 630 if (prev_size == -1 && option_cmp == CMP_DIFFPREV) 631 { 632 first_open_fastest = "(#"; 633 first_open_notfastest = " ("; 634 first_close = ")"; 635 } 636 else 637 { 638 first_open_fastest = "#"; 639 first_open_notfastest = " "; 640 first_close = ""; 641 } 642 643 fastest = -1; 644 fastest_time = -1.0; 645 for (i = 0; i < num_choices; i++) 646 { 647 s->r = choice[i].r; 648 choice[i].time = speed_measure (choice[i].p->fun, s); 649 choice[i].no_time = (choice[i].time == -1.0); 650 if (! choice[i].no_time) 651 choice[i].time *= choice[i].scale; 652 653 /* Apply the effect of CMP_DIFFPREV, but the new choice[i].prev_time 654 is before any differences. */ 655 { 656 double t; 657 t = choice[i].time; 658 if (t != -1.0 && option_cmp == CMP_DIFFPREV && prev_size != -1) 659 { 660 if (choice[i].prev_time == -1.0) 661 choice[i].no_time = 1; 662 else 663 choice[i].time = choice[i].time - choice[i].prev_time; 664 } 665 choice[i].prev_time = t; 666 } 667 668 if (choice[i].no_time) 669 continue; 670 671 /* Look for the fastest after CMP_DIFFPREV has been applied, but 672 before CMP_RATIO or CMP_DIFFERENCE. There's only a fastest shown 673 if there's more than one routine. */ 674 if (num_choices > 1 && (fastest == -1 || choice[i].time < fastest_time)) 675 { 676 fastest = i; 677 fastest_time = choice[i].time; 678 } 679 680 if (option_cmp == CMP_DIFFPREV) 681 { 682 /* Conversion for UNIT_CYCLESPERLIMB differs in CMP_DIFFPREV. */ 683 if (option_unit == UNIT_CYCLES) 684 choice[i].time /= speed_cycletime; 685 else if (option_unit == UNIT_CYCLESPERLIMB) 686 { 687 if (prev_size == -1) 688 choice[i].time /= speed_cycletime; 689 else 690 choice[i].time /= (speed_cycletime 691 * (SIZE_TO_DIVISOR(s->size) 692 - SIZE_TO_DIVISOR(prev_size))); 693 } 694 } 695 else 696 { 697 if (option_unit == UNIT_CYCLES) 698 choice[i].time /= speed_cycletime; 699 else if (option_unit == UNIT_CYCLESPERLIMB) 700 choice[i].time /= (speed_cycletime * SIZE_TO_DIVISOR(s->size)); 701 702 if (option_cmp == CMP_RATIO && i > 0) 703 { 704 /* A ratio isn't affected by the units chosen. */ 705 if (choice[0].no_time || choice[0].time == 0.0) 706 choice[i].no_time = 1; 707 else 708 choice[i].time /= choice[0].time; 709 } 710 else if (option_cmp == CMP_DIFFERENCE && i > 0) 711 { 712 if (choice[0].no_time) 713 { 714 choice[i].no_time = 1; 715 continue; 716 } 717 choice[i].time -= choice[0].time; 718 } 719 } 720 } 721 722 if (option_gnuplot) 723 { 724 /* In CMP_DIFFPREV, don't print anything for the first size, start 725 with the second where an actual difference is available. 726 727 In CMP_RATIO, print the first column as 1.0. 728 729 The 9 decimals printed is much more than the expected precision of 730 the measurements actually. */ 731 732 if (! (option_cmp == CMP_DIFFPREV && prev_size == -1)) 733 { 734 fprintf (fp, "%-6ld ", s->size); 735 for (i = 0; i < num_choices; i++) 736 fprintf (fp, " %.9e", 737 choice[i].no_time ? 0.0 738 : (option_cmp == CMP_RATIO && i == 0) ? 1.0 739 : choice[i].time); 740 fprintf (fp, "\n"); 741 } 742 } 743 else 744 { 745 fprintf (fp, "%-6ld ", s->size); 746 for (i = 0; i < num_choices; i++) 747 { 748 char buf[128]; 749 int decimals; 750 751 if (choice[i].no_time) 752 { 753 fprintf (fp, " %*s", COLUMN_WIDTH, "n/a"); 754 } 755 else 756 {if (option_unit == UNIT_CYCLESPERLIMB 757 || (option_cmp == CMP_RATIO && i > 0)) 758 decimals = 4; 759 else if (option_unit == UNIT_CYCLES) 760 decimals = 2; 761 else 762 decimals = 9; 763 764 sprintf (buf, "%s%.*f%s", 765 i == fastest ? first_open_fastest : first_open_notfastest, 766 decimals, choice[i].time, first_close); 767 fprintf (fp, " %*s", COLUMN_WIDTH, buf); 768 } 769 } 770 fprintf (fp, "\n"); 771 } 772 773 TMP_FREE; 774 } 775 776 void 777 run_all (FILE *fp) 778 { 779 mp_size_t prev_size; 780 int i; 781 TMP_DECL; 782 783 TMP_MARK; 784 SPEED_TMP_ALLOC_LIMBS (sp.xp_block, SPEED_BLOCK_SIZE, sp.align_xp); 785 SPEED_TMP_ALLOC_LIMBS (sp.yp_block, SPEED_BLOCK_SIZE, sp.align_yp); 786 787 data_fill (sp.xp_block, SPEED_BLOCK_SIZE); 788 data_fill (sp.yp_block, SPEED_BLOCK_SIZE); 789 790 for (i = 0; i < size_num; i++) 791 { 792 sp.size = size_array[i].start; 793 prev_size = -1; 794 for (;;) 795 { 796 mp_size_t step; 797 798 if (option_data == DATA_2FD && sp.size >= 2) 799 sp.xp[sp.size-1] = 2; 800 801 run_one (fp, &sp, prev_size); 802 prev_size = sp.size; 803 804 if (option_data == DATA_2FD && sp.size >= 2) 805 sp.xp[sp.size-1] = MP_LIMB_T_MAX; 806 807 if (option_factor != 0.0) 808 { 809 step = (mp_size_t) (sp.size * option_factor - sp.size); 810 if (step < 1) 811 step = 1; 812 } 813 else 814 step = 1; 815 if (step < option_step) 816 step = option_step; 817 818 sp.size += step; 819 if (sp.size > size_array[i].end) 820 break; 821 } 822 } 823 824 TMP_FREE; 825 } 826 827 828 FILE * 829 fopen_for_write (const char *filename) 830 { 831 FILE *fp; 832 if ((fp = fopen (filename, "w")) == NULL) 833 { 834 fprintf (stderr, "Cannot create %s\n", filename); 835 exit(1); 836 } 837 return fp; 838 } 839 840 void 841 fclose_written (FILE *fp, const char *filename) 842 { 843 int err; 844 845 err = ferror (fp); 846 err |= fclose (fp); 847 848 if (err) 849 { 850 fprintf (stderr, "Error writing %s\n", filename); 851 exit(1); 852 } 853 } 854 855 856 void 857 run_gnuplot (int argc, char *argv[]) 858 { 859 char *plot_filename; 860 char *data_filename; 861 FILE *fp; 862 int i; 863 864 plot_filename = (char *) (*__gmp_allocate_func) 865 (strlen (option_gnuplot_basename) + 20); 866 data_filename = (char *) (*__gmp_allocate_func) 867 (strlen (option_gnuplot_basename) + 20); 868 869 sprintf (plot_filename, "%s.gnuplot", option_gnuplot_basename); 870 sprintf (data_filename, "%s.data", option_gnuplot_basename); 871 872 fp = fopen_for_write (plot_filename); 873 874 fprintf (fp, "# Generated with:\n"); 875 fprintf (fp, "#"); 876 for (i = 0; i < argc; i++) 877 fprintf (fp, " %s", argv[i]); 878 fprintf (fp, "\n"); 879 fprintf (fp, "\n"); 880 881 fprintf (fp, "reset\n"); 882 883 /* Putting the key at the top left is usually good, and you can change it 884 interactively if it's not. */ 885 fprintf (fp, "set key left\n"); 886 887 /* designed to make it possible to see crossovers easily */ 888 fprintf (fp, "set style data lines\n"); 889 890 fprintf (fp, "plot "); 891 for (i = 0; i < num_choices; i++) 892 { 893 fprintf (fp, " \"%s\" using 1:%d", data_filename, i+2); 894 fprintf (fp, " title \"%s\"", choice[i].name); 895 896 if (i != num_choices-1) 897 fprintf (fp, ", \\"); 898 fprintf (fp, "\n"); 899 } 900 901 fprintf (fp, "load \"-\"\n"); 902 fclose_written (fp, plot_filename); 903 904 fp = fopen_for_write (data_filename); 905 906 /* Unbuffered so you can see where the program was up to if it crashes or 907 you kill it. */ 908 setbuf (fp, NULL); 909 910 run_all (fp); 911 fclose_written (fp, data_filename); 912 } 913 914 915 /* Return a limb with n many one bits (starting from the least significant) */ 916 917 #define LIMB_ONES(n) \ 918 ((n) == GMP_LIMB_BITS ? MP_LIMB_T_MAX \ 919 : (n) == 0 ? CNST_LIMB(0) \ 920 : (CNST_LIMB(1) << (n)) - 1) 921 922 mp_limb_t 923 r_string (const char *s) 924 { 925 const char *s_orig = s; 926 long n; 927 928 if (strcmp (s, "aas") == 0) 929 return GMP_NUMB_0xAA; 930 931 { 932 mpz_t z; 933 mp_limb_t l; 934 int set, siz; 935 936 mpz_init (z); 937 set = mpz_set_str (z, s, 0); 938 siz = SIZ(z); 939 l = (siz == 0 ? 0 : siz > 0 ? PTR(z)[0] : -PTR(z)[0]); 940 mpz_clear (z); 941 if (set == 0) 942 { 943 if (siz > 1 || siz < -1) 944 printf ("Warning, r parameter %s truncated to %d bits\n", 945 s_orig, GMP_LIMB_BITS); 946 return l; 947 } 948 } 949 950 if (s[0] == '0' && (s[1] == 'x' || s[1] == 'X')) 951 n = strtoul (s+2, (char **) &s, 16); 952 else 953 n = strtol (s, (char **) &s, 10); 954 955 if (strcmp (s, "bits") == 0) 956 { 957 mp_limb_t l; 958 if (n > GMP_LIMB_BITS) 959 { 960 fprintf (stderr, "%ld bit parameter invalid (max %d bits)\n", 961 n, GMP_LIMB_BITS); 962 exit (1); 963 } 964 mpn_random (&l, 1); 965 return (l | (CNST_LIMB(1) << (n-1))) & LIMB_ONES(n); 966 } 967 else if (strcmp (s, "ones") == 0) 968 { 969 if (n > GMP_LIMB_BITS) 970 { 971 fprintf (stderr, "%ld bit parameter invalid (max %d bits)\n", 972 n, GMP_LIMB_BITS); 973 exit (1); 974 } 975 return LIMB_ONES (n); 976 } 977 else if (*s != '\0') 978 { 979 fprintf (stderr, "invalid r parameter: %s\n", s_orig); 980 exit (1); 981 } 982 983 return n; 984 } 985 986 987 void 988 routine_find (struct choice_t *c, const char *s_orig) 989 { 990 const char *s; 991 int i; 992 size_t nlen; 993 994 c->name = s_orig; 995 s = strchr (s_orig, '*'); 996 if (s != NULL) 997 { 998 c->scale = atof(s_orig); 999 s++; 1000 } 1001 else 1002 { 1003 c->scale = 1.0; 1004 s = s_orig; 1005 } 1006 1007 for (i = 0; i < numberof (routine); i++) 1008 { 1009 nlen = strlen (routine[i].name); 1010 if (memcmp (s, routine[i].name, nlen) != 0) 1011 continue; 1012 1013 if (s[nlen] == '.') 1014 { 1015 /* match, with a .r parameter */ 1016 1017 if (! (routine[i].flag & (FLAG_R|FLAG_R_OPTIONAL))) 1018 { 1019 fprintf (stderr, 1020 "Choice %s bad: doesn't take a \".<r>\" parameter\n", 1021 s_orig); 1022 exit (1); 1023 } 1024 1025 c->p = &routine[i]; 1026 c->r = r_string (s + nlen + 1); 1027 return; 1028 } 1029 1030 if (s[nlen] == '\0') 1031 { 1032 /* match, with no parameter */ 1033 1034 if (routine[i].flag & FLAG_R) 1035 { 1036 fprintf (stderr, 1037 "Choice %s bad: needs a \".<r>\" parameter\n", 1038 s_orig); 1039 exit (1); 1040 } 1041 1042 c->p = &routine[i]; 1043 c->r = 0; 1044 return; 1045 } 1046 } 1047 1048 fprintf (stderr, "Choice %s unrecognised\n", s_orig); 1049 exit (1); 1050 } 1051 1052 1053 void 1054 usage (void) 1055 { 1056 int i; 1057 1058 speed_time_init (); 1059 1060 printf ("Usage: speed [-options] -s size <routine>...\n"); 1061 printf ("Measure the speed of some routines.\n"); 1062 printf ("Times are in seconds, accuracy is shown.\n"); 1063 printf ("\n"); 1064 printf (" -p num set precision as number of time units each routine must run\n"); 1065 printf (" -s size[-end][,size[-end]]... sizes to measure\n"); 1066 printf (" single sizes or ranges, sep with comma or use multiple -s\n"); 1067 printf (" -t step step through sizes by given amount\n"); 1068 printf (" -f factor step through sizes by given factor (eg. 1.05)\n"); 1069 printf (" -r show times as ratios of the first routine\n"); 1070 printf (" -d show times as difference from the first routine\n"); 1071 printf (" -D show times as difference from previous size shown\n"); 1072 printf (" -c show times in CPU cycles\n"); 1073 printf (" -C show times in cycles per limb\n"); 1074 printf (" -u print resource usage (memory) at end\n"); 1075 printf (" -P name output plot files \"name.gnuplot\" and \"name.data\"\n"); 1076 printf (" -a <type> use given data: random(default), random2, zeros, aas, ffs, 2fd\n"); 1077 printf (" -x, -y, -w, -W <align> specify data alignments, sources and dests\n"); 1078 printf (" -o addrs print addresses of data blocks\n"); 1079 printf ("\n"); 1080 printf ("If both -t and -f are used, it means step by the factor or the step, whichever\n"); 1081 printf ("is greater.\n"); 1082 printf ("If both -C and -D are used, it means cycles per however many limbs between a\n"); 1083 printf ("size and the previous size.\n"); 1084 printf ("\n"); 1085 printf ("After running with -P, plots can be viewed with Gnuplot or Quickplot.\n"); 1086 printf ("\"gnuplot name.gnuplot\" (use \"set logscale xy; replot\" at the prompt for\n"); 1087 printf ("a log/log plot).\n"); 1088 printf ("\"quickplot -s name.data\" (has interactive zooming, and note -s is important\n"); 1089 printf ("when viewing more than one routine, it means same axis scales for all data).\n"); 1090 printf ("\n"); 1091 printf ("The available routines are as follows.\n"); 1092 printf ("\n"); 1093 1094 for (i = 0; i < numberof (routine); i++) 1095 { 1096 if (routine[i].flag & FLAG_R) 1097 printf ("\t%s.r\n", routine[i].name); 1098 else if (routine[i].flag & FLAG_R_OPTIONAL) 1099 printf ("\t%s (optional .r)\n", routine[i].name); 1100 else 1101 printf ("\t%s\n", routine[i].name); 1102 } 1103 printf ("\n"); 1104 printf ("Routines with a \".r\" need an extra parameter, for example mpn_lshift.6\n"); 1105 printf ("r should be in decimal, or use 0xN for hexadecimal.\n"); 1106 printf ("\n"); 1107 printf ("Special forms for r are \"<N>bits\" for a random N bit number, \"<N>ones\" for\n"); 1108 printf ("N one bits, or \"aas\" for 0xAA..AA.\n"); 1109 printf ("\n"); 1110 printf ("Times for sizes out of the range accepted by a routine are shown as 0.\n"); 1111 printf ("The fastest routine at each size is marked with a # (free form output only).\n"); 1112 printf ("\n"); 1113 printf ("%s", speed_time_string); 1114 printf ("\n"); 1115 printf ("Gnuplot home page http://www.gnuplot.info/\n"); 1116 printf ("Quickplot home page http://quickplot.sourceforge.net/\n"); 1117 } 1118 1119 void 1120 check_align_option (const char *name, mp_size_t align) 1121 { 1122 if (align < 0 || align > SPEED_TMP_ALLOC_ADJUST_MASK) 1123 { 1124 fprintf (stderr, "Alignment request out of range: %s %ld\n", 1125 name, (long) align); 1126 fprintf (stderr, " should be 0 to %d (limbs), inclusive\n", 1127 SPEED_TMP_ALLOC_ADJUST_MASK); 1128 exit (1); 1129 } 1130 } 1131 1132 int 1133 main (int argc, char *argv[]) 1134 { 1135 int i; 1136 int opt; 1137 1138 /* Unbuffered so output goes straight out when directed to a pipe or file 1139 and isn't lost on killing the program half way. */ 1140 setbuf (stdout, NULL); 1141 1142 for (;;) 1143 { 1144 opt = getopt(argc, argv, "a:CcDdEFf:o:p:P:rRs:t:ux:y:w:W:z"); 1145 if (opt == EOF) 1146 break; 1147 1148 switch (opt) { 1149 case 'a': 1150 if (strcmp (optarg, "random") == 0) option_data = DATA_RANDOM; 1151 else if (strcmp (optarg, "random2") == 0) option_data = DATA_RANDOM2; 1152 else if (strcmp (optarg, "zeros") == 0) option_data = DATA_ZEROS; 1153 else if (strcmp (optarg, "aas") == 0) option_data = DATA_AAS; 1154 else if (strcmp (optarg, "ffs") == 0) option_data = DATA_FFS; 1155 else if (strcmp (optarg, "2fd") == 0) option_data = DATA_2FD; 1156 else 1157 { 1158 fprintf (stderr, "unrecognised data option: %s\n", optarg); 1159 exit (1); 1160 } 1161 break; 1162 case 'C': 1163 if (option_unit != UNIT_SECONDS) goto bad_unit; 1164 option_unit = UNIT_CYCLESPERLIMB; 1165 break; 1166 case 'c': 1167 if (option_unit != UNIT_SECONDS) 1168 { 1169 bad_unit: 1170 fprintf (stderr, "cannot use more than one of -c, -C\n"); 1171 exit (1); 1172 } 1173 option_unit = UNIT_CYCLES; 1174 break; 1175 case 'D': 1176 if (option_cmp != CMP_ABSOLUTE) goto bad_cmp; 1177 option_cmp = CMP_DIFFPREV; 1178 break; 1179 case 'd': 1180 if (option_cmp != CMP_ABSOLUTE) 1181 { 1182 bad_cmp: 1183 fprintf (stderr, "cannot use more than one of -d, -D, -r\n"); 1184 exit (1); 1185 } 1186 option_cmp = CMP_DIFFERENCE; 1187 break; 1188 case 'E': 1189 option_square = 1; 1190 break; 1191 case 'F': 1192 option_square = 2; 1193 break; 1194 case 'f': 1195 option_factor = atof (optarg); 1196 if (option_factor <= 1.0) 1197 { 1198 fprintf (stderr, "-f factor must be > 1.0\n"); 1199 exit (1); 1200 } 1201 break; 1202 case 'o': 1203 speed_option_set (optarg); 1204 break; 1205 case 'P': 1206 option_gnuplot = 1; 1207 option_gnuplot_basename = optarg; 1208 break; 1209 case 'p': 1210 speed_precision = atoi (optarg); 1211 break; 1212 case 'R': 1213 option_seed = time (NULL); 1214 break; 1215 case 'r': 1216 if (option_cmp != CMP_ABSOLUTE) 1217 goto bad_cmp; 1218 option_cmp = CMP_RATIO; 1219 break; 1220 case 's': 1221 { 1222 char *s; 1223 for (s = strtok (optarg, ","); s != NULL; s = strtok (NULL, ",")) 1224 { 1225 if (size_num == size_allocnum) 1226 { 1227 size_array = (struct size_array_t *) 1228 __gmp_allocate_or_reallocate 1229 (size_array, 1230 size_allocnum * sizeof(size_array[0]), 1231 (size_allocnum+10) * sizeof(size_array[0])); 1232 size_allocnum += 10; 1233 } 1234 if (sscanf (s, "%ld-%ld", 1235 &size_array[size_num].start, 1236 &size_array[size_num].end) != 2) 1237 { 1238 size_array[size_num].start = size_array[size_num].end 1239 = atol (s); 1240 } 1241 1242 if (size_array[size_num].start < 0 1243 || size_array[size_num].end < 0 1244 || size_array[size_num].start > size_array[size_num].end) 1245 { 1246 fprintf (stderr, "invalid size parameter: %s\n", s); 1247 exit (1); 1248 } 1249 1250 size_num++; 1251 } 1252 } 1253 break; 1254 case 't': 1255 option_step = atol (optarg); 1256 if (option_step < 1) 1257 { 1258 fprintf (stderr, "-t step must be >= 1\n"); 1259 exit (1); 1260 } 1261 break; 1262 case 'u': 1263 option_resource_usage = 1; 1264 break; 1265 case 'z': 1266 sp.cache = 1; 1267 break; 1268 case 'x': 1269 sp.align_xp = atol (optarg); 1270 check_align_option ("-x", sp.align_xp); 1271 break; 1272 case 'y': 1273 sp.align_yp = atol (optarg); 1274 check_align_option ("-y", sp.align_yp); 1275 break; 1276 case 'w': 1277 sp.align_wp = atol (optarg); 1278 check_align_option ("-w", sp.align_wp); 1279 break; 1280 case 'W': 1281 sp.align_wp2 = atol (optarg); 1282 check_align_option ("-W", sp.align_wp2); 1283 break; 1284 case '?': 1285 exit(1); 1286 } 1287 } 1288 1289 if (optind >= argc) 1290 { 1291 usage (); 1292 exit (1); 1293 } 1294 1295 if (size_num == 0) 1296 { 1297 fprintf (stderr, "-s <size> must be specified\n"); 1298 exit (1); 1299 } 1300 1301 gmp_randinit_default (__gmp_rands); 1302 __gmp_rands_initialized = 1; 1303 gmp_randseed_ui (__gmp_rands, option_seed); 1304 1305 choice = (struct choice_t *) (*__gmp_allocate_func) 1306 ((argc - optind) * sizeof(choice[0])); 1307 for ( ; optind < argc; optind++) 1308 { 1309 struct choice_t c; 1310 routine_find (&c, argv[optind]); 1311 choice[num_choices] = c; 1312 num_choices++; 1313 } 1314 1315 if ((option_cmp == CMP_RATIO || option_cmp == CMP_DIFFERENCE) && 1316 num_choices < 2) 1317 { 1318 fprintf (stderr, "WARNING, -d or -r does nothing when only one routine requested\n"); 1319 } 1320 1321 speed_time_init (); 1322 if (option_unit == UNIT_CYCLES || option_unit == UNIT_CYCLESPERLIMB) 1323 speed_cycletime_need_cycles (); 1324 else 1325 speed_cycletime_need_seconds (); 1326 1327 if (option_gnuplot) 1328 { 1329 run_gnuplot (argc, argv); 1330 } 1331 else 1332 { 1333 if (option_unit == UNIT_SECONDS) 1334 printf ("overhead %.9f secs", speed_measure (speed_noop, NULL)); 1335 else 1336 printf ("overhead %.2f cycles", 1337 speed_measure (speed_noop, NULL) / speed_cycletime); 1338 printf (", precision %d units of %.2e secs", 1339 speed_precision, speed_unittime); 1340 1341 if (speed_cycletime == 1.0 || speed_cycletime == 0.0) 1342 printf (", CPU freq unknown\n"); 1343 else 1344 printf (", CPU freq %.2f MHz\n", 1e-6/speed_cycletime); 1345 1346 printf (" "); 1347 for (i = 0; i < num_choices; i++) 1348 printf (" %*s", COLUMN_WIDTH, choice[i].name); 1349 printf ("\n"); 1350 1351 run_all (stdout); 1352 } 1353 1354 if (option_resource_usage) 1355 { 1356 #if HAVE_GETRUSAGE 1357 { 1358 /* This doesn't give data sizes on linux 2.0.x, only utime. */ 1359 struct rusage r; 1360 if (getrusage (RUSAGE_SELF, &r) != 0) 1361 perror ("getrusage"); 1362 else 1363 printf ("getrusage(): utime %ld.%06ld data %ld stack %ld maxresident %ld\n", 1364 r.ru_utime.tv_sec, r.ru_utime.tv_usec, 1365 r.ru_idrss, r.ru_isrss, r.ru_ixrss); 1366 } 1367 #else 1368 printf ("getrusage() not available\n"); 1369 #endif 1370 1371 /* Linux kernel. */ 1372 { 1373 char buf[128]; 1374 sprintf (buf, "/proc/%d/status", getpid()); 1375 if (access (buf, R_OK) == 0) 1376 { 1377 sprintf (buf, "cat /proc/%d/status", getpid()); 1378 system (buf); 1379 } 1380 1381 } 1382 } 1383 1384 return 0; 1385 }