github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/tune/time.c (about) 1 /* Time routines for speed measurements. 2 3 Copyright 1999-2004, 2010-2012 Free Software Foundation, Inc. 4 5 This file is part of the GNU MP Library. 6 7 The GNU MP Library is free software; you can redistribute it and/or modify 8 it under the terms of either: 9 10 * the GNU Lesser General Public License as published by the Free 11 Software Foundation; either version 3 of the License, or (at your 12 option) any later version. 13 14 or 15 16 * the GNU General Public License as published by the Free Software 17 Foundation; either version 2 of the License, or (at your option) any 18 later version. 19 20 or both in parallel, as here. 21 22 The GNU MP Library is distributed in the hope that it will be useful, but 23 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25 for more details. 26 27 You should have received copies of the GNU General Public License and the 28 GNU Lesser General Public License along with the GNU MP Library. If not, 29 see https://www.gnu.org/licenses/. */ 30 31 32 /* Usage: 33 34 The code in this file implements the lowest level of time measuring, 35 simple one-time measuring of time between two points. 36 37 void speed_starttime (void) 38 double speed_endtime (void) 39 Call speed_starttime to start measuring, and then call speed_endtime 40 when done. 41 42 speed_endtime returns the time taken, in seconds. Or if the timebase 43 is in CPU cycles and the CPU frequency is unknown then speed_endtime 44 returns cycles. Applications can identify the cycles return by 45 checking for speed_cycletime (described below) equal to 1.0. 46 47 If some sort of temporary glitch occurs then speed_endtime returns 48 0.0. Currently this is for various cases where a negative time has 49 occurred. This unfortunately occurs with getrusage on some systems, 50 and with the hppa cycle counter on hpux. 51 52 double speed_cycletime 53 The time in seconds for each CPU cycle. For example on a 100 MHz CPU 54 this would be 1.0e-8. 55 56 If the CPU frequency is unknown, then speed_cycletime is either 0.0 57 or 1.0. It's 0.0 when speed_endtime is returning seconds, or it's 58 1.0 when speed_endtime is returning cycles. 59 60 It may be noted that "speed_endtime() / speed_cycletime" gives a 61 measured time in cycles, irrespective of whether speed_endtime is 62 returning cycles or seconds. (Assuming cycles can be had, ie. it's 63 either cycles already or the cpu frequency is known. See also 64 speed_cycletime_need_cycles below.) 65 66 double speed_unittime 67 The unit of time measurement accuracy for the timing method in use. 68 This is in seconds or cycles, as per speed_endtime. 69 70 char speed_time_string[] 71 A null-terminated string describing the time method in use. 72 73 void speed_time_init (void) 74 Initialize time measuring. speed_starttime() does this 75 automatically, so it's only needed if an application wants to inspect 76 the above global variables before making a measurement. 77 78 int speed_precision 79 The intended accuracy of time measurements. speed_measure() in 80 common.c for instance runs target routines with enough repetitions so 81 it takes at least "speed_unittime * speed_precision" (this expression 82 works for both cycles or seconds from speed_endtime). 83 84 A program can provide an option so the user to set speed_precision. 85 If speed_precision is zero when speed_time_init or speed_starttime 86 first run then it gets a default based on the measuring method 87 chosen. (More precision for higher accuracy methods.) 88 89 void speed_cycletime_need_seconds (void) 90 Call this to demand that speed_endtime will return seconds, and not 91 cycles. If only cycles are available then an error is printed and 92 the program exits. 93 94 void speed_cycletime_need_cycles (void) 95 Call this to demand that speed_cycletime is non-zero, so that 96 "speed_endtime() / speed_cycletime" will give times in cycles. 97 98 99 100 Notes: 101 102 Various combinations of cycle counter, read_real_time(), getrusage(), 103 gettimeofday() and times() can arise, according to which are available 104 and their precision. 105 106 107 Allowing speed_endtime() to return either seconds or cycles is only a 108 slight complication and makes it possible for the speed program to do 109 some sensible things without demanding the CPU frequency. If seconds are 110 being measured then it can always print seconds, and if cycles are being 111 measured then it can always print them without needing to know how long 112 they are. Also the tune program doesn't care at all what the units are. 113 114 GMP_CPU_FREQUENCY can always be set when the automated methods in freq.c 115 fail. This will be needed if times in seconds are wanted but a cycle 116 counter is being used, or if times in cycles are wanted but getrusage or 117 another seconds based timer is in use. 118 119 If the measuring method uses a cycle counter but supplements it with 120 getrusage or the like, then knowing the CPU frequency is mandatory since 121 the code compares values from the two. 122 123 124 Not done: 125 126 Solaris gethrtime() seems no more than a slow way to access the Sparc V9 127 cycle counter. gethrvtime() seems to be relevant only to light weight 128 processes, it doesn't for instance give nanosecond virtual time. So 129 neither of these are used. 130 131 132 Bugs: 133 134 getrusage_microseconds_p is fundamentally flawed, getrusage and 135 gettimeofday can have resolutions other than clock ticks or microseconds, 136 for instance IRIX 5 has a tick of 10 ms but a getrusage of 1 ms. 137 138 139 Enhancements: 140 141 The SGI hardware counter has 64 bits on some machines, which could be 142 used when available. But perhaps 32 bits is enough range, and then rely 143 on the getrusage supplement. 144 145 Maybe getrusage (or times) should be used as a supplement for any 146 wall-clock measuring method. Currently a wall clock with a good range 147 (eg. a 64-bit cycle counter) is used without a supplement. 148 149 On PowerPC the timebase registers could be used, but would have to do 150 something to find out the speed. On 6xx chips it's normally 1/4 bus 151 speed, on 4xx chips it's either that or an external clock. Measuring 152 against gettimeofday might be ok. */ 153 154 155 #include "config.h" 156 157 #include <errno.h> 158 #include <setjmp.h> 159 #include <signal.h> 160 #include <stddef.h> 161 #include <stdio.h> 162 #include <string.h> 163 #include <stdlib.h> /* for getenv() */ 164 165 #if HAVE_FCNTL_H 166 #include <fcntl.h> /* for open() */ 167 #endif 168 169 #if HAVE_STDINT_H 170 #include <stdint.h> /* for uint64_t */ 171 #endif 172 173 #if HAVE_UNISTD_H 174 #include <unistd.h> /* for sysconf() */ 175 #endif 176 177 #include <sys/types.h> 178 179 #if TIME_WITH_SYS_TIME 180 # include <sys/time.h> /* for struct timeval */ 181 # include <time.h> 182 #else 183 # if HAVE_SYS_TIME_H 184 # include <sys/time.h> 185 # else 186 # include <time.h> 187 # endif 188 #endif 189 190 #if HAVE_SYS_MMAN_H 191 #include <sys/mman.h> /* for mmap() */ 192 #endif 193 194 #if HAVE_SYS_RESOURCE_H 195 #include <sys/resource.h> /* for struct rusage */ 196 #endif 197 198 #if HAVE_SYS_SYSSGI_H 199 #include <sys/syssgi.h> /* for syssgi() */ 200 #endif 201 202 #if HAVE_SYS_SYSTEMCFG_H 203 #include <sys/systemcfg.h> /* for RTC_POWER on AIX */ 204 #endif 205 206 #if HAVE_SYS_TIMES_H 207 #include <sys/times.h> /* for times() and struct tms */ 208 #endif 209 210 #include "gmp.h" 211 #include "gmp-impl.h" 212 213 #include "speed.h" 214 215 216 /* strerror is only used for some stuff on newish systems, no need to have a 217 proper replacement */ 218 #if ! HAVE_STRERROR 219 #define strerror(n) "<strerror not available>" 220 #endif 221 222 223 char speed_time_string[256]; 224 int speed_precision = 0; 225 double speed_unittime; 226 double speed_cycletime = 0.0; 227 228 229 /* don't rely on "unsigned" to "double" conversion, it's broken in SunOS 4 230 native cc */ 231 #define M_2POWU (((double) INT_MAX + 1.0) * 2.0) 232 233 #define M_2POW32 4294967296.0 234 #define M_2POW64 (M_2POW32 * M_2POW32) 235 236 237 /* Conditionals for the time functions available are done with normal C 238 code, which is a lot easier than wildly nested preprocessor directives. 239 240 The choice of what to use is partly made at run-time, according to 241 whether the cycle counter works and the measured accuracy of getrusage 242 and gettimeofday. 243 244 A routine that's not available won't be getting called, but is an abort() 245 to be sure it isn't called mistakenly. 246 247 It can be assumed that if a function exists then its data type will, but 248 if the function doesn't then the data type might or might not exist, so 249 the type can't be used unconditionally. The "struct_rusage" etc macros 250 provide dummies when the respective function doesn't exist. */ 251 252 253 #if HAVE_SPEED_CYCLECOUNTER 254 static const int have_cycles = HAVE_SPEED_CYCLECOUNTER; 255 #else 256 static const int have_cycles = 0; 257 #define speed_cyclecounter(p) ASSERT_FAIL (speed_cyclecounter not available) 258 #endif 259 260 /* "stck" returns ticks since 1 Jan 1900 00:00 GMT, where each tick is 2^-12 261 microseconds. Same #ifdefs here as in longlong.h. */ 262 #if defined (__GNUC__) && ! defined (NO_ASM) \ 263 && (defined (__i370__) || defined (__s390__) || defined (__mvs__)) 264 static const int have_stck = 1; 265 static const int use_stck = 1; /* always use when available */ 266 typedef uint64_t stck_t; /* gcc for s390 is quite new, always has uint64_t */ 267 #define STCK(timestamp) \ 268 do { \ 269 asm ("stck %0" : "=Q" (timestamp)); \ 270 } while (0) 271 #else 272 static const int have_stck = 0; 273 static const int use_stck = 0; 274 typedef unsigned long stck_t; /* dummy */ 275 #define STCK(timestamp) ASSERT_FAIL (stck instruction not available) 276 #endif 277 #define STCK_PERIOD (1.0 / 4096e6) /* 2^-12 microseconds */ 278 279 /* mftb 280 Enhancement: On 64-bit chips mftb gives a 64-bit value, no need for mftbu 281 and a loop (see powerpc64.asm). */ 282 #if HAVE_HOST_CPU_FAMILY_powerpc 283 static const int have_mftb = 1; 284 #if defined (__GNUC__) && ! defined (NO_ASM) 285 #define MFTB(a) \ 286 do { \ 287 unsigned __h1, __l, __h2; \ 288 do { \ 289 asm volatile ("mftbu %0\n" \ 290 "mftb %1\n" \ 291 "mftbu %2" \ 292 : "=r" (__h1), \ 293 "=r" (__l), \ 294 "=r" (__h2)); \ 295 } while (__h1 != __h2); \ 296 a[0] = __l; \ 297 a[1] = __h1; \ 298 } while (0) 299 #else 300 #define MFTB(a) mftb_function (a) 301 #endif 302 #else /* ! powerpc */ 303 static const int have_mftb = 0; 304 #define MFTB(a) \ 305 do { \ 306 a[0] = 0; \ 307 a[1] = 0; \ 308 ASSERT_FAIL (mftb not available); \ 309 } while (0) 310 #endif 311 312 /* Unicos 10.X has syssgi(), but not mmap(). */ 313 #if HAVE_SYSSGI && HAVE_MMAP 314 static const int have_sgi = 1; 315 #else 316 static const int have_sgi = 0; 317 #endif 318 319 #if HAVE_READ_REAL_TIME 320 static const int have_rrt = 1; 321 #else 322 static const int have_rrt = 0; 323 #define read_real_time(t,s) ASSERT_FAIL (read_real_time not available) 324 #define time_base_to_time(t,s) ASSERT_FAIL (time_base_to_time not available) 325 #define RTC_POWER 1 326 #define RTC_POWER_PC 2 327 #define timebasestruct_t struct timebasestruct_dummy 328 struct timebasestruct_dummy { 329 int flag; 330 unsigned int tb_high; 331 unsigned int tb_low; 332 }; 333 #endif 334 335 #if HAVE_CLOCK_GETTIME 336 static const int have_cgt = 1; 337 #define struct_timespec struct timespec 338 #else 339 static const int have_cgt = 0; 340 #define struct_timespec struct timespec_dummy 341 #define clock_gettime(id,ts) (ASSERT_FAIL (clock_gettime not available), -1) 342 #define clock_getres(id,ts) (ASSERT_FAIL (clock_getres not available), -1) 343 #endif 344 345 #if HAVE_GETRUSAGE 346 static const int have_grus = 1; 347 #define struct_rusage struct rusage 348 #else 349 static const int have_grus = 0; 350 #define getrusage(n,ru) ASSERT_FAIL (getrusage not available) 351 #define struct_rusage struct rusage_dummy 352 #endif 353 354 #if HAVE_GETTIMEOFDAY 355 static const int have_gtod = 1; 356 #define struct_timeval struct timeval 357 #else 358 static const int have_gtod = 0; 359 #define gettimeofday(tv,tz) ASSERT_FAIL (gettimeofday not available) 360 #define struct_timeval struct timeval_dummy 361 #endif 362 363 #if HAVE_TIMES 364 static const int have_times = 1; 365 #define struct_tms struct tms 366 #else 367 static const int have_times = 0; 368 #define times(tms) ASSERT_FAIL (times not available) 369 #define struct_tms struct tms_dummy 370 #endif 371 372 struct tms_dummy { 373 long tms_utime; 374 }; 375 struct timeval_dummy { 376 long tv_sec; 377 long tv_usec; 378 }; 379 struct rusage_dummy { 380 struct_timeval ru_utime; 381 }; 382 struct timespec_dummy { 383 long tv_sec; 384 long tv_nsec; 385 }; 386 387 static int use_cycles; 388 static int use_mftb; 389 static int use_sgi; 390 static int use_rrt; 391 static int use_cgt; 392 static int use_gtod; 393 static int use_grus; 394 static int use_times; 395 static int use_tick_boundary; 396 397 static unsigned start_cycles[2]; 398 static stck_t start_stck; 399 static unsigned start_mftb[2]; 400 static unsigned start_sgi; 401 static timebasestruct_t start_rrt; 402 static struct_timespec start_cgt; 403 static struct_rusage start_grus; 404 static struct_timeval start_gtod; 405 static struct_tms start_times; 406 407 static double cycles_limit = 1e100; 408 static double mftb_unittime; 409 static double sgi_unittime; 410 static double cgt_unittime; 411 static double grus_unittime; 412 static double gtod_unittime; 413 static double times_unittime; 414 415 /* for RTC_POWER format, ie. seconds and nanoseconds */ 416 #define TIMEBASESTRUCT_SECS(t) ((t)->tb_high + (t)->tb_low * 1e-9) 417 418 419 /* Return a string representing a time in seconds, nicely formatted. 420 Eg. "10.25ms". */ 421 char * 422 unittime_string (double t) 423 { 424 static char buf[128]; 425 426 const char *unit; 427 int prec; 428 429 /* choose units and scale */ 430 if (t < 1e-6) 431 t *= 1e9, unit = "ns"; 432 else if (t < 1e-3) 433 t *= 1e6, unit = "us"; 434 else if (t < 1.0) 435 t *= 1e3, unit = "ms"; 436 else 437 unit = "s"; 438 439 /* want 4 significant figures */ 440 if (t < 1.0) 441 prec = 4; 442 else if (t < 10.0) 443 prec = 3; 444 else if (t < 100.0) 445 prec = 2; 446 else 447 prec = 1; 448 449 sprintf (buf, "%.*f%s", prec, t, unit); 450 return buf; 451 } 452 453 454 static jmp_buf cycles_works_buf; 455 456 static RETSIGTYPE 457 cycles_works_handler (int sig) 458 { 459 longjmp (cycles_works_buf, 1); 460 } 461 462 int 463 cycles_works_p (void) 464 { 465 static int result = -1; 466 467 if (result != -1) 468 goto done; 469 470 /* FIXME: On linux, the cycle counter is not saved and restored over 471 * context switches, making it almost useless for precise cputime 472 * measurements. When available, it's better to use clock_gettime, 473 * which seems to have reasonable accuracy (tested on x86_32, 474 * linux-2.6.26, glibc-2.7). However, there are also some linux 475 * systems where clock_gettime is broken in one way or the other, 476 * like CLOCK_PROCESS_CPUTIME_ID not implemented (easy case) or 477 * kind-of implemented but broken (needs code to detect that), and 478 * on those systems a wall-clock cycle counter is the least bad 479 * fallback. 480 * 481 * So we need some code to disable the cycle counter on some but not 482 * all linux systems. */ 483 #ifdef SIGILL 484 { 485 RETSIGTYPE (*old_handler) (int); 486 unsigned cycles[2]; 487 488 old_handler = signal (SIGILL, cycles_works_handler); 489 if (old_handler == SIG_ERR) 490 { 491 if (speed_option_verbose) 492 printf ("cycles_works_p(): SIGILL not supported, assuming speed_cyclecounter() works\n"); 493 goto yes; 494 } 495 if (setjmp (cycles_works_buf)) 496 { 497 if (speed_option_verbose) 498 printf ("cycles_works_p(): SIGILL during speed_cyclecounter(), so doesn't work\n"); 499 result = 0; 500 goto done; 501 } 502 speed_cyclecounter (cycles); 503 signal (SIGILL, old_handler); 504 if (speed_option_verbose) 505 printf ("cycles_works_p(): speed_cyclecounter() works\n"); 506 } 507 #else 508 509 if (speed_option_verbose) 510 printf ("cycles_works_p(): SIGILL not defined, assuming speed_cyclecounter() works\n"); 511 goto yes; 512 #endif 513 514 yes: 515 result = 1; 516 517 done: 518 return result; 519 } 520 521 522 /* The number of clock ticks per second, but looking at sysconf rather than 523 just CLK_TCK, where possible. */ 524 long 525 clk_tck (void) 526 { 527 static long result = -1L; 528 if (result != -1L) 529 return result; 530 531 #if HAVE_SYSCONF 532 result = sysconf (_SC_CLK_TCK); 533 if (result != -1L) 534 { 535 if (speed_option_verbose) 536 printf ("sysconf(_SC_CLK_TCK) is %ld per second\n", result); 537 return result; 538 } 539 540 fprintf (stderr, 541 "sysconf(_SC_CLK_TCK) not working, using CLK_TCK instead\n"); 542 #endif 543 544 #ifdef CLK_TCK 545 result = CLK_TCK; 546 if (speed_option_verbose) 547 printf ("CLK_TCK is %ld per second\n", result); 548 return result; 549 #else 550 fprintf (stderr, "CLK_TCK not defined, cannot continue\n"); 551 abort (); 552 #endif 553 } 554 555 556 /* If two times can be observed less than half a clock tick apart, then 557 assume "get" is microsecond accurate. 558 559 Two times only 1 microsecond apart are not believed, since some kernels 560 take it upon themselves to ensure gettimeofday doesn't return the same 561 value twice, for the benefit of applications using it for a timestamp. 562 This is obviously very stupid given the speed of CPUs these days. 563 564 Making "reps" many calls to noop_1() is designed to waste some CPU, with 565 a view to getting measurements 2 microseconds (or more) apart. "reps" is 566 increased progressively until such a period is seen. 567 568 The outer loop "attempts" are just to allow for any random nonsense or 569 system load upsetting the measurements (ie. making two successive calls 570 to "get" come out as a longer interval than normal). 571 572 Bugs: 573 574 The assumption that any interval less than a half tick implies 575 microsecond resolution is obviously fairly rash, the true resolution 576 could be anything between a microsecond and that half tick. Perhaps 577 something special would have to be done on a system where this is the 578 case, since there's no obvious reliable way to detect it 579 automatically. */ 580 581 #define MICROSECONDS_P(name, type, get, sec, usec) \ 582 { \ 583 static int result = -1; \ 584 type st, et; \ 585 long dt, half_tick; \ 586 unsigned attempt, reps, i, j; \ 587 \ 588 if (result != -1) \ 589 return result; \ 590 \ 591 result = 0; \ 592 half_tick = (1000000L / clk_tck ()) / 2; \ 593 \ 594 for (attempt = 0; attempt < 5; attempt++) \ 595 { \ 596 reps = 0; \ 597 for (;;) \ 598 { \ 599 get (st); \ 600 for (i = 0; i < reps; i++) \ 601 for (j = 0; j < 100; j++) \ 602 noop_1 (CNST_LIMB(0)); \ 603 get (et); \ 604 \ 605 dt = (sec(et)-sec(st))*1000000L + usec(et)-usec(st); \ 606 \ 607 if (speed_option_verbose >= 2) \ 608 printf ("%s attempt=%u, reps=%u, dt=%ld\n", \ 609 name, attempt, reps, dt); \ 610 \ 611 if (dt >= 2) \ 612 break; \ 613 \ 614 reps = (reps == 0 ? 1 : 2*reps); \ 615 if (reps == 0) \ 616 break; /* uint overflow, not normal */ \ 617 } \ 618 \ 619 if (dt < half_tick) \ 620 { \ 621 result = 1; \ 622 break; \ 623 } \ 624 } \ 625 \ 626 if (speed_option_verbose) \ 627 { \ 628 if (result) \ 629 printf ("%s is microsecond accurate\n", name); \ 630 else \ 631 printf ("%s is only %s clock tick accurate\n", \ 632 name, unittime_string (1.0/clk_tck())); \ 633 } \ 634 return result; \ 635 } 636 637 638 int 639 gettimeofday_microseconds_p (void) 640 { 641 #define call_gettimeofday(t) gettimeofday (&(t), NULL) 642 #define timeval_tv_sec(t) ((t).tv_sec) 643 #define timeval_tv_usec(t) ((t).tv_usec) 644 MICROSECONDS_P ("gettimeofday", struct_timeval, 645 call_gettimeofday, timeval_tv_sec, timeval_tv_usec); 646 } 647 648 int 649 getrusage_microseconds_p (void) 650 { 651 #define call_getrusage(t) getrusage (0, &(t)) 652 #define rusage_tv_sec(t) ((t).ru_utime.tv_sec) 653 #define rusage_tv_usec(t) ((t).ru_utime.tv_usec) 654 MICROSECONDS_P ("getrusage", struct_rusage, 655 call_getrusage, rusage_tv_sec, rusage_tv_usec); 656 } 657 658 /* Test whether getrusage goes backwards, return non-zero if it does 659 (suggesting it's flawed). 660 661 On a macintosh m68040-unknown-netbsd1.4.1 getrusage looks like it's 662 microsecond accurate, but has been seen remaining unchanged after many 663 microseconds have elapsed. It also regularly goes backwards by 1000 to 664 5000 usecs, this has been seen after between 500 and 4000 attempts taking 665 perhaps 0.03 seconds. We consider this too broken for good measuring. 666 We used to have configure pretend getrusage didn't exist on this system, 667 but a runtime test should be more reliable, since we imagine the problem 668 is not confined to just this exact system tuple. */ 669 670 int 671 getrusage_backwards_p (void) 672 { 673 static int result = -1; 674 struct rusage start, prev, next; 675 long d; 676 int i; 677 678 if (result != -1) 679 return result; 680 681 getrusage (0, &start); 682 memcpy (&next, &start, sizeof (next)); 683 684 result = 0; 685 i = 0; 686 for (;;) 687 { 688 memcpy (&prev, &next, sizeof (prev)); 689 getrusage (0, &next); 690 691 if (next.ru_utime.tv_sec < prev.ru_utime.tv_sec 692 || (next.ru_utime.tv_sec == prev.ru_utime.tv_sec 693 && next.ru_utime.tv_usec < prev.ru_utime.tv_usec)) 694 { 695 if (speed_option_verbose) 696 printf ("getrusage went backwards (attempt %d: %ld.%06ld -> %ld.%06ld)\n", 697 i, 698 (long) prev.ru_utime.tv_sec, (long) prev.ru_utime.tv_usec, 699 (long) next.ru_utime.tv_sec, (long) next.ru_utime.tv_usec); 700 result = 1; 701 break; 702 } 703 704 /* minimum 1000 attempts, then stop after either 0.1 seconds or 50000 705 attempts, whichever comes first */ 706 d = 1000000 * (next.ru_utime.tv_sec - start.ru_utime.tv_sec) 707 + (next.ru_utime.tv_usec - start.ru_utime.tv_usec); 708 i++; 709 if (i > 50000 || (i > 1000 && d > 100000)) 710 break; 711 } 712 713 return result; 714 } 715 716 /* CLOCK_PROCESS_CPUTIME_ID looks like it's going to be in a future version 717 of glibc (some time post 2.2). 718 719 CLOCK_VIRTUAL is process time, available in BSD systems (though sometimes 720 defined, but returning -1 for an error). */ 721 722 #ifdef CLOCK_PROCESS_CPUTIME_ID 723 # define CGT_ID CLOCK_PROCESS_CPUTIME_ID 724 #else 725 # ifdef CLOCK_VIRTUAL 726 # define CGT_ID CLOCK_VIRTUAL 727 # endif 728 #endif 729 #ifdef CGT_ID 730 const int have_cgt_id = 1; 731 #else 732 const int have_cgt_id = 0; 733 # define CGT_ID (ASSERT_FAIL (CGT_ID not determined), -1) 734 #endif 735 736 #define CGT_DELAY_COUNT 1000 737 738 int 739 cgt_works_p (void) 740 { 741 static int result = -1; 742 struct_timespec unit; 743 744 if (! have_cgt) 745 return 0; 746 747 if (! have_cgt_id) 748 { 749 if (speed_option_verbose) 750 printf ("clock_gettime don't know what ID to use\n"); 751 result = 0; 752 return result; 753 } 754 755 if (result != -1) 756 return result; 757 758 /* trial run to see if it works */ 759 if (clock_gettime (CGT_ID, &unit) != 0) 760 { 761 if (speed_option_verbose) 762 printf ("clock_gettime id=%d error: %s\n", CGT_ID, strerror (errno)); 763 result = 0; 764 return result; 765 } 766 767 /* get the resolution */ 768 if (clock_getres (CGT_ID, &unit) != 0) 769 { 770 if (speed_option_verbose) 771 printf ("clock_getres id=%d error: %s\n", CGT_ID, strerror (errno)); 772 result = 0; 773 return result; 774 } 775 776 cgt_unittime = unit.tv_sec + unit.tv_nsec * 1e-9; 777 if (speed_option_verbose) 778 printf ("clock_gettime is %s accurate\n", unittime_string (cgt_unittime)); 779 780 if (cgt_unittime < 10e-9) 781 { 782 /* Do we believe this? */ 783 struct timespec start, end; 784 static volatile int counter; 785 double duration; 786 if (clock_gettime (CGT_ID, &start)) 787 { 788 if (speed_option_verbose) 789 printf ("clock_gettime id=%d error: %s\n", CGT_ID, strerror (errno)); 790 result = 0; 791 return result; 792 } 793 /* Loop of at least 1000 memory accesses, ought to take at 794 least 100 ns*/ 795 for (counter = 0; counter < CGT_DELAY_COUNT; counter++) 796 ; 797 if (clock_gettime (CGT_ID, &end)) 798 { 799 if (speed_option_verbose) 800 printf ("clock_gettime id=%d error: %s\n", CGT_ID, strerror (errno)); 801 result = 0; 802 return result; 803 } 804 duration = (end.tv_sec + end.tv_nsec * 1e-9 805 - start.tv_sec - start.tv_nsec * 1e-9); 806 if (speed_option_verbose) 807 printf ("delay loop of %d rounds took %s (according to clock_gettime)\n", 808 CGT_DELAY_COUNT, unittime_string (duration)); 809 if (duration < 100e-9) 810 { 811 if (speed_option_verbose) 812 printf ("clock_gettime id=%d not believable\n", CGT_ID); 813 result = 0; 814 return result; 815 } 816 } 817 result = 1; 818 return result; 819 } 820 821 822 static double 823 freq_measure_mftb_one (void) 824 { 825 #define call_gettimeofday(t) gettimeofday (&(t), NULL) 826 #define timeval_tv_sec(t) ((t).tv_sec) 827 #define timeval_tv_usec(t) ((t).tv_usec) 828 FREQ_MEASURE_ONE ("mftb", struct_timeval, 829 call_gettimeofday, MFTB, 830 timeval_tv_sec, timeval_tv_usec); 831 } 832 833 834 static jmp_buf mftb_works_buf; 835 836 static RETSIGTYPE 837 mftb_works_handler (int sig) 838 { 839 longjmp (mftb_works_buf, 1); 840 } 841 842 int 843 mftb_works_p (void) 844 { 845 unsigned a[2]; 846 RETSIGTYPE (*old_handler) (int); 847 double cycletime; 848 849 /* suppress a warning about a[] unused */ 850 a[0] = 0; 851 852 if (! have_mftb) 853 return 0; 854 855 #ifdef SIGILL 856 old_handler = signal (SIGILL, mftb_works_handler); 857 if (old_handler == SIG_ERR) 858 { 859 if (speed_option_verbose) 860 printf ("mftb_works_p(): SIGILL not supported, assuming mftb works\n"); 861 return 1; 862 } 863 if (setjmp (mftb_works_buf)) 864 { 865 if (speed_option_verbose) 866 printf ("mftb_works_p(): SIGILL during mftb, so doesn't work\n"); 867 return 0; 868 } 869 MFTB (a); 870 signal (SIGILL, old_handler); 871 if (speed_option_verbose) 872 printf ("mftb_works_p(): mftb works\n"); 873 #else 874 875 if (speed_option_verbose) 876 printf ("mftb_works_p(): SIGILL not defined, assuming mftb works\n"); 877 #endif 878 879 #if ! HAVE_GETTIMEOFDAY 880 if (speed_option_verbose) 881 printf ("mftb_works_p(): no gettimeofday available to measure mftb\n"); 882 return 0; 883 #endif 884 885 /* The time base is normally 1/4 of the bus speed on 6xx and 7xx chips, on 886 other chips it can be driven from an external clock. */ 887 cycletime = freq_measure ("mftb", freq_measure_mftb_one); 888 if (cycletime == -1.0) 889 { 890 if (speed_option_verbose) 891 printf ("mftb_works_p(): cannot measure mftb period\n"); 892 return 0; 893 } 894 895 mftb_unittime = cycletime; 896 return 1; 897 } 898 899 900 volatile unsigned *sgi_addr; 901 902 int 903 sgi_works_p (void) 904 { 905 #if HAVE_SYSSGI && HAVE_MMAP 906 static int result = -1; 907 908 size_t pagesize, offset; 909 __psunsigned_t phys, physpage; 910 void *virtpage; 911 unsigned period_picoseconds; 912 int size, fd; 913 914 if (result != -1) 915 return result; 916 917 phys = syssgi (SGI_QUERY_CYCLECNTR, &period_picoseconds); 918 if (phys == (__psunsigned_t) -1) 919 { 920 /* ENODEV is the error when a counter is not available */ 921 if (speed_option_verbose) 922 printf ("syssgi SGI_QUERY_CYCLECNTR error: %s\n", strerror (errno)); 923 result = 0; 924 return result; 925 } 926 sgi_unittime = period_picoseconds * 1e-12; 927 928 /* IRIX 5 doesn't have SGI_CYCLECNTR_SIZE, assume 32 bits in that case. 929 Challenge/ONYX hardware has a 64 bit byte counter, but there seems no 930 obvious way to identify that without SGI_CYCLECNTR_SIZE. */ 931 #ifdef SGI_CYCLECNTR_SIZE 932 size = syssgi (SGI_CYCLECNTR_SIZE); 933 if (size == -1) 934 { 935 if (speed_option_verbose) 936 { 937 printf ("syssgi SGI_CYCLECNTR_SIZE error: %s\n", strerror (errno)); 938 printf (" will assume size==4\n"); 939 } 940 size = 32; 941 } 942 #else 943 size = 32; 944 #endif 945 946 if (size < 32) 947 { 948 printf ("syssgi SGI_CYCLECNTR_SIZE gives %d, expected 32 or 64\n", size); 949 result = 0; 950 return result; 951 } 952 953 pagesize = getpagesize(); 954 offset = (size_t) phys & (pagesize-1); 955 physpage = phys - offset; 956 957 /* shouldn't cross over a page boundary */ 958 ASSERT_ALWAYS (offset + size/8 <= pagesize); 959 960 fd = open("/dev/mmem", O_RDONLY); 961 if (fd == -1) 962 { 963 if (speed_option_verbose) 964 printf ("open /dev/mmem: %s\n", strerror (errno)); 965 result = 0; 966 return result; 967 } 968 969 virtpage = mmap (0, pagesize, PROT_READ, MAP_PRIVATE, fd, (off_t) physpage); 970 if (virtpage == (void *) -1) 971 { 972 if (speed_option_verbose) 973 printf ("mmap /dev/mmem: %s\n", strerror (errno)); 974 result = 0; 975 return result; 976 } 977 978 /* address of least significant 4 bytes, knowing mips is big endian */ 979 sgi_addr = (unsigned *) ((char *) virtpage + offset 980 + size/8 - sizeof(unsigned)); 981 result = 1; 982 return result; 983 984 #else /* ! (HAVE_SYSSGI && HAVE_MMAP) */ 985 return 0; 986 #endif 987 } 988 989 990 #define DEFAULT(var,n) \ 991 do { \ 992 if (! (var)) \ 993 (var) = (n); \ 994 } while (0) 995 996 void 997 speed_time_init (void) 998 { 999 double supplement_unittime = 0.0; 1000 1001 static int speed_time_initialized = 0; 1002 if (speed_time_initialized) 1003 return; 1004 speed_time_initialized = 1; 1005 1006 speed_cycletime_init (); 1007 1008 if (!speed_option_cycles_broken && have_cycles && cycles_works_p ()) 1009 { 1010 use_cycles = 1; 1011 DEFAULT (speed_cycletime, 1.0); 1012 speed_unittime = speed_cycletime; 1013 DEFAULT (speed_precision, 10000); 1014 strcpy (speed_time_string, "CPU cycle counter"); 1015 1016 /* only used if a supplementary method is chosen below */ 1017 cycles_limit = (have_cycles == 1 ? M_2POW32 : M_2POW64) / 2.0 1018 * speed_cycletime; 1019 1020 if (have_grus && getrusage_microseconds_p() && ! getrusage_backwards_p()) 1021 { 1022 /* this is a good combination */ 1023 use_grus = 1; 1024 supplement_unittime = grus_unittime = 1.0e-6; 1025 strcpy (speed_time_string, "CPU cycle counter, supplemented by microsecond getrusage()"); 1026 } 1027 else if (have_cycles == 1) 1028 { 1029 /* When speed_cyclecounter has a limited range, look for something 1030 to supplement it. */ 1031 if (have_gtod && gettimeofday_microseconds_p()) 1032 { 1033 use_gtod = 1; 1034 supplement_unittime = gtod_unittime = 1.0e-6; 1035 strcpy (speed_time_string, "CPU cycle counter, supplemented by microsecond gettimeofday()"); 1036 } 1037 else if (have_grus) 1038 { 1039 use_grus = 1; 1040 supplement_unittime = grus_unittime = 1.0 / (double) clk_tck (); 1041 sprintf (speed_time_string, "CPU cycle counter, supplemented by %s clock tick getrusage()", unittime_string (supplement_unittime)); 1042 } 1043 else if (have_times) 1044 { 1045 use_times = 1; 1046 supplement_unittime = times_unittime = 1.0 / (double) clk_tck (); 1047 sprintf (speed_time_string, "CPU cycle counter, supplemented by %s clock tick times()", unittime_string (supplement_unittime)); 1048 } 1049 else if (have_gtod) 1050 { 1051 use_gtod = 1; 1052 supplement_unittime = gtod_unittime = 1.0 / (double) clk_tck (); 1053 sprintf (speed_time_string, "CPU cycle counter, supplemented by %s clock tick gettimeofday()", unittime_string (supplement_unittime)); 1054 } 1055 else 1056 { 1057 fprintf (stderr, "WARNING: cycle counter is 32 bits and there's no other functions.\n"); 1058 fprintf (stderr, " Wraparounds may produce bad results on long measurements.\n"); 1059 } 1060 } 1061 1062 if (use_grus || use_times || use_gtod) 1063 { 1064 /* must know cycle period to compare cycles to other measuring 1065 (via cycles_limit) */ 1066 speed_cycletime_need_seconds (); 1067 1068 if (speed_precision * supplement_unittime > cycles_limit) 1069 { 1070 fprintf (stderr, "WARNING: requested precision can't always be achieved due to limited range\n"); 1071 fprintf (stderr, " cycle counter and limited precision supplemental method\n"); 1072 fprintf (stderr, " (%s)\n", speed_time_string); 1073 } 1074 } 1075 } 1076 else if (have_stck) 1077 { 1078 strcpy (speed_time_string, "STCK timestamp"); 1079 /* stck is in units of 2^-12 microseconds, which is very likely higher 1080 resolution than a cpu cycle */ 1081 if (speed_cycletime == 0.0) 1082 speed_cycletime_fail 1083 ("Need to know CPU frequency for effective stck unit"); 1084 speed_unittime = MAX (speed_cycletime, STCK_PERIOD); 1085 DEFAULT (speed_precision, 10000); 1086 } 1087 else if (have_mftb && mftb_works_p ()) 1088 { 1089 use_mftb = 1; 1090 DEFAULT (speed_precision, 10000); 1091 speed_unittime = mftb_unittime; 1092 sprintf (speed_time_string, "mftb counter (%s)", 1093 unittime_string (speed_unittime)); 1094 } 1095 else if (have_sgi && sgi_works_p ()) 1096 { 1097 use_sgi = 1; 1098 DEFAULT (speed_precision, 10000); 1099 speed_unittime = sgi_unittime; 1100 sprintf (speed_time_string, "syssgi() mmap counter (%s), supplemented by millisecond getrusage()", 1101 unittime_string (speed_unittime)); 1102 /* supplemented with getrusage, which we assume to have 1ms resolution */ 1103 use_grus = 1; 1104 supplement_unittime = 1e-3; 1105 } 1106 else if (have_rrt) 1107 { 1108 timebasestruct_t t; 1109 use_rrt = 1; 1110 DEFAULT (speed_precision, 10000); 1111 read_real_time (&t, sizeof(t)); 1112 switch (t.flag) { 1113 case RTC_POWER: 1114 /* FIXME: What's the actual RTC resolution? */ 1115 speed_unittime = 1e-7; 1116 strcpy (speed_time_string, "read_real_time() power nanoseconds"); 1117 break; 1118 case RTC_POWER_PC: 1119 t.tb_high = 1; 1120 t.tb_low = 0; 1121 time_base_to_time (&t, sizeof(t)); 1122 speed_unittime = TIMEBASESTRUCT_SECS(&t) / M_2POW32; 1123 sprintf (speed_time_string, "%s read_real_time() powerpc ticks", 1124 unittime_string (speed_unittime)); 1125 break; 1126 default: 1127 fprintf (stderr, "ERROR: Unrecognised timebasestruct_t flag=%d\n", 1128 t.flag); 1129 abort (); 1130 } 1131 } 1132 else if (have_cgt && cgt_works_p() && cgt_unittime < 1.5e-6) 1133 { 1134 /* use clock_gettime if microsecond or better resolution */ 1135 choose_cgt: 1136 use_cgt = 1; 1137 speed_unittime = cgt_unittime; 1138 DEFAULT (speed_precision, (cgt_unittime <= 0.1e-6 ? 10000 : 1000)); 1139 strcpy (speed_time_string, "microsecond accurate clock_gettime()"); 1140 } 1141 else if (have_times && clk_tck() > 1000000) 1142 { 1143 /* Cray vector systems have times() which is clock cycle resolution 1144 (eg. 450 MHz). */ 1145 DEFAULT (speed_precision, 10000); 1146 goto choose_times; 1147 } 1148 else if (have_grus && getrusage_microseconds_p() && ! getrusage_backwards_p()) 1149 { 1150 use_grus = 1; 1151 speed_unittime = grus_unittime = 1.0e-6; 1152 DEFAULT (speed_precision, 1000); 1153 strcpy (speed_time_string, "microsecond accurate getrusage()"); 1154 } 1155 else if (have_gtod && gettimeofday_microseconds_p()) 1156 { 1157 use_gtod = 1; 1158 speed_unittime = gtod_unittime = 1.0e-6; 1159 DEFAULT (speed_precision, 1000); 1160 strcpy (speed_time_string, "microsecond accurate gettimeofday()"); 1161 } 1162 else if (have_cgt && cgt_works_p() && cgt_unittime < 1.5/clk_tck()) 1163 { 1164 /* use clock_gettime if 1 tick or better resolution */ 1165 goto choose_cgt; 1166 } 1167 else if (have_times) 1168 { 1169 use_tick_boundary = 1; 1170 DEFAULT (speed_precision, 200); 1171 choose_times: 1172 use_times = 1; 1173 speed_unittime = times_unittime = 1.0 / (double) clk_tck (); 1174 sprintf (speed_time_string, "%s clock tick times()", 1175 unittime_string (speed_unittime)); 1176 } 1177 else if (have_grus) 1178 { 1179 use_grus = 1; 1180 use_tick_boundary = 1; 1181 speed_unittime = grus_unittime = 1.0 / (double) clk_tck (); 1182 DEFAULT (speed_precision, 200); 1183 sprintf (speed_time_string, "%s clock tick getrusage()\n", 1184 unittime_string (speed_unittime)); 1185 } 1186 else if (have_gtod) 1187 { 1188 use_gtod = 1; 1189 use_tick_boundary = 1; 1190 speed_unittime = gtod_unittime = 1.0 / (double) clk_tck (); 1191 DEFAULT (speed_precision, 200); 1192 sprintf (speed_time_string, "%s clock tick gettimeofday()", 1193 unittime_string (speed_unittime)); 1194 } 1195 else 1196 { 1197 fprintf (stderr, "No time measuring method available\n"); 1198 fprintf (stderr, "None of: speed_cyclecounter(), STCK(), getrusage(), gettimeofday(), times()\n"); 1199 abort (); 1200 } 1201 1202 if (speed_option_verbose) 1203 { 1204 printf ("speed_time_init: %s\n", speed_time_string); 1205 printf (" speed_precision %d\n", speed_precision); 1206 printf (" speed_unittime %.2g\n", speed_unittime); 1207 if (supplement_unittime) 1208 printf (" supplement_unittime %.2g\n", supplement_unittime); 1209 printf (" use_tick_boundary %d\n", use_tick_boundary); 1210 if (have_cycles) 1211 printf (" cycles_limit %.2g seconds\n", cycles_limit); 1212 } 1213 } 1214 1215 1216 1217 /* Burn up CPU until a clock tick boundary, for greater accuracy. Set the 1218 corresponding "start_foo" appropriately too. */ 1219 1220 void 1221 grus_tick_boundary (void) 1222 { 1223 struct_rusage prev; 1224 getrusage (0, &prev); 1225 do { 1226 getrusage (0, &start_grus); 1227 } while (start_grus.ru_utime.tv_usec == prev.ru_utime.tv_usec); 1228 } 1229 1230 void 1231 gtod_tick_boundary (void) 1232 { 1233 struct_timeval prev; 1234 gettimeofday (&prev, NULL); 1235 do { 1236 gettimeofday (&start_gtod, NULL); 1237 } while (start_gtod.tv_usec == prev.tv_usec); 1238 } 1239 1240 void 1241 times_tick_boundary (void) 1242 { 1243 struct_tms prev; 1244 times (&prev); 1245 do 1246 times (&start_times); 1247 while (start_times.tms_utime == prev.tms_utime); 1248 } 1249 1250 1251 /* "have_" values are tested to let unused code go dead. */ 1252 1253 void 1254 speed_starttime (void) 1255 { 1256 speed_time_init (); 1257 1258 if (have_grus && use_grus) 1259 { 1260 if (use_tick_boundary) 1261 grus_tick_boundary (); 1262 else 1263 getrusage (0, &start_grus); 1264 } 1265 1266 if (have_gtod && use_gtod) 1267 { 1268 if (use_tick_boundary) 1269 gtod_tick_boundary (); 1270 else 1271 gettimeofday (&start_gtod, NULL); 1272 } 1273 1274 if (have_times && use_times) 1275 { 1276 if (use_tick_boundary) 1277 times_tick_boundary (); 1278 else 1279 times (&start_times); 1280 } 1281 1282 if (have_cgt && use_cgt) 1283 clock_gettime (CGT_ID, &start_cgt); 1284 1285 if (have_rrt && use_rrt) 1286 read_real_time (&start_rrt, sizeof(start_rrt)); 1287 1288 if (have_sgi && use_sgi) 1289 start_sgi = *sgi_addr; 1290 1291 if (have_mftb && use_mftb) 1292 MFTB (start_mftb); 1293 1294 if (have_stck && use_stck) 1295 STCK (start_stck); 1296 1297 /* Cycles sampled last for maximum accuracy. */ 1298 if (have_cycles && use_cycles) 1299 speed_cyclecounter (start_cycles); 1300 } 1301 1302 1303 /* Calculate the difference between two cycle counter samples, as a "double" 1304 counter of cycles. 1305 1306 The start and end values are allowed to cancel in integers in case the 1307 counter values are bigger than the 53 bits that normally fit in a double. 1308 1309 This works even if speed_cyclecounter() puts a value bigger than 32-bits 1310 in the low word (the high word always gets a 2**32 multiplier though). */ 1311 1312 double 1313 speed_cyclecounter_diff (const unsigned end[2], const unsigned start[2]) 1314 { 1315 unsigned d; 1316 double t; 1317 1318 if (have_cycles == 1) 1319 { 1320 t = (end[0] - start[0]); 1321 } 1322 else 1323 { 1324 d = end[0] - start[0]; 1325 t = d - (d > end[0] ? M_2POWU : 0.0); 1326 t += (end[1] - start[1]) * M_2POW32; 1327 } 1328 return t; 1329 } 1330 1331 1332 double 1333 speed_mftb_diff (const unsigned end[2], const unsigned start[2]) 1334 { 1335 unsigned d; 1336 double t; 1337 1338 d = end[0] - start[0]; 1339 t = (double) d - (d > end[0] ? M_2POW32 : 0.0); 1340 t += (end[1] - start[1]) * M_2POW32; 1341 return t; 1342 } 1343 1344 1345 /* Calculate the difference between "start" and "end" using fields "sec" and 1346 "psec", where each "psec" is a "punit" of a second. 1347 1348 The seconds parts are allowed to cancel before being combined with the 1349 psec parts, in case a simple "sec+psec*punit" exceeds the precision of a 1350 double. 1351 1352 Total time is only calculated in a "double" since an integer count of 1353 psecs might overflow. 2^32 microseconds is only a bit over an hour, or 1354 2^32 nanoseconds only about 4 seconds. 1355 1356 The casts to "long" are for the benefit of timebasestruct_t, where the 1357 fields are only "unsigned int", but we want a signed difference. */ 1358 1359 #define DIFF_SECS_ROUTINE(sec, psec, punit) \ 1360 { \ 1361 long sec_diff, psec_diff; \ 1362 sec_diff = (long) end->sec - (long) start->sec; \ 1363 psec_diff = (long) end->psec - (long) start->psec; \ 1364 return (double) sec_diff + punit * (double) psec_diff; \ 1365 } 1366 1367 double 1368 timeval_diff_secs (const struct_timeval *end, const struct_timeval *start) 1369 { 1370 DIFF_SECS_ROUTINE (tv_sec, tv_usec, 1e-6); 1371 } 1372 1373 double 1374 rusage_diff_secs (const struct_rusage *end, const struct_rusage *start) 1375 { 1376 DIFF_SECS_ROUTINE (ru_utime.tv_sec, ru_utime.tv_usec, 1e-6); 1377 } 1378 1379 double 1380 timespec_diff_secs (const struct_timespec *end, const struct_timespec *start) 1381 { 1382 DIFF_SECS_ROUTINE (tv_sec, tv_nsec, 1e-9); 1383 } 1384 1385 /* This is for use after time_base_to_time, ie. for seconds and nanoseconds. */ 1386 double 1387 timebasestruct_diff_secs (const timebasestruct_t *end, 1388 const timebasestruct_t *start) 1389 { 1390 DIFF_SECS_ROUTINE (tb_high, tb_low, 1e-9); 1391 } 1392 1393 1394 double 1395 speed_endtime (void) 1396 { 1397 #define END_USE(name,value) \ 1398 do { \ 1399 if (speed_option_verbose >= 3) \ 1400 printf ("speed_endtime(): used %s\n", name); \ 1401 result = value; \ 1402 goto done; \ 1403 } while (0) 1404 1405 #define END_ENOUGH(name,value) \ 1406 do { \ 1407 if (speed_option_verbose >= 3) \ 1408 printf ("speed_endtime(): %s gives enough precision\n", name); \ 1409 result = value; \ 1410 goto done; \ 1411 } while (0) 1412 1413 #define END_EXCEED(name,value) \ 1414 do { \ 1415 if (speed_option_verbose >= 3) \ 1416 printf ("speed_endtime(): cycle counter limit exceeded, used %s\n", \ 1417 name); \ 1418 result = value; \ 1419 goto done; \ 1420 } while (0) 1421 1422 unsigned end_cycles[2]; 1423 stck_t end_stck; 1424 unsigned end_mftb[2]; 1425 unsigned end_sgi; 1426 timebasestruct_t end_rrt; 1427 struct_timespec end_cgt; 1428 struct_timeval end_gtod; 1429 struct_rusage end_grus; 1430 struct_tms end_times; 1431 double t_gtod, t_grus, t_times, t_cgt; 1432 double t_rrt, t_sgi, t_mftb, t_stck, t_cycles; 1433 double result; 1434 1435 /* Cycles sampled first for maximum accuracy. 1436 "have_" values tested to let unused code go dead. */ 1437 1438 if (have_cycles && use_cycles) speed_cyclecounter (end_cycles); 1439 if (have_stck && use_stck) STCK (end_stck); 1440 if (have_mftb && use_mftb) MFTB (end_mftb); 1441 if (have_sgi && use_sgi) end_sgi = *sgi_addr; 1442 if (have_rrt && use_rrt) read_real_time (&end_rrt, sizeof(end_rrt)); 1443 if (have_cgt && use_cgt) clock_gettime (CGT_ID, &end_cgt); 1444 if (have_gtod && use_gtod) gettimeofday (&end_gtod, NULL); 1445 if (have_grus && use_grus) getrusage (0, &end_grus); 1446 if (have_times && use_times) times (&end_times); 1447 1448 result = -1.0; 1449 1450 if (speed_option_verbose >= 4) 1451 { 1452 printf ("speed_endtime():\n"); 1453 if (use_cycles) 1454 printf (" cycles 0x%X,0x%X -> 0x%X,0x%X\n", 1455 start_cycles[1], start_cycles[0], 1456 end_cycles[1], end_cycles[0]); 1457 1458 if (use_stck) 1459 printf (" stck 0x%lX -> 0x%lX\n", start_stck, end_stck); 1460 1461 if (use_mftb) 1462 printf (" mftb 0x%X,%08X -> 0x%X,%08X\n", 1463 start_mftb[1], start_mftb[0], 1464 end_mftb[1], end_mftb[0]); 1465 1466 if (use_sgi) 1467 printf (" sgi 0x%X -> 0x%X\n", start_sgi, end_sgi); 1468 1469 if (use_rrt) 1470 printf (" read_real_time (%d)%u,%u -> (%d)%u,%u\n", 1471 start_rrt.flag, start_rrt.tb_high, start_rrt.tb_low, 1472 end_rrt.flag, end_rrt.tb_high, end_rrt.tb_low); 1473 1474 if (use_cgt) 1475 printf (" clock_gettime %ld.%09ld -> %ld.%09ld\n", 1476 start_cgt.tv_sec, start_cgt.tv_nsec, 1477 end_cgt.tv_sec, end_cgt.tv_nsec); 1478 1479 if (use_gtod) 1480 printf (" gettimeofday %ld.%06ld -> %ld.%06ld\n", 1481 start_gtod.tv_sec, start_gtod.tv_usec, 1482 end_gtod.tv_sec, end_gtod.tv_usec); 1483 1484 if (use_grus) 1485 printf (" getrusage %ld.%06ld -> %ld.%06ld\n", 1486 start_grus.ru_utime.tv_sec, start_grus.ru_utime.tv_usec, 1487 end_grus.ru_utime.tv_sec, end_grus.ru_utime.tv_usec); 1488 1489 if (use_times) 1490 printf (" times %ld -> %ld\n", 1491 start_times.tms_utime, end_times.tms_utime); 1492 } 1493 1494 if (use_rrt) 1495 { 1496 time_base_to_time (&start_rrt, sizeof(start_rrt)); 1497 time_base_to_time (&end_rrt, sizeof(end_rrt)); 1498 t_rrt = timebasestruct_diff_secs (&end_rrt, &start_rrt); 1499 END_USE ("read_real_time()", t_rrt); 1500 } 1501 1502 if (use_cgt) 1503 { 1504 t_cgt = timespec_diff_secs (&end_cgt, &start_cgt); 1505 END_USE ("clock_gettime()", t_cgt); 1506 } 1507 1508 if (use_grus) 1509 { 1510 t_grus = rusage_diff_secs (&end_grus, &start_grus); 1511 1512 /* Use getrusage() if the cycle counter limit would be exceeded, or if 1513 it provides enough accuracy already. */ 1514 if (use_cycles) 1515 { 1516 if (t_grus >= speed_precision*grus_unittime) 1517 END_ENOUGH ("getrusage()", t_grus); 1518 if (t_grus >= cycles_limit) 1519 END_EXCEED ("getrusage()", t_grus); 1520 } 1521 } 1522 1523 if (use_times) 1524 { 1525 t_times = (end_times.tms_utime - start_times.tms_utime) * times_unittime; 1526 1527 /* Use times() if the cycle counter limit would be exceeded, or if 1528 it provides enough accuracy already. */ 1529 if (use_cycles) 1530 { 1531 if (t_times >= speed_precision*times_unittime) 1532 END_ENOUGH ("times()", t_times); 1533 if (t_times >= cycles_limit) 1534 END_EXCEED ("times()", t_times); 1535 } 1536 } 1537 1538 if (use_gtod) 1539 { 1540 t_gtod = timeval_diff_secs (&end_gtod, &start_gtod); 1541 1542 /* Use gettimeofday() if it measured a value bigger than the cycle 1543 counter can handle. */ 1544 if (use_cycles) 1545 { 1546 if (t_gtod >= cycles_limit) 1547 END_EXCEED ("gettimeofday()", t_gtod); 1548 } 1549 } 1550 1551 if (use_mftb) 1552 { 1553 t_mftb = speed_mftb_diff (end_mftb, start_mftb) * mftb_unittime; 1554 END_USE ("mftb", t_mftb); 1555 } 1556 1557 if (use_stck) 1558 { 1559 t_stck = (end_stck - start_stck) * STCK_PERIOD; 1560 END_USE ("stck", t_stck); 1561 } 1562 1563 if (use_sgi) 1564 { 1565 t_sgi = (end_sgi - start_sgi) * sgi_unittime; 1566 END_USE ("SGI hardware counter", t_sgi); 1567 } 1568 1569 if (use_cycles) 1570 { 1571 t_cycles = speed_cyclecounter_diff (end_cycles, start_cycles) 1572 * speed_cycletime; 1573 END_USE ("cycle counter", t_cycles); 1574 } 1575 1576 if (use_grus && getrusage_microseconds_p()) 1577 END_USE ("getrusage()", t_grus); 1578 1579 if (use_gtod && gettimeofday_microseconds_p()) 1580 END_USE ("gettimeofday()", t_gtod); 1581 1582 if (use_times) END_USE ("times()", t_times); 1583 if (use_grus) END_USE ("getrusage()", t_grus); 1584 if (use_gtod) END_USE ("gettimeofday()", t_gtod); 1585 1586 fprintf (stderr, "speed_endtime(): oops, no time method available\n"); 1587 abort (); 1588 1589 done: 1590 if (result < 0.0) 1591 { 1592 if (speed_option_verbose >= 2) 1593 fprintf (stderr, "speed_endtime(): warning, treating negative time as zero: %.9f\n", result); 1594 result = 0.0; 1595 } 1596 return result; 1597 }