github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/tune/time.c (about)

     1  /* Time routines for speed measurements.
     2  
     3  Copyright 1999-2004, 2010-2012 Free Software Foundation, Inc.
     4  
     5  This file is part of the GNU MP Library.
     6  
     7  The GNU MP Library is free software; you can redistribute it and/or modify
     8  it under the terms of either:
     9  
    10    * the GNU Lesser General Public License as published by the Free
    11      Software Foundation; either version 3 of the License, or (at your
    12      option) any later version.
    13  
    14  or
    15  
    16    * the GNU General Public License as published by the Free Software
    17      Foundation; either version 2 of the License, or (at your option) any
    18      later version.
    19  
    20  or both in parallel, as here.
    21  
    22  The GNU MP Library is distributed in the hope that it will be useful, but
    23  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    24  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    25  for more details.
    26  
    27  You should have received copies of the GNU General Public License and the
    28  GNU Lesser General Public License along with the GNU MP Library.  If not,
    29  see https://www.gnu.org/licenses/.  */
    30  
    31  
    32  /* Usage:
    33  
    34     The code in this file implements the lowest level of time measuring,
    35     simple one-time measuring of time between two points.
    36  
    37     void speed_starttime (void)
    38     double speed_endtime (void)
    39         Call speed_starttime to start measuring, and then call speed_endtime
    40         when done.
    41  
    42         speed_endtime returns the time taken, in seconds.  Or if the timebase
    43         is in CPU cycles and the CPU frequency is unknown then speed_endtime
    44         returns cycles.  Applications can identify the cycles return by
    45         checking for speed_cycletime (described below) equal to 1.0.
    46  
    47         If some sort of temporary glitch occurs then speed_endtime returns
    48         0.0.  Currently this is for various cases where a negative time has
    49         occurred.  This unfortunately occurs with getrusage on some systems,
    50         and with the hppa cycle counter on hpux.
    51  
    52     double speed_cycletime
    53         The time in seconds for each CPU cycle.  For example on a 100 MHz CPU
    54         this would be 1.0e-8.
    55  
    56         If the CPU frequency is unknown, then speed_cycletime is either 0.0
    57         or 1.0.  It's 0.0 when speed_endtime is returning seconds, or it's
    58         1.0 when speed_endtime is returning cycles.
    59  
    60         It may be noted that "speed_endtime() / speed_cycletime" gives a
    61         measured time in cycles, irrespective of whether speed_endtime is
    62         returning cycles or seconds.  (Assuming cycles can be had, ie. it's
    63         either cycles already or the cpu frequency is known.  See also
    64         speed_cycletime_need_cycles below.)
    65  
    66     double speed_unittime
    67         The unit of time measurement accuracy for the timing method in use.
    68         This is in seconds or cycles, as per speed_endtime.
    69  
    70     char speed_time_string[]
    71         A null-terminated string describing the time method in use.
    72  
    73     void speed_time_init (void)
    74         Initialize time measuring.  speed_starttime() does this
    75         automatically, so it's only needed if an application wants to inspect
    76         the above global variables before making a measurement.
    77  
    78     int speed_precision
    79         The intended accuracy of time measurements.  speed_measure() in
    80         common.c for instance runs target routines with enough repetitions so
    81         it takes at least "speed_unittime * speed_precision" (this expression
    82         works for both cycles or seconds from speed_endtime).
    83  
    84         A program can provide an option so the user to set speed_precision.
    85         If speed_precision is zero when speed_time_init or speed_starttime
    86         first run then it gets a default based on the measuring method
    87         chosen.  (More precision for higher accuracy methods.)
    88  
    89     void speed_cycletime_need_seconds (void)
    90         Call this to demand that speed_endtime will return seconds, and not
    91         cycles.  If only cycles are available then an error is printed and
    92         the program exits.
    93  
    94     void speed_cycletime_need_cycles (void)
    95         Call this to demand that speed_cycletime is non-zero, so that
    96         "speed_endtime() / speed_cycletime" will give times in cycles.
    97  
    98  
    99  
   100     Notes:
   101  
   102     Various combinations of cycle counter, read_real_time(), getrusage(),
   103     gettimeofday() and times() can arise, according to which are available
   104     and their precision.
   105  
   106  
   107     Allowing speed_endtime() to return either seconds or cycles is only a
   108     slight complication and makes it possible for the speed program to do
   109     some sensible things without demanding the CPU frequency.  If seconds are
   110     being measured then it can always print seconds, and if cycles are being
   111     measured then it can always print them without needing to know how long
   112     they are.  Also the tune program doesn't care at all what the units are.
   113  
   114     GMP_CPU_FREQUENCY can always be set when the automated methods in freq.c
   115     fail.  This will be needed if times in seconds are wanted but a cycle
   116     counter is being used, or if times in cycles are wanted but getrusage or
   117     another seconds based timer is in use.
   118  
   119     If the measuring method uses a cycle counter but supplements it with
   120     getrusage or the like, then knowing the CPU frequency is mandatory since
   121     the code compares values from the two.
   122  
   123  
   124     Not done:
   125  
   126     Solaris gethrtime() seems no more than a slow way to access the Sparc V9
   127     cycle counter.  gethrvtime() seems to be relevant only to light weight
   128     processes, it doesn't for instance give nanosecond virtual time.  So
   129     neither of these are used.
   130  
   131  
   132     Bugs:
   133  
   134     getrusage_microseconds_p is fundamentally flawed, getrusage and
   135     gettimeofday can have resolutions other than clock ticks or microseconds,
   136     for instance IRIX 5 has a tick of 10 ms but a getrusage of 1 ms.
   137  
   138  
   139     Enhancements:
   140  
   141     The SGI hardware counter has 64 bits on some machines, which could be
   142     used when available.  But perhaps 32 bits is enough range, and then rely
   143     on the getrusage supplement.
   144  
   145     Maybe getrusage (or times) should be used as a supplement for any
   146     wall-clock measuring method.  Currently a wall clock with a good range
   147     (eg. a 64-bit cycle counter) is used without a supplement.
   148  
   149     On PowerPC the timebase registers could be used, but would have to do
   150     something to find out the speed.  On 6xx chips it's normally 1/4 bus
   151     speed, on 4xx chips it's either that or an external clock.  Measuring
   152     against gettimeofday might be ok.  */
   153  
   154  
   155  #include "config.h"
   156  
   157  #include <errno.h>
   158  #include <setjmp.h>
   159  #include <signal.h>
   160  #include <stddef.h>
   161  #include <stdio.h>
   162  #include <string.h>
   163  #include <stdlib.h> /* for getenv() */
   164  
   165  #if HAVE_FCNTL_H
   166  #include <fcntl.h>  /* for open() */
   167  #endif
   168  
   169  #if HAVE_STDINT_H
   170  #include <stdint.h> /* for uint64_t */
   171  #endif
   172  
   173  #if HAVE_UNISTD_H
   174  #include <unistd.h> /* for sysconf() */
   175  #endif
   176  
   177  #include <sys/types.h>
   178  
   179  #if TIME_WITH_SYS_TIME
   180  # include <sys/time.h>  /* for struct timeval */
   181  # include <time.h>
   182  #else
   183  # if HAVE_SYS_TIME_H
   184  #  include <sys/time.h>
   185  # else
   186  #  include <time.h>
   187  # endif
   188  #endif
   189  
   190  #if HAVE_SYS_MMAN_H
   191  #include <sys/mman.h>      /* for mmap() */
   192  #endif
   193  
   194  #if HAVE_SYS_RESOURCE_H
   195  #include <sys/resource.h>  /* for struct rusage */
   196  #endif
   197  
   198  #if HAVE_SYS_SYSSGI_H
   199  #include <sys/syssgi.h>    /* for syssgi() */
   200  #endif
   201  
   202  #if HAVE_SYS_SYSTEMCFG_H
   203  #include <sys/systemcfg.h> /* for RTC_POWER on AIX */
   204  #endif
   205  
   206  #if HAVE_SYS_TIMES_H
   207  #include <sys/times.h>  /* for times() and struct tms */
   208  #endif
   209  
   210  #include "gmp.h"
   211  #include "gmp-impl.h"
   212  
   213  #include "speed.h"
   214  
   215  
   216  /* strerror is only used for some stuff on newish systems, no need to have a
   217     proper replacement */
   218  #if ! HAVE_STRERROR
   219  #define strerror(n)  "<strerror not available>"
   220  #endif
   221  
   222  
   223  char    speed_time_string[256];
   224  int     speed_precision = 0;
   225  double  speed_unittime;
   226  double  speed_cycletime = 0.0;
   227  
   228  
   229  /* don't rely on "unsigned" to "double" conversion, it's broken in SunOS 4
   230     native cc */
   231  #define M_2POWU   (((double) INT_MAX + 1.0) * 2.0)
   232  
   233  #define M_2POW32  4294967296.0
   234  #define M_2POW64  (M_2POW32 * M_2POW32)
   235  
   236  
   237  /* Conditionals for the time functions available are done with normal C
   238     code, which is a lot easier than wildly nested preprocessor directives.
   239  
   240     The choice of what to use is partly made at run-time, according to
   241     whether the cycle counter works and the measured accuracy of getrusage
   242     and gettimeofday.
   243  
   244     A routine that's not available won't be getting called, but is an abort()
   245     to be sure it isn't called mistakenly.
   246  
   247     It can be assumed that if a function exists then its data type will, but
   248     if the function doesn't then the data type might or might not exist, so
   249     the type can't be used unconditionally.  The "struct_rusage" etc macros
   250     provide dummies when the respective function doesn't exist. */
   251  
   252  
   253  #if HAVE_SPEED_CYCLECOUNTER
   254  static const int have_cycles = HAVE_SPEED_CYCLECOUNTER;
   255  #else
   256  static const int have_cycles = 0;
   257  #define speed_cyclecounter(p)  ASSERT_FAIL (speed_cyclecounter not available)
   258  #endif
   259  
   260  /* "stck" returns ticks since 1 Jan 1900 00:00 GMT, where each tick is 2^-12
   261     microseconds.  Same #ifdefs here as in longlong.h.  */
   262  #if defined (__GNUC__) && ! defined (NO_ASM)                            \
   263    && (defined (__i370__) || defined (__s390__) || defined (__mvs__))
   264  static const int  have_stck = 1;
   265  static const int  use_stck = 1;  /* always use when available */
   266  typedef uint64_t  stck_t; /* gcc for s390 is quite new, always has uint64_t */
   267  #define STCK(timestamp)                 \
   268    do {                                  \
   269      asm ("stck %0" : "=Q" (timestamp)); \
   270    } while (0)
   271  #else
   272  static const int  have_stck = 0;
   273  static const int  use_stck = 0;
   274  typedef unsigned long  stck_t;   /* dummy */
   275  #define STCK(timestamp)  ASSERT_FAIL (stck instruction not available)
   276  #endif
   277  #define STCK_PERIOD      (1.0 / 4096e6)   /* 2^-12 microseconds */
   278  
   279  /* mftb
   280     Enhancement: On 64-bit chips mftb gives a 64-bit value, no need for mftbu
   281     and a loop (see powerpc64.asm).  */
   282  #if HAVE_HOST_CPU_FAMILY_powerpc
   283  static const int  have_mftb = 1;
   284  #if defined (__GNUC__) && ! defined (NO_ASM)
   285  #define MFTB(a)                         \
   286    do {                                  \
   287      unsigned  __h1, __l, __h2;          \
   288      do {                                \
   289        asm volatile ("mftbu %0\n"        \
   290  		    "mftb  %1\n"        \
   291  		    "mftbu %2"          \
   292  		    : "=r" (__h1),      \
   293  		      "=r" (__l),       \
   294  		      "=r" (__h2));     \
   295      } while (__h1 != __h2);             \
   296      a[0] = __l;                         \
   297      a[1] = __h1;                        \
   298    } while (0)
   299  #else
   300  #define MFTB(a)   mftb_function (a)
   301  #endif
   302  #else /* ! powerpc */
   303  static const int  have_mftb = 0;
   304  #define MFTB(a)                         \
   305    do {                                  \
   306      a[0] = 0;                           \
   307      a[1] = 0;                           \
   308      ASSERT_FAIL (mftb not available);   \
   309    } while (0)
   310  #endif
   311  
   312  /* Unicos 10.X has syssgi(), but not mmap(). */
   313  #if HAVE_SYSSGI && HAVE_MMAP
   314  static const int  have_sgi = 1;
   315  #else
   316  static const int  have_sgi = 0;
   317  #endif
   318  
   319  #if HAVE_READ_REAL_TIME
   320  static const int have_rrt = 1;
   321  #else
   322  static const int have_rrt = 0;
   323  #define read_real_time(t,s)     ASSERT_FAIL (read_real_time not available)
   324  #define time_base_to_time(t,s)  ASSERT_FAIL (time_base_to_time not available)
   325  #define RTC_POWER     1
   326  #define RTC_POWER_PC  2
   327  #define timebasestruct_t   struct timebasestruct_dummy
   328  struct timebasestruct_dummy {
   329    int             flag;
   330    unsigned int    tb_high;
   331    unsigned int    tb_low;
   332  };
   333  #endif
   334  
   335  #if HAVE_CLOCK_GETTIME
   336  static const int have_cgt = 1;
   337  #define struct_timespec  struct timespec
   338  #else
   339  static const int have_cgt = 0;
   340  #define struct_timespec       struct timespec_dummy
   341  #define clock_gettime(id,ts)  (ASSERT_FAIL (clock_gettime not available), -1)
   342  #define clock_getres(id,ts)   (ASSERT_FAIL (clock_getres not available), -1)
   343  #endif
   344  
   345  #if HAVE_GETRUSAGE
   346  static const int have_grus = 1;
   347  #define struct_rusage   struct rusage
   348  #else
   349  static const int have_grus = 0;
   350  #define getrusage(n,ru)  ASSERT_FAIL (getrusage not available)
   351  #define struct_rusage    struct rusage_dummy
   352  #endif
   353  
   354  #if HAVE_GETTIMEOFDAY
   355  static const int have_gtod = 1;
   356  #define struct_timeval   struct timeval
   357  #else
   358  static const int have_gtod = 0;
   359  #define gettimeofday(tv,tz)  ASSERT_FAIL (gettimeofday not available)
   360  #define struct_timeval   struct timeval_dummy
   361  #endif
   362  
   363  #if HAVE_TIMES
   364  static const int have_times = 1;
   365  #define struct_tms   struct tms
   366  #else
   367  static const int have_times = 0;
   368  #define times(tms)   ASSERT_FAIL (times not available)
   369  #define struct_tms   struct tms_dummy
   370  #endif
   371  
   372  struct tms_dummy {
   373    long  tms_utime;
   374  };
   375  struct timeval_dummy {
   376    long  tv_sec;
   377    long  tv_usec;
   378  };
   379  struct rusage_dummy {
   380    struct_timeval ru_utime;
   381  };
   382  struct timespec_dummy {
   383    long  tv_sec;
   384    long  tv_nsec;
   385  };
   386  
   387  static int  use_cycles;
   388  static int  use_mftb;
   389  static int  use_sgi;
   390  static int  use_rrt;
   391  static int  use_cgt;
   392  static int  use_gtod;
   393  static int  use_grus;
   394  static int  use_times;
   395  static int  use_tick_boundary;
   396  
   397  static unsigned         start_cycles[2];
   398  static stck_t           start_stck;
   399  static unsigned         start_mftb[2];
   400  static unsigned         start_sgi;
   401  static timebasestruct_t start_rrt;
   402  static struct_timespec  start_cgt;
   403  static struct_rusage    start_grus;
   404  static struct_timeval   start_gtod;
   405  static struct_tms       start_times;
   406  
   407  static double  cycles_limit = 1e100;
   408  static double  mftb_unittime;
   409  static double  sgi_unittime;
   410  static double  cgt_unittime;
   411  static double  grus_unittime;
   412  static double  gtod_unittime;
   413  static double  times_unittime;
   414  
   415  /* for RTC_POWER format, ie. seconds and nanoseconds */
   416  #define TIMEBASESTRUCT_SECS(t)  ((t)->tb_high + (t)->tb_low * 1e-9)
   417  
   418  
   419  /* Return a string representing a time in seconds, nicely formatted.
   420     Eg. "10.25ms".  */
   421  char *
   422  unittime_string (double t)
   423  {
   424    static char  buf[128];
   425  
   426    const char  *unit;
   427    int         prec;
   428  
   429    /* choose units and scale */
   430    if (t < 1e-6)
   431      t *= 1e9, unit = "ns";
   432    else if (t < 1e-3)
   433      t *= 1e6, unit = "us";
   434    else if (t < 1.0)
   435      t *= 1e3, unit = "ms";
   436    else
   437      unit = "s";
   438  
   439    /* want 4 significant figures */
   440    if (t < 1.0)
   441      prec = 4;
   442    else if (t < 10.0)
   443      prec = 3;
   444    else if (t < 100.0)
   445      prec = 2;
   446    else
   447      prec = 1;
   448  
   449    sprintf (buf, "%.*f%s", prec, t, unit);
   450    return buf;
   451  }
   452  
   453  
   454  static jmp_buf  cycles_works_buf;
   455  
   456  static RETSIGTYPE
   457  cycles_works_handler (int sig)
   458  {
   459    longjmp (cycles_works_buf, 1);
   460  }
   461  
   462  int
   463  cycles_works_p (void)
   464  {
   465    static int  result = -1;
   466  
   467    if (result != -1)
   468      goto done;
   469  
   470    /* FIXME: On linux, the cycle counter is not saved and restored over
   471     * context switches, making it almost useless for precise cputime
   472     * measurements. When available, it's better to use clock_gettime,
   473     * which seems to have reasonable accuracy (tested on x86_32,
   474     * linux-2.6.26, glibc-2.7). However, there are also some linux
   475     * systems where clock_gettime is broken in one way or the other,
   476     * like CLOCK_PROCESS_CPUTIME_ID not implemented (easy case) or
   477     * kind-of implemented but broken (needs code to detect that), and
   478     * on those systems a wall-clock cycle counter is the least bad
   479     * fallback.
   480     *
   481     * So we need some code to disable the cycle counter on some but not
   482     * all linux systems. */
   483  #ifdef SIGILL
   484    {
   485      RETSIGTYPE (*old_handler) (int);
   486      unsigned  cycles[2];
   487  
   488      old_handler = signal (SIGILL, cycles_works_handler);
   489      if (old_handler == SIG_ERR)
   490        {
   491  	if (speed_option_verbose)
   492  	  printf ("cycles_works_p(): SIGILL not supported, assuming speed_cyclecounter() works\n");
   493  	goto yes;
   494        }
   495      if (setjmp (cycles_works_buf))
   496        {
   497  	if (speed_option_verbose)
   498  	  printf ("cycles_works_p(): SIGILL during speed_cyclecounter(), so doesn't work\n");
   499  	result = 0;
   500  	goto done;
   501        }
   502      speed_cyclecounter (cycles);
   503      signal (SIGILL, old_handler);
   504      if (speed_option_verbose)
   505        printf ("cycles_works_p(): speed_cyclecounter() works\n");
   506    }
   507  #else
   508  
   509    if (speed_option_verbose)
   510      printf ("cycles_works_p(): SIGILL not defined, assuming speed_cyclecounter() works\n");
   511    goto yes;
   512  #endif
   513  
   514   yes:
   515    result = 1;
   516  
   517   done:
   518    return result;
   519  }
   520  
   521  
   522  /* The number of clock ticks per second, but looking at sysconf rather than
   523     just CLK_TCK, where possible.  */
   524  long
   525  clk_tck (void)
   526  {
   527    static long  result = -1L;
   528    if (result != -1L)
   529      return result;
   530  
   531  #if HAVE_SYSCONF
   532    result = sysconf (_SC_CLK_TCK);
   533    if (result != -1L)
   534      {
   535        if (speed_option_verbose)
   536  	printf ("sysconf(_SC_CLK_TCK) is %ld per second\n", result);
   537        return result;
   538      }
   539  
   540    fprintf (stderr,
   541  	   "sysconf(_SC_CLK_TCK) not working, using CLK_TCK instead\n");
   542  #endif
   543  
   544  #ifdef CLK_TCK
   545    result = CLK_TCK;
   546    if (speed_option_verbose)
   547      printf ("CLK_TCK is %ld per second\n", result);
   548    return result;
   549  #else
   550    fprintf (stderr, "CLK_TCK not defined, cannot continue\n");
   551    abort ();
   552  #endif
   553  }
   554  
   555  
   556  /* If two times can be observed less than half a clock tick apart, then
   557     assume "get" is microsecond accurate.
   558  
   559     Two times only 1 microsecond apart are not believed, since some kernels
   560     take it upon themselves to ensure gettimeofday doesn't return the same
   561     value twice, for the benefit of applications using it for a timestamp.
   562     This is obviously very stupid given the speed of CPUs these days.
   563  
   564     Making "reps" many calls to noop_1() is designed to waste some CPU, with
   565     a view to getting measurements 2 microseconds (or more) apart.  "reps" is
   566     increased progressively until such a period is seen.
   567  
   568     The outer loop "attempts" are just to allow for any random nonsense or
   569     system load upsetting the measurements (ie. making two successive calls
   570     to "get" come out as a longer interval than normal).
   571  
   572     Bugs:
   573  
   574     The assumption that any interval less than a half tick implies
   575     microsecond resolution is obviously fairly rash, the true resolution
   576     could be anything between a microsecond and that half tick.  Perhaps
   577     something special would have to be done on a system where this is the
   578     case, since there's no obvious reliable way to detect it
   579     automatically.  */
   580  
   581  #define MICROSECONDS_P(name, type, get, sec, usec)                      \
   582    {                                                                     \
   583      static int  result = -1;                                            \
   584      type      st, et;                                                   \
   585      long      dt, half_tick;                                            \
   586      unsigned  attempt, reps, i, j;                                      \
   587  									\
   588      if (result != -1)                                                   \
   589        return result;                                                    \
   590  									\
   591      result = 0;                                                         \
   592      half_tick = (1000000L / clk_tck ()) / 2;                            \
   593  									\
   594      for (attempt = 0; attempt < 5; attempt++)                           \
   595        {                                                                 \
   596  	reps = 0;                                                       \
   597  	for (;;)                                                        \
   598  	  {                                                             \
   599  	    get (st);                                                   \
   600  	    for (i = 0; i < reps; i++)                                  \
   601  	      for (j = 0; j < 100; j++)                                 \
   602  		noop_1 (CNST_LIMB(0));                                  \
   603  	    get (et);                                                   \
   604  									\
   605  	    dt = (sec(et)-sec(st))*1000000L + usec(et)-usec(st);        \
   606  									\
   607  	    if (speed_option_verbose >= 2)                              \
   608  	      printf ("%s attempt=%u, reps=%u, dt=%ld\n",               \
   609  		      name, attempt, reps, dt);                         \
   610  									\
   611  	    if (dt >= 2)                                                \
   612  	      break;                                                    \
   613  									\
   614  	    reps = (reps == 0 ? 1 : 2*reps);                            \
   615  	    if (reps == 0)                                              \
   616  	      break;  /* uint overflow, not normal */                   \
   617  	  }                                                             \
   618  									\
   619  	if (dt < half_tick)                                             \
   620  	  {                                                             \
   621  	    result = 1;                                                 \
   622  	    break;                                                      \
   623  	  }                                                             \
   624        }                                                                 \
   625  									\
   626      if (speed_option_verbose)                                           \
   627        {                                                                 \
   628  	if (result)                                                     \
   629  	  printf ("%s is microsecond accurate\n", name);                \
   630  	else                                                            \
   631  	  printf ("%s is only %s clock tick accurate\n",                \
   632  		  name, unittime_string (1.0/clk_tck()));               \
   633        }                                                                 \
   634      return result;                                                      \
   635    }
   636  
   637  
   638  int
   639  gettimeofday_microseconds_p (void)
   640  {
   641  #define call_gettimeofday(t)   gettimeofday (&(t), NULL)
   642  #define timeval_tv_sec(t)      ((t).tv_sec)
   643  #define timeval_tv_usec(t)     ((t).tv_usec)
   644    MICROSECONDS_P ("gettimeofday", struct_timeval,
   645  		  call_gettimeofday, timeval_tv_sec, timeval_tv_usec);
   646  }
   647  
   648  int
   649  getrusage_microseconds_p (void)
   650  {
   651  #define call_getrusage(t)   getrusage (0, &(t))
   652  #define rusage_tv_sec(t)    ((t).ru_utime.tv_sec)
   653  #define rusage_tv_usec(t)   ((t).ru_utime.tv_usec)
   654    MICROSECONDS_P ("getrusage", struct_rusage,
   655  		  call_getrusage, rusage_tv_sec, rusage_tv_usec);
   656  }
   657  
   658  /* Test whether getrusage goes backwards, return non-zero if it does
   659     (suggesting it's flawed).
   660  
   661     On a macintosh m68040-unknown-netbsd1.4.1 getrusage looks like it's
   662     microsecond accurate, but has been seen remaining unchanged after many
   663     microseconds have elapsed.  It also regularly goes backwards by 1000 to
   664     5000 usecs, this has been seen after between 500 and 4000 attempts taking
   665     perhaps 0.03 seconds.  We consider this too broken for good measuring.
   666     We used to have configure pretend getrusage didn't exist on this system,
   667     but a runtime test should be more reliable, since we imagine the problem
   668     is not confined to just this exact system tuple.  */
   669  
   670  int
   671  getrusage_backwards_p (void)
   672  {
   673    static int result = -1;
   674    struct rusage  start, prev, next;
   675    long  d;
   676    int   i;
   677  
   678    if (result != -1)
   679      return result;
   680  
   681    getrusage (0, &start);
   682    memcpy (&next, &start, sizeof (next));
   683  
   684    result = 0;
   685    i = 0;
   686    for (;;)
   687      {
   688        memcpy (&prev, &next, sizeof (prev));
   689        getrusage (0, &next);
   690  
   691        if (next.ru_utime.tv_sec < prev.ru_utime.tv_sec
   692  	  || (next.ru_utime.tv_sec == prev.ru_utime.tv_sec
   693  	      && next.ru_utime.tv_usec < prev.ru_utime.tv_usec))
   694  	{
   695  	  if (speed_option_verbose)
   696  	    printf ("getrusage went backwards (attempt %d: %ld.%06ld -> %ld.%06ld)\n",
   697  		    i,
   698  		    (long) prev.ru_utime.tv_sec, (long) prev.ru_utime.tv_usec,
   699  		    (long) next.ru_utime.tv_sec, (long) next.ru_utime.tv_usec);
   700  	  result = 1;
   701  	  break;
   702  	}
   703  
   704        /* minimum 1000 attempts, then stop after either 0.1 seconds or 50000
   705  	 attempts, whichever comes first */
   706        d = 1000000 * (next.ru_utime.tv_sec - start.ru_utime.tv_sec)
   707  	+ (next.ru_utime.tv_usec - start.ru_utime.tv_usec);
   708        i++;
   709        if (i > 50000 || (i > 1000 && d > 100000))
   710  	break;
   711      }
   712  
   713    return result;
   714  }
   715  
   716  /* CLOCK_PROCESS_CPUTIME_ID looks like it's going to be in a future version
   717     of glibc (some time post 2.2).
   718  
   719     CLOCK_VIRTUAL is process time, available in BSD systems (though sometimes
   720     defined, but returning -1 for an error).  */
   721  
   722  #ifdef CLOCK_PROCESS_CPUTIME_ID
   723  # define CGT_ID        CLOCK_PROCESS_CPUTIME_ID
   724  #else
   725  # ifdef CLOCK_VIRTUAL
   726  #  define CGT_ID       CLOCK_VIRTUAL
   727  # endif
   728  #endif
   729  #ifdef CGT_ID
   730  const int  have_cgt_id = 1;
   731  #else
   732  const int  have_cgt_id = 0;
   733  # define CGT_ID       (ASSERT_FAIL (CGT_ID not determined), -1)
   734  #endif
   735  
   736  #define CGT_DELAY_COUNT 1000
   737  
   738  int
   739  cgt_works_p (void)
   740  {
   741    static int  result = -1;
   742    struct_timespec  unit;
   743  
   744    if (! have_cgt)
   745      return 0;
   746  
   747    if (! have_cgt_id)
   748      {
   749        if (speed_option_verbose)
   750  	printf ("clock_gettime don't know what ID to use\n");
   751        result = 0;
   752        return result;
   753      }
   754  
   755    if (result != -1)
   756      return result;
   757  
   758    /* trial run to see if it works */
   759    if (clock_gettime (CGT_ID, &unit) != 0)
   760      {
   761        if (speed_option_verbose)
   762  	printf ("clock_gettime id=%d error: %s\n", CGT_ID, strerror (errno));
   763        result = 0;
   764        return result;
   765      }
   766  
   767    /* get the resolution */
   768    if (clock_getres (CGT_ID, &unit) != 0)
   769      {
   770        if (speed_option_verbose)
   771  	printf ("clock_getres id=%d error: %s\n", CGT_ID, strerror (errno));
   772        result = 0;
   773        return result;
   774      }
   775  
   776    cgt_unittime = unit.tv_sec + unit.tv_nsec * 1e-9;
   777    if (speed_option_verbose)
   778      printf ("clock_gettime is %s accurate\n", unittime_string (cgt_unittime));
   779  
   780    if (cgt_unittime < 10e-9)
   781      {
   782        /* Do we believe this? */
   783        struct timespec start, end;
   784        static volatile int counter;
   785        double duration;
   786        if (clock_gettime (CGT_ID, &start))
   787  	{
   788  	  if (speed_option_verbose)
   789  	    printf ("clock_gettime id=%d error: %s\n", CGT_ID, strerror (errno));
   790  	  result = 0;
   791  	  return result;
   792  	}
   793        /* Loop of at least 1000 memory accesses, ought to take at
   794  	 least 100 ns*/
   795        for (counter = 0; counter < CGT_DELAY_COUNT; counter++)
   796  	;
   797        if (clock_gettime (CGT_ID, &end))
   798  	{
   799  	  if (speed_option_verbose)
   800  	    printf ("clock_gettime id=%d error: %s\n", CGT_ID, strerror (errno));
   801  	  result = 0;
   802  	  return result;
   803  	}
   804        duration = (end.tv_sec + end.tv_nsec * 1e-9
   805  		  - start.tv_sec - start.tv_nsec * 1e-9);
   806        if (speed_option_verbose)
   807  	printf ("delay loop of %d rounds took %s (according to clock_gettime)\n",
   808  		CGT_DELAY_COUNT, unittime_string (duration));
   809        if (duration < 100e-9)
   810  	{
   811  	  if (speed_option_verbose)
   812  	    printf ("clock_gettime id=%d not believable\n", CGT_ID);
   813  	  result = 0;
   814  	  return result;
   815  	}
   816      }
   817    result = 1;
   818    return result;
   819  }
   820  
   821  
   822  static double
   823  freq_measure_mftb_one (void)
   824  {
   825  #define call_gettimeofday(t)   gettimeofday (&(t), NULL)
   826  #define timeval_tv_sec(t)      ((t).tv_sec)
   827  #define timeval_tv_usec(t)     ((t).tv_usec)
   828    FREQ_MEASURE_ONE ("mftb", struct_timeval,
   829  		    call_gettimeofday, MFTB,
   830  		    timeval_tv_sec, timeval_tv_usec);
   831  }
   832  
   833  
   834  static jmp_buf  mftb_works_buf;
   835  
   836  static RETSIGTYPE
   837  mftb_works_handler (int sig)
   838  {
   839    longjmp (mftb_works_buf, 1);
   840  }
   841  
   842  int
   843  mftb_works_p (void)
   844  {
   845    unsigned   a[2];
   846    RETSIGTYPE (*old_handler) (int);
   847    double     cycletime;
   848  
   849    /* suppress a warning about a[] unused */
   850    a[0] = 0;
   851  
   852    if (! have_mftb)
   853      return 0;
   854  
   855  #ifdef SIGILL
   856    old_handler = signal (SIGILL, mftb_works_handler);
   857    if (old_handler == SIG_ERR)
   858      {
   859        if (speed_option_verbose)
   860  	printf ("mftb_works_p(): SIGILL not supported, assuming mftb works\n");
   861        return 1;
   862      }
   863    if (setjmp (mftb_works_buf))
   864      {
   865        if (speed_option_verbose)
   866  	printf ("mftb_works_p(): SIGILL during mftb, so doesn't work\n");
   867        return 0;
   868      }
   869    MFTB (a);
   870    signal (SIGILL, old_handler);
   871    if (speed_option_verbose)
   872      printf ("mftb_works_p(): mftb works\n");
   873  #else
   874  
   875    if (speed_option_verbose)
   876      printf ("mftb_works_p(): SIGILL not defined, assuming mftb works\n");
   877  #endif
   878  
   879  #if ! HAVE_GETTIMEOFDAY
   880    if (speed_option_verbose)
   881      printf ("mftb_works_p(): no gettimeofday available to measure mftb\n");
   882    return 0;
   883  #endif
   884  
   885    /* The time base is normally 1/4 of the bus speed on 6xx and 7xx chips, on
   886       other chips it can be driven from an external clock. */
   887    cycletime = freq_measure ("mftb", freq_measure_mftb_one);
   888    if (cycletime == -1.0)
   889      {
   890        if (speed_option_verbose)
   891  	printf ("mftb_works_p(): cannot measure mftb period\n");
   892        return 0;
   893      }
   894  
   895    mftb_unittime = cycletime;
   896    return 1;
   897  }
   898  
   899  
   900  volatile unsigned  *sgi_addr;
   901  
   902  int
   903  sgi_works_p (void)
   904  {
   905  #if HAVE_SYSSGI && HAVE_MMAP
   906    static int  result = -1;
   907  
   908    size_t          pagesize, offset;
   909    __psunsigned_t  phys, physpage;
   910    void            *virtpage;
   911    unsigned        period_picoseconds;
   912    int             size, fd;
   913  
   914    if (result != -1)
   915      return result;
   916  
   917    phys = syssgi (SGI_QUERY_CYCLECNTR, &period_picoseconds);
   918    if (phys == (__psunsigned_t) -1)
   919      {
   920        /* ENODEV is the error when a counter is not available */
   921        if (speed_option_verbose)
   922  	printf ("syssgi SGI_QUERY_CYCLECNTR error: %s\n", strerror (errno));
   923        result = 0;
   924        return result;
   925      }
   926    sgi_unittime = period_picoseconds * 1e-12;
   927  
   928    /* IRIX 5 doesn't have SGI_CYCLECNTR_SIZE, assume 32 bits in that case.
   929       Challenge/ONYX hardware has a 64 bit byte counter, but there seems no
   930       obvious way to identify that without SGI_CYCLECNTR_SIZE.  */
   931  #ifdef SGI_CYCLECNTR_SIZE
   932    size = syssgi (SGI_CYCLECNTR_SIZE);
   933    if (size == -1)
   934      {
   935        if (speed_option_verbose)
   936  	{
   937  	  printf ("syssgi SGI_CYCLECNTR_SIZE error: %s\n", strerror (errno));
   938  	  printf ("    will assume size==4\n");
   939  	}
   940        size = 32;
   941      }
   942  #else
   943    size = 32;
   944  #endif
   945  
   946    if (size < 32)
   947      {
   948        printf ("syssgi SGI_CYCLECNTR_SIZE gives %d, expected 32 or 64\n", size);
   949        result = 0;
   950        return result;
   951      }
   952  
   953    pagesize = getpagesize();
   954    offset = (size_t) phys & (pagesize-1);
   955    physpage = phys - offset;
   956  
   957    /* shouldn't cross over a page boundary */
   958    ASSERT_ALWAYS (offset + size/8 <= pagesize);
   959  
   960    fd = open("/dev/mmem", O_RDONLY);
   961    if (fd == -1)
   962      {
   963        if (speed_option_verbose)
   964  	printf ("open /dev/mmem: %s\n", strerror (errno));
   965        result = 0;
   966        return result;
   967      }
   968  
   969    virtpage = mmap (0, pagesize, PROT_READ, MAP_PRIVATE, fd, (off_t) physpage);
   970    if (virtpage == (void *) -1)
   971      {
   972        if (speed_option_verbose)
   973  	printf ("mmap /dev/mmem: %s\n", strerror (errno));
   974        result = 0;
   975        return result;
   976      }
   977  
   978    /* address of least significant 4 bytes, knowing mips is big endian */
   979    sgi_addr = (unsigned *) ((char *) virtpage + offset
   980  			   + size/8 - sizeof(unsigned));
   981    result = 1;
   982    return result;
   983  
   984  #else /* ! (HAVE_SYSSGI && HAVE_MMAP) */
   985    return 0;
   986  #endif
   987  }
   988  
   989  
   990  #define DEFAULT(var,n)  \
   991    do {                  \
   992      if (! (var))        \
   993        (var) = (n);      \
   994    } while (0)
   995  
   996  void
   997  speed_time_init (void)
   998  {
   999    double supplement_unittime = 0.0;
  1000  
  1001    static int  speed_time_initialized = 0;
  1002    if (speed_time_initialized)
  1003      return;
  1004    speed_time_initialized = 1;
  1005  
  1006    speed_cycletime_init ();
  1007  
  1008    if (!speed_option_cycles_broken && have_cycles && cycles_works_p ())
  1009      {
  1010        use_cycles = 1;
  1011        DEFAULT (speed_cycletime, 1.0);
  1012        speed_unittime = speed_cycletime;
  1013        DEFAULT (speed_precision, 10000);
  1014        strcpy (speed_time_string, "CPU cycle counter");
  1015  
  1016        /* only used if a supplementary method is chosen below */
  1017        cycles_limit = (have_cycles == 1 ? M_2POW32 : M_2POW64) / 2.0
  1018  	* speed_cycletime;
  1019  
  1020        if (have_grus && getrusage_microseconds_p() && ! getrusage_backwards_p())
  1021  	{
  1022  	  /* this is a good combination */
  1023  	  use_grus = 1;
  1024  	  supplement_unittime = grus_unittime = 1.0e-6;
  1025  	  strcpy (speed_time_string, "CPU cycle counter, supplemented by microsecond getrusage()");
  1026  	}
  1027        else if (have_cycles == 1)
  1028  	{
  1029  	  /* When speed_cyclecounter has a limited range, look for something
  1030  	     to supplement it. */
  1031  	  if (have_gtod && gettimeofday_microseconds_p())
  1032  	    {
  1033  	      use_gtod = 1;
  1034  	      supplement_unittime = gtod_unittime = 1.0e-6;
  1035  	      strcpy (speed_time_string, "CPU cycle counter, supplemented by microsecond gettimeofday()");
  1036  	    }
  1037  	  else if (have_grus)
  1038  	    {
  1039  	      use_grus = 1;
  1040  	      supplement_unittime = grus_unittime = 1.0 / (double) clk_tck ();
  1041  	      sprintf (speed_time_string, "CPU cycle counter, supplemented by %s clock tick getrusage()", unittime_string (supplement_unittime));
  1042  	    }
  1043  	  else if (have_times)
  1044  	    {
  1045  	      use_times = 1;
  1046  	      supplement_unittime = times_unittime = 1.0 / (double) clk_tck ();
  1047  	      sprintf (speed_time_string, "CPU cycle counter, supplemented by %s clock tick times()", unittime_string (supplement_unittime));
  1048  	    }
  1049  	  else if (have_gtod)
  1050  	    {
  1051  	      use_gtod = 1;
  1052  	      supplement_unittime = gtod_unittime = 1.0 / (double) clk_tck ();
  1053  	      sprintf (speed_time_string, "CPU cycle counter, supplemented by %s clock tick gettimeofday()", unittime_string (supplement_unittime));
  1054  	    }
  1055  	  else
  1056  	    {
  1057  	      fprintf (stderr, "WARNING: cycle counter is 32 bits and there's no other functions.\n");
  1058  	      fprintf (stderr, "    Wraparounds may produce bad results on long measurements.\n");
  1059  	    }
  1060  	}
  1061  
  1062        if (use_grus || use_times || use_gtod)
  1063  	{
  1064  	  /* must know cycle period to compare cycles to other measuring
  1065  	     (via cycles_limit) */
  1066  	  speed_cycletime_need_seconds ();
  1067  
  1068  	  if (speed_precision * supplement_unittime > cycles_limit)
  1069  	    {
  1070  	      fprintf (stderr, "WARNING: requested precision can't always be achieved due to limited range\n");
  1071  	      fprintf (stderr, "    cycle counter and limited precision supplemental method\n");
  1072  	      fprintf (stderr, "    (%s)\n", speed_time_string);
  1073  	    }
  1074  	}
  1075      }
  1076    else if (have_stck)
  1077      {
  1078        strcpy (speed_time_string, "STCK timestamp");
  1079        /* stck is in units of 2^-12 microseconds, which is very likely higher
  1080  	 resolution than a cpu cycle */
  1081        if (speed_cycletime == 0.0)
  1082  	speed_cycletime_fail
  1083  	  ("Need to know CPU frequency for effective stck unit");
  1084        speed_unittime = MAX (speed_cycletime, STCK_PERIOD);
  1085        DEFAULT (speed_precision, 10000);
  1086      }
  1087    else if (have_mftb && mftb_works_p ())
  1088      {
  1089        use_mftb = 1;
  1090        DEFAULT (speed_precision, 10000);
  1091        speed_unittime = mftb_unittime;
  1092        sprintf (speed_time_string, "mftb counter (%s)",
  1093  	       unittime_string (speed_unittime));
  1094      }
  1095    else if (have_sgi && sgi_works_p ())
  1096      {
  1097        use_sgi = 1;
  1098        DEFAULT (speed_precision, 10000);
  1099        speed_unittime = sgi_unittime;
  1100        sprintf (speed_time_string, "syssgi() mmap counter (%s), supplemented by millisecond getrusage()",
  1101  	       unittime_string (speed_unittime));
  1102        /* supplemented with getrusage, which we assume to have 1ms resolution */
  1103        use_grus = 1;
  1104        supplement_unittime = 1e-3;
  1105      }
  1106    else if (have_rrt)
  1107      {
  1108        timebasestruct_t  t;
  1109        use_rrt = 1;
  1110        DEFAULT (speed_precision, 10000);
  1111        read_real_time (&t, sizeof(t));
  1112        switch (t.flag) {
  1113        case RTC_POWER:
  1114  	/* FIXME: What's the actual RTC resolution? */
  1115  	speed_unittime = 1e-7;
  1116  	strcpy (speed_time_string, "read_real_time() power nanoseconds");
  1117  	break;
  1118        case RTC_POWER_PC:
  1119  	t.tb_high = 1;
  1120  	t.tb_low = 0;
  1121  	time_base_to_time (&t, sizeof(t));
  1122  	speed_unittime = TIMEBASESTRUCT_SECS(&t) / M_2POW32;
  1123  	sprintf (speed_time_string, "%s read_real_time() powerpc ticks",
  1124  		 unittime_string (speed_unittime));
  1125  	break;
  1126        default:
  1127  	fprintf (stderr, "ERROR: Unrecognised timebasestruct_t flag=%d\n",
  1128  		 t.flag);
  1129  	abort ();
  1130        }
  1131      }
  1132    else if (have_cgt && cgt_works_p() && cgt_unittime < 1.5e-6)
  1133      {
  1134        /* use clock_gettime if microsecond or better resolution */
  1135      choose_cgt:
  1136        use_cgt = 1;
  1137        speed_unittime = cgt_unittime;
  1138        DEFAULT (speed_precision, (cgt_unittime <= 0.1e-6 ? 10000 : 1000));
  1139        strcpy (speed_time_string, "microsecond accurate clock_gettime()");
  1140      }
  1141    else if (have_times && clk_tck() > 1000000)
  1142      {
  1143        /* Cray vector systems have times() which is clock cycle resolution
  1144  	 (eg. 450 MHz).  */
  1145        DEFAULT (speed_precision, 10000);
  1146        goto choose_times;
  1147      }
  1148    else if (have_grus && getrusage_microseconds_p() && ! getrusage_backwards_p())
  1149      {
  1150        use_grus = 1;
  1151        speed_unittime = grus_unittime = 1.0e-6;
  1152        DEFAULT (speed_precision, 1000);
  1153        strcpy (speed_time_string, "microsecond accurate getrusage()");
  1154      }
  1155    else if (have_gtod && gettimeofday_microseconds_p())
  1156      {
  1157        use_gtod = 1;
  1158        speed_unittime = gtod_unittime = 1.0e-6;
  1159        DEFAULT (speed_precision, 1000);
  1160        strcpy (speed_time_string, "microsecond accurate gettimeofday()");
  1161      }
  1162    else if (have_cgt && cgt_works_p() && cgt_unittime < 1.5/clk_tck())
  1163      {
  1164        /* use clock_gettime if 1 tick or better resolution */
  1165        goto choose_cgt;
  1166      }
  1167    else if (have_times)
  1168      {
  1169        use_tick_boundary = 1;
  1170        DEFAULT (speed_precision, 200);
  1171      choose_times:
  1172        use_times = 1;
  1173        speed_unittime = times_unittime = 1.0 / (double) clk_tck ();
  1174        sprintf (speed_time_string, "%s clock tick times()",
  1175  	       unittime_string (speed_unittime));
  1176      }
  1177    else if (have_grus)
  1178      {
  1179        use_grus = 1;
  1180        use_tick_boundary = 1;
  1181        speed_unittime = grus_unittime = 1.0 / (double) clk_tck ();
  1182        DEFAULT (speed_precision, 200);
  1183        sprintf (speed_time_string, "%s clock tick getrusage()\n",
  1184  	       unittime_string (speed_unittime));
  1185      }
  1186    else if (have_gtod)
  1187      {
  1188        use_gtod = 1;
  1189        use_tick_boundary = 1;
  1190        speed_unittime = gtod_unittime = 1.0 / (double) clk_tck ();
  1191        DEFAULT (speed_precision, 200);
  1192        sprintf (speed_time_string, "%s clock tick gettimeofday()",
  1193  	       unittime_string (speed_unittime));
  1194      }
  1195    else
  1196      {
  1197        fprintf (stderr, "No time measuring method available\n");
  1198        fprintf (stderr, "None of: speed_cyclecounter(), STCK(), getrusage(), gettimeofday(), times()\n");
  1199        abort ();
  1200      }
  1201  
  1202    if (speed_option_verbose)
  1203      {
  1204        printf ("speed_time_init: %s\n", speed_time_string);
  1205        printf ("    speed_precision     %d\n", speed_precision);
  1206        printf ("    speed_unittime      %.2g\n", speed_unittime);
  1207        if (supplement_unittime)
  1208  	printf ("    supplement_unittime %.2g\n", supplement_unittime);
  1209        printf ("    use_tick_boundary   %d\n", use_tick_boundary);
  1210        if (have_cycles)
  1211  	printf ("    cycles_limit        %.2g seconds\n", cycles_limit);
  1212      }
  1213  }
  1214  
  1215  
  1216  
  1217  /* Burn up CPU until a clock tick boundary, for greater accuracy.  Set the
  1218     corresponding "start_foo" appropriately too. */
  1219  
  1220  void
  1221  grus_tick_boundary (void)
  1222  {
  1223    struct_rusage  prev;
  1224    getrusage (0, &prev);
  1225    do {
  1226      getrusage (0, &start_grus);
  1227    } while (start_grus.ru_utime.tv_usec == prev.ru_utime.tv_usec);
  1228  }
  1229  
  1230  void
  1231  gtod_tick_boundary (void)
  1232  {
  1233    struct_timeval  prev;
  1234    gettimeofday (&prev, NULL);
  1235    do {
  1236      gettimeofday (&start_gtod, NULL);
  1237    } while (start_gtod.tv_usec == prev.tv_usec);
  1238  }
  1239  
  1240  void
  1241  times_tick_boundary (void)
  1242  {
  1243    struct_tms  prev;
  1244    times (&prev);
  1245    do
  1246      times (&start_times);
  1247    while (start_times.tms_utime == prev.tms_utime);
  1248  }
  1249  
  1250  
  1251  /* "have_" values are tested to let unused code go dead.  */
  1252  
  1253  void
  1254  speed_starttime (void)
  1255  {
  1256    speed_time_init ();
  1257  
  1258    if (have_grus && use_grus)
  1259      {
  1260        if (use_tick_boundary)
  1261  	grus_tick_boundary ();
  1262        else
  1263  	getrusage (0, &start_grus);
  1264      }
  1265  
  1266    if (have_gtod && use_gtod)
  1267      {
  1268        if (use_tick_boundary)
  1269  	gtod_tick_boundary ();
  1270        else
  1271  	gettimeofday (&start_gtod, NULL);
  1272      }
  1273  
  1274    if (have_times && use_times)
  1275      {
  1276        if (use_tick_boundary)
  1277  	times_tick_boundary ();
  1278        else
  1279  	times (&start_times);
  1280      }
  1281  
  1282    if (have_cgt && use_cgt)
  1283      clock_gettime (CGT_ID, &start_cgt);
  1284  
  1285    if (have_rrt && use_rrt)
  1286      read_real_time (&start_rrt, sizeof(start_rrt));
  1287  
  1288    if (have_sgi && use_sgi)
  1289      start_sgi = *sgi_addr;
  1290  
  1291    if (have_mftb && use_mftb)
  1292      MFTB (start_mftb);
  1293  
  1294    if (have_stck && use_stck)
  1295      STCK (start_stck);
  1296  
  1297    /* Cycles sampled last for maximum accuracy. */
  1298    if (have_cycles && use_cycles)
  1299      speed_cyclecounter (start_cycles);
  1300  }
  1301  
  1302  
  1303  /* Calculate the difference between two cycle counter samples, as a "double"
  1304     counter of cycles.
  1305  
  1306     The start and end values are allowed to cancel in integers in case the
  1307     counter values are bigger than the 53 bits that normally fit in a double.
  1308  
  1309     This works even if speed_cyclecounter() puts a value bigger than 32-bits
  1310     in the low word (the high word always gets a 2**32 multiplier though). */
  1311  
  1312  double
  1313  speed_cyclecounter_diff (const unsigned end[2], const unsigned start[2])
  1314  {
  1315    unsigned  d;
  1316    double    t;
  1317  
  1318    if (have_cycles == 1)
  1319      {
  1320        t = (end[0] - start[0]);
  1321      }
  1322    else
  1323      {
  1324        d = end[0] - start[0];
  1325        t = d - (d > end[0] ? M_2POWU : 0.0);
  1326        t += (end[1] - start[1]) * M_2POW32;
  1327      }
  1328    return t;
  1329  }
  1330  
  1331  
  1332  double
  1333  speed_mftb_diff (const unsigned end[2], const unsigned start[2])
  1334  {
  1335    unsigned  d;
  1336    double    t;
  1337  
  1338    d = end[0] - start[0];
  1339    t = (double) d - (d > end[0] ? M_2POW32 : 0.0);
  1340    t += (end[1] - start[1]) * M_2POW32;
  1341    return t;
  1342  }
  1343  
  1344  
  1345  /* Calculate the difference between "start" and "end" using fields "sec" and
  1346     "psec", where each "psec" is a "punit" of a second.
  1347  
  1348     The seconds parts are allowed to cancel before being combined with the
  1349     psec parts, in case a simple "sec+psec*punit" exceeds the precision of a
  1350     double.
  1351  
  1352     Total time is only calculated in a "double" since an integer count of
  1353     psecs might overflow.  2^32 microseconds is only a bit over an hour, or
  1354     2^32 nanoseconds only about 4 seconds.
  1355  
  1356     The casts to "long" are for the benefit of timebasestruct_t, where the
  1357     fields are only "unsigned int", but we want a signed difference.  */
  1358  
  1359  #define DIFF_SECS_ROUTINE(sec, psec, punit)                     \
  1360    {                                                             \
  1361      long  sec_diff, psec_diff;                                  \
  1362      sec_diff = (long) end->sec - (long) start->sec;             \
  1363      psec_diff = (long) end->psec - (long) start->psec;          \
  1364      return (double) sec_diff + punit * (double) psec_diff;      \
  1365    }
  1366  
  1367  double
  1368  timeval_diff_secs (const struct_timeval *end, const struct_timeval *start)
  1369  {
  1370    DIFF_SECS_ROUTINE (tv_sec, tv_usec, 1e-6);
  1371  }
  1372  
  1373  double
  1374  rusage_diff_secs (const struct_rusage *end, const struct_rusage *start)
  1375  {
  1376    DIFF_SECS_ROUTINE (ru_utime.tv_sec, ru_utime.tv_usec, 1e-6);
  1377  }
  1378  
  1379  double
  1380  timespec_diff_secs (const struct_timespec *end, const struct_timespec *start)
  1381  {
  1382    DIFF_SECS_ROUTINE (tv_sec, tv_nsec, 1e-9);
  1383  }
  1384  
  1385  /* This is for use after time_base_to_time, ie. for seconds and nanoseconds. */
  1386  double
  1387  timebasestruct_diff_secs (const timebasestruct_t *end,
  1388  			  const timebasestruct_t *start)
  1389  {
  1390    DIFF_SECS_ROUTINE (tb_high, tb_low, 1e-9);
  1391  }
  1392  
  1393  
  1394  double
  1395  speed_endtime (void)
  1396  {
  1397  #define END_USE(name,value)                             \
  1398    do {                                                  \
  1399      if (speed_option_verbose >= 3)                      \
  1400        printf ("speed_endtime(): used %s\n", name);      \
  1401      result = value;                                     \
  1402      goto done;                                          \
  1403    } while (0)
  1404  
  1405  #define END_ENOUGH(name,value)                                          \
  1406    do {                                                                  \
  1407      if (speed_option_verbose >= 3)                                      \
  1408        printf ("speed_endtime(): %s gives enough precision\n", name);    \
  1409      result = value;                                                     \
  1410      goto done;                                                          \
  1411    } while (0)
  1412  
  1413  #define END_EXCEED(name,value)                                            \
  1414    do {                                                                    \
  1415      if (speed_option_verbose >= 3)                                        \
  1416        printf ("speed_endtime(): cycle counter limit exceeded, used %s\n", \
  1417  	      name);                                                      \
  1418      result = value;                                                       \
  1419      goto done;                                                            \
  1420    } while (0)
  1421  
  1422    unsigned          end_cycles[2];
  1423    stck_t            end_stck;
  1424    unsigned          end_mftb[2];
  1425    unsigned          end_sgi;
  1426    timebasestruct_t  end_rrt;
  1427    struct_timespec   end_cgt;
  1428    struct_timeval    end_gtod;
  1429    struct_rusage     end_grus;
  1430    struct_tms        end_times;
  1431    double            t_gtod, t_grus, t_times, t_cgt;
  1432    double            t_rrt, t_sgi, t_mftb, t_stck, t_cycles;
  1433    double            result;
  1434  
  1435    /* Cycles sampled first for maximum accuracy.
  1436       "have_" values tested to let unused code go dead.  */
  1437  
  1438    if (have_cycles && use_cycles)  speed_cyclecounter (end_cycles);
  1439    if (have_stck   && use_stck)    STCK (end_stck);
  1440    if (have_mftb   && use_mftb)    MFTB (end_mftb);
  1441    if (have_sgi    && use_sgi)     end_sgi = *sgi_addr;
  1442    if (have_rrt    && use_rrt)     read_real_time (&end_rrt, sizeof(end_rrt));
  1443    if (have_cgt    && use_cgt)     clock_gettime (CGT_ID, &end_cgt);
  1444    if (have_gtod   && use_gtod)    gettimeofday (&end_gtod, NULL);
  1445    if (have_grus   && use_grus)    getrusage (0, &end_grus);
  1446    if (have_times  && use_times)   times (&end_times);
  1447  
  1448    result = -1.0;
  1449  
  1450    if (speed_option_verbose >= 4)
  1451      {
  1452        printf ("speed_endtime():\n");
  1453        if (use_cycles)
  1454  	printf ("   cycles  0x%X,0x%X -> 0x%X,0x%X\n",
  1455  		start_cycles[1], start_cycles[0],
  1456  		end_cycles[1], end_cycles[0]);
  1457  
  1458        if (use_stck)
  1459  	printf ("   stck  0x%lX -> 0x%lX\n", start_stck, end_stck);
  1460  
  1461        if (use_mftb)
  1462  	printf ("   mftb  0x%X,%08X -> 0x%X,%08X\n",
  1463  		start_mftb[1], start_mftb[0],
  1464  		end_mftb[1], end_mftb[0]);
  1465  
  1466        if (use_sgi)
  1467  	printf ("   sgi  0x%X -> 0x%X\n", start_sgi, end_sgi);
  1468  
  1469        if (use_rrt)
  1470  	printf ("   read_real_time  (%d)%u,%u -> (%d)%u,%u\n",
  1471  		start_rrt.flag, start_rrt.tb_high, start_rrt.tb_low,
  1472  		end_rrt.flag, end_rrt.tb_high, end_rrt.tb_low);
  1473  
  1474        if (use_cgt)
  1475  	printf ("   clock_gettime  %ld.%09ld -> %ld.%09ld\n",
  1476  		start_cgt.tv_sec, start_cgt.tv_nsec,
  1477  		end_cgt.tv_sec, end_cgt.tv_nsec);
  1478  
  1479        if (use_gtod)
  1480  	printf ("   gettimeofday  %ld.%06ld -> %ld.%06ld\n",
  1481  		start_gtod.tv_sec, start_gtod.tv_usec,
  1482  		end_gtod.tv_sec, end_gtod.tv_usec);
  1483  
  1484        if (use_grus)
  1485  	printf ("   getrusage  %ld.%06ld -> %ld.%06ld\n",
  1486  		start_grus.ru_utime.tv_sec, start_grus.ru_utime.tv_usec,
  1487  		end_grus.ru_utime.tv_sec, end_grus.ru_utime.tv_usec);
  1488  
  1489        if (use_times)
  1490  	printf ("   times  %ld -> %ld\n",
  1491  		start_times.tms_utime, end_times.tms_utime);
  1492      }
  1493  
  1494    if (use_rrt)
  1495      {
  1496        time_base_to_time (&start_rrt, sizeof(start_rrt));
  1497        time_base_to_time (&end_rrt, sizeof(end_rrt));
  1498        t_rrt = timebasestruct_diff_secs (&end_rrt, &start_rrt);
  1499        END_USE ("read_real_time()", t_rrt);
  1500      }
  1501  
  1502    if (use_cgt)
  1503      {
  1504        t_cgt = timespec_diff_secs (&end_cgt, &start_cgt);
  1505        END_USE ("clock_gettime()", t_cgt);
  1506      }
  1507  
  1508    if (use_grus)
  1509      {
  1510        t_grus = rusage_diff_secs (&end_grus, &start_grus);
  1511  
  1512        /* Use getrusage() if the cycle counter limit would be exceeded, or if
  1513  	 it provides enough accuracy already. */
  1514        if (use_cycles)
  1515  	{
  1516  	  if (t_grus >= speed_precision*grus_unittime)
  1517  	    END_ENOUGH ("getrusage()", t_grus);
  1518  	  if (t_grus >= cycles_limit)
  1519  	    END_EXCEED ("getrusage()", t_grus);
  1520  	}
  1521      }
  1522  
  1523    if (use_times)
  1524      {
  1525        t_times = (end_times.tms_utime - start_times.tms_utime) * times_unittime;
  1526  
  1527        /* Use times() if the cycle counter limit would be exceeded, or if
  1528  	 it provides enough accuracy already. */
  1529        if (use_cycles)
  1530  	{
  1531  	  if (t_times >= speed_precision*times_unittime)
  1532  	    END_ENOUGH ("times()", t_times);
  1533  	  if (t_times >= cycles_limit)
  1534  	    END_EXCEED ("times()", t_times);
  1535  	}
  1536      }
  1537  
  1538    if (use_gtod)
  1539      {
  1540        t_gtod = timeval_diff_secs (&end_gtod, &start_gtod);
  1541  
  1542        /* Use gettimeofday() if it measured a value bigger than the cycle
  1543  	 counter can handle.  */
  1544        if (use_cycles)
  1545  	{
  1546  	  if (t_gtod >= cycles_limit)
  1547  	    END_EXCEED ("gettimeofday()", t_gtod);
  1548  	}
  1549      }
  1550  
  1551    if (use_mftb)
  1552      {
  1553        t_mftb = speed_mftb_diff (end_mftb, start_mftb) * mftb_unittime;
  1554        END_USE ("mftb", t_mftb);
  1555      }
  1556  
  1557    if (use_stck)
  1558      {
  1559        t_stck = (end_stck - start_stck) * STCK_PERIOD;
  1560        END_USE ("stck", t_stck);
  1561      }
  1562  
  1563    if (use_sgi)
  1564      {
  1565        t_sgi = (end_sgi - start_sgi) * sgi_unittime;
  1566        END_USE ("SGI hardware counter", t_sgi);
  1567      }
  1568  
  1569    if (use_cycles)
  1570      {
  1571        t_cycles = speed_cyclecounter_diff (end_cycles, start_cycles)
  1572  	* speed_cycletime;
  1573        END_USE ("cycle counter", t_cycles);
  1574      }
  1575  
  1576    if (use_grus && getrusage_microseconds_p())
  1577      END_USE ("getrusage()", t_grus);
  1578  
  1579    if (use_gtod && gettimeofday_microseconds_p())
  1580      END_USE ("gettimeofday()", t_gtod);
  1581  
  1582    if (use_times)  END_USE ("times()",        t_times);
  1583    if (use_grus)   END_USE ("getrusage()",    t_grus);
  1584    if (use_gtod)   END_USE ("gettimeofday()", t_gtod);
  1585  
  1586    fprintf (stderr, "speed_endtime(): oops, no time method available\n");
  1587    abort ();
  1588  
  1589   done:
  1590    if (result < 0.0)
  1591      {
  1592        if (speed_option_verbose >= 2)
  1593  	fprintf (stderr, "speed_endtime(): warning, treating negative time as zero: %.9f\n", result);
  1594        result = 0.0;
  1595      }
  1596    return result;
  1597  }