github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/tune/speed-ext.c (about)

     1  /* An example of extending the speed program to measure routines not in GMP.
     2  
     3  Copyright 1999, 2000, 2002, 2003, 2005 Free Software Foundation, Inc.
     4  
     5  This file is part of the GNU MP Library.
     6  
     7  The GNU MP Library is free software; you can redistribute it and/or modify
     8  it under the terms of either:
     9  
    10    * the GNU Lesser General Public License as published by the Free
    11      Software Foundation; either version 3 of the License, or (at your
    12      option) any later version.
    13  
    14  or
    15  
    16    * the GNU General Public License as published by the Free Software
    17      Foundation; either version 2 of the License, or (at your option) any
    18      later version.
    19  
    20  or both in parallel, as here.
    21  
    22  The GNU MP Library is distributed in the hope that it will be useful, but
    23  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    24  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    25  for more details.
    26  
    27  You should have received copies of the GNU General Public License and the
    28  GNU Lesser General Public License along with the GNU MP Library.  If not,
    29  see https://www.gnu.org/licenses/.  */
    30  
    31  
    32  /* The extension here is three versions of an mpn arithmetic mean.  These
    33     aren't meant to be particularly useful, just examples.
    34  
    35     You can run something like the following to compare their speeds.
    36  
    37             ./speed-ext -s 1-20 -c mean_calls mean_open mean_open2
    38  
    39     On RISC chips, mean_open() might be fastest if the compiler is doing a
    40     good job.  On the register starved x86s, mean_calls will be fastest.
    41  
    42  
    43     Notes:
    44  
    45     SPEED_EXTRA_PROTOS and SPEED_EXTRA_ROUTINES are macros that get expanded
    46     by speed.c in useful places.  SPEED_EXTRA_PROTOS goes after the header
    47     files, and SPEED_EXTRA_ROUTINES goes in the array of available routines.
    48  
    49     The advantage of this #include "speed.c" scheme is that there's no
    50     editing of a copy of that file, and new features in new versions of it
    51     will be immediately available.
    52  
    53     In a real program the routines mean_calls() etc would probably be in
    54     separate C or assembler source files, and just the measuring
    55     speed_mean_calls() etc would be here.  Linking against other libraries
    56     for things to measure is perfectly possible too.
    57  
    58     When attempting to compare two versions of the same named routine, say
    59     like the generic and assembler versions of mpn_add_n(), creative use of
    60     cc -D or #define is suggested, so one or both can be renamed and linked
    61     into the same program.  It'll be much easier to compare them side by side
    62     than with separate programs for each.
    63  
    64     common.c has notes on writing speed measuring routines.
    65  
    66     Remember to link against tune/libspeed.la (or tune/.libs/libspeed.a if
    67     not using libtool) to get common.o and other objects needed by speed.c.  */
    68  
    69  
    70  #define SPEED_EXTRA_PROTOS                                              \
    71    double speed_mean_calls (struct speed_params *s);			\
    72    double speed_mean_open  (struct speed_params *s);			\
    73    double speed_mean_open2 (struct speed_params *s);
    74  
    75  #define SPEED_EXTRA_ROUTINES            \
    76    { "mean_calls",  speed_mean_calls  }, \
    77    { "mean_open",   speed_mean_open   }, \
    78    { "mean_open2",  speed_mean_open2  },
    79  
    80  #include "speed.c"
    81  
    82  
    83  /* A straightforward implementation calling mpn subroutines.
    84  
    85     wp,size is set to (xp,size + yp,size) / 2.  The return value is the
    86     remainder from the division.  The other versions are the same.  */
    87  
    88  mp_limb_t
    89  mean_calls (mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size)
    90  {
    91    mp_limb_t  c, ret;
    92  
    93    ASSERT (size >= 1);
    94  
    95    c = mpn_add_n (wp, xp, yp, size);
    96    ret = mpn_rshift (wp, wp, size, 1) >> (GMP_LIMB_BITS-1);
    97    wp[size-1] |= (c << (GMP_LIMB_BITS-1));
    98    return ret;
    99  }
   100  
   101  
   102  /* An open-coded version, making one pass over the data.  The right shift is
   103     done as the added limbs are produced.  The addition code follows
   104     mpn/generic/add_n.c. */
   105  
   106  mp_limb_t
   107  mean_open (mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size)
   108  {
   109    mp_limb_t  w, wprev, x, y, c, ret;
   110    mp_size_t  i;
   111  
   112    ASSERT (size >= 1);
   113  
   114    x = xp[0];
   115    y = yp[0];
   116  
   117    wprev = x + y;
   118    c = (wprev < x);
   119    ret = (wprev & 1);
   120  
   121  #define RSHIFT(hi,lo)   (((lo) >> 1) | ((hi) << (GMP_LIMB_BITS-1)))
   122  
   123    for (i = 1; i < size; i++)
   124      {
   125        x = xp[i];
   126        y = yp[i];
   127  
   128        w = x + c;
   129        c = (w < x);
   130        w += y;
   131        c += (w < y);
   132  
   133        wp[i-1] = RSHIFT (w, wprev);
   134        wprev = w;
   135      }
   136  
   137    wp[i-1] = RSHIFT (c, wprev);
   138  
   139    return ret;
   140  }
   141  
   142  
   143  /* Another one-pass version, but right shifting the source limbs rather than
   144     the result limbs.  There's not much chance of this being better than the
   145     above, but it's an alternative at least. */
   146  
   147  mp_limb_t
   148  mean_open2 (mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size)
   149  {
   150    mp_limb_t  w, x, y, xnext, ynext, c, ret;
   151    mp_size_t  i;
   152  
   153    ASSERT (size >= 1);
   154  
   155    x = xp[0];
   156    y = yp[0];
   157  
   158    /* ret is the low bit of x+y, c is the carry out of that low bit add */
   159    ret = (x ^ y) & 1;
   160    c   = (x & y) & 1;
   161  
   162    for (i = 0; i < size-1; i++)
   163      {
   164        xnext = xp[i+1];
   165        ynext = yp[i+1];
   166        x = RSHIFT (xnext, x);
   167        y = RSHIFT (ynext, y);
   168  
   169        w = x + c;
   170        c = (w < x);
   171        w += y;
   172        c += (w < y);
   173        wp[i] = w;
   174  
   175        x = xnext;
   176        y = ynext;
   177      }
   178  
   179    wp[i] = (x >> 1) + (y >> 1) + c;
   180  
   181    return ret;
   182  }
   183  
   184  
   185  /* The speed measuring routines are the same apart from which function they
   186     run, so a macro is used.  Actually this macro is the same as
   187     SPEED_ROUTINE_MPN_BINARY_N.  */
   188  
   189  #define SPEED_ROUTINE_MEAN(mean_fun)                    \
   190    {                                                     \
   191      unsigned  i;                                        \
   192      mp_ptr    wp;                                       \
   193      double    t;                                        \
   194      TMP_DECL;                                  \
   195                                                          \
   196      SPEED_RESTRICT_COND (s->size >= 1);                 \
   197                                                          \
   198      TMP_MARK;                                  \
   199      SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp);   \
   200                                                          \
   201      speed_operand_src (s, s->xp, s->size);              \
   202      speed_operand_src (s, s->yp, s->size);              \
   203      speed_operand_dst (s, wp, s->size);                 \
   204      speed_cache_fill (s);                               \
   205                                                          \
   206      speed_starttime ();                                 \
   207      i = s->reps;                                        \
   208      do                                                  \
   209        mean_fun (wp, s->xp, s->yp, s->size);             \
   210      while (--i != 0);                                   \
   211      t = speed_endtime ();                               \
   212                                                          \
   213      TMP_FREE;                                  \
   214      return t;                                           \
   215    }
   216  
   217  double
   218  speed_mean_calls (struct speed_params *s)
   219  {
   220    SPEED_ROUTINE_MEAN (mean_calls);
   221  }
   222  
   223  double
   224  speed_mean_open (struct speed_params *s)
   225  {
   226    SPEED_ROUTINE_MEAN (mean_open);
   227  }
   228  
   229  double
   230  speed_mean_open2 (struct speed_params *s)
   231  {
   232    SPEED_ROUTINE_MEAN (mean_open2);
   233  }