github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/generic/gcd.c (about)

     1  /* mpn/gcd.c: mpn_gcd for gcd of two odd integers.
     2  
     3  Copyright 1991, 1993-1998, 2000-2005, 2008, 2010, 2012 Free Software
     4  Foundation, Inc.
     5  
     6  This file is part of the GNU MP Library.
     7  
     8  The GNU MP Library is free software; you can redistribute it and/or modify
     9  it under the terms of either:
    10  
    11    * the GNU Lesser General Public License as published by the Free
    12      Software Foundation; either version 3 of the License, or (at your
    13      option) any later version.
    14  
    15  or
    16  
    17    * the GNU General Public License as published by the Free Software
    18      Foundation; either version 2 of the License, or (at your option) any
    19      later version.
    20  
    21  or both in parallel, as here.
    22  
    23  The GNU MP Library is distributed in the hope that it will be useful, but
    24  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    25  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    26  for more details.
    27  
    28  You should have received copies of the GNU General Public License and the
    29  GNU Lesser General Public License along with the GNU MP Library.  If not,
    30  see https://www.gnu.org/licenses/.  */
    31  
    32  #include "gmp.h"
    33  #include "gmp-impl.h"
    34  #include "longlong.h"
    35  
    36  /* Uses the HGCD operation described in
    37  
    38       N. Möller, On Schönhage's algorithm and subquadratic integer gcd
    39       computation, Math. Comp. 77 (2008), 589-607.
    40  
    41    to reduce inputs until they are of size below GCD_DC_THRESHOLD, and
    42    then uses Lehmer's algorithm.
    43  */
    44  
    45  /* Some reasonable choices are n / 2 (same as in hgcd), and p = (n +
    46   * 2)/3, which gives a balanced multiplication in
    47   * mpn_hgcd_matrix_adjust. However, p = 2 n/3 gives slightly better
    48   * performance. The matrix-vector multiplication is then
    49   * 4:1-unbalanced, with matrix elements of size n/6, and vector
    50   * elements of size p = 2n/3. */
    51  
    52  /* From analysis of the theoretical running time, it appears that when
    53   * multiplication takes time O(n^alpha), p should be chosen so that
    54   * the ratio of the time for the mpn_hgcd call, and the time for the
    55   * multiplication in mpn_hgcd_matrix_adjust, is roughly 1/(alpha -
    56   * 1). */
    57  #ifdef TUNE_GCD_P
    58  #define P_TABLE_SIZE 10000
    59  mp_size_t p_table[P_TABLE_SIZE];
    60  #define CHOOSE_P(n) ( (n) < P_TABLE_SIZE ? p_table[n] : 2*(n)/3)
    61  #else
    62  #define CHOOSE_P(n) (2*(n) / 3)
    63  #endif
    64  
    65  struct gcd_ctx
    66  {
    67    mp_ptr gp;
    68    mp_size_t gn;
    69  };
    70  
    71  static void
    72  gcd_hook (void *p, mp_srcptr gp, mp_size_t gn,
    73  	  mp_srcptr qp, mp_size_t qn, int d)
    74  {
    75    struct gcd_ctx *ctx = (struct gcd_ctx *) p;
    76    MPN_COPY (ctx->gp, gp, gn);
    77    ctx->gn = gn;
    78  }
    79  
    80  #if GMP_NAIL_BITS > 0
    81  /* Nail supports should be easy, replacing the sub_ddmmss with nails
    82   * logic. */
    83  #error Nails not supported.
    84  #endif
    85  
    86  /* Use binary algorithm to compute G <-- GCD (U, V) for usize, vsize == 2.
    87     Both U and V must be odd. */
    88  static inline mp_size_t
    89  gcd_2 (mp_ptr gp, mp_srcptr up, mp_srcptr vp)
    90  {
    91    mp_limb_t u0, u1, v0, v1;
    92    mp_size_t gn;
    93  
    94    u0 = up[0];
    95    u1 = up[1];
    96    v0 = vp[0];
    97    v1 = vp[1];
    98  
    99    ASSERT (u0 & 1);
   100    ASSERT (v0 & 1);
   101  
   102    /* Check for u0 != v0 needed to ensure that argument to
   103     * count_trailing_zeros is non-zero. */
   104    while (u1 != v1 && u0 != v0)
   105      {
   106        unsigned long int r;
   107        if (u1 > v1)
   108  	{
   109  	  sub_ddmmss (u1, u0, u1, u0, v1, v0);
   110  	  count_trailing_zeros (r, u0);
   111  	  u0 = ((u1 << (GMP_NUMB_BITS - r)) & GMP_NUMB_MASK) | (u0 >> r);
   112  	  u1 >>= r;
   113  	}
   114        else  /* u1 < v1.  */
   115  	{
   116  	  sub_ddmmss (v1, v0, v1, v0, u1, u0);
   117  	  count_trailing_zeros (r, v0);
   118  	  v0 = ((v1 << (GMP_NUMB_BITS - r)) & GMP_NUMB_MASK) | (v0 >> r);
   119  	  v1 >>= r;
   120  	}
   121      }
   122  
   123    gp[0] = u0, gp[1] = u1, gn = 1 + (u1 != 0);
   124  
   125    /* If U == V == GCD, done.  Otherwise, compute GCD (V, |U - V|).  */
   126    if (u1 == v1 && u0 == v0)
   127      return gn;
   128  
   129    v0 = (u0 == v0) ? ((u1 > v1) ? u1-v1 : v1-u1) : ((u0 > v0) ? u0-v0 : v0-u0);
   130    gp[0] = mpn_gcd_1 (gp, gn, v0);
   131  
   132    return 1;
   133  }
   134  
   135  mp_size_t
   136  mpn_gcd (mp_ptr gp, mp_ptr up, mp_size_t usize, mp_ptr vp, mp_size_t n)
   137  {
   138    mp_size_t talloc;
   139    mp_size_t scratch;
   140    mp_size_t matrix_scratch;
   141  
   142    struct gcd_ctx ctx;
   143    mp_ptr tp;
   144    TMP_DECL;
   145  
   146    ASSERT (usize >= n);
   147    ASSERT (n > 0);
   148    ASSERT (vp[n-1] > 0);
   149  
   150    /* FIXME: Check for small sizes first, before setting up temporary
   151       storage etc. */
   152    talloc = MPN_GCD_SUBDIV_STEP_ITCH(n);
   153  
   154    /* For initial division */
   155    scratch = usize - n + 1;
   156    if (scratch > talloc)
   157      talloc = scratch;
   158  
   159  #if TUNE_GCD_P
   160    if (CHOOSE_P (n) > 0)
   161  #else
   162    if (ABOVE_THRESHOLD (n, GCD_DC_THRESHOLD))
   163  #endif
   164      {
   165        mp_size_t hgcd_scratch;
   166        mp_size_t update_scratch;
   167        mp_size_t p = CHOOSE_P (n);
   168        mp_size_t scratch;
   169  #if TUNE_GCD_P
   170        /* Worst case, since we don't guarantee that n - CHOOSE_P(n)
   171  	 is increasing */
   172        matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n);
   173        hgcd_scratch = mpn_hgcd_itch (n);
   174        update_scratch = 2*(n - 1);
   175  #else
   176        matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p);
   177        hgcd_scratch = mpn_hgcd_itch (n - p);
   178        update_scratch = p + n - 1;
   179  #endif
   180        scratch = matrix_scratch + MAX(hgcd_scratch, update_scratch);
   181        if (scratch > talloc)
   182  	talloc = scratch;
   183      }
   184  
   185    TMP_MARK;
   186    tp = TMP_ALLOC_LIMBS(talloc);
   187  
   188    if (usize > n)
   189      {
   190        mpn_tdiv_qr (tp, up, 0, up, usize, vp, n);
   191  
   192        if (mpn_zero_p (up, n))
   193  	{
   194  	  MPN_COPY (gp, vp, n);
   195  	  ctx.gn = n;
   196  	  goto done;
   197  	}
   198      }
   199  
   200    ctx.gp = gp;
   201  
   202  #if TUNE_GCD_P
   203    while (CHOOSE_P (n) > 0)
   204  #else
   205    while (ABOVE_THRESHOLD (n, GCD_DC_THRESHOLD))
   206  #endif
   207      {
   208        struct hgcd_matrix M;
   209        mp_size_t p = CHOOSE_P (n);
   210        mp_size_t matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p);
   211        mp_size_t nn;
   212        mpn_hgcd_matrix_init (&M, n - p, tp);
   213        nn = mpn_hgcd (up + p, vp + p, n - p, &M, tp + matrix_scratch);
   214        if (nn > 0)
   215  	{
   216  	  ASSERT (M.n <= (n - p - 1)/2);
   217  	  ASSERT (M.n + p <= (p + n - 1) / 2);
   218  	  /* Temporary storage 2 (p + M->n) <= p + n - 1. */
   219  	  n = mpn_hgcd_matrix_adjust (&M, p + nn, up, vp, p, tp + matrix_scratch);
   220  	}
   221        else
   222  	{
   223  	  /* Temporary storage n */
   224  	  n = mpn_gcd_subdiv_step (up, vp, n, 0, gcd_hook, &ctx, tp);
   225  	  if (n == 0)
   226  	    goto done;
   227  	}
   228      }
   229  
   230    while (n > 2)
   231      {
   232        struct hgcd_matrix1 M;
   233        mp_limb_t uh, ul, vh, vl;
   234        mp_limb_t mask;
   235  
   236        mask = up[n-1] | vp[n-1];
   237        ASSERT (mask > 0);
   238  
   239        if (mask & GMP_NUMB_HIGHBIT)
   240  	{
   241  	  uh = up[n-1]; ul = up[n-2];
   242  	  vh = vp[n-1]; vl = vp[n-2];
   243  	}
   244        else
   245  	{
   246  	  int shift;
   247  
   248  	  count_leading_zeros (shift, mask);
   249  	  uh = MPN_EXTRACT_NUMB (shift, up[n-1], up[n-2]);
   250  	  ul = MPN_EXTRACT_NUMB (shift, up[n-2], up[n-3]);
   251  	  vh = MPN_EXTRACT_NUMB (shift, vp[n-1], vp[n-2]);
   252  	  vl = MPN_EXTRACT_NUMB (shift, vp[n-2], vp[n-3]);
   253  	}
   254  
   255        /* Try an mpn_hgcd2 step */
   256        if (mpn_hgcd2 (uh, ul, vh, vl, &M))
   257  	{
   258  	  n = mpn_matrix22_mul1_inverse_vector (&M, tp, up, vp, n);
   259  	  MP_PTR_SWAP (up, tp);
   260  	}
   261        else
   262  	{
   263  	  /* mpn_hgcd2 has failed. Then either one of a or b is very
   264  	     small, or the difference is very small. Perform one
   265  	     subtraction followed by one division. */
   266  
   267  	  /* Temporary storage n */
   268  	  n = mpn_gcd_subdiv_step (up, vp, n, 0, &gcd_hook, &ctx, tp);
   269  	  if (n == 0)
   270  	    goto done;
   271  	}
   272      }
   273  
   274    ASSERT(up[n-1] | vp[n-1]);
   275  
   276    if (n == 1)
   277      {
   278        *gp = mpn_gcd_1(up, 1, vp[0]);
   279        ctx.gn = 1;
   280        goto done;
   281      }
   282  
   283    /* Due to the calling convention for mpn_gcd, at most one can be
   284       even. */
   285  
   286    if (! (up[0] & 1))
   287      MP_PTR_SWAP (up, vp);
   288  
   289    ASSERT (up[0] & 1);
   290  
   291    if (vp[0] == 0)
   292      {
   293        *gp = mpn_gcd_1 (up, 2, vp[1]);
   294        ctx.gn = 1;
   295        goto done;
   296      }
   297    else if (! (vp[0] & 1))
   298      {
   299        int r;
   300        count_trailing_zeros (r, vp[0]);
   301        vp[0] = ((vp[1] << (GMP_NUMB_BITS - r)) & GMP_NUMB_MASK) | (vp[0] >> r);
   302        vp[1] >>= r;
   303      }
   304  
   305    ctx.gn = gcd_2(gp, up, vp);
   306  
   307  done:
   308    TMP_FREE;
   309    return ctx.gn;
   310  }