github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/generic/hgcd_appr.c (about)

     1  /* hgcd_appr.c.
     2  
     3     THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
     4     SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
     5     GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
     6  
     7  Copyright 2011, 2012 Free Software Foundation, Inc.
     8  
     9  This file is part of the GNU MP Library.
    10  
    11  The GNU MP Library is free software; you can redistribute it and/or modify
    12  it under the terms of either:
    13  
    14    * the GNU Lesser General Public License as published by the Free
    15      Software Foundation; either version 3 of the License, or (at your
    16      option) any later version.
    17  
    18  or
    19  
    20    * the GNU General Public License as published by the Free Software
    21      Foundation; either version 2 of the License, or (at your option) any
    22      later version.
    23  
    24  or both in parallel, as here.
    25  
    26  The GNU MP Library is distributed in the hope that it will be useful, but
    27  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    28  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    29  for more details.
    30  
    31  You should have received copies of the GNU General Public License and the
    32  GNU Lesser General Public License along with the GNU MP Library.  If not,
    33  see https://www.gnu.org/licenses/.  */
    34  
    35  #include "gmp.h"
    36  #include "gmp-impl.h"
    37  #include "longlong.h"
    38  
    39  /* Identical to mpn_hgcd_itch. FIXME: Do we really need to add
    40     HGCD_THRESHOLD at the end? */
    41  mp_size_t
    42  mpn_hgcd_appr_itch (mp_size_t n)
    43  {
    44    if (BELOW_THRESHOLD (n, HGCD_APPR_THRESHOLD))
    45      return n;
    46    else
    47      {
    48        unsigned k;
    49        int count;
    50        mp_size_t nscaled;
    51  
    52        /* Get the recursion depth. */
    53        nscaled = (n - 1) / (HGCD_APPR_THRESHOLD - 1);
    54        count_leading_zeros (count, nscaled);
    55        k = GMP_LIMB_BITS - count;
    56  
    57        return 20 * ((n+3) / 4) + 22 * k + HGCD_THRESHOLD;
    58      }
    59  }
    60  
    61  /* Destroys inputs. */
    62  int
    63  mpn_hgcd_appr (mp_ptr ap, mp_ptr bp, mp_size_t n,
    64  	       struct hgcd_matrix *M, mp_ptr tp)
    65  {
    66    mp_size_t s;
    67    int success = 0;
    68  
    69    ASSERT (n > 0);
    70  
    71    ASSERT ((ap[n-1] | bp[n-1]) != 0);
    72  
    73    if (n <= 2)
    74      /* Implies s = n. A fairly uninteresting case but exercised by the
    75         random inputs of the testsuite. */
    76      return 0;
    77  
    78    ASSERT ((n+1)/2 - 1 < M->alloc);
    79  
    80    /* We aim for reduction of to GMP_NUMB_BITS * s bits. But each time
    81       we discard some of the least significant limbs, we must keep one
    82       additional bit to account for the truncation error. We maintain
    83       the GMP_NUMB_BITS * s - extra_bits as the current target size. */
    84  
    85    s = n/2 + 1;
    86    if (BELOW_THRESHOLD (n, HGCD_APPR_THRESHOLD))
    87      {
    88        unsigned extra_bits = 0;
    89  
    90        while (n > 2)
    91  	{
    92  	  mp_size_t nn;
    93  
    94  	  ASSERT (n > s);
    95  	  ASSERT (n <= 2*s);
    96  
    97  	  nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
    98  	  if (!nn)
    99  	    break;
   100  
   101  	  n = nn;
   102  	  success = 1;
   103  
   104  	  /* We can truncate and discard the lower p bits whenever nbits <=
   105  	     2*sbits - p. To account for the truncation error, we must
   106  	     adjust
   107  
   108  	     sbits <-- sbits + 1 - p,
   109  
   110  	     rather than just sbits <-- sbits - p. This adjustment makes
   111  	     the produced matrix slightly smaller than it could be. */
   112  
   113  	  if (GMP_NUMB_BITS * (n + 1) + 2 * extra_bits <= 2*GMP_NUMB_BITS * s)
   114  	    {
   115  	      mp_size_t p = (GMP_NUMB_BITS * (2*s - n) - 2*extra_bits) / GMP_NUMB_BITS;
   116  
   117  	      if (extra_bits == 0)
   118  		{
   119  		  /* We cross a limb boundary and bump s. We can't do that
   120  		     if the result is that it makes makes min(U, V)
   121  		     smaller than 2^{GMP_NUMB_BITS} s. */
   122  		  if (s + 1 == n
   123  		      || mpn_zero_p (ap + s + 1, n - s - 1)
   124  		      || mpn_zero_p (bp + s + 1, n - s - 1))
   125  		    continue;
   126  
   127  		  extra_bits = GMP_NUMB_BITS - 1;
   128  		  s++;
   129  		}
   130  	      else
   131  		{
   132  		  extra_bits--;
   133  		}
   134  
   135  	      /* Drop the p least significant limbs */
   136  	      ap += p; bp += p; n -= p; s -= p;
   137  	    }
   138  	}
   139  
   140        ASSERT (s > 0);
   141  
   142        if (extra_bits > 0)
   143  	{
   144  	  /* We can get here only of we have dropped at least one of the least
   145  	     significant bits, so we can decrement ap and bp. We can then shift
   146  	     left extra bits using mpn_rshift. */
   147  	  /* NOTE: In the unlikely case that n is large, it would be preferable
   148  	     to do an initial subdiv step to reduce the size before shifting,
   149  	     but that would mean duplicating mpn_gcd_subdiv_step with a bit
   150  	     count rather than a limb count. */
   151  	  ap--; bp--;
   152  	  ap[0] = mpn_rshift (ap+1, ap+1, n, GMP_NUMB_BITS - extra_bits);
   153  	  bp[0] = mpn_rshift (bp+1, bp+1, n, GMP_NUMB_BITS - extra_bits);
   154  	  n += (ap[n] | bp[n]) > 0;
   155  
   156  	  ASSERT (success);
   157  
   158  	  while (n > 2)
   159  	    {
   160  	      mp_size_t nn;
   161  
   162  	      ASSERT (n > s);
   163  	      ASSERT (n <= 2*s);
   164  
   165  	      nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
   166  
   167  	      if (!nn)
   168  		return 1;
   169  
   170  	      n = nn;
   171  	    }
   172  	}
   173  
   174        if (n == 2)
   175  	{
   176  	  struct hgcd_matrix1 M1;
   177  	  ASSERT (s == 1);
   178  
   179  	  if (mpn_hgcd2 (ap[1], ap[0], bp[1], bp[0], &M1))
   180  	    {
   181  	      /* Multiply M <- M * M1 */
   182  	      mpn_hgcd_matrix_mul_1 (M, &M1, tp);
   183  	      success = 1;
   184  	    }
   185  	}
   186        return success;
   187      }
   188    else
   189      {
   190        mp_size_t n2 = (3*n)/4 + 1;
   191        mp_size_t p = n/2;
   192        mp_size_t nn;
   193  
   194        nn = mpn_hgcd_reduce (M, ap, bp, n, p, tp);
   195        if (nn)
   196  	{
   197  	  n = nn;
   198  	  /* FIXME: Discard some of the low limbs immediately? */
   199  	  success = 1;
   200  	}
   201  
   202        while (n > n2)
   203  	{
   204  	  mp_size_t nn;
   205  
   206  	  /* Needs n + 1 storage */
   207  	  nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
   208  	  if (!nn)
   209  	    return success;
   210  
   211  	  n = nn;
   212  	  success = 1;
   213  	}
   214        if (n > s + 2)
   215  	{
   216  	  struct hgcd_matrix M1;
   217  	  mp_size_t scratch;
   218  
   219  	  p = 2*s - n + 1;
   220  	  scratch = MPN_HGCD_MATRIX_INIT_ITCH (n-p);
   221  
   222  	  mpn_hgcd_matrix_init(&M1, n - p, tp);
   223  	  if (mpn_hgcd_appr (ap + p, bp + p, n - p, &M1, tp + scratch))
   224  	    {
   225  	      /* We always have max(M) > 2^{-(GMP_NUMB_BITS + 1)} max(M1) */
   226  	      ASSERT (M->n + 2 >= M1.n);
   227  
   228  	      /* Furthermore, assume M ends with a quotient (1, q; 0, 1),
   229  		 then either q or q + 1 is a correct quotient, and M1 will
   230  		 start with either (1, 0; 1, 1) or (2, 1; 1, 1). This
   231  		 rules out the case that the size of M * M1 is much
   232  		 smaller than the expected M->n + M1->n. */
   233  
   234  	      ASSERT (M->n + M1.n < M->alloc);
   235  
   236  	      /* We need a bound for of M->n + M1.n. Let n be the original
   237  		 input size. Then
   238  
   239  		 ceil(n/2) - 1 >= size of product >= M.n + M1.n - 2
   240  
   241  		 and it follows that
   242  
   243  		 M.n + M1.n <= ceil(n/2) + 1
   244  
   245  		 Then 3*(M.n + M1.n) + 5 <= 3 * ceil(n/2) + 8 is the
   246  		 amount of needed scratch space. */
   247  	      mpn_hgcd_matrix_mul (M, &M1, tp + scratch);
   248  	      return 1;
   249  	    }
   250  	}
   251  
   252        for(;;)
   253  	{
   254  	  mp_size_t nn;
   255  
   256  	  ASSERT (n > s);
   257  	  ASSERT (n <= 2*s);
   258  
   259  	  nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
   260  
   261  	  if (!nn)
   262  	    return success;
   263  
   264  	  n = nn;
   265  	  success = 1;
   266  	}
   267      }
   268  }