github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpz/oddfac_1.c (about)

     1  /* mpz_oddfac_1(RESULT, N) -- Set RESULT to the odd factor of N!.
     2  
     3  Contributed to the GNU project by Marco Bodrato.
     4  
     5  THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.
     6  IT IS ONLY SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.
     7  IN FACT, IT IS ALMOST GUARANTEED THAT IT WILL CHANGE OR
     8  DISAPPEAR IN A FUTURE GNU MP RELEASE.
     9  
    10  Copyright 2010-2012 Free Software Foundation, Inc.
    11  
    12  This file is part of the GNU MP Library.
    13  
    14  The GNU MP Library is free software; you can redistribute it and/or modify
    15  it under the terms of either:
    16  
    17    * the GNU Lesser General Public License as published by the Free
    18      Software Foundation; either version 3 of the License, or (at your
    19      option) any later version.
    20  
    21  or
    22  
    23    * the GNU General Public License as published by the Free Software
    24      Foundation; either version 2 of the License, or (at your option) any
    25      later version.
    26  
    27  or both in parallel, as here.
    28  
    29  The GNU MP Library is distributed in the hope that it will be useful, but
    30  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    31  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    32  for more details.
    33  
    34  You should have received copies of the GNU General Public License and the
    35  GNU Lesser General Public License along with the GNU MP Library.  If not,
    36  see https://www.gnu.org/licenses/.  */
    37  
    38  #include "gmp.h"
    39  #include "gmp-impl.h"
    40  #include "longlong.h"
    41  
    42  /* TODO:
    43     - split this file in smaller parts with functions that can be recycled for different computations.
    44   */
    45  
    46  /**************************************************************/
    47  /* Section macros: common macros, for mswing/fac/bin (&sieve) */
    48  /**************************************************************/
    49  
    50  #define FACTOR_LIST_APPEND(PR, MAX_PR, VEC, I)			\
    51    if ((PR) > (MAX_PR)) {					\
    52      (VEC)[(I)++] = (PR);					\
    53      (PR) = 1;							\
    54    }
    55  
    56  #define FACTOR_LIST_STORE(P, PR, MAX_PR, VEC, I)		\
    57    do {								\
    58      if ((PR) > (MAX_PR)) {					\
    59        (VEC)[(I)++] = (PR);					\
    60        (PR) = (P);						\
    61      } else							\
    62        (PR) *= (P);						\
    63    } while (0)
    64  
    65  #define LOOP_ON_SIEVE_CONTINUE(prime,end,sieve)			\
    66      __max_i = (end);						\
    67  								\
    68      do {							\
    69        ++__i;							\
    70        if (((sieve)[__index] & __mask) == 0)			\
    71  	{							\
    72  	  (prime) = id_to_n(__i)
    73  
    74  #define LOOP_ON_SIEVE_BEGIN(prime,start,end,off,sieve)		\
    75    do {								\
    76      mp_limb_t __mask, __index, __max_i, __i;			\
    77  								\
    78      __i = (start)-(off);					\
    79      __index = __i / GMP_LIMB_BITS;				\
    80      __mask = CNST_LIMB(1) << (__i % GMP_LIMB_BITS);		\
    81      __i += (off);						\
    82  								\
    83      LOOP_ON_SIEVE_CONTINUE(prime,end,sieve)
    84  
    85  #define LOOP_ON_SIEVE_STOP					\
    86  	}							\
    87        __mask = __mask << 1 | __mask >> (GMP_LIMB_BITS-1);	\
    88        __index += __mask & 1;					\
    89      }  while (__i <= __max_i)					\
    90  
    91  #define LOOP_ON_SIEVE_END					\
    92      LOOP_ON_SIEVE_STOP;						\
    93    } while (0)
    94  
    95  /*********************************************************/
    96  /* Section sieve: sieving functions and tools for primes */
    97  /*********************************************************/
    98  
    99  #if WANT_ASSERT
   100  static mp_limb_t
   101  bit_to_n (mp_limb_t bit) { return (bit*3+4)|1; }
   102  #endif
   103  
   104  /* id_to_n (x) = bit_to_n (x-1) = (id*3+1)|1*/
   105  static mp_limb_t
   106  id_to_n  (mp_limb_t id)  { return id*3+1+(id&1); }
   107  
   108  /* n_to_bit (n) = ((n-1)&(-CNST_LIMB(2)))/3U-1 */
   109  static mp_limb_t
   110  n_to_bit (mp_limb_t n) { return ((n-5)|1)/3U; }
   111  
   112  #if WANT_ASSERT
   113  static mp_size_t
   114  primesieve_size (mp_limb_t n) { return n_to_bit(n) / GMP_LIMB_BITS + 1; }
   115  #endif
   116  
   117  /*********************************************************/
   118  /* Section mswing: 2-multiswing factorial                 */
   119  /*********************************************************/
   120  
   121  /* Returns an approximation of the sqare root of x.  *
   122   * It gives: x <= limb_apprsqrt (x) ^ 2 < x * 9/4    */
   123  static mp_limb_t
   124  limb_apprsqrt (mp_limb_t x)
   125  {
   126    int s;
   127  
   128    ASSERT (x > 2);
   129    count_leading_zeros (s, x - 1);
   130    s = GMP_LIMB_BITS - 1 - s;
   131    return (CNST_LIMB(1) << (s >> 1)) + (CNST_LIMB(1) << ((s - 1) >> 1));
   132  }
   133  
   134  #if 0
   135  /* A count-then-exponentiate variant for SWING_A_PRIME */
   136  #define SWING_A_PRIME(P, N, PR, MAX_PR, VEC, I)		\
   137    do {							\
   138      mp_limb_t __q, __prime;				\
   139      int __exp;						\
   140      __prime = (P);					\
   141      __exp = 0;						\
   142      __q = (N);						\
   143      do {						\
   144        __q /= __prime;					\
   145        __exp += __q & 1;					\
   146      } while (__q >= __prime);				\
   147      if (__exp) { /* Store $prime^{exp}$ */		\
   148        for (__q = __prime; --__exp; __q *= __prime);	\
   149        FACTOR_LIST_STORE(__q, PR, MAX_PR, VEC, I);	\
   150      };							\
   151    } while (0)
   152  #else
   153  #define SWING_A_PRIME(P, N, PR, MAX_PR, VEC, I)	\
   154    do {						\
   155      mp_limb_t __q, __prime;			\
   156      __prime = (P);				\
   157      FACTOR_LIST_APPEND(PR, MAX_PR, VEC, I);	\
   158      __q = (N);					\
   159      do {					\
   160        __q /= __prime;				\
   161        if ((__q & 1) != 0) (PR) *= __prime;	\
   162      } while (__q >= __prime);			\
   163    } while (0)
   164  #endif
   165  
   166  #define SH_SWING_A_PRIME(P, N, PR, MAX_PR, VEC, I)	\
   167    do {							\
   168      mp_limb_t __prime;					\
   169      __prime = (P);					\
   170      if ((((N) / __prime) & 1) != 0)			\
   171        FACTOR_LIST_STORE(__prime, PR, MAX_PR, VEC, I);	\
   172    } while (0)
   173  
   174  /* mpz_2multiswing_1 computes the odd part of the 2-multiswing
   175     factorial of the parameter n.  The result x is an odd positive
   176     integer so that multiswing(n,2) = x 2^a.
   177  
   178     Uses the algorithm described by Peter Luschny in "Divide, Swing and
   179     Conquer the Factorial!".
   180  
   181     The pointer sieve points to primesieve_size(n) limbs containing a
   182     bit-array where primes are marked as 0.
   183     Enough (FIXME: explain :-) limbs must be pointed by factors.
   184   */
   185  
   186  static void
   187  mpz_2multiswing_1 (mpz_ptr x, mp_limb_t n, mp_ptr sieve, mp_ptr factors)
   188  {
   189    mp_limb_t prod, max_prod;
   190    mp_size_t j;
   191  
   192    ASSERT (n >= 26);
   193  
   194    j = 0;
   195    prod  = -(n & 1);
   196    n &= ~ CNST_LIMB(1); /* n-1, if n is odd */
   197  
   198    prod = (prod & n) + 1; /* the original n, if it was odd, 1 otherwise */
   199    max_prod = GMP_NUMB_MAX / (n-1);
   200  
   201    /* Handle prime = 3 separately. */
   202    SWING_A_PRIME (3, n, prod, max_prod, factors, j);
   203  
   204    /* Swing primes from 5 to n/3 */
   205    {
   206      mp_limb_t s;
   207  
   208      {
   209        mp_limb_t prime;
   210  
   211        s = limb_apprsqrt(n);
   212        ASSERT (s >= 5);
   213        s = n_to_bit (s);
   214        LOOP_ON_SIEVE_BEGIN (prime, n_to_bit (5), s, 0,sieve);
   215        SWING_A_PRIME (prime, n, prod, max_prod, factors, j);
   216        LOOP_ON_SIEVE_END;
   217        s++;
   218      }
   219  
   220      ASSERT (max_prod <= GMP_NUMB_MAX / 3);
   221      ASSERT (bit_to_n (s) * bit_to_n (s) > n);
   222      ASSERT (s <= n_to_bit (n / 3));
   223      {
   224        mp_limb_t prime;
   225        mp_limb_t l_max_prod = max_prod * 3;
   226  
   227        LOOP_ON_SIEVE_BEGIN (prime, s, n_to_bit (n/3), 0, sieve);
   228        SH_SWING_A_PRIME (prime, n, prod, l_max_prod, factors, j);
   229        LOOP_ON_SIEVE_END;
   230      }
   231    }
   232  
   233    /* Store primes from (n+1)/2 to n */
   234    {
   235      mp_limb_t prime;
   236      LOOP_ON_SIEVE_BEGIN (prime, n_to_bit (n >> 1) + 1, n_to_bit (n), 0,sieve);
   237      FACTOR_LIST_STORE (prime, prod, max_prod, factors, j);
   238      LOOP_ON_SIEVE_END;
   239    }
   240  
   241    if (LIKELY (j != 0))
   242      {
   243        factors[j++] = prod;
   244        mpz_prodlimbs (x, factors, j);
   245      }
   246    else
   247      {
   248        PTR (x)[0] = prod;
   249        SIZ (x) = 1;
   250      }
   251  }
   252  
   253  #undef SWING_A_PRIME
   254  #undef SH_SWING_A_PRIME
   255  #undef LOOP_ON_SIEVE_END
   256  #undef LOOP_ON_SIEVE_STOP
   257  #undef LOOP_ON_SIEVE_BEGIN
   258  #undef LOOP_ON_SIEVE_CONTINUE
   259  #undef FACTOR_LIST_APPEND
   260  
   261  /*********************************************************/
   262  /* Section oddfac: odd factorial, needed also by binomial*/
   263  /*********************************************************/
   264  
   265  #if TUNE_PROGRAM_BUILD
   266  #define FACTORS_PER_LIMB (GMP_NUMB_BITS / (LOG2C(FAC_DSC_THRESHOLD_LIMIT-1)+1))
   267  #else
   268  #define FACTORS_PER_LIMB (GMP_NUMB_BITS / (LOG2C(FAC_DSC_THRESHOLD-1)+1))
   269  #endif
   270  
   271  /* mpz_oddfac_1 computes the odd part of the factorial of the
   272     parameter n.  I.e. n! = x 2^a, where x is the returned value: an
   273     odd positive integer.
   274  
   275     If flag != 0 a square is skipped in the DSC part, e.g.
   276     if n is odd, n > FAC_DSC_THRESHOLD and flag = 1, x is set to n!!.
   277  
   278     If n is too small, flag is ignored, and an ASSERT can be triggered.
   279  
   280     TODO: FAC_DSC_THRESHOLD is used here with two different roles:
   281      - to decide when prime factorisation is needed,
   282      - to stop the recursion, once sieving is done.
   283     Maybe two thresholds can do a better job.
   284   */
   285  void
   286  mpz_oddfac_1 (mpz_ptr x, mp_limb_t n, unsigned flag)
   287  {
   288    ASSERT (n <= GMP_NUMB_MAX);
   289    ASSERT (flag == 0 || (flag == 1 && n > ODD_FACTORIAL_TABLE_LIMIT && ABOVE_THRESHOLD (n, FAC_DSC_THRESHOLD)));
   290  
   291    if (n <= ODD_FACTORIAL_TABLE_LIMIT)
   292      {
   293        PTR (x)[0] = __gmp_oddfac_table[n];
   294        SIZ (x) = 1;
   295      }
   296    else if (n <= ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 1)
   297      {
   298        mp_ptr   px;
   299  
   300        px = MPZ_NEWALLOC (x, 2);
   301        umul_ppmm (px[1], px[0], __gmp_odd2fac_table[(n - 1) >> 1], __gmp_oddfac_table[n >> 1]);
   302        SIZ (x) = 2;
   303      }
   304    else
   305      {
   306        unsigned s;
   307        mp_ptr   factors;
   308  
   309        s = 0;
   310        {
   311  	mp_limb_t tn;
   312  	mp_limb_t prod, max_prod, i;
   313  	mp_size_t j;
   314  	TMP_SDECL;
   315  
   316  #if TUNE_PROGRAM_BUILD
   317  	ASSERT (FAC_DSC_THRESHOLD_LIMIT >= FAC_DSC_THRESHOLD);
   318  	ASSERT (FAC_DSC_THRESHOLD >= 2 * (ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 2));
   319  #endif
   320  
   321  	/* Compute the number of recursive steps for the DSC algorithm. */
   322  	for (tn = n; ABOVE_THRESHOLD (tn, FAC_DSC_THRESHOLD); s++)
   323  	  tn >>= 1;
   324  
   325  	j = 0;
   326  
   327  	TMP_SMARK;
   328  	factors = TMP_SALLOC_LIMBS (1 + tn / FACTORS_PER_LIMB);
   329  	ASSERT (tn >= FACTORS_PER_LIMB);
   330  
   331  	prod = 1;
   332  #if TUNE_PROGRAM_BUILD
   333  	max_prod = GMP_NUMB_MAX / FAC_DSC_THRESHOLD_LIMIT;
   334  #else
   335  	max_prod = GMP_NUMB_MAX / FAC_DSC_THRESHOLD;
   336  #endif
   337  
   338  	ASSERT (tn > ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 1);
   339  	do {
   340  	  i = ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 2;
   341  	  factors[j++] = ODD_DOUBLEFACTORIAL_TABLE_MAX;
   342  	  do {
   343  	    FACTOR_LIST_STORE (i, prod, max_prod, factors, j);
   344  	    i += 2;
   345  	  } while (i <= tn);
   346  	  max_prod <<= 1;
   347  	  tn >>= 1;
   348  	} while (tn > ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 1);
   349  
   350  	factors[j++] = prod;
   351  	factors[j++] = __gmp_odd2fac_table[(tn - 1) >> 1];
   352  	factors[j++] = __gmp_oddfac_table[tn >> 1];
   353  	mpz_prodlimbs (x, factors, j);
   354  
   355  	TMP_SFREE;
   356        }
   357  
   358        if (s != 0)
   359  	/* Use the algorithm described by Peter Luschny in "Divide,
   360  	   Swing and Conquer the Factorial!".
   361  
   362  	   Improvement: there are two temporary buffers, factors and
   363  	   square, that are never used together; with a good estimate
   364  	   of the maximal needed size, they could share a single
   365  	   allocation.
   366  	*/
   367  	{
   368  	  mpz_t mswing;
   369  	  mp_ptr sieve;
   370  	  mp_size_t size;
   371  	  TMP_DECL;
   372  
   373  	  TMP_MARK;
   374  
   375  	  flag--;
   376  	  size = n / GMP_NUMB_BITS + 4;
   377  	  ASSERT (primesieve_size (n - 1) <= size - (size / 2 + 1));
   378  	  /* 2-multiswing(n) < 2^(n-1)*sqrt(n/pi) < 2^(n+GMP_NUMB_BITS);
   379  	     one more can be overwritten by mul, another for the sieve */
   380  	  MPZ_TMP_INIT (mswing, size);
   381  	  /* Initialize size, so that ASSERT can check it correctly. */
   382  	  ASSERT_CODE (SIZ (mswing) = 0);
   383  
   384  	  /* Put the sieve on the second half, it will be overwritten by the last mswing. */
   385  	  sieve = PTR (mswing) + size / 2 + 1;
   386  
   387  	  size = (gmp_primesieve (sieve, n - 1) + 1) / log_n_max (n) + 1;
   388  
   389  	  factors = TMP_ALLOC_LIMBS (size);
   390  	  do {
   391  	    mp_ptr    square, px;
   392  	    mp_size_t nx, ns;
   393  	    mp_limb_t cy;
   394  	    TMP_DECL;
   395  
   396  	    s--;
   397  	    ASSERT (ABSIZ (mswing) < ALLOC (mswing) / 2); /* Check: sieve has not been overwritten */
   398  	    mpz_2multiswing_1 (mswing, n >> s, sieve, factors);
   399  
   400  	    TMP_MARK;
   401  	    nx = SIZ (x);
   402  	    if (s == flag) {
   403  	      size = nx;
   404  	      square = TMP_ALLOC_LIMBS (size);
   405  	      MPN_COPY (square, PTR (x), nx);
   406  	    } else {
   407  	      size = nx << 1;
   408  	      square = TMP_ALLOC_LIMBS (size);
   409  	      mpn_sqr (square, PTR (x), nx);
   410  	      size -= (square[size - 1] == 0);
   411  	    }
   412  	    ns = SIZ (mswing);
   413  	    nx = size + ns;
   414  	    px = MPZ_NEWALLOC (x, nx);
   415  	    ASSERT (ns <= size);
   416  	    cy = mpn_mul (px, square, size, PTR(mswing), ns); /* n!= n$ * floor(n/2)!^2 */
   417  
   418  	    TMP_FREE;
   419  	    SIZ(x) = nx - (cy == 0);
   420  	  } while (s != 0);
   421  	  TMP_FREE;
   422  	}
   423      }
   424  }
   425  
   426  #undef FACTORS_PER_LIMB
   427  #undef FACTOR_LIST_STORE