github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpz/oddfac_1.c (about) 1 /* mpz_oddfac_1(RESULT, N) -- Set RESULT to the odd factor of N!. 2 3 Contributed to the GNU project by Marco Bodrato. 4 5 THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. 6 IT IS ONLY SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. 7 IN FACT, IT IS ALMOST GUARANTEED THAT IT WILL CHANGE OR 8 DISAPPEAR IN A FUTURE GNU MP RELEASE. 9 10 Copyright 2010-2012 Free Software Foundation, Inc. 11 12 This file is part of the GNU MP Library. 13 14 The GNU MP Library is free software; you can redistribute it and/or modify 15 it under the terms of either: 16 17 * the GNU Lesser General Public License as published by the Free 18 Software Foundation; either version 3 of the License, or (at your 19 option) any later version. 20 21 or 22 23 * the GNU General Public License as published by the Free Software 24 Foundation; either version 2 of the License, or (at your option) any 25 later version. 26 27 or both in parallel, as here. 28 29 The GNU MP Library is distributed in the hope that it will be useful, but 30 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 31 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 32 for more details. 33 34 You should have received copies of the GNU General Public License and the 35 GNU Lesser General Public License along with the GNU MP Library. If not, 36 see https://www.gnu.org/licenses/. */ 37 38 #include "gmp.h" 39 #include "gmp-impl.h" 40 #include "longlong.h" 41 42 /* TODO: 43 - split this file in smaller parts with functions that can be recycled for different computations. 44 */ 45 46 /**************************************************************/ 47 /* Section macros: common macros, for mswing/fac/bin (&sieve) */ 48 /**************************************************************/ 49 50 #define FACTOR_LIST_APPEND(PR, MAX_PR, VEC, I) \ 51 if ((PR) > (MAX_PR)) { \ 52 (VEC)[(I)++] = (PR); \ 53 (PR) = 1; \ 54 } 55 56 #define FACTOR_LIST_STORE(P, PR, MAX_PR, VEC, I) \ 57 do { \ 58 if ((PR) > (MAX_PR)) { \ 59 (VEC)[(I)++] = (PR); \ 60 (PR) = (P); \ 61 } else \ 62 (PR) *= (P); \ 63 } while (0) 64 65 #define LOOP_ON_SIEVE_CONTINUE(prime,end,sieve) \ 66 __max_i = (end); \ 67 \ 68 do { \ 69 ++__i; \ 70 if (((sieve)[__index] & __mask) == 0) \ 71 { \ 72 (prime) = id_to_n(__i) 73 74 #define LOOP_ON_SIEVE_BEGIN(prime,start,end,off,sieve) \ 75 do { \ 76 mp_limb_t __mask, __index, __max_i, __i; \ 77 \ 78 __i = (start)-(off); \ 79 __index = __i / GMP_LIMB_BITS; \ 80 __mask = CNST_LIMB(1) << (__i % GMP_LIMB_BITS); \ 81 __i += (off); \ 82 \ 83 LOOP_ON_SIEVE_CONTINUE(prime,end,sieve) 84 85 #define LOOP_ON_SIEVE_STOP \ 86 } \ 87 __mask = __mask << 1 | __mask >> (GMP_LIMB_BITS-1); \ 88 __index += __mask & 1; \ 89 } while (__i <= __max_i) \ 90 91 #define LOOP_ON_SIEVE_END \ 92 LOOP_ON_SIEVE_STOP; \ 93 } while (0) 94 95 /*********************************************************/ 96 /* Section sieve: sieving functions and tools for primes */ 97 /*********************************************************/ 98 99 #if WANT_ASSERT 100 static mp_limb_t 101 bit_to_n (mp_limb_t bit) { return (bit*3+4)|1; } 102 #endif 103 104 /* id_to_n (x) = bit_to_n (x-1) = (id*3+1)|1*/ 105 static mp_limb_t 106 id_to_n (mp_limb_t id) { return id*3+1+(id&1); } 107 108 /* n_to_bit (n) = ((n-1)&(-CNST_LIMB(2)))/3U-1 */ 109 static mp_limb_t 110 n_to_bit (mp_limb_t n) { return ((n-5)|1)/3U; } 111 112 #if WANT_ASSERT 113 static mp_size_t 114 primesieve_size (mp_limb_t n) { return n_to_bit(n) / GMP_LIMB_BITS + 1; } 115 #endif 116 117 /*********************************************************/ 118 /* Section mswing: 2-multiswing factorial */ 119 /*********************************************************/ 120 121 /* Returns an approximation of the sqare root of x. * 122 * It gives: x <= limb_apprsqrt (x) ^ 2 < x * 9/4 */ 123 static mp_limb_t 124 limb_apprsqrt (mp_limb_t x) 125 { 126 int s; 127 128 ASSERT (x > 2); 129 count_leading_zeros (s, x - 1); 130 s = GMP_LIMB_BITS - 1 - s; 131 return (CNST_LIMB(1) << (s >> 1)) + (CNST_LIMB(1) << ((s - 1) >> 1)); 132 } 133 134 #if 0 135 /* A count-then-exponentiate variant for SWING_A_PRIME */ 136 #define SWING_A_PRIME(P, N, PR, MAX_PR, VEC, I) \ 137 do { \ 138 mp_limb_t __q, __prime; \ 139 int __exp; \ 140 __prime = (P); \ 141 __exp = 0; \ 142 __q = (N); \ 143 do { \ 144 __q /= __prime; \ 145 __exp += __q & 1; \ 146 } while (__q >= __prime); \ 147 if (__exp) { /* Store $prime^{exp}$ */ \ 148 for (__q = __prime; --__exp; __q *= __prime); \ 149 FACTOR_LIST_STORE(__q, PR, MAX_PR, VEC, I); \ 150 }; \ 151 } while (0) 152 #else 153 #define SWING_A_PRIME(P, N, PR, MAX_PR, VEC, I) \ 154 do { \ 155 mp_limb_t __q, __prime; \ 156 __prime = (P); \ 157 FACTOR_LIST_APPEND(PR, MAX_PR, VEC, I); \ 158 __q = (N); \ 159 do { \ 160 __q /= __prime; \ 161 if ((__q & 1) != 0) (PR) *= __prime; \ 162 } while (__q >= __prime); \ 163 } while (0) 164 #endif 165 166 #define SH_SWING_A_PRIME(P, N, PR, MAX_PR, VEC, I) \ 167 do { \ 168 mp_limb_t __prime; \ 169 __prime = (P); \ 170 if ((((N) / __prime) & 1) != 0) \ 171 FACTOR_LIST_STORE(__prime, PR, MAX_PR, VEC, I); \ 172 } while (0) 173 174 /* mpz_2multiswing_1 computes the odd part of the 2-multiswing 175 factorial of the parameter n. The result x is an odd positive 176 integer so that multiswing(n,2) = x 2^a. 177 178 Uses the algorithm described by Peter Luschny in "Divide, Swing and 179 Conquer the Factorial!". 180 181 The pointer sieve points to primesieve_size(n) limbs containing a 182 bit-array where primes are marked as 0. 183 Enough (FIXME: explain :-) limbs must be pointed by factors. 184 */ 185 186 static void 187 mpz_2multiswing_1 (mpz_ptr x, mp_limb_t n, mp_ptr sieve, mp_ptr factors) 188 { 189 mp_limb_t prod, max_prod; 190 mp_size_t j; 191 192 ASSERT (n >= 26); 193 194 j = 0; 195 prod = -(n & 1); 196 n &= ~ CNST_LIMB(1); /* n-1, if n is odd */ 197 198 prod = (prod & n) + 1; /* the original n, if it was odd, 1 otherwise */ 199 max_prod = GMP_NUMB_MAX / (n-1); 200 201 /* Handle prime = 3 separately. */ 202 SWING_A_PRIME (3, n, prod, max_prod, factors, j); 203 204 /* Swing primes from 5 to n/3 */ 205 { 206 mp_limb_t s; 207 208 { 209 mp_limb_t prime; 210 211 s = limb_apprsqrt(n); 212 ASSERT (s >= 5); 213 s = n_to_bit (s); 214 LOOP_ON_SIEVE_BEGIN (prime, n_to_bit (5), s, 0,sieve); 215 SWING_A_PRIME (prime, n, prod, max_prod, factors, j); 216 LOOP_ON_SIEVE_END; 217 s++; 218 } 219 220 ASSERT (max_prod <= GMP_NUMB_MAX / 3); 221 ASSERT (bit_to_n (s) * bit_to_n (s) > n); 222 ASSERT (s <= n_to_bit (n / 3)); 223 { 224 mp_limb_t prime; 225 mp_limb_t l_max_prod = max_prod * 3; 226 227 LOOP_ON_SIEVE_BEGIN (prime, s, n_to_bit (n/3), 0, sieve); 228 SH_SWING_A_PRIME (prime, n, prod, l_max_prod, factors, j); 229 LOOP_ON_SIEVE_END; 230 } 231 } 232 233 /* Store primes from (n+1)/2 to n */ 234 { 235 mp_limb_t prime; 236 LOOP_ON_SIEVE_BEGIN (prime, n_to_bit (n >> 1) + 1, n_to_bit (n), 0,sieve); 237 FACTOR_LIST_STORE (prime, prod, max_prod, factors, j); 238 LOOP_ON_SIEVE_END; 239 } 240 241 if (LIKELY (j != 0)) 242 { 243 factors[j++] = prod; 244 mpz_prodlimbs (x, factors, j); 245 } 246 else 247 { 248 PTR (x)[0] = prod; 249 SIZ (x) = 1; 250 } 251 } 252 253 #undef SWING_A_PRIME 254 #undef SH_SWING_A_PRIME 255 #undef LOOP_ON_SIEVE_END 256 #undef LOOP_ON_SIEVE_STOP 257 #undef LOOP_ON_SIEVE_BEGIN 258 #undef LOOP_ON_SIEVE_CONTINUE 259 #undef FACTOR_LIST_APPEND 260 261 /*********************************************************/ 262 /* Section oddfac: odd factorial, needed also by binomial*/ 263 /*********************************************************/ 264 265 #if TUNE_PROGRAM_BUILD 266 #define FACTORS_PER_LIMB (GMP_NUMB_BITS / (LOG2C(FAC_DSC_THRESHOLD_LIMIT-1)+1)) 267 #else 268 #define FACTORS_PER_LIMB (GMP_NUMB_BITS / (LOG2C(FAC_DSC_THRESHOLD-1)+1)) 269 #endif 270 271 /* mpz_oddfac_1 computes the odd part of the factorial of the 272 parameter n. I.e. n! = x 2^a, where x is the returned value: an 273 odd positive integer. 274 275 If flag != 0 a square is skipped in the DSC part, e.g. 276 if n is odd, n > FAC_DSC_THRESHOLD and flag = 1, x is set to n!!. 277 278 If n is too small, flag is ignored, and an ASSERT can be triggered. 279 280 TODO: FAC_DSC_THRESHOLD is used here with two different roles: 281 - to decide when prime factorisation is needed, 282 - to stop the recursion, once sieving is done. 283 Maybe two thresholds can do a better job. 284 */ 285 void 286 mpz_oddfac_1 (mpz_ptr x, mp_limb_t n, unsigned flag) 287 { 288 ASSERT (n <= GMP_NUMB_MAX); 289 ASSERT (flag == 0 || (flag == 1 && n > ODD_FACTORIAL_TABLE_LIMIT && ABOVE_THRESHOLD (n, FAC_DSC_THRESHOLD))); 290 291 if (n <= ODD_FACTORIAL_TABLE_LIMIT) 292 { 293 PTR (x)[0] = __gmp_oddfac_table[n]; 294 SIZ (x) = 1; 295 } 296 else if (n <= ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 1) 297 { 298 mp_ptr px; 299 300 px = MPZ_NEWALLOC (x, 2); 301 umul_ppmm (px[1], px[0], __gmp_odd2fac_table[(n - 1) >> 1], __gmp_oddfac_table[n >> 1]); 302 SIZ (x) = 2; 303 } 304 else 305 { 306 unsigned s; 307 mp_ptr factors; 308 309 s = 0; 310 { 311 mp_limb_t tn; 312 mp_limb_t prod, max_prod, i; 313 mp_size_t j; 314 TMP_SDECL; 315 316 #if TUNE_PROGRAM_BUILD 317 ASSERT (FAC_DSC_THRESHOLD_LIMIT >= FAC_DSC_THRESHOLD); 318 ASSERT (FAC_DSC_THRESHOLD >= 2 * (ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 2)); 319 #endif 320 321 /* Compute the number of recursive steps for the DSC algorithm. */ 322 for (tn = n; ABOVE_THRESHOLD (tn, FAC_DSC_THRESHOLD); s++) 323 tn >>= 1; 324 325 j = 0; 326 327 TMP_SMARK; 328 factors = TMP_SALLOC_LIMBS (1 + tn / FACTORS_PER_LIMB); 329 ASSERT (tn >= FACTORS_PER_LIMB); 330 331 prod = 1; 332 #if TUNE_PROGRAM_BUILD 333 max_prod = GMP_NUMB_MAX / FAC_DSC_THRESHOLD_LIMIT; 334 #else 335 max_prod = GMP_NUMB_MAX / FAC_DSC_THRESHOLD; 336 #endif 337 338 ASSERT (tn > ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 1); 339 do { 340 i = ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 2; 341 factors[j++] = ODD_DOUBLEFACTORIAL_TABLE_MAX; 342 do { 343 FACTOR_LIST_STORE (i, prod, max_prod, factors, j); 344 i += 2; 345 } while (i <= tn); 346 max_prod <<= 1; 347 tn >>= 1; 348 } while (tn > ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 1); 349 350 factors[j++] = prod; 351 factors[j++] = __gmp_odd2fac_table[(tn - 1) >> 1]; 352 factors[j++] = __gmp_oddfac_table[tn >> 1]; 353 mpz_prodlimbs (x, factors, j); 354 355 TMP_SFREE; 356 } 357 358 if (s != 0) 359 /* Use the algorithm described by Peter Luschny in "Divide, 360 Swing and Conquer the Factorial!". 361 362 Improvement: there are two temporary buffers, factors and 363 square, that are never used together; with a good estimate 364 of the maximal needed size, they could share a single 365 allocation. 366 */ 367 { 368 mpz_t mswing; 369 mp_ptr sieve; 370 mp_size_t size; 371 TMP_DECL; 372 373 TMP_MARK; 374 375 flag--; 376 size = n / GMP_NUMB_BITS + 4; 377 ASSERT (primesieve_size (n - 1) <= size - (size / 2 + 1)); 378 /* 2-multiswing(n) < 2^(n-1)*sqrt(n/pi) < 2^(n+GMP_NUMB_BITS); 379 one more can be overwritten by mul, another for the sieve */ 380 MPZ_TMP_INIT (mswing, size); 381 /* Initialize size, so that ASSERT can check it correctly. */ 382 ASSERT_CODE (SIZ (mswing) = 0); 383 384 /* Put the sieve on the second half, it will be overwritten by the last mswing. */ 385 sieve = PTR (mswing) + size / 2 + 1; 386 387 size = (gmp_primesieve (sieve, n - 1) + 1) / log_n_max (n) + 1; 388 389 factors = TMP_ALLOC_LIMBS (size); 390 do { 391 mp_ptr square, px; 392 mp_size_t nx, ns; 393 mp_limb_t cy; 394 TMP_DECL; 395 396 s--; 397 ASSERT (ABSIZ (mswing) < ALLOC (mswing) / 2); /* Check: sieve has not been overwritten */ 398 mpz_2multiswing_1 (mswing, n >> s, sieve, factors); 399 400 TMP_MARK; 401 nx = SIZ (x); 402 if (s == flag) { 403 size = nx; 404 square = TMP_ALLOC_LIMBS (size); 405 MPN_COPY (square, PTR (x), nx); 406 } else { 407 size = nx << 1; 408 square = TMP_ALLOC_LIMBS (size); 409 mpn_sqr (square, PTR (x), nx); 410 size -= (square[size - 1] == 0); 411 } 412 ns = SIZ (mswing); 413 nx = size + ns; 414 px = MPZ_NEWALLOC (x, nx); 415 ASSERT (ns <= size); 416 cy = mpn_mul (px, square, size, PTR(mswing), ns); /* n!= n$ * floor(n/2)!^2 */ 417 418 TMP_FREE; 419 SIZ(x) = nx - (cy == 0); 420 } while (s != 0); 421 TMP_FREE; 422 } 423 } 424 } 425 426 #undef FACTORS_PER_LIMB 427 #undef FACTOR_LIST_STORE