github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpz/bin_uiui.c (about) 1 /* mpz_bin_uiui - compute n over k. 2 3 Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato. 4 5 Copyright 2010-2012 Free Software Foundation, Inc. 6 7 This file is part of the GNU MP Library. 8 9 The GNU MP Library is free software; you can redistribute it and/or modify 10 it under the terms of either: 11 12 * the GNU Lesser General Public License as published by the Free 13 Software Foundation; either version 3 of the License, or (at your 14 option) any later version. 15 16 or 17 18 * the GNU General Public License as published by the Free Software 19 Foundation; either version 2 of the License, or (at your option) any 20 later version. 21 22 or both in parallel, as here. 23 24 The GNU MP Library is distributed in the hope that it will be useful, but 25 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 26 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 27 for more details. 28 29 You should have received copies of the GNU General Public License and the 30 GNU Lesser General Public License along with the GNU MP Library. If not, 31 see https://www.gnu.org/licenses/. */ 32 33 #include "gmp.h" 34 #include "gmp-impl.h" 35 #include "longlong.h" 36 37 #ifndef BIN_GOETGHELUCK_THRESHOLD 38 #define BIN_GOETGHELUCK_THRESHOLD 1000 39 #endif 40 #ifndef BIN_UIUI_ENABLE_SMALLDC 41 #define BIN_UIUI_ENABLE_SMALLDC 1 42 #endif 43 #ifndef BIN_UIUI_RECURSIVE_SMALLDC 44 #define BIN_UIUI_RECURSIVE_SMALLDC (GMP_NUMB_BITS > 32) 45 #endif 46 47 /* Algorithm: 48 49 Accumulate chunks of factors first limb-by-limb (using one of mul0-mul8) 50 which are then accumulated into mpn numbers. The first inner loop 51 accumulates divisor factors, the 2nd inner loop accumulates exactly the same 52 number of dividend factors. We avoid accumulating more for the divisor, 53 even with its smaller factors, since we else cannot guarantee divisibility. 54 55 Since we know each division will yield an integer, we compute the quotient 56 using Hensel norm: If the quotient is limited by 2^t, we compute A / B mod 57 2^t. 58 59 Improvements: 60 61 (1) An obvious improvement to this code would be to compute mod 2^t 62 everywhere. Unfortunately, we cannot determine t beforehand, unless we 63 invoke some approximation, such as Stirling's formula. Of course, we don't 64 need t to be tight. However, it is not clear that this would help much, 65 our numbers are kept reasonably small already. 66 67 (2) Compute nmax/kmax semi-accurately, without scalar division or a loop. 68 Extracting the 3 msb, then doing a table lookup using cnt*8+msb as index, 69 would make it both reasonably accurate and fast. (We could use a table 70 stored into a limb, perhaps.) The table should take the removed factors of 71 2 into account (those done on-the-fly in mulN). 72 73 (3) The first time in the loop we compute the odd part of a 74 factorial in kp, we might use oddfac_1 for this task. 75 */ 76 77 /* This threshold determines how large divisor to accumulate before we call 78 bdiv. Perhaps we should never call bdiv, and accumulate all we are told, 79 since we are just basecase code anyway? Presumably, this depends on the 80 relative speed of the asymptotically fast code and this code. */ 81 #define SOME_THRESHOLD 20 82 83 /* Multiply-into-limb functions. These remove factors of 2 on-the-fly. FIXME: 84 All versions of MAXFACS don't take this 2 removal into account now, meaning 85 that then, shifting just adds some overhead. (We remove factors from the 86 completed limb anyway.) */ 87 88 static mp_limb_t 89 mul1 (mp_limb_t m) 90 { 91 return m; 92 } 93 94 static mp_limb_t 95 mul2 (mp_limb_t m) 96 { 97 /* We need to shift before multiplying, to avoid an overflow. */ 98 mp_limb_t m01 = (m | 1) * ((m + 1) >> 1); 99 return m01; 100 } 101 102 static mp_limb_t 103 mul3 (mp_limb_t m) 104 { 105 mp_limb_t m01 = (m + 0) * (m + 1) >> 1; 106 mp_limb_t m2 = (m + 2); 107 return m01 * m2; 108 } 109 110 static mp_limb_t 111 mul4 (mp_limb_t m) 112 { 113 mp_limb_t m01 = (m + 0) * (m + 1) >> 1; 114 mp_limb_t m23 = (m + 2) * (m + 3) >> 1; 115 return m01 * m23; 116 } 117 118 static mp_limb_t 119 mul5 (mp_limb_t m) 120 { 121 mp_limb_t m012 = (m + 0) * (m + 1) * (m + 2) >> 1; 122 mp_limb_t m34 = (m + 3) * (m + 4) >> 1; 123 return m012 * m34; 124 } 125 126 static mp_limb_t 127 mul6 (mp_limb_t m) 128 { 129 mp_limb_t m01 = (m + 0) * (m + 1); 130 mp_limb_t m23 = (m + 2) * (m + 3); 131 mp_limb_t m45 = (m + 4) * (m + 5) >> 1; 132 mp_limb_t m0123 = m01 * m23 >> 3; 133 return m0123 * m45; 134 } 135 136 static mp_limb_t 137 mul7 (mp_limb_t m) 138 { 139 mp_limb_t m01 = (m + 0) * (m + 1); 140 mp_limb_t m23 = (m + 2) * (m + 3); 141 mp_limb_t m456 = (m + 4) * (m + 5) * (m + 6) >> 1; 142 mp_limb_t m0123 = m01 * m23 >> 3; 143 return m0123 * m456; 144 } 145 146 static mp_limb_t 147 mul8 (mp_limb_t m) 148 { 149 mp_limb_t m01 = (m + 0) * (m + 1); 150 mp_limb_t m23 = (m + 2) * (m + 3); 151 mp_limb_t m45 = (m + 4) * (m + 5); 152 mp_limb_t m67 = (m + 6) * (m + 7); 153 mp_limb_t m0123 = m01 * m23 >> 3; 154 mp_limb_t m4567 = m45 * m67 >> 3; 155 return m0123 * m4567; 156 } 157 158 typedef mp_limb_t (* mulfunc_t) (mp_limb_t); 159 160 static const mulfunc_t mulfunc[] = {mul1,mul2,mul3,mul4,mul5,mul6,mul7,mul8}; 161 #define M (numberof(mulfunc)) 162 163 /* Number of factors-of-2 removed by the corresponding mulN function. */ 164 static const unsigned char tcnttab[] = {0, 1, 1, 2, 2, 4, 4, 6}; 165 166 #if 1 167 /* This variant is inaccurate but share the code with other functions. */ 168 #define MAXFACS(max,l) \ 169 do { \ 170 (max) = log_n_max (l); \ 171 } while (0) 172 #else 173 174 /* This variant is exact(?) but uses a loop. It takes the 2 removal 175 of mulN into account. */ 176 static const unsigned long ftab[] = 177 #if GMP_NUMB_BITS == 64 178 /* 1 to 8 factors per iteration */ 179 {CNST_LIMB(0xffffffffffffffff),CNST_LIMB(0x100000000),0x32cbfe,0x16a0b,0x24c4,0xa16,0x34b,0x1b2 /*,0xdf,0x8d */}; 180 #endif 181 #if GMP_NUMB_BITS == 32 182 /* 1 to 7 factors per iteration */ 183 {0xffffffff,0x10000,0x801,0x16b,0x71,0x42,0x26 /* ,0x1e */}; 184 #endif 185 186 #define MAXFACS(max,l) \ 187 do { \ 188 int __i; \ 189 for (__i = numberof (ftab) - 1; l > ftab[__i]; __i--) \ 190 ; \ 191 (max) = __i + 1; \ 192 } while (0) 193 #endif 194 195 /* Entry i contains (i!/2^t)^(-1) where t is chosen such that the parenthesis 196 is an odd integer. */ 197 static const mp_limb_t facinv[] = { ONE_LIMB_ODD_FACTORIAL_INVERSES_TABLE }; 198 199 static void 200 mpz_bdiv_bin_uiui (mpz_ptr r, unsigned long int n, unsigned long int k) 201 { 202 int nmax, kmax, nmaxnow, numfac; 203 mp_ptr np, kp; 204 mp_size_t nn, kn, alloc; 205 mp_limb_t i, j, t, iii, jjj, cy, dinv; 206 mp_bitcnt_t i2cnt, j2cnt; 207 int cnt; 208 mp_size_t maxn; 209 TMP_DECL; 210 211 ASSERT (k > ODD_FACTORIAL_TABLE_LIMIT); 212 TMP_MARK; 213 214 maxn = 1 + n / GMP_NUMB_BITS; /* absolutely largest result size (limbs) */ 215 216 /* FIXME: This allocation might be insufficient, but is usually way too 217 large. */ 218 alloc = SOME_THRESHOLD - 1 + MAX (3 * maxn / 2, SOME_THRESHOLD); 219 alloc = MIN (alloc, k) + 1; 220 np = TMP_ALLOC_LIMBS (alloc); 221 kp = TMP_ALLOC_LIMBS (SOME_THRESHOLD + 1); 222 223 MAXFACS (nmax, n); 224 ASSERT (nmax <= M); 225 MAXFACS (kmax, k); 226 ASSERT (kmax <= M); 227 ASSERT (k >= M); 228 229 i = n - k + 1; 230 231 np[0] = 1; nn = 1; 232 233 i2cnt = 0; /* total low zeros in dividend */ 234 j2cnt = __gmp_fac2cnt_table[ODD_FACTORIAL_TABLE_LIMIT / 2 - 1]; 235 /* total low zeros in divisor */ 236 237 numfac = 1; 238 j = ODD_FACTORIAL_TABLE_LIMIT + 1; 239 jjj = ODD_FACTORIAL_TABLE_MAX; 240 ASSERT (__gmp_oddfac_table[ODD_FACTORIAL_TABLE_LIMIT] == ODD_FACTORIAL_TABLE_MAX); 241 242 while (1) 243 { 244 kp[0] = jjj; /* store new factors */ 245 kn = 1; 246 t = k - j + 1; 247 kmax = MIN (kmax, t); 248 249 while (kmax != 0 && kn < SOME_THRESHOLD) 250 { 251 jjj = mulfunc[kmax - 1] (j); 252 j += kmax; /* number of factors used */ 253 count_trailing_zeros (cnt, jjj); /* count low zeros */ 254 jjj >>= cnt; /* remove remaining low zeros */ 255 j2cnt += tcnttab[kmax - 1] + cnt; /* update low zeros count */ 256 cy = mpn_mul_1 (kp, kp, kn, jjj); /* accumulate new factors */ 257 kp[kn] = cy; 258 kn += cy != 0; 259 t = k - j + 1; 260 kmax = MIN (kmax, t); 261 } 262 numfac = j - numfac; 263 264 while (numfac != 0) 265 { 266 nmaxnow = MIN (nmax, numfac); 267 iii = mulfunc[nmaxnow - 1] (i); 268 i += nmaxnow; /* number of factors used */ 269 count_trailing_zeros (cnt, iii); /* count low zeros */ 270 iii >>= cnt; /* remove remaining low zeros */ 271 i2cnt += tcnttab[nmaxnow - 1] + cnt; /* update low zeros count */ 272 cy = mpn_mul_1 (np, np, nn, iii); /* accumulate new factors */ 273 np[nn] = cy; 274 nn += cy != 0; 275 numfac -= nmaxnow; 276 } 277 278 ASSERT (nn < alloc); 279 280 binvert_limb (dinv, kp[0]); 281 nn += (np[nn - 1] >= kp[kn - 1]); 282 nn -= kn; 283 mpn_sbpi1_bdiv_q (np, np, nn, kp, MIN(kn,nn), -dinv); 284 285 if (kmax == 0) 286 break; 287 numfac = j; 288 289 jjj = mulfunc[kmax - 1] (j); 290 j += kmax; /* number of factors used */ 291 count_trailing_zeros (cnt, jjj); /* count low zeros */ 292 jjj >>= cnt; /* remove remaining low zeros */ 293 j2cnt += tcnttab[kmax - 1] + cnt; /* update low zeros count */ 294 } 295 296 /* Put back the right number of factors of 2. */ 297 cnt = i2cnt - j2cnt; 298 if (cnt != 0) 299 { 300 ASSERT (cnt < GMP_NUMB_BITS); /* can happen, but not for intended use */ 301 cy = mpn_lshift (np, np, nn, cnt); 302 np[nn] = cy; 303 nn += cy != 0; 304 } 305 306 nn -= np[nn - 1] == 0; /* normalisation */ 307 308 kp = MPZ_NEWALLOC (r, nn); 309 SIZ(r) = nn; 310 MPN_COPY (kp, np, nn); 311 TMP_FREE; 312 } 313 314 static void 315 mpz_smallk_bin_uiui (mpz_ptr r, unsigned long int n, unsigned long int k) 316 { 317 int nmax, numfac; 318 mp_ptr rp; 319 mp_size_t rn, alloc; 320 mp_limb_t i, iii, cy; 321 mp_bitcnt_t i2cnt, cnt; 322 323 count_leading_zeros (cnt, (mp_limb_t) n); 324 cnt = GMP_LIMB_BITS - cnt; 325 alloc = cnt * k / GMP_NUMB_BITS + 3; /* FIXME: ensure rounding is enough. */ 326 rp = MPZ_NEWALLOC (r, alloc); 327 328 MAXFACS (nmax, n); 329 nmax = MIN (nmax, M); 330 331 i = n - k + 1; 332 333 nmax = MIN (nmax, k); 334 rp[0] = mulfunc[nmax - 1] (i); 335 rn = 1; 336 i += nmax; /* number of factors used */ 337 i2cnt = tcnttab[nmax - 1]; /* low zeros count */ 338 numfac = k - nmax; 339 while (numfac != 0) 340 { 341 nmax = MIN (nmax, numfac); 342 iii = mulfunc[nmax - 1] (i); 343 i += nmax; /* number of factors used */ 344 i2cnt += tcnttab[nmax - 1]; /* update low zeros count */ 345 cy = mpn_mul_1 (rp, rp, rn, iii); /* accumulate new factors */ 346 rp[rn] = cy; 347 rn += cy != 0; 348 numfac -= nmax; 349 } 350 351 ASSERT (rn < alloc); 352 353 mpn_pi1_bdiv_q_1 (rp, rp, rn, __gmp_oddfac_table[k], facinv[k - 2], 354 __gmp_fac2cnt_table[k / 2 - 1] - i2cnt); 355 /* A two-fold, branch-free normalisation is possible :*/ 356 /* rn -= rp[rn - 1] == 0; */ 357 /* rn -= rp[rn - 1] == 0; */ 358 MPN_NORMALIZE_NOT_ZERO (rp, rn); 359 360 SIZ(r) = rn; 361 } 362 363 /* Algorithm: 364 365 Plain and simply multiply things together. 366 367 We tabulate factorials (k!/2^t)^(-1) mod B (where t is chosen such 368 that k!/2^t is odd). 369 370 */ 371 372 static mp_limb_t 373 bc_bin_uiui (unsigned int n, unsigned int k) 374 { 375 return ((__gmp_oddfac_table[n] * facinv[k - 2] * facinv[n - k - 2]) 376 << (__gmp_fac2cnt_table[n / 2 - 1] - __gmp_fac2cnt_table[k / 2 - 1] - __gmp_fac2cnt_table[(n-k) / 2 - 1])) 377 & GMP_NUMB_MASK; 378 } 379 380 /* Algorithm: 381 382 Recursively exploit the relation 383 bin(n,k) = bin(n,k>>1)*bin(n-k>>1,k-k>>1)/bin(k,k>>1) . 384 385 Values for binomial(k,k>>1) that fit in a limb are precomputed 386 (with inverses). 387 */ 388 389 /* bin2kk[i - ODD_CENTRAL_BINOMIAL_OFFSET] = 390 binomial(i*2,i)/2^t (where t is chosen so that it is odd). */ 391 static const mp_limb_t bin2kk[] = { ONE_LIMB_ODD_CENTRAL_BINOMIAL_TABLE }; 392 393 /* bin2kkinv[i] = bin2kk[i]^-1 mod B */ 394 static const mp_limb_t bin2kkinv[] = { ONE_LIMB_ODD_CENTRAL_BINOMIAL_INVERSE_TABLE }; 395 396 /* bin2kk[i] = binomial((i+MIN_S)*2,i+MIN_S)/2^t. This table contains the t values. */ 397 static const unsigned char fac2bin[] = { CENTRAL_BINOMIAL_2FAC_TABLE }; 398 399 static void 400 mpz_smallkdc_bin_uiui (mpz_ptr r, unsigned long int n, unsigned long int k) 401 { 402 mp_ptr rp; 403 mp_size_t rn; 404 unsigned long int hk; 405 406 hk = k >> 1; 407 408 if ((! BIN_UIUI_RECURSIVE_SMALLDC) || hk <= ODD_FACTORIAL_TABLE_LIMIT) 409 mpz_smallk_bin_uiui (r, n, hk); 410 else 411 mpz_smallkdc_bin_uiui (r, n, hk); 412 k -= hk; 413 n -= hk; 414 if (n <= ODD_FACTORIAL_EXTTABLE_LIMIT) { 415 mp_limb_t cy; 416 rn = SIZ (r); 417 rp = MPZ_REALLOC (r, rn + 1); 418 cy = mpn_mul_1 (rp, rp, rn, bc_bin_uiui (n, k)); 419 rp [rn] = cy; 420 rn += cy != 0; 421 } else { 422 mp_limb_t buffer[ODD_CENTRAL_BINOMIAL_TABLE_LIMIT + 3]; 423 mpz_t t; 424 425 ALLOC (t) = ODD_CENTRAL_BINOMIAL_TABLE_LIMIT + 3; 426 PTR (t) = buffer; 427 if ((! BIN_UIUI_RECURSIVE_SMALLDC) || k <= ODD_FACTORIAL_TABLE_LIMIT) 428 mpz_smallk_bin_uiui (t, n, k); 429 else 430 mpz_smallkdc_bin_uiui (t, n, k); 431 mpz_mul (r, r, t); 432 rp = PTR (r); 433 rn = SIZ (r); 434 } 435 436 mpn_pi1_bdiv_q_1 (rp, rp, rn, bin2kk[k - ODD_CENTRAL_BINOMIAL_OFFSET], 437 bin2kkinv[k - ODD_CENTRAL_BINOMIAL_OFFSET], 438 fac2bin[k - ODD_CENTRAL_BINOMIAL_OFFSET] - (k != hk)); 439 /* A two-fold, branch-free normalisation is possible :*/ 440 /* rn -= rp[rn - 1] == 0; */ 441 /* rn -= rp[rn - 1] == 0; */ 442 MPN_NORMALIZE_NOT_ZERO (rp, rn); 443 444 SIZ(r) = rn; 445 } 446 447 /* mpz_goetgheluck_bin_uiui(RESULT, N, K) -- Set RESULT to binomial(N,K). 448 * 449 * Contributed to the GNU project by Marco Bodrato. 450 * 451 * Implementation of the algorithm by P. Goetgheluck, "Computing 452 * Binomial Coefficients", The American Mathematical Monthly, Vol. 94, 453 * No. 4 (April 1987), pp. 360-365. 454 * 455 * Acknowledgment: Peter Luschny did spot the slowness of the previous 456 * code and suggested the reference. 457 */ 458 459 /* TODO: Remove duplicated constants / macros / static functions... 460 */ 461 462 /*************************************************************/ 463 /* Section macros: common macros, for swing/fac/bin (&sieve) */ 464 /*************************************************************/ 465 466 #define FACTOR_LIST_APPEND(PR, MAX_PR, VEC, I) \ 467 if ((PR) > (MAX_PR)) { \ 468 (VEC)[(I)++] = (PR); \ 469 (PR) = 1; \ 470 } 471 472 #define FACTOR_LIST_STORE(P, PR, MAX_PR, VEC, I) \ 473 do { \ 474 if ((PR) > (MAX_PR)) { \ 475 (VEC)[(I)++] = (PR); \ 476 (PR) = (P); \ 477 } else \ 478 (PR) *= (P); \ 479 } while (0) 480 481 #define LOOP_ON_SIEVE_CONTINUE(prime,end,sieve) \ 482 __max_i = (end); \ 483 \ 484 do { \ 485 ++__i; \ 486 if (((sieve)[__index] & __mask) == 0) \ 487 { \ 488 (prime) = id_to_n(__i) 489 490 #define LOOP_ON_SIEVE_BEGIN(prime,start,end,off,sieve) \ 491 do { \ 492 mp_limb_t __mask, __index, __max_i, __i; \ 493 \ 494 __i = (start)-(off); \ 495 __index = __i / GMP_LIMB_BITS; \ 496 __mask = CNST_LIMB(1) << (__i % GMP_LIMB_BITS); \ 497 __i += (off); \ 498 \ 499 LOOP_ON_SIEVE_CONTINUE(prime,end,sieve) 500 501 #define LOOP_ON_SIEVE_STOP \ 502 } \ 503 __mask = __mask << 1 | __mask >> (GMP_LIMB_BITS-1); \ 504 __index += __mask & 1; \ 505 } while (__i <= __max_i) \ 506 507 #define LOOP_ON_SIEVE_END \ 508 LOOP_ON_SIEVE_STOP; \ 509 } while (0) 510 511 /*********************************************************/ 512 /* Section sieve: sieving functions and tools for primes */ 513 /*********************************************************/ 514 515 #if WANT_ASSERT 516 static mp_limb_t 517 bit_to_n (mp_limb_t bit) { return (bit*3+4)|1; } 518 #endif 519 520 /* id_to_n (x) = bit_to_n (x-1) = (id*3+1)|1*/ 521 static mp_limb_t 522 id_to_n (mp_limb_t id) { return id*3+1+(id&1); } 523 524 /* n_to_bit (n) = ((n-1)&(-CNST_LIMB(2)))/3U-1 */ 525 static mp_limb_t 526 n_to_bit (mp_limb_t n) { return ((n-5)|1)/3U; } 527 528 static mp_size_t 529 primesieve_size (mp_limb_t n) { return n_to_bit(n) / GMP_LIMB_BITS + 1; } 530 531 /*********************************************************/ 532 /* Section binomial: fast binomial implementation */ 533 /*********************************************************/ 534 535 #define COUNT_A_PRIME(P, N, K, PR, MAX_PR, VEC, I) \ 536 do { \ 537 mp_limb_t __a, __b, __prime, __ma,__mb; \ 538 __prime = (P); \ 539 __a = (N); __b = (K); __mb = 0; \ 540 FACTOR_LIST_APPEND(PR, MAX_PR, VEC, I); \ 541 do { \ 542 __mb += __b % __prime; __b /= __prime; \ 543 __ma = __a % __prime; __a /= __prime; \ 544 if (__ma < __mb) { \ 545 __mb = 1; (PR) *= __prime; \ 546 } else __mb = 0; \ 547 } while (__a >= __prime); \ 548 } while (0) 549 550 #define SH_COUNT_A_PRIME(P, N, K, PR, MAX_PR, VEC, I) \ 551 do { \ 552 mp_limb_t __prime; \ 553 __prime = (P); \ 554 if (((N) % __prime) < ((K) % __prime)) { \ 555 FACTOR_LIST_STORE (__prime, PR, MAX_PR, VEC, I); \ 556 } \ 557 } while (0) 558 559 /* Returns an approximation of the sqare root of x. * 560 * It gives: x <= limb_apprsqrt (x) ^ 2 < x * 9/4 */ 561 static mp_limb_t 562 limb_apprsqrt (mp_limb_t x) 563 { 564 int s; 565 566 ASSERT (x > 2); 567 count_leading_zeros (s, x - 1); 568 s = GMP_LIMB_BITS - 1 - s; 569 return (CNST_LIMB(1) << (s >> 1)) + (CNST_LIMB(1) << ((s - 1) >> 1)); 570 } 571 572 static void 573 mpz_goetgheluck_bin_uiui (mpz_ptr r, unsigned long int n, unsigned long int k) 574 { 575 mp_limb_t *sieve, *factors, count; 576 mp_limb_t prod, max_prod, j; 577 TMP_DECL; 578 579 ASSERT (BIN_GOETGHELUCK_THRESHOLD >= 13); 580 ASSERT (n >= 25); 581 582 TMP_MARK; 583 sieve = TMP_ALLOC_LIMBS (primesieve_size (n)); 584 585 count = gmp_primesieve (sieve, n) + 1; 586 factors = TMP_ALLOC_LIMBS (count / log_n_max (n) + 1); 587 588 max_prod = GMP_NUMB_MAX / n; 589 590 /* Handle primes = 2, 3 separately. */ 591 popc_limb (count, n - k); 592 popc_limb (j, k); 593 count += j; 594 popc_limb (j, n); 595 count -= j; 596 prod = CNST_LIMB(1) << count; 597 598 j = 0; 599 COUNT_A_PRIME (3, n, k, prod, max_prod, factors, j); 600 601 /* Accumulate prime factors from 5 to n/2 */ 602 { 603 mp_limb_t s; 604 605 { 606 mp_limb_t prime; 607 s = limb_apprsqrt(n); 608 s = n_to_bit (s); 609 LOOP_ON_SIEVE_BEGIN (prime, n_to_bit (5), s, 0,sieve); 610 COUNT_A_PRIME (prime, n, k, prod, max_prod, factors, j); 611 LOOP_ON_SIEVE_END; 612 s++; 613 } 614 615 ASSERT (max_prod <= GMP_NUMB_MAX / 2); 616 max_prod <<= 1; 617 ASSERT (bit_to_n (s) * bit_to_n (s) > n); 618 ASSERT (s <= n_to_bit (n >> 1)); 619 { 620 mp_limb_t prime; 621 622 LOOP_ON_SIEVE_BEGIN (prime, s, n_to_bit (n >> 1), 0,sieve); 623 SH_COUNT_A_PRIME (prime, n, k, prod, max_prod, factors, j); 624 LOOP_ON_SIEVE_END; 625 } 626 max_prod >>= 1; 627 } 628 629 /* Store primes from (n-k)+1 to n */ 630 ASSERT (n_to_bit (n - k) < n_to_bit (n)); 631 { 632 mp_limb_t prime; 633 LOOP_ON_SIEVE_BEGIN (prime, n_to_bit (n - k) + 1, n_to_bit (n), 0,sieve); 634 FACTOR_LIST_STORE (prime, prod, max_prod, factors, j); 635 LOOP_ON_SIEVE_END; 636 } 637 638 if (LIKELY (j != 0)) 639 { 640 factors[j++] = prod; 641 mpz_prodlimbs (r, factors, j); 642 } 643 else 644 { 645 PTR (r)[0] = prod; 646 SIZ (r) = 1; 647 } 648 TMP_FREE; 649 } 650 651 #undef COUNT_A_PRIME 652 #undef SH_COUNT_A_PRIME 653 #undef LOOP_ON_SIEVE_END 654 #undef LOOP_ON_SIEVE_STOP 655 #undef LOOP_ON_SIEVE_BEGIN 656 #undef LOOP_ON_SIEVE_CONTINUE 657 658 /*********************************************************/ 659 /* End of implementation of Goetgheluck's algorithm */ 660 /*********************************************************/ 661 662 void 663 mpz_bin_uiui (mpz_ptr r, unsigned long int n, unsigned long int k) 664 { 665 if (UNLIKELY (n < k)) { 666 SIZ (r) = 0; 667 #if BITS_PER_ULONG > GMP_NUMB_BITS 668 } else if (UNLIKELY (n > GMP_NUMB_MAX)) { 669 mpz_t tmp; 670 671 mpz_init_set_ui (tmp, n); 672 mpz_bin_ui (r, tmp, k); 673 mpz_clear (tmp); 674 #endif 675 } else { 676 ASSERT (n <= GMP_NUMB_MAX); 677 /* Rewrite bin(n,k) as bin(n,n-k) if that is smaller. */ 678 k = MIN (k, n - k); 679 if (k < 2) { 680 PTR(r)[0] = k ? n : 1; /* 1 + ((-k) & (n-1)); */ 681 SIZ(r) = 1; 682 } else if (n <= ODD_FACTORIAL_EXTTABLE_LIMIT) { /* k >= 2, n >= 4 */ 683 PTR(r)[0] = bc_bin_uiui (n, k); 684 SIZ(r) = 1; 685 } else if (k <= ODD_FACTORIAL_TABLE_LIMIT) 686 mpz_smallk_bin_uiui (r, n, k); 687 else if (BIN_UIUI_ENABLE_SMALLDC && 688 k <= (BIN_UIUI_RECURSIVE_SMALLDC ? ODD_CENTRAL_BINOMIAL_TABLE_LIMIT : ODD_FACTORIAL_TABLE_LIMIT)* 2) 689 mpz_smallkdc_bin_uiui (r, n, k); 690 else if (ABOVE_THRESHOLD (k, BIN_GOETGHELUCK_THRESHOLD) && 691 k > (n >> 4)) /* k > ODD_FACTORIAL_TABLE_LIMIT */ 692 mpz_goetgheluck_bin_uiui (r, n, k); 693 else 694 mpz_bdiv_bin_uiui (r, n, k); 695 } 696 }