github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/tests/refmpn.c (about) 1 /* Reference mpn functions, designed to be simple, portable and independent 2 of the normal gmp code. Speed isn't a consideration. 3 4 Copyright 1996-2009, 2011-2014 Free Software Foundation, Inc. 5 6 This file is part of the GNU MP Library test suite. 7 8 The GNU MP Library test suite is free software; you can redistribute it 9 and/or modify it under the terms of the GNU General Public License as 10 published by the Free Software Foundation; either version 3 of the License, 11 or (at your option) any later version. 12 13 The GNU MP Library test suite is distributed in the hope that it will be 14 useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General 16 Public License for more details. 17 18 You should have received a copy of the GNU General Public License along with 19 the GNU MP Library test suite. If not, see https://www.gnu.org/licenses/. */ 20 21 22 /* Most routines have assertions representing what the mpn routines are 23 supposed to accept. Many of these reference routines do sensible things 24 outside these ranges (eg. for size==0), but the assertions are present to 25 pick up bad parameters passed here that are about to be passed the same 26 to a real mpn routine being compared. */ 27 28 /* always do assertion checking */ 29 #define WANT_ASSERT 1 30 31 #include <stdio.h> /* for NULL */ 32 #include <stdlib.h> /* for malloc */ 33 34 #include "gmp.h" 35 #include "gmp-impl.h" 36 #include "longlong.h" 37 38 #include "tests.h" 39 40 41 42 /* Return non-zero if regions {xp,xsize} and {yp,ysize} overlap, with sizes 43 in bytes. */ 44 int 45 byte_overlap_p (const void *v_xp, mp_size_t xsize, 46 const void *v_yp, mp_size_t ysize) 47 { 48 const char *xp = (const char *) v_xp; 49 const char *yp = (const char *) v_yp; 50 51 ASSERT (xsize >= 0); 52 ASSERT (ysize >= 0); 53 54 /* no wraparounds */ 55 ASSERT (xp+xsize >= xp); 56 ASSERT (yp+ysize >= yp); 57 58 if (xp + xsize <= yp) 59 return 0; 60 61 if (yp + ysize <= xp) 62 return 0; 63 64 return 1; 65 } 66 67 /* Return non-zero if limb regions {xp,xsize} and {yp,ysize} overlap. */ 68 int 69 refmpn_overlap_p (mp_srcptr xp, mp_size_t xsize, mp_srcptr yp, mp_size_t ysize) 70 { 71 return byte_overlap_p (xp, xsize * GMP_LIMB_BYTES, 72 yp, ysize * GMP_LIMB_BYTES); 73 } 74 75 /* Check overlap for a routine defined to work low to high. */ 76 int 77 refmpn_overlap_low_to_high_p (mp_srcptr dst, mp_srcptr src, mp_size_t size) 78 { 79 return (dst <= src || ! refmpn_overlap_p (dst, size, src, size)); 80 } 81 82 /* Check overlap for a routine defined to work high to low. */ 83 int 84 refmpn_overlap_high_to_low_p (mp_srcptr dst, mp_srcptr src, mp_size_t size) 85 { 86 return (dst >= src || ! refmpn_overlap_p (dst, size, src, size)); 87 } 88 89 /* Check overlap for a standard routine requiring equal or separate. */ 90 int 91 refmpn_overlap_fullonly_p (mp_srcptr dst, mp_srcptr src, mp_size_t size) 92 { 93 return (dst == src || ! refmpn_overlap_p (dst, size, src, size)); 94 } 95 int 96 refmpn_overlap_fullonly_two_p (mp_srcptr dst, mp_srcptr src1, mp_srcptr src2, 97 mp_size_t size) 98 { 99 return (refmpn_overlap_fullonly_p (dst, src1, size) 100 && refmpn_overlap_fullonly_p (dst, src2, size)); 101 } 102 103 104 mp_ptr 105 refmpn_malloc_limbs (mp_size_t size) 106 { 107 mp_ptr p; 108 ASSERT (size >= 0); 109 if (size == 0) 110 size = 1; 111 p = (mp_ptr) malloc ((size_t) (size * GMP_LIMB_BYTES)); 112 ASSERT (p != NULL); 113 return p; 114 } 115 116 /* Free limbs allocated by refmpn_malloc_limbs. NOTE: Can't free 117 * memory allocated by refmpn_malloc_limbs_aligned. */ 118 void 119 refmpn_free_limbs (mp_ptr p) 120 { 121 free (p); 122 } 123 124 mp_ptr 125 refmpn_memdup_limbs (mp_srcptr ptr, mp_size_t size) 126 { 127 mp_ptr p; 128 p = refmpn_malloc_limbs (size); 129 refmpn_copyi (p, ptr, size); 130 return p; 131 } 132 133 /* malloc n limbs on a multiple of m bytes boundary */ 134 mp_ptr 135 refmpn_malloc_limbs_aligned (mp_size_t n, size_t m) 136 { 137 return (mp_ptr) align_pointer (refmpn_malloc_limbs (n + m-1), m); 138 } 139 140 141 void 142 refmpn_fill (mp_ptr ptr, mp_size_t size, mp_limb_t value) 143 { 144 mp_size_t i; 145 ASSERT (size >= 0); 146 for (i = 0; i < size; i++) 147 ptr[i] = value; 148 } 149 150 void 151 refmpn_zero (mp_ptr ptr, mp_size_t size) 152 { 153 refmpn_fill (ptr, size, CNST_LIMB(0)); 154 } 155 156 void 157 refmpn_zero_extend (mp_ptr ptr, mp_size_t oldsize, mp_size_t newsize) 158 { 159 ASSERT (newsize >= oldsize); 160 refmpn_zero (ptr+oldsize, newsize-oldsize); 161 } 162 163 int 164 refmpn_zero_p (mp_srcptr ptr, mp_size_t size) 165 { 166 mp_size_t i; 167 for (i = 0; i < size; i++) 168 if (ptr[i] != 0) 169 return 0; 170 return 1; 171 } 172 173 mp_size_t 174 refmpn_normalize (mp_srcptr ptr, mp_size_t size) 175 { 176 ASSERT (size >= 0); 177 while (size > 0 && ptr[size-1] == 0) 178 size--; 179 return size; 180 } 181 182 /* the highest one bit in x */ 183 mp_limb_t 184 refmpn_msbone (mp_limb_t x) 185 { 186 mp_limb_t n = (mp_limb_t) 1 << (GMP_LIMB_BITS-1); 187 188 while (n != 0) 189 { 190 if (x & n) 191 break; 192 n >>= 1; 193 } 194 return n; 195 } 196 197 /* a mask of the highest one bit plus and all bits below */ 198 mp_limb_t 199 refmpn_msbone_mask (mp_limb_t x) 200 { 201 if (x == 0) 202 return 0; 203 204 return (refmpn_msbone (x) << 1) - 1; 205 } 206 207 /* How many digits in the given base will fit in a limb. 208 Notice that the product b is allowed to be equal to the limit 209 2^GMP_NUMB_BITS, this ensures the result for base==2 will be 210 GMP_NUMB_BITS (and similarly other powers of 2). */ 211 int 212 refmpn_chars_per_limb (int base) 213 { 214 mp_limb_t limit[2], b[2]; 215 int chars_per_limb; 216 217 ASSERT (base >= 2); 218 219 limit[0] = 0; /* limit = 2^GMP_NUMB_BITS */ 220 limit[1] = 1; 221 b[0] = 1; /* b = 1 */ 222 b[1] = 0; 223 224 chars_per_limb = 0; 225 for (;;) 226 { 227 if (refmpn_mul_1 (b, b, (mp_size_t) 2, (mp_limb_t) base)) 228 break; 229 if (refmpn_cmp (b, limit, (mp_size_t) 2) > 0) 230 break; 231 chars_per_limb++; 232 } 233 return chars_per_limb; 234 } 235 236 /* The biggest value base**n which fits in GMP_NUMB_BITS. */ 237 mp_limb_t 238 refmpn_big_base (int base) 239 { 240 int chars_per_limb = refmpn_chars_per_limb (base); 241 int i; 242 mp_limb_t bb; 243 244 ASSERT (base >= 2); 245 bb = 1; 246 for (i = 0; i < chars_per_limb; i++) 247 bb *= base; 248 return bb; 249 } 250 251 252 void 253 refmpn_setbit (mp_ptr ptr, unsigned long bit) 254 { 255 ptr[bit/GMP_NUMB_BITS] |= CNST_LIMB(1) << (bit%GMP_NUMB_BITS); 256 } 257 258 void 259 refmpn_clrbit (mp_ptr ptr, unsigned long bit) 260 { 261 ptr[bit/GMP_NUMB_BITS] &= ~ (CNST_LIMB(1) << (bit%GMP_NUMB_BITS)); 262 } 263 264 #define REFMPN_TSTBIT(ptr,bit) \ 265 (((ptr)[(bit)/GMP_NUMB_BITS] & (CNST_LIMB(1) << ((bit)%GMP_NUMB_BITS))) != 0) 266 267 int 268 refmpn_tstbit (mp_srcptr ptr, unsigned long bit) 269 { 270 return REFMPN_TSTBIT (ptr, bit); 271 } 272 273 unsigned long 274 refmpn_scan0 (mp_srcptr ptr, unsigned long bit) 275 { 276 while (REFMPN_TSTBIT (ptr, bit) != 0) 277 bit++; 278 return bit; 279 } 280 281 unsigned long 282 refmpn_scan1 (mp_srcptr ptr, unsigned long bit) 283 { 284 while (REFMPN_TSTBIT (ptr, bit) == 0) 285 bit++; 286 return bit; 287 } 288 289 void 290 refmpn_copy (mp_ptr rp, mp_srcptr sp, mp_size_t size) 291 { 292 ASSERT (refmpn_overlap_fullonly_p (rp, sp, size)); 293 refmpn_copyi (rp, sp, size); 294 } 295 296 void 297 refmpn_copyi (mp_ptr rp, mp_srcptr sp, mp_size_t size) 298 { 299 mp_size_t i; 300 301 ASSERT (refmpn_overlap_low_to_high_p (rp, sp, size)); 302 ASSERT (size >= 0); 303 304 for (i = 0; i < size; i++) 305 rp[i] = sp[i]; 306 } 307 308 void 309 refmpn_copyd (mp_ptr rp, mp_srcptr sp, mp_size_t size) 310 { 311 mp_size_t i; 312 313 ASSERT (refmpn_overlap_high_to_low_p (rp, sp, size)); 314 ASSERT (size >= 0); 315 316 for (i = size-1; i >= 0; i--) 317 rp[i] = sp[i]; 318 } 319 320 /* Copy {xp,xsize} to {wp,wsize}. If x is shorter, then pad w with low 321 zeros to wsize. If x is longer, then copy just the high wsize limbs. */ 322 void 323 refmpn_copy_extend (mp_ptr wp, mp_size_t wsize, mp_srcptr xp, mp_size_t xsize) 324 { 325 ASSERT (wsize >= 0); 326 ASSERT (xsize >= 0); 327 328 /* high part of x if x bigger than w */ 329 if (xsize > wsize) 330 { 331 xp += xsize - wsize; 332 xsize = wsize; 333 } 334 335 refmpn_copy (wp + wsize-xsize, xp, xsize); 336 refmpn_zero (wp, wsize-xsize); 337 } 338 339 int 340 refmpn_cmp (mp_srcptr xp, mp_srcptr yp, mp_size_t size) 341 { 342 mp_size_t i; 343 344 ASSERT (size >= 1); 345 ASSERT_MPN (xp, size); 346 ASSERT_MPN (yp, size); 347 348 for (i = size-1; i >= 0; i--) 349 { 350 if (xp[i] > yp[i]) return 1; 351 if (xp[i] < yp[i]) return -1; 352 } 353 return 0; 354 } 355 356 int 357 refmpn_cmp_allowzero (mp_srcptr xp, mp_srcptr yp, mp_size_t size) 358 { 359 if (size == 0) 360 return 0; 361 else 362 return refmpn_cmp (xp, yp, size); 363 } 364 365 int 366 refmpn_cmp_twosizes (mp_srcptr xp, mp_size_t xsize, 367 mp_srcptr yp, mp_size_t ysize) 368 { 369 int opp, cmp; 370 371 ASSERT_MPN (xp, xsize); 372 ASSERT_MPN (yp, ysize); 373 374 opp = (xsize < ysize); 375 if (opp) 376 MPN_SRCPTR_SWAP (xp,xsize, yp,ysize); 377 378 if (! refmpn_zero_p (xp+ysize, xsize-ysize)) 379 cmp = 1; 380 else 381 cmp = refmpn_cmp (xp, yp, ysize); 382 383 return (opp ? -cmp : cmp); 384 } 385 386 int 387 refmpn_equal_anynail (mp_srcptr xp, mp_srcptr yp, mp_size_t size) 388 { 389 mp_size_t i; 390 ASSERT (size >= 0); 391 392 for (i = 0; i < size; i++) 393 if (xp[i] != yp[i]) 394 return 0; 395 return 1; 396 } 397 398 399 #define LOGOPS(operation) \ 400 { \ 401 mp_size_t i; \ 402 \ 403 ASSERT (refmpn_overlap_fullonly_two_p (rp, s1p, s2p, size)); \ 404 ASSERT (size >= 1); \ 405 ASSERT_MPN (s1p, size); \ 406 ASSERT_MPN (s2p, size); \ 407 \ 408 for (i = 0; i < size; i++) \ 409 rp[i] = operation; \ 410 } 411 412 void 413 refmpn_and_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) 414 { 415 LOGOPS (s1p[i] & s2p[i]); 416 } 417 void 418 refmpn_andn_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) 419 { 420 LOGOPS (s1p[i] & ~s2p[i]); 421 } 422 void 423 refmpn_nand_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) 424 { 425 LOGOPS ((s1p[i] & s2p[i]) ^ GMP_NUMB_MASK); 426 } 427 void 428 refmpn_ior_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) 429 { 430 LOGOPS (s1p[i] | s2p[i]); 431 } 432 void 433 refmpn_iorn_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) 434 { 435 LOGOPS (s1p[i] | (s2p[i] ^ GMP_NUMB_MASK)); 436 } 437 void 438 refmpn_nior_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) 439 { 440 LOGOPS ((s1p[i] | s2p[i]) ^ GMP_NUMB_MASK); 441 } 442 void 443 refmpn_xor_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) 444 { 445 LOGOPS (s1p[i] ^ s2p[i]); 446 } 447 void 448 refmpn_xnor_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) 449 { 450 LOGOPS ((s1p[i] ^ s2p[i]) ^ GMP_NUMB_MASK); 451 } 452 453 454 /* set *dh,*dl to mh:ml - sh:sl, in full limbs */ 455 void 456 refmpn_sub_ddmmss (mp_limb_t *dh, mp_limb_t *dl, 457 mp_limb_t mh, mp_limb_t ml, mp_limb_t sh, mp_limb_t sl) 458 { 459 *dl = ml - sl; 460 *dh = mh - sh - (ml < sl); 461 } 462 463 464 /* set *w to x+y, return 0 or 1 carry */ 465 mp_limb_t 466 ref_addc_limb (mp_limb_t *w, mp_limb_t x, mp_limb_t y) 467 { 468 mp_limb_t sum, cy; 469 470 ASSERT_LIMB (x); 471 ASSERT_LIMB (y); 472 473 sum = x + y; 474 #if GMP_NAIL_BITS == 0 475 *w = sum; 476 cy = (sum < x); 477 #else 478 *w = sum & GMP_NUMB_MASK; 479 cy = (sum >> GMP_NUMB_BITS); 480 #endif 481 return cy; 482 } 483 484 /* set *w to x-y, return 0 or 1 borrow */ 485 mp_limb_t 486 ref_subc_limb (mp_limb_t *w, mp_limb_t x, mp_limb_t y) 487 { 488 mp_limb_t diff, cy; 489 490 ASSERT_LIMB (x); 491 ASSERT_LIMB (y); 492 493 diff = x - y; 494 #if GMP_NAIL_BITS == 0 495 *w = diff; 496 cy = (diff > x); 497 #else 498 *w = diff & GMP_NUMB_MASK; 499 cy = (diff >> GMP_NUMB_BITS) & 1; 500 #endif 501 return cy; 502 } 503 504 /* set *w to x+y+c (where c is 0 or 1), return 0 or 1 carry */ 505 mp_limb_t 506 adc (mp_limb_t *w, mp_limb_t x, mp_limb_t y, mp_limb_t c) 507 { 508 mp_limb_t r; 509 510 ASSERT_LIMB (x); 511 ASSERT_LIMB (y); 512 ASSERT (c == 0 || c == 1); 513 514 r = ref_addc_limb (w, x, y); 515 return r + ref_addc_limb (w, *w, c); 516 } 517 518 /* set *w to x-y-c (where c is 0 or 1), return 0 or 1 borrow */ 519 mp_limb_t 520 sbb (mp_limb_t *w, mp_limb_t x, mp_limb_t y, mp_limb_t c) 521 { 522 mp_limb_t r; 523 524 ASSERT_LIMB (x); 525 ASSERT_LIMB (y); 526 ASSERT (c == 0 || c == 1); 527 528 r = ref_subc_limb (w, x, y); 529 return r + ref_subc_limb (w, *w, c); 530 } 531 532 533 #define AORS_1(operation) \ 534 { \ 535 mp_size_t i; \ 536 \ 537 ASSERT (refmpn_overlap_fullonly_p (rp, sp, size)); \ 538 ASSERT (size >= 1); \ 539 ASSERT_MPN (sp, size); \ 540 ASSERT_LIMB (n); \ 541 \ 542 for (i = 0; i < size; i++) \ 543 n = operation (&rp[i], sp[i], n); \ 544 return n; \ 545 } 546 547 mp_limb_t 548 refmpn_add_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t n) 549 { 550 AORS_1 (ref_addc_limb); 551 } 552 mp_limb_t 553 refmpn_sub_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t n) 554 { 555 AORS_1 (ref_subc_limb); 556 } 557 558 #define AORS_NC(operation) \ 559 { \ 560 mp_size_t i; \ 561 \ 562 ASSERT (refmpn_overlap_fullonly_two_p (rp, s1p, s2p, size)); \ 563 ASSERT (carry == 0 || carry == 1); \ 564 ASSERT (size >= 1); \ 565 ASSERT_MPN (s1p, size); \ 566 ASSERT_MPN (s2p, size); \ 567 \ 568 for (i = 0; i < size; i++) \ 569 carry = operation (&rp[i], s1p[i], s2p[i], carry); \ 570 return carry; \ 571 } 572 573 mp_limb_t 574 refmpn_add_nc (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size, 575 mp_limb_t carry) 576 { 577 AORS_NC (adc); 578 } 579 mp_limb_t 580 refmpn_sub_nc (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size, 581 mp_limb_t carry) 582 { 583 AORS_NC (sbb); 584 } 585 586 587 mp_limb_t 588 refmpn_add_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) 589 { 590 return refmpn_add_nc (rp, s1p, s2p, size, CNST_LIMB(0)); 591 } 592 mp_limb_t 593 refmpn_sub_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) 594 { 595 return refmpn_sub_nc (rp, s1p, s2p, size, CNST_LIMB(0)); 596 } 597 598 mp_limb_t 599 refmpn_cnd_add_n (mp_limb_t cnd, mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) 600 { 601 if (cnd != 0) 602 return refmpn_add_n (rp, s1p, s2p, size); 603 else 604 { 605 refmpn_copyi (rp, s1p, size); 606 return 0; 607 } 608 } 609 mp_limb_t 610 refmpn_cnd_sub_n (mp_limb_t cnd, mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) 611 { 612 if (cnd != 0) 613 return refmpn_sub_n (rp, s1p, s2p, size); 614 else 615 { 616 refmpn_copyi (rp, s1p, size); 617 return 0; 618 } 619 } 620 621 622 #define AORS_ERR1_N(operation) \ 623 { \ 624 mp_size_t i; \ 625 mp_limb_t carry2; \ 626 \ 627 ASSERT (refmpn_overlap_fullonly_p (rp, s1p, size)); \ 628 ASSERT (refmpn_overlap_fullonly_p (rp, s2p, size)); \ 629 ASSERT (! refmpn_overlap_p (rp, size, yp, size)); \ 630 ASSERT (! refmpn_overlap_p (ep, 2, s1p, size)); \ 631 ASSERT (! refmpn_overlap_p (ep, 2, s2p, size)); \ 632 ASSERT (! refmpn_overlap_p (ep, 2, yp, size)); \ 633 ASSERT (! refmpn_overlap_p (ep, 2, rp, size)); \ 634 \ 635 ASSERT (carry == 0 || carry == 1); \ 636 ASSERT (size >= 1); \ 637 ASSERT_MPN (s1p, size); \ 638 ASSERT_MPN (s2p, size); \ 639 ASSERT_MPN (yp, size); \ 640 \ 641 ep[0] = ep[1] = CNST_LIMB(0); \ 642 \ 643 for (i = 0; i < size; i++) \ 644 { \ 645 carry = operation (&rp[i], s1p[i], s2p[i], carry); \ 646 if (carry == 1) \ 647 { \ 648 carry2 = ref_addc_limb (&ep[0], ep[0], yp[size - 1 - i]); \ 649 carry2 = ref_addc_limb (&ep[1], ep[1], carry2); \ 650 ASSERT (carry2 == 0); \ 651 } \ 652 } \ 653 return carry; \ 654 } 655 656 mp_limb_t 657 refmpn_add_err1_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, 658 mp_ptr ep, mp_srcptr yp, 659 mp_size_t size, mp_limb_t carry) 660 { 661 AORS_ERR1_N (adc); 662 } 663 mp_limb_t 664 refmpn_sub_err1_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, 665 mp_ptr ep, mp_srcptr yp, 666 mp_size_t size, mp_limb_t carry) 667 { 668 AORS_ERR1_N (sbb); 669 } 670 671 672 #define AORS_ERR2_N(operation) \ 673 { \ 674 mp_size_t i; \ 675 mp_limb_t carry2; \ 676 \ 677 ASSERT (refmpn_overlap_fullonly_p (rp, s1p, size)); \ 678 ASSERT (refmpn_overlap_fullonly_p (rp, s2p, size)); \ 679 ASSERT (! refmpn_overlap_p (rp, size, y1p, size)); \ 680 ASSERT (! refmpn_overlap_p (rp, size, y2p, size)); \ 681 ASSERT (! refmpn_overlap_p (ep, 4, s1p, size)); \ 682 ASSERT (! refmpn_overlap_p (ep, 4, s2p, size)); \ 683 ASSERT (! refmpn_overlap_p (ep, 4, y1p, size)); \ 684 ASSERT (! refmpn_overlap_p (ep, 4, y2p, size)); \ 685 ASSERT (! refmpn_overlap_p (ep, 4, rp, size)); \ 686 \ 687 ASSERT (carry == 0 || carry == 1); \ 688 ASSERT (size >= 1); \ 689 ASSERT_MPN (s1p, size); \ 690 ASSERT_MPN (s2p, size); \ 691 ASSERT_MPN (y1p, size); \ 692 ASSERT_MPN (y2p, size); \ 693 \ 694 ep[0] = ep[1] = CNST_LIMB(0); \ 695 ep[2] = ep[3] = CNST_LIMB(0); \ 696 \ 697 for (i = 0; i < size; i++) \ 698 { \ 699 carry = operation (&rp[i], s1p[i], s2p[i], carry); \ 700 if (carry == 1) \ 701 { \ 702 carry2 = ref_addc_limb (&ep[0], ep[0], y1p[size - 1 - i]); \ 703 carry2 = ref_addc_limb (&ep[1], ep[1], carry2); \ 704 ASSERT (carry2 == 0); \ 705 carry2 = ref_addc_limb (&ep[2], ep[2], y2p[size - 1 - i]); \ 706 carry2 = ref_addc_limb (&ep[3], ep[3], carry2); \ 707 ASSERT (carry2 == 0); \ 708 } \ 709 } \ 710 return carry; \ 711 } 712 713 mp_limb_t 714 refmpn_add_err2_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, 715 mp_ptr ep, mp_srcptr y1p, mp_srcptr y2p, 716 mp_size_t size, mp_limb_t carry) 717 { 718 AORS_ERR2_N (adc); 719 } 720 mp_limb_t 721 refmpn_sub_err2_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, 722 mp_ptr ep, mp_srcptr y1p, mp_srcptr y2p, 723 mp_size_t size, mp_limb_t carry) 724 { 725 AORS_ERR2_N (sbb); 726 } 727 728 729 #define AORS_ERR3_N(operation) \ 730 { \ 731 mp_size_t i; \ 732 mp_limb_t carry2; \ 733 \ 734 ASSERT (refmpn_overlap_fullonly_p (rp, s1p, size)); \ 735 ASSERT (refmpn_overlap_fullonly_p (rp, s2p, size)); \ 736 ASSERT (! refmpn_overlap_p (rp, size, y1p, size)); \ 737 ASSERT (! refmpn_overlap_p (rp, size, y2p, size)); \ 738 ASSERT (! refmpn_overlap_p (rp, size, y3p, size)); \ 739 ASSERT (! refmpn_overlap_p (ep, 6, s1p, size)); \ 740 ASSERT (! refmpn_overlap_p (ep, 6, s2p, size)); \ 741 ASSERT (! refmpn_overlap_p (ep, 6, y1p, size)); \ 742 ASSERT (! refmpn_overlap_p (ep, 6, y2p, size)); \ 743 ASSERT (! refmpn_overlap_p (ep, 6, y3p, size)); \ 744 ASSERT (! refmpn_overlap_p (ep, 6, rp, size)); \ 745 \ 746 ASSERT (carry == 0 || carry == 1); \ 747 ASSERT (size >= 1); \ 748 ASSERT_MPN (s1p, size); \ 749 ASSERT_MPN (s2p, size); \ 750 ASSERT_MPN (y1p, size); \ 751 ASSERT_MPN (y2p, size); \ 752 ASSERT_MPN (y3p, size); \ 753 \ 754 ep[0] = ep[1] = CNST_LIMB(0); \ 755 ep[2] = ep[3] = CNST_LIMB(0); \ 756 ep[4] = ep[5] = CNST_LIMB(0); \ 757 \ 758 for (i = 0; i < size; i++) \ 759 { \ 760 carry = operation (&rp[i], s1p[i], s2p[i], carry); \ 761 if (carry == 1) \ 762 { \ 763 carry2 = ref_addc_limb (&ep[0], ep[0], y1p[size - 1 - i]); \ 764 carry2 = ref_addc_limb (&ep[1], ep[1], carry2); \ 765 ASSERT (carry2 == 0); \ 766 carry2 = ref_addc_limb (&ep[2], ep[2], y2p[size - 1 - i]); \ 767 carry2 = ref_addc_limb (&ep[3], ep[3], carry2); \ 768 ASSERT (carry2 == 0); \ 769 carry2 = ref_addc_limb (&ep[4], ep[4], y3p[size - 1 - i]); \ 770 carry2 = ref_addc_limb (&ep[5], ep[5], carry2); \ 771 ASSERT (carry2 == 0); \ 772 } \ 773 } \ 774 return carry; \ 775 } 776 777 mp_limb_t 778 refmpn_add_err3_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, 779 mp_ptr ep, mp_srcptr y1p, mp_srcptr y2p, mp_srcptr y3p, 780 mp_size_t size, mp_limb_t carry) 781 { 782 AORS_ERR3_N (adc); 783 } 784 mp_limb_t 785 refmpn_sub_err3_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, 786 mp_ptr ep, mp_srcptr y1p, mp_srcptr y2p, mp_srcptr y3p, 787 mp_size_t size, mp_limb_t carry) 788 { 789 AORS_ERR3_N (sbb); 790 } 791 792 793 mp_limb_t 794 refmpn_addlsh_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, 795 mp_size_t n, unsigned int s) 796 { 797 mp_limb_t cy; 798 mp_ptr tp; 799 800 ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n)); 801 ASSERT (n >= 1); 802 ASSERT (0 < s && s < GMP_NUMB_BITS); 803 ASSERT_MPN (up, n); 804 ASSERT_MPN (vp, n); 805 806 tp = refmpn_malloc_limbs (n); 807 cy = refmpn_lshift (tp, vp, n, s); 808 cy += refmpn_add_n (rp, up, tp, n); 809 free (tp); 810 return cy; 811 } 812 mp_limb_t 813 refmpn_addlsh1_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n) 814 { 815 return refmpn_addlsh_n (rp, up, vp, n, 1); 816 } 817 mp_limb_t 818 refmpn_addlsh2_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n) 819 { 820 return refmpn_addlsh_n (rp, up, vp, n, 2); 821 } 822 mp_limb_t 823 refmpn_addlsh_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n, unsigned int s) 824 { 825 return refmpn_addlsh_n (rp, rp, vp, n, s); 826 } 827 mp_limb_t 828 refmpn_addlsh1_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n) 829 { 830 return refmpn_addlsh_n (rp, rp, vp, n, 1); 831 } 832 mp_limb_t 833 refmpn_addlsh2_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n) 834 { 835 return refmpn_addlsh_n (rp, rp, vp, n, 2); 836 } 837 mp_limb_t 838 refmpn_addlsh_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n, unsigned int s) 839 { 840 return refmpn_addlsh_n (rp, vp, rp, n, s); 841 } 842 mp_limb_t 843 refmpn_addlsh1_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n) 844 { 845 return refmpn_addlsh_n (rp, vp, rp, n, 1); 846 } 847 mp_limb_t 848 refmpn_addlsh2_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n) 849 { 850 return refmpn_addlsh_n (rp, vp, rp, n, 2); 851 } 852 mp_limb_t 853 refmpn_addlsh_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, 854 mp_size_t n, unsigned int s, mp_limb_t carry) 855 { 856 mp_limb_t cy; 857 858 ASSERT (carry <= (CNST_LIMB(1) << s)); 859 860 cy = refmpn_addlsh_n (rp, up, vp, n, s); 861 cy += refmpn_add_1 (rp, rp, n, carry); 862 return cy; 863 } 864 mp_limb_t 865 refmpn_addlsh1_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t carry) 866 { 867 return refmpn_addlsh_nc (rp, up, vp, n, 1, carry); 868 } 869 mp_limb_t 870 refmpn_addlsh2_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t carry) 871 { 872 return refmpn_addlsh_nc (rp, up, vp, n, 2, carry); 873 } 874 875 mp_limb_t 876 refmpn_sublsh_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, 877 mp_size_t n, unsigned int s) 878 { 879 mp_limb_t cy; 880 mp_ptr tp; 881 882 ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n)); 883 ASSERT (n >= 1); 884 ASSERT (0 < s && s < GMP_NUMB_BITS); 885 ASSERT_MPN (up, n); 886 ASSERT_MPN (vp, n); 887 888 tp = refmpn_malloc_limbs (n); 889 cy = mpn_lshift (tp, vp, n, s); 890 cy += mpn_sub_n (rp, up, tp, n); 891 free (tp); 892 return cy; 893 } 894 mp_limb_t 895 refmpn_sublsh1_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n) 896 { 897 return refmpn_sublsh_n (rp, up, vp, n, 1); 898 } 899 mp_limb_t 900 refmpn_sublsh2_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n) 901 { 902 return refmpn_sublsh_n (rp, up, vp, n, 2); 903 } 904 mp_limb_t 905 refmpn_sublsh_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n, unsigned int s) 906 { 907 return refmpn_sublsh_n (rp, rp, vp, n, s); 908 } 909 mp_limb_t 910 refmpn_sublsh1_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n) 911 { 912 return refmpn_sublsh_n (rp, rp, vp, n, 1); 913 } 914 mp_limb_t 915 refmpn_sublsh2_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n) 916 { 917 return refmpn_sublsh_n (rp, rp, vp, n, 2); 918 } 919 mp_limb_t 920 refmpn_sublsh_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n, unsigned int s) 921 { 922 return refmpn_sublsh_n (rp, vp, rp, n, s); 923 } 924 mp_limb_t 925 refmpn_sublsh1_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n) 926 { 927 return refmpn_sublsh_n (rp, vp, rp, n, 1); 928 } 929 mp_limb_t 930 refmpn_sublsh2_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n) 931 { 932 return refmpn_sublsh_n (rp, vp, rp, n, 2); 933 } 934 mp_limb_t 935 refmpn_sublsh_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, 936 mp_size_t n, unsigned int s, mp_limb_t carry) 937 { 938 mp_limb_t cy; 939 940 ASSERT (carry <= (CNST_LIMB(1) << s)); 941 942 cy = refmpn_sublsh_n (rp, up, vp, n, s); 943 cy += refmpn_sub_1 (rp, rp, n, carry); 944 return cy; 945 } 946 mp_limb_t 947 refmpn_sublsh1_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t carry) 948 { 949 return refmpn_sublsh_nc (rp, up, vp, n, 1, carry); 950 } 951 mp_limb_t 952 refmpn_sublsh2_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t carry) 953 { 954 return refmpn_sublsh_nc (rp, up, vp, n, 2, carry); 955 } 956 957 mp_limb_signed_t 958 refmpn_rsblsh_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, 959 mp_size_t n, unsigned int s) 960 { 961 mp_limb_signed_t cy; 962 mp_ptr tp; 963 964 ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n)); 965 ASSERT (n >= 1); 966 ASSERT (0 < s && s < GMP_NUMB_BITS); 967 ASSERT_MPN (up, n); 968 ASSERT_MPN (vp, n); 969 970 tp = refmpn_malloc_limbs (n); 971 cy = mpn_lshift (tp, vp, n, s); 972 cy -= mpn_sub_n (rp, tp, up, n); 973 free (tp); 974 return cy; 975 } 976 mp_limb_signed_t 977 refmpn_rsblsh1_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n) 978 { 979 return refmpn_rsblsh_n (rp, up, vp, n, 1); 980 } 981 mp_limb_signed_t 982 refmpn_rsblsh2_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n) 983 { 984 return refmpn_rsblsh_n (rp, up, vp, n, 2); 985 } 986 mp_limb_signed_t 987 refmpn_rsblsh_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, 988 mp_size_t n, unsigned int s, mp_limb_signed_t carry) 989 { 990 mp_limb_signed_t cy; 991 992 ASSERT (carry == -1 || (carry >> s) == 0); 993 994 cy = refmpn_rsblsh_n (rp, up, vp, n, s); 995 if (carry > 0) 996 cy += refmpn_add_1 (rp, rp, n, carry); 997 else 998 cy -= refmpn_sub_1 (rp, rp, n, -carry); 999 return cy; 1000 } 1001 mp_limb_signed_t 1002 refmpn_rsblsh1_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_signed_t carry) 1003 { 1004 return refmpn_rsblsh_nc (rp, up, vp, n, 1, carry); 1005 } 1006 mp_limb_signed_t 1007 refmpn_rsblsh2_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_signed_t carry) 1008 { 1009 return refmpn_rsblsh_nc (rp, up, vp, n, 2, carry); 1010 } 1011 1012 mp_limb_t 1013 refmpn_rsh1add_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n) 1014 { 1015 mp_limb_t cya, cys; 1016 1017 ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n)); 1018 ASSERT (n >= 1); 1019 ASSERT_MPN (up, n); 1020 ASSERT_MPN (vp, n); 1021 1022 cya = mpn_add_n (rp, up, vp, n); 1023 cys = mpn_rshift (rp, rp, n, 1) >> (GMP_NUMB_BITS - 1); 1024 rp[n - 1] |= cya << (GMP_NUMB_BITS - 1); 1025 return cys; 1026 } 1027 mp_limb_t 1028 refmpn_rsh1sub_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n) 1029 { 1030 mp_limb_t cya, cys; 1031 1032 ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n)); 1033 ASSERT (n >= 1); 1034 ASSERT_MPN (up, n); 1035 ASSERT_MPN (vp, n); 1036 1037 cya = mpn_sub_n (rp, up, vp, n); 1038 cys = mpn_rshift (rp, rp, n, 1) >> (GMP_NUMB_BITS - 1); 1039 rp[n - 1] |= cya << (GMP_NUMB_BITS - 1); 1040 return cys; 1041 } 1042 1043 /* Twos complement, return borrow. */ 1044 mp_limb_t 1045 refmpn_neg (mp_ptr dst, mp_srcptr src, mp_size_t size) 1046 { 1047 mp_ptr zeros; 1048 mp_limb_t ret; 1049 1050 ASSERT (size >= 1); 1051 1052 zeros = refmpn_malloc_limbs (size); 1053 refmpn_fill (zeros, size, CNST_LIMB(0)); 1054 ret = refmpn_sub_n (dst, zeros, src, size); 1055 free (zeros); 1056 return ret; 1057 } 1058 1059 1060 #define AORS(aors_n, aors_1) \ 1061 { \ 1062 mp_limb_t c; \ 1063 ASSERT (s1size >= s2size); \ 1064 ASSERT (s2size >= 1); \ 1065 c = aors_n (rp, s1p, s2p, s2size); \ 1066 if (s1size-s2size != 0) \ 1067 c = aors_1 (rp+s2size, s1p+s2size, s1size-s2size, c); \ 1068 return c; \ 1069 } 1070 mp_limb_t 1071 refmpn_add (mp_ptr rp, 1072 mp_srcptr s1p, mp_size_t s1size, 1073 mp_srcptr s2p, mp_size_t s2size) 1074 { 1075 AORS (refmpn_add_n, refmpn_add_1); 1076 } 1077 mp_limb_t 1078 refmpn_sub (mp_ptr rp, 1079 mp_srcptr s1p, mp_size_t s1size, 1080 mp_srcptr s2p, mp_size_t s2size) 1081 { 1082 AORS (refmpn_sub_n, refmpn_sub_1); 1083 } 1084 1085 1086 #define SHIFTHIGH(x) ((x) << GMP_LIMB_BITS/2) 1087 #define SHIFTLOW(x) ((x) >> GMP_LIMB_BITS/2) 1088 1089 #define LOWMASK (((mp_limb_t) 1 << GMP_LIMB_BITS/2)-1) 1090 #define HIGHMASK SHIFTHIGH(LOWMASK) 1091 1092 #define LOWPART(x) ((x) & LOWMASK) 1093 #define HIGHPART(x) SHIFTLOW((x) & HIGHMASK) 1094 1095 /* Set return:*lo to x*y, using full limbs not nails. */ 1096 mp_limb_t 1097 refmpn_umul_ppmm (mp_limb_t *lo, mp_limb_t x, mp_limb_t y) 1098 { 1099 mp_limb_t hi, s; 1100 1101 *lo = LOWPART(x) * LOWPART(y); 1102 hi = HIGHPART(x) * HIGHPART(y); 1103 1104 s = LOWPART(x) * HIGHPART(y); 1105 hi += HIGHPART(s); 1106 s = SHIFTHIGH(LOWPART(s)); 1107 *lo += s; 1108 hi += (*lo < s); 1109 1110 s = HIGHPART(x) * LOWPART(y); 1111 hi += HIGHPART(s); 1112 s = SHIFTHIGH(LOWPART(s)); 1113 *lo += s; 1114 hi += (*lo < s); 1115 1116 return hi; 1117 } 1118 1119 mp_limb_t 1120 refmpn_umul_ppmm_r (mp_limb_t x, mp_limb_t y, mp_limb_t *lo) 1121 { 1122 return refmpn_umul_ppmm (lo, x, y); 1123 } 1124 1125 mp_limb_t 1126 refmpn_mul_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier, 1127 mp_limb_t carry) 1128 { 1129 mp_size_t i; 1130 mp_limb_t hi, lo; 1131 1132 ASSERT (refmpn_overlap_low_to_high_p (rp, sp, size)); 1133 ASSERT (size >= 1); 1134 ASSERT_MPN (sp, size); 1135 ASSERT_LIMB (multiplier); 1136 ASSERT_LIMB (carry); 1137 1138 multiplier <<= GMP_NAIL_BITS; 1139 for (i = 0; i < size; i++) 1140 { 1141 hi = refmpn_umul_ppmm (&lo, sp[i], multiplier); 1142 lo >>= GMP_NAIL_BITS; 1143 ASSERT_NOCARRY (ref_addc_limb (&hi, hi, ref_addc_limb (&lo, lo, carry))); 1144 rp[i] = lo; 1145 carry = hi; 1146 } 1147 return carry; 1148 } 1149 1150 mp_limb_t 1151 refmpn_mul_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier) 1152 { 1153 return refmpn_mul_1c (rp, sp, size, multiplier, CNST_LIMB(0)); 1154 } 1155 1156 1157 mp_limb_t 1158 refmpn_mul_N (mp_ptr dst, mp_srcptr src, mp_size_t size, 1159 mp_srcptr mult, mp_size_t msize) 1160 { 1161 mp_ptr src_copy; 1162 mp_limb_t ret; 1163 mp_size_t i; 1164 1165 ASSERT (refmpn_overlap_fullonly_p (dst, src, size)); 1166 ASSERT (! refmpn_overlap_p (dst, size+msize-1, mult, msize)); 1167 ASSERT (size >= msize); 1168 ASSERT_MPN (mult, msize); 1169 1170 /* in case dst==src */ 1171 src_copy = refmpn_malloc_limbs (size); 1172 refmpn_copyi (src_copy, src, size); 1173 src = src_copy; 1174 1175 dst[size] = refmpn_mul_1 (dst, src, size, mult[0]); 1176 for (i = 1; i < msize-1; i++) 1177 dst[size+i] = refmpn_addmul_1 (dst+i, src, size, mult[i]); 1178 ret = refmpn_addmul_1 (dst+i, src, size, mult[i]); 1179 1180 free (src_copy); 1181 return ret; 1182 } 1183 1184 mp_limb_t 1185 refmpn_mul_2 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult) 1186 { 1187 return refmpn_mul_N (rp, sp, size, mult, (mp_size_t) 2); 1188 } 1189 mp_limb_t 1190 refmpn_mul_3 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult) 1191 { 1192 return refmpn_mul_N (rp, sp, size, mult, (mp_size_t) 3); 1193 } 1194 mp_limb_t 1195 refmpn_mul_4 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult) 1196 { 1197 return refmpn_mul_N (rp, sp, size, mult, (mp_size_t) 4); 1198 } 1199 mp_limb_t 1200 refmpn_mul_5 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult) 1201 { 1202 return refmpn_mul_N (rp, sp, size, mult, (mp_size_t) 5); 1203 } 1204 mp_limb_t 1205 refmpn_mul_6 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult) 1206 { 1207 return refmpn_mul_N (rp, sp, size, mult, (mp_size_t) 6); 1208 } 1209 1210 #define AORSMUL_1C(operation_n) \ 1211 { \ 1212 mp_ptr p; \ 1213 mp_limb_t ret; \ 1214 \ 1215 ASSERT (refmpn_overlap_fullonly_p (rp, sp, size)); \ 1216 \ 1217 p = refmpn_malloc_limbs (size); \ 1218 ret = refmpn_mul_1c (p, sp, size, multiplier, carry); \ 1219 ret += operation_n (rp, rp, p, size); \ 1220 \ 1221 free (p); \ 1222 return ret; \ 1223 } 1224 1225 mp_limb_t 1226 refmpn_addmul_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size, 1227 mp_limb_t multiplier, mp_limb_t carry) 1228 { 1229 AORSMUL_1C (refmpn_add_n); 1230 } 1231 mp_limb_t 1232 refmpn_submul_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size, 1233 mp_limb_t multiplier, mp_limb_t carry) 1234 { 1235 AORSMUL_1C (refmpn_sub_n); 1236 } 1237 1238 1239 mp_limb_t 1240 refmpn_addmul_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier) 1241 { 1242 return refmpn_addmul_1c (rp, sp, size, multiplier, CNST_LIMB(0)); 1243 } 1244 mp_limb_t 1245 refmpn_submul_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier) 1246 { 1247 return refmpn_submul_1c (rp, sp, size, multiplier, CNST_LIMB(0)); 1248 } 1249 1250 1251 mp_limb_t 1252 refmpn_addmul_N (mp_ptr dst, mp_srcptr src, mp_size_t size, 1253 mp_srcptr mult, mp_size_t msize) 1254 { 1255 mp_ptr src_copy; 1256 mp_limb_t ret; 1257 mp_size_t i; 1258 1259 ASSERT (dst == src || ! refmpn_overlap_p (dst, size+msize-1, src, size)); 1260 ASSERT (! refmpn_overlap_p (dst, size+msize-1, mult, msize)); 1261 ASSERT (size >= msize); 1262 ASSERT_MPN (mult, msize); 1263 1264 /* in case dst==src */ 1265 src_copy = refmpn_malloc_limbs (size); 1266 refmpn_copyi (src_copy, src, size); 1267 src = src_copy; 1268 1269 for (i = 0; i < msize-1; i++) 1270 dst[size+i] = refmpn_addmul_1 (dst+i, src, size, mult[i]); 1271 ret = refmpn_addmul_1 (dst+i, src, size, mult[i]); 1272 1273 free (src_copy); 1274 return ret; 1275 } 1276 1277 mp_limb_t 1278 refmpn_addmul_2 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult) 1279 { 1280 return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 2); 1281 } 1282 mp_limb_t 1283 refmpn_addmul_3 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult) 1284 { 1285 return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 3); 1286 } 1287 mp_limb_t 1288 refmpn_addmul_4 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult) 1289 { 1290 return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 4); 1291 } 1292 mp_limb_t 1293 refmpn_addmul_5 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult) 1294 { 1295 return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 5); 1296 } 1297 mp_limb_t 1298 refmpn_addmul_6 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult) 1299 { 1300 return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 6); 1301 } 1302 mp_limb_t 1303 refmpn_addmul_7 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult) 1304 { 1305 return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 7); 1306 } 1307 mp_limb_t 1308 refmpn_addmul_8 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult) 1309 { 1310 return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 8); 1311 } 1312 1313 mp_limb_t 1314 refmpn_add_n_sub_nc (mp_ptr r1p, mp_ptr r2p, 1315 mp_srcptr s1p, mp_srcptr s2p, mp_size_t size, 1316 mp_limb_t carry) 1317 { 1318 mp_ptr p; 1319 mp_limb_t acy, scy; 1320 1321 /* Destinations can't overlap. */ 1322 ASSERT (! refmpn_overlap_p (r1p, size, r2p, size)); 1323 ASSERT (refmpn_overlap_fullonly_two_p (r1p, s1p, s2p, size)); 1324 ASSERT (refmpn_overlap_fullonly_two_p (r2p, s1p, s2p, size)); 1325 ASSERT (size >= 1); 1326 1327 /* in case r1p==s1p or r1p==s2p */ 1328 p = refmpn_malloc_limbs (size); 1329 1330 acy = refmpn_add_nc (p, s1p, s2p, size, carry >> 1); 1331 scy = refmpn_sub_nc (r2p, s1p, s2p, size, carry & 1); 1332 refmpn_copyi (r1p, p, size); 1333 1334 free (p); 1335 return 2 * acy + scy; 1336 } 1337 1338 mp_limb_t 1339 refmpn_add_n_sub_n (mp_ptr r1p, mp_ptr r2p, 1340 mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) 1341 { 1342 return refmpn_add_n_sub_nc (r1p, r2p, s1p, s2p, size, CNST_LIMB(0)); 1343 } 1344 1345 1346 /* Right shift hi,lo and return the low limb of the result. 1347 Note a shift by GMP_LIMB_BITS isn't assumed to work (doesn't on x86). */ 1348 mp_limb_t 1349 rshift_make (mp_limb_t hi, mp_limb_t lo, unsigned shift) 1350 { 1351 ASSERT (shift < GMP_NUMB_BITS); 1352 if (shift == 0) 1353 return lo; 1354 else 1355 return ((hi << (GMP_NUMB_BITS-shift)) | (lo >> shift)) & GMP_NUMB_MASK; 1356 } 1357 1358 /* Left shift hi,lo and return the high limb of the result. 1359 Note a shift by GMP_LIMB_BITS isn't assumed to work (doesn't on x86). */ 1360 mp_limb_t 1361 lshift_make (mp_limb_t hi, mp_limb_t lo, unsigned shift) 1362 { 1363 ASSERT (shift < GMP_NUMB_BITS); 1364 if (shift == 0) 1365 return hi; 1366 else 1367 return ((hi << shift) | (lo >> (GMP_NUMB_BITS-shift))) & GMP_NUMB_MASK; 1368 } 1369 1370 1371 mp_limb_t 1372 refmpn_rshift (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift) 1373 { 1374 mp_limb_t ret; 1375 mp_size_t i; 1376 1377 ASSERT (refmpn_overlap_low_to_high_p (rp, sp, size)); 1378 ASSERT (size >= 1); 1379 ASSERT (shift >= 1 && shift < GMP_NUMB_BITS); 1380 ASSERT_MPN (sp, size); 1381 1382 ret = rshift_make (sp[0], CNST_LIMB(0), shift); 1383 1384 for (i = 0; i < size-1; i++) 1385 rp[i] = rshift_make (sp[i+1], sp[i], shift); 1386 1387 rp[i] = rshift_make (CNST_LIMB(0), sp[i], shift); 1388 return ret; 1389 } 1390 1391 mp_limb_t 1392 refmpn_lshift (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift) 1393 { 1394 mp_limb_t ret; 1395 mp_size_t i; 1396 1397 ASSERT (refmpn_overlap_high_to_low_p (rp, sp, size)); 1398 ASSERT (size >= 1); 1399 ASSERT (shift >= 1 && shift < GMP_NUMB_BITS); 1400 ASSERT_MPN (sp, size); 1401 1402 ret = lshift_make (CNST_LIMB(0), sp[size-1], shift); 1403 1404 for (i = size-2; i >= 0; i--) 1405 rp[i+1] = lshift_make (sp[i+1], sp[i], shift); 1406 1407 rp[i+1] = lshift_make (sp[i+1], CNST_LIMB(0), shift); 1408 return ret; 1409 } 1410 1411 void 1412 refmpn_com (mp_ptr rp, mp_srcptr sp, mp_size_t size) 1413 { 1414 mp_size_t i; 1415 1416 /* We work downwards since mpn_lshiftc needs that. */ 1417 ASSERT (refmpn_overlap_high_to_low_p (rp, sp, size)); 1418 1419 for (i = size - 1; i >= 0; i--) 1420 rp[i] = (~sp[i]) & GMP_NUMB_MASK; 1421 } 1422 1423 mp_limb_t 1424 refmpn_lshiftc (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift) 1425 { 1426 mp_limb_t res; 1427 1428 /* No asserts here, refmpn_lshift will assert what we need. */ 1429 1430 res = refmpn_lshift (rp, sp, size, shift); 1431 refmpn_com (rp, rp, size); 1432 return res; 1433 } 1434 1435 /* accepting shift==0 and doing a plain copyi or copyd in that case */ 1436 mp_limb_t 1437 refmpn_rshift_or_copy (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift) 1438 { 1439 if (shift == 0) 1440 { 1441 refmpn_copyi (rp, sp, size); 1442 return 0; 1443 } 1444 else 1445 { 1446 return refmpn_rshift (rp, sp, size, shift); 1447 } 1448 } 1449 mp_limb_t 1450 refmpn_lshift_or_copy (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift) 1451 { 1452 if (shift == 0) 1453 { 1454 refmpn_copyd (rp, sp, size); 1455 return 0; 1456 } 1457 else 1458 { 1459 return refmpn_lshift (rp, sp, size, shift); 1460 } 1461 } 1462 1463 /* accepting size==0 too */ 1464 mp_limb_t 1465 refmpn_rshift_or_copy_any (mp_ptr rp, mp_srcptr sp, mp_size_t size, 1466 unsigned shift) 1467 { 1468 return (size == 0 ? 0 : refmpn_rshift_or_copy (rp, sp, size, shift)); 1469 } 1470 mp_limb_t 1471 refmpn_lshift_or_copy_any (mp_ptr rp, mp_srcptr sp, mp_size_t size, 1472 unsigned shift) 1473 { 1474 return (size == 0 ? 0 : refmpn_lshift_or_copy (rp, sp, size, shift)); 1475 } 1476 1477 /* Divide h,l by d, return quotient, store remainder to *rp. 1478 Operates on full limbs, not nails. 1479 Must have h < d. 1480 __udiv_qrnnd_c isn't simple, and it's a bit slow, but it works. */ 1481 mp_limb_t 1482 refmpn_udiv_qrnnd (mp_limb_t *rp, mp_limb_t h, mp_limb_t l, mp_limb_t d) 1483 { 1484 mp_limb_t q, r; 1485 int n; 1486 1487 ASSERT (d != 0); 1488 ASSERT (h < d); 1489 1490 #if 0 1491 udiv_qrnnd (q, r, h, l, d); 1492 *rp = r; 1493 return q; 1494 #endif 1495 1496 n = refmpn_count_leading_zeros (d); 1497 d <<= n; 1498 1499 if (n != 0) 1500 { 1501 h = (h << n) | (l >> (GMP_LIMB_BITS - n)); 1502 l <<= n; 1503 } 1504 1505 __udiv_qrnnd_c (q, r, h, l, d); 1506 r >>= n; 1507 *rp = r; 1508 return q; 1509 } 1510 1511 mp_limb_t 1512 refmpn_udiv_qrnnd_r (mp_limb_t h, mp_limb_t l, mp_limb_t d, mp_limb_t *rp) 1513 { 1514 return refmpn_udiv_qrnnd (rp, h, l, d); 1515 } 1516 1517 /* This little subroutine avoids some bad code generation from i386 gcc 3.0 1518 -fPIC -O2 -fomit-frame-pointer (%ebp being used uninitialized). */ 1519 static mp_limb_t 1520 refmpn_divmod_1c_workaround (mp_ptr rp, mp_srcptr sp, mp_size_t size, 1521 mp_limb_t divisor, mp_limb_t carry) 1522 { 1523 mp_size_t i; 1524 mp_limb_t rem[1]; 1525 for (i = size-1; i >= 0; i--) 1526 { 1527 rp[i] = refmpn_udiv_qrnnd (rem, carry, 1528 sp[i] << GMP_NAIL_BITS, 1529 divisor << GMP_NAIL_BITS); 1530 carry = *rem >> GMP_NAIL_BITS; 1531 } 1532 return carry; 1533 } 1534 1535 mp_limb_t 1536 refmpn_divmod_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size, 1537 mp_limb_t divisor, mp_limb_t carry) 1538 { 1539 mp_ptr sp_orig; 1540 mp_ptr prod; 1541 mp_limb_t carry_orig; 1542 1543 ASSERT (refmpn_overlap_fullonly_p (rp, sp, size)); 1544 ASSERT (size >= 0); 1545 ASSERT (carry < divisor); 1546 ASSERT_MPN (sp, size); 1547 ASSERT_LIMB (divisor); 1548 ASSERT_LIMB (carry); 1549 1550 if (size == 0) 1551 return carry; 1552 1553 sp_orig = refmpn_memdup_limbs (sp, size); 1554 prod = refmpn_malloc_limbs (size); 1555 carry_orig = carry; 1556 1557 carry = refmpn_divmod_1c_workaround (rp, sp, size, divisor, carry); 1558 1559 /* check by multiplying back */ 1560 #if 0 1561 printf ("size=%ld divisor=0x%lX carry=0x%lX remainder=0x%lX\n", 1562 size, divisor, carry_orig, carry); 1563 mpn_trace("s",sp_copy,size); 1564 mpn_trace("r",rp,size); 1565 printf ("mul_1c %lX\n", refmpn_mul_1c (prod, rp, size, divisor, carry)); 1566 mpn_trace("p",prod,size); 1567 #endif 1568 ASSERT (refmpn_mul_1c (prod, rp, size, divisor, carry) == carry_orig); 1569 ASSERT (refmpn_cmp (prod, sp_orig, size) == 0); 1570 free (sp_orig); 1571 free (prod); 1572 1573 return carry; 1574 } 1575 1576 mp_limb_t 1577 refmpn_divmod_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t divisor) 1578 { 1579 return refmpn_divmod_1c (rp, sp, size, divisor, CNST_LIMB(0)); 1580 } 1581 1582 1583 mp_limb_t 1584 refmpn_mod_1c (mp_srcptr sp, mp_size_t size, mp_limb_t divisor, 1585 mp_limb_t carry) 1586 { 1587 mp_ptr p = refmpn_malloc_limbs (size); 1588 carry = refmpn_divmod_1c (p, sp, size, divisor, carry); 1589 free (p); 1590 return carry; 1591 } 1592 1593 mp_limb_t 1594 refmpn_mod_1 (mp_srcptr sp, mp_size_t size, mp_limb_t divisor) 1595 { 1596 return refmpn_mod_1c (sp, size, divisor, CNST_LIMB(0)); 1597 } 1598 1599 mp_limb_t 1600 refmpn_preinv_mod_1 (mp_srcptr sp, mp_size_t size, mp_limb_t divisor, 1601 mp_limb_t inverse) 1602 { 1603 ASSERT (divisor & GMP_NUMB_HIGHBIT); 1604 ASSERT (inverse == refmpn_invert_limb (divisor)); 1605 return refmpn_mod_1 (sp, size, divisor); 1606 } 1607 1608 /* This implementation will be rather slow, but has the advantage of being 1609 in a different style than the libgmp versions. */ 1610 mp_limb_t 1611 refmpn_mod_34lsub1 (mp_srcptr p, mp_size_t n) 1612 { 1613 ASSERT ((GMP_NUMB_BITS % 4) == 0); 1614 return mpn_mod_1 (p, n, (CNST_LIMB(1) << (3 * GMP_NUMB_BITS / 4)) - 1); 1615 } 1616 1617 1618 mp_limb_t 1619 refmpn_divrem_1c (mp_ptr rp, mp_size_t xsize, 1620 mp_srcptr sp, mp_size_t size, mp_limb_t divisor, 1621 mp_limb_t carry) 1622 { 1623 mp_ptr z; 1624 1625 z = refmpn_malloc_limbs (xsize); 1626 refmpn_fill (z, xsize, CNST_LIMB(0)); 1627 1628 carry = refmpn_divmod_1c (rp+xsize, sp, size, divisor, carry); 1629 carry = refmpn_divmod_1c (rp, z, xsize, divisor, carry); 1630 1631 free (z); 1632 return carry; 1633 } 1634 1635 mp_limb_t 1636 refmpn_divrem_1 (mp_ptr rp, mp_size_t xsize, 1637 mp_srcptr sp, mp_size_t size, mp_limb_t divisor) 1638 { 1639 return refmpn_divrem_1c (rp, xsize, sp, size, divisor, CNST_LIMB(0)); 1640 } 1641 1642 mp_limb_t 1643 refmpn_preinv_divrem_1 (mp_ptr rp, mp_size_t xsize, 1644 mp_srcptr sp, mp_size_t size, 1645 mp_limb_t divisor, mp_limb_t inverse, unsigned shift) 1646 { 1647 ASSERT (size >= 0); 1648 ASSERT (shift == refmpn_count_leading_zeros (divisor)); 1649 ASSERT (inverse == refmpn_invert_limb (divisor << shift)); 1650 1651 return refmpn_divrem_1 (rp, xsize, sp, size, divisor); 1652 } 1653 1654 mp_limb_t 1655 refmpn_divrem_2 (mp_ptr qp, mp_size_t qxn, 1656 mp_ptr np, mp_size_t nn, 1657 mp_srcptr dp) 1658 { 1659 mp_ptr tp; 1660 mp_limb_t qh; 1661 1662 tp = refmpn_malloc_limbs (nn + qxn); 1663 refmpn_zero (tp, qxn); 1664 refmpn_copyi (tp + qxn, np, nn); 1665 qh = refmpn_sb_div_qr (qp, tp, nn + qxn, dp, 2); 1666 refmpn_copyi (np, tp, 2); 1667 free (tp); 1668 return qh; 1669 } 1670 1671 /* Inverse is floor((b*(b-d)-1) / d), per division by invariant integers 1672 paper, figure 8.1 m', where b=2^GMP_LIMB_BITS. Note that -d-1 < d 1673 since d has the high bit set. */ 1674 1675 mp_limb_t 1676 refmpn_invert_limb (mp_limb_t d) 1677 { 1678 mp_limb_t r; 1679 ASSERT (d & GMP_LIMB_HIGHBIT); 1680 return refmpn_udiv_qrnnd (&r, -d-1, MP_LIMB_T_MAX, d); 1681 } 1682 1683 void 1684 refmpn_invert (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_ptr scratch) 1685 { 1686 mp_ptr qp, tp; 1687 TMP_DECL; 1688 TMP_MARK; 1689 1690 tp = TMP_ALLOC_LIMBS (2 * n); 1691 qp = TMP_ALLOC_LIMBS (n + 1); 1692 1693 MPN_ZERO (tp, 2 * n); mpn_sub_1 (tp, tp, 2 * n, 1); 1694 1695 refmpn_tdiv_qr (qp, rp, 0, tp, 2 * n, up, n); 1696 refmpn_copyi (rp, qp, n); 1697 1698 TMP_FREE; 1699 } 1700 1701 void 1702 refmpn_binvert (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_ptr scratch) 1703 { 1704 mp_ptr tp; 1705 mp_limb_t binv; 1706 TMP_DECL; 1707 TMP_MARK; 1708 1709 /* We use the library mpn_sbpi1_bdiv_q here, which isn't kosher in testing 1710 code. To make up for it, we check that the inverse is correct using a 1711 multiply. */ 1712 1713 tp = TMP_ALLOC_LIMBS (2 * n); 1714 1715 MPN_ZERO (tp, n); 1716 tp[0] = 1; 1717 binvert_limb (binv, up[0]); 1718 mpn_sbpi1_bdiv_q (rp, tp, n, up, n, -binv); 1719 1720 refmpn_mul_n (tp, rp, up, n); 1721 ASSERT_ALWAYS (tp[0] == 1 && mpn_zero_p (tp + 1, n - 1)); 1722 1723 TMP_FREE; 1724 } 1725 1726 /* The aim is to produce a dst quotient and return a remainder c, satisfying 1727 c*b^n + src-i == 3*dst, where i is the incoming carry. 1728 1729 Some value c==0, c==1 or c==2 will satisfy, so just try each. 1730 1731 If GMP_NUMB_BITS is even then 2^GMP_NUMB_BITS==1mod3 and a non-zero 1732 remainder from the first division attempt determines the correct 1733 remainder (3-c), but don't bother with that, since we can't guarantee 1734 anything about GMP_NUMB_BITS when using nails. 1735 1736 If the initial src-i produces a borrow then refmpn_sub_1 leaves a twos 1737 complement negative, ie. b^n+a-i, and the calculation produces c1 1738 satisfying c1*b^n + b^n+src-i == 3*dst, from which clearly c=c1+1. This 1739 means it's enough to just add any borrow back at the end. 1740 1741 A borrow only occurs when a==0 or a==1, and, by the same reasoning as in 1742 mpn/generic/diveby3.c, the c1 that results in those cases will only be 0 1743 or 1 respectively, so with 1 added the final return value is still in the 1744 prescribed range 0 to 2. */ 1745 1746 mp_limb_t 1747 refmpn_divexact_by3c (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t carry) 1748 { 1749 mp_ptr spcopy; 1750 mp_limb_t c, cs; 1751 1752 ASSERT (refmpn_overlap_fullonly_p (rp, sp, size)); 1753 ASSERT (size >= 1); 1754 ASSERT (carry <= 2); 1755 ASSERT_MPN (sp, size); 1756 1757 spcopy = refmpn_malloc_limbs (size); 1758 cs = refmpn_sub_1 (spcopy, sp, size, carry); 1759 1760 for (c = 0; c <= 2; c++) 1761 if (refmpn_divmod_1c (rp, spcopy, size, CNST_LIMB(3), c) == 0) 1762 goto done; 1763 ASSERT_FAIL (no value of c satisfies); 1764 1765 done: 1766 c += cs; 1767 ASSERT (c <= 2); 1768 1769 free (spcopy); 1770 return c; 1771 } 1772 1773 mp_limb_t 1774 refmpn_divexact_by3 (mp_ptr rp, mp_srcptr sp, mp_size_t size) 1775 { 1776 return refmpn_divexact_by3c (rp, sp, size, CNST_LIMB(0)); 1777 } 1778 1779 1780 /* The same as mpn/generic/mul_basecase.c, but using refmpn functions. */ 1781 void 1782 refmpn_mul_basecase (mp_ptr prodp, 1783 mp_srcptr up, mp_size_t usize, 1784 mp_srcptr vp, mp_size_t vsize) 1785 { 1786 mp_size_t i; 1787 1788 ASSERT (! refmpn_overlap_p (prodp, usize+vsize, up, usize)); 1789 ASSERT (! refmpn_overlap_p (prodp, usize+vsize, vp, vsize)); 1790 ASSERT (usize >= vsize); 1791 ASSERT (vsize >= 1); 1792 ASSERT_MPN (up, usize); 1793 ASSERT_MPN (vp, vsize); 1794 1795 prodp[usize] = refmpn_mul_1 (prodp, up, usize, vp[0]); 1796 for (i = 1; i < vsize; i++) 1797 prodp[usize+i] = refmpn_addmul_1 (prodp+i, up, usize, vp[i]); 1798 } 1799 1800 1801 /* The same as mpn/generic/mulmid_basecase.c, but using refmpn functions. */ 1802 void 1803 refmpn_mulmid_basecase (mp_ptr rp, 1804 mp_srcptr up, mp_size_t un, 1805 mp_srcptr vp, mp_size_t vn) 1806 { 1807 mp_limb_t cy; 1808 mp_size_t i; 1809 1810 ASSERT (un >= vn); 1811 ASSERT (vn >= 1); 1812 ASSERT (! refmpn_overlap_p (rp, un - vn + 3, up, un)); 1813 ASSERT (! refmpn_overlap_p (rp, un - vn + 3, vp, vn)); 1814 ASSERT_MPN (up, un); 1815 ASSERT_MPN (vp, vn); 1816 1817 rp[un - vn + 1] = refmpn_mul_1 (rp, up + vn - 1, un - vn + 1, vp[0]); 1818 rp[un - vn + 2] = CNST_LIMB (0); 1819 for (i = 1; i < vn; i++) 1820 { 1821 cy = refmpn_addmul_1 (rp, up + vn - i - 1, un - vn + 1, vp[i]); 1822 cy = ref_addc_limb (&rp[un - vn + 1], rp[un - vn + 1], cy); 1823 cy = ref_addc_limb (&rp[un - vn + 2], rp[un - vn + 2], cy); 1824 ASSERT (cy == 0); 1825 } 1826 } 1827 1828 void 1829 refmpn_toom42_mulmid (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, 1830 mp_ptr scratch) 1831 { 1832 refmpn_mulmid_basecase (rp, up, 2*n - 1, vp, n); 1833 } 1834 1835 void 1836 refmpn_mulmid_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n) 1837 { 1838 /* FIXME: this could be made faster by using refmpn_mul and then subtracting 1839 off products near the middle product region boundary */ 1840 refmpn_mulmid_basecase (rp, up, 2*n - 1, vp, n); 1841 } 1842 1843 void 1844 refmpn_mulmid (mp_ptr rp, mp_srcptr up, mp_size_t un, 1845 mp_srcptr vp, mp_size_t vn) 1846 { 1847 /* FIXME: this could be made faster by using refmpn_mul and then subtracting 1848 off products near the middle product region boundary */ 1849 refmpn_mulmid_basecase (rp, up, un, vp, vn); 1850 } 1851 1852 1853 1854 #define TOOM3_THRESHOLD (MAX (MUL_TOOM33_THRESHOLD, SQR_TOOM3_THRESHOLD)) 1855 #define TOOM4_THRESHOLD (MAX (MUL_TOOM44_THRESHOLD, SQR_TOOM4_THRESHOLD)) 1856 #define TOOM6_THRESHOLD (MAX (MUL_TOOM6H_THRESHOLD, SQR_TOOM6_THRESHOLD)) 1857 #if WANT_FFT 1858 #define FFT_THRESHOLD (MAX (MUL_FFT_THRESHOLD, SQR_FFT_THRESHOLD)) 1859 #else 1860 #define FFT_THRESHOLD MP_SIZE_T_MAX /* don't use toom44 here */ 1861 #endif 1862 1863 void 1864 refmpn_mul (mp_ptr wp, mp_srcptr up, mp_size_t un, mp_srcptr vp, mp_size_t vn) 1865 { 1866 mp_ptr tp, rp; 1867 mp_size_t tn; 1868 1869 if (vn < TOOM3_THRESHOLD) 1870 { 1871 /* In the mpn_mul_basecase and toom2 ranges, use our own mul_basecase. */ 1872 if (vn != 0) 1873 refmpn_mul_basecase (wp, up, un, vp, vn); 1874 else 1875 MPN_ZERO (wp, un); 1876 return; 1877 } 1878 1879 MPN_ZERO (wp, vn); 1880 rp = refmpn_malloc_limbs (2 * vn); 1881 1882 if (vn < TOOM4_THRESHOLD) 1883 tn = mpn_toom22_mul_itch (vn, vn); 1884 else if (vn < TOOM6_THRESHOLD) 1885 tn = mpn_toom33_mul_itch (vn, vn); 1886 else if (vn < FFT_THRESHOLD) 1887 tn = mpn_toom44_mul_itch (vn, vn); 1888 else 1889 tn = mpn_toom6h_mul_itch (vn, vn); 1890 tp = refmpn_malloc_limbs (tn); 1891 1892 while (un >= vn) 1893 { 1894 if (vn < TOOM4_THRESHOLD) 1895 /* In the toom3 range, use mpn_toom22_mul. */ 1896 mpn_toom22_mul (rp, up, vn, vp, vn, tp); 1897 else if (vn < TOOM6_THRESHOLD) 1898 /* In the toom4 range, use mpn_toom33_mul. */ 1899 mpn_toom33_mul (rp, up, vn, vp, vn, tp); 1900 else if (vn < FFT_THRESHOLD) 1901 /* In the toom6 range, use mpn_toom44_mul. */ 1902 mpn_toom44_mul (rp, up, vn, vp, vn, tp); 1903 else 1904 /* For the largest operands, use mpn_toom6h_mul. */ 1905 mpn_toom6h_mul (rp, up, vn, vp, vn, tp); 1906 1907 ASSERT_NOCARRY (refmpn_add (wp, rp, 2 * vn, wp, vn)); 1908 wp += vn; 1909 1910 up += vn; 1911 un -= vn; 1912 } 1913 1914 free (tp); 1915 1916 if (un != 0) 1917 { 1918 refmpn_mul (rp, vp, vn, up, un); 1919 ASSERT_NOCARRY (refmpn_add (wp, rp, un + vn, wp, vn)); 1920 } 1921 free (rp); 1922 } 1923 1924 void 1925 refmpn_mul_n (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size) 1926 { 1927 refmpn_mul (prodp, up, size, vp, size); 1928 } 1929 1930 void 1931 refmpn_mullo_n (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size) 1932 { 1933 mp_ptr tp = refmpn_malloc_limbs (2*size); 1934 refmpn_mul (tp, up, size, vp, size); 1935 refmpn_copyi (prodp, tp, size); 1936 free (tp); 1937 } 1938 1939 void 1940 refmpn_sqr (mp_ptr dst, mp_srcptr src, mp_size_t size) 1941 { 1942 refmpn_mul (dst, src, size, src, size); 1943 } 1944 1945 void 1946 refmpn_sqrlo (mp_ptr dst, mp_srcptr src, mp_size_t size) 1947 { 1948 refmpn_mullo_n (dst, src, src, size); 1949 } 1950 1951 /* Allowing usize<vsize, usize==0 or vsize==0. */ 1952 void 1953 refmpn_mul_any (mp_ptr prodp, 1954 mp_srcptr up, mp_size_t usize, 1955 mp_srcptr vp, mp_size_t vsize) 1956 { 1957 ASSERT (! refmpn_overlap_p (prodp, usize+vsize, up, usize)); 1958 ASSERT (! refmpn_overlap_p (prodp, usize+vsize, vp, vsize)); 1959 ASSERT (usize >= 0); 1960 ASSERT (vsize >= 0); 1961 ASSERT_MPN (up, usize); 1962 ASSERT_MPN (vp, vsize); 1963 1964 if (usize == 0) 1965 { 1966 refmpn_fill (prodp, vsize, CNST_LIMB(0)); 1967 return; 1968 } 1969 1970 if (vsize == 0) 1971 { 1972 refmpn_fill (prodp, usize, CNST_LIMB(0)); 1973 return; 1974 } 1975 1976 if (usize >= vsize) 1977 refmpn_mul (prodp, up, usize, vp, vsize); 1978 else 1979 refmpn_mul (prodp, vp, vsize, up, usize); 1980 } 1981 1982 1983 mp_limb_t 1984 refmpn_gcd_1 (mp_srcptr xp, mp_size_t xsize, mp_limb_t y) 1985 { 1986 mp_limb_t x; 1987 int twos; 1988 1989 ASSERT (y != 0); 1990 ASSERT (! refmpn_zero_p (xp, xsize)); 1991 ASSERT_MPN (xp, xsize); 1992 ASSERT_LIMB (y); 1993 1994 x = refmpn_mod_1 (xp, xsize, y); 1995 if (x == 0) 1996 return y; 1997 1998 twos = 0; 1999 while ((x & 1) == 0 && (y & 1) == 0) 2000 { 2001 x >>= 1; 2002 y >>= 1; 2003 twos++; 2004 } 2005 2006 for (;;) 2007 { 2008 while ((x & 1) == 0) x >>= 1; 2009 while ((y & 1) == 0) y >>= 1; 2010 2011 if (x < y) 2012 MP_LIMB_T_SWAP (x, y); 2013 2014 x -= y; 2015 if (x == 0) 2016 break; 2017 } 2018 2019 return y << twos; 2020 } 2021 2022 2023 /* Based on the full limb x, not nails. */ 2024 unsigned 2025 refmpn_count_leading_zeros (mp_limb_t x) 2026 { 2027 unsigned n = 0; 2028 2029 ASSERT (x != 0); 2030 2031 while ((x & GMP_LIMB_HIGHBIT) == 0) 2032 { 2033 x <<= 1; 2034 n++; 2035 } 2036 return n; 2037 } 2038 2039 /* Full limbs allowed, not limited to nails. */ 2040 unsigned 2041 refmpn_count_trailing_zeros (mp_limb_t x) 2042 { 2043 unsigned n = 0; 2044 2045 ASSERT (x != 0); 2046 ASSERT_LIMB (x); 2047 2048 while ((x & 1) == 0) 2049 { 2050 x >>= 1; 2051 n++; 2052 } 2053 return n; 2054 } 2055 2056 /* Strip factors of two (low zero bits) from {p,size} by right shifting. 2057 The return value is the number of twos stripped. */ 2058 mp_size_t 2059 refmpn_strip_twos (mp_ptr p, mp_size_t size) 2060 { 2061 mp_size_t limbs; 2062 unsigned shift; 2063 2064 ASSERT (size >= 1); 2065 ASSERT (! refmpn_zero_p (p, size)); 2066 ASSERT_MPN (p, size); 2067 2068 for (limbs = 0; p[0] == 0; limbs++) 2069 { 2070 refmpn_copyi (p, p+1, size-1); 2071 p[size-1] = 0; 2072 } 2073 2074 shift = refmpn_count_trailing_zeros (p[0]); 2075 if (shift) 2076 refmpn_rshift (p, p, size, shift); 2077 2078 return limbs*GMP_NUMB_BITS + shift; 2079 } 2080 2081 mp_limb_t 2082 refmpn_gcd (mp_ptr gp, mp_ptr xp, mp_size_t xsize, mp_ptr yp, mp_size_t ysize) 2083 { 2084 int cmp; 2085 2086 ASSERT (ysize >= 1); 2087 ASSERT (xsize >= ysize); 2088 ASSERT ((xp[0] & 1) != 0); 2089 ASSERT ((yp[0] & 1) != 0); 2090 /* ASSERT (xp[xsize-1] != 0); */ /* don't think x needs to be odd */ 2091 ASSERT (yp[ysize-1] != 0); 2092 ASSERT (refmpn_overlap_fullonly_p (gp, xp, xsize)); 2093 ASSERT (refmpn_overlap_fullonly_p (gp, yp, ysize)); 2094 ASSERT (! refmpn_overlap_p (xp, xsize, yp, ysize)); 2095 if (xsize == ysize) 2096 ASSERT (refmpn_msbone (xp[xsize-1]) >= refmpn_msbone (yp[ysize-1])); 2097 ASSERT_MPN (xp, xsize); 2098 ASSERT_MPN (yp, ysize); 2099 2100 refmpn_strip_twos (xp, xsize); 2101 MPN_NORMALIZE (xp, xsize); 2102 MPN_NORMALIZE (yp, ysize); 2103 2104 for (;;) 2105 { 2106 cmp = refmpn_cmp_twosizes (xp, xsize, yp, ysize); 2107 if (cmp == 0) 2108 break; 2109 if (cmp < 0) 2110 MPN_PTR_SWAP (xp,xsize, yp,ysize); 2111 2112 ASSERT_NOCARRY (refmpn_sub (xp, xp, xsize, yp, ysize)); 2113 2114 refmpn_strip_twos (xp, xsize); 2115 MPN_NORMALIZE (xp, xsize); 2116 } 2117 2118 refmpn_copyi (gp, xp, xsize); 2119 return xsize; 2120 } 2121 2122 unsigned long 2123 ref_popc_limb (mp_limb_t src) 2124 { 2125 unsigned long count; 2126 int i; 2127 2128 count = 0; 2129 for (i = 0; i < GMP_LIMB_BITS; i++) 2130 { 2131 count += (src & 1); 2132 src >>= 1; 2133 } 2134 return count; 2135 } 2136 2137 unsigned long 2138 refmpn_popcount (mp_srcptr sp, mp_size_t size) 2139 { 2140 unsigned long count = 0; 2141 mp_size_t i; 2142 2143 ASSERT (size >= 0); 2144 ASSERT_MPN (sp, size); 2145 2146 for (i = 0; i < size; i++) 2147 count += ref_popc_limb (sp[i]); 2148 return count; 2149 } 2150 2151 unsigned long 2152 refmpn_hamdist (mp_srcptr s1p, mp_srcptr s2p, mp_size_t size) 2153 { 2154 mp_ptr d; 2155 unsigned long count; 2156 2157 ASSERT (size >= 0); 2158 ASSERT_MPN (s1p, size); 2159 ASSERT_MPN (s2p, size); 2160 2161 if (size == 0) 2162 return 0; 2163 2164 d = refmpn_malloc_limbs (size); 2165 refmpn_xor_n (d, s1p, s2p, size); 2166 count = refmpn_popcount (d, size); 2167 free (d); 2168 return count; 2169 } 2170 2171 2172 /* set r to a%d */ 2173 void 2174 refmpn_mod2 (mp_limb_t r[2], const mp_limb_t a[2], const mp_limb_t d[2]) 2175 { 2176 mp_limb_t D[2]; 2177 int n; 2178 2179 ASSERT (! refmpn_overlap_p (r, (mp_size_t) 2, d, (mp_size_t) 2)); 2180 ASSERT_MPN (a, 2); 2181 ASSERT_MPN (d, 2); 2182 2183 D[1] = d[1], D[0] = d[0]; 2184 r[1] = a[1], r[0] = a[0]; 2185 n = 0; 2186 2187 for (;;) 2188 { 2189 if (D[1] & GMP_NUMB_HIGHBIT) 2190 break; 2191 if (refmpn_cmp (r, D, (mp_size_t) 2) <= 0) 2192 break; 2193 refmpn_lshift (D, D, (mp_size_t) 2, 1); 2194 n++; 2195 ASSERT (n <= GMP_NUMB_BITS); 2196 } 2197 2198 while (n >= 0) 2199 { 2200 if (refmpn_cmp (r, D, (mp_size_t) 2) >= 0) 2201 ASSERT_NOCARRY (refmpn_sub_n (r, r, D, (mp_size_t) 2)); 2202 refmpn_rshift (D, D, (mp_size_t) 2, 1); 2203 n--; 2204 } 2205 2206 ASSERT (refmpn_cmp (r, d, (mp_size_t) 2) < 0); 2207 } 2208 2209 2210 2211 /* Similar to the old mpn/generic/sb_divrem_mn.c, but somewhat simplified, in 2212 particular the trial quotient is allowed to be 2 too big. */ 2213 mp_limb_t 2214 refmpn_sb_div_qr (mp_ptr qp, 2215 mp_ptr np, mp_size_t nsize, 2216 mp_srcptr dp, mp_size_t dsize) 2217 { 2218 mp_limb_t retval = 0; 2219 mp_size_t i; 2220 mp_limb_t d1 = dp[dsize-1]; 2221 mp_ptr np_orig = refmpn_memdup_limbs (np, nsize); 2222 2223 ASSERT (nsize >= dsize); 2224 /* ASSERT (dsize > 2); */ 2225 ASSERT (dsize >= 2); 2226 ASSERT (dp[dsize-1] & GMP_NUMB_HIGHBIT); 2227 ASSERT (! refmpn_overlap_p (qp, nsize-dsize, np, nsize) || qp+dsize >= np); 2228 ASSERT_MPN (np, nsize); 2229 ASSERT_MPN (dp, dsize); 2230 2231 i = nsize-dsize; 2232 if (refmpn_cmp (np+i, dp, dsize) >= 0) 2233 { 2234 ASSERT_NOCARRY (refmpn_sub_n (np+i, np+i, dp, dsize)); 2235 retval = 1; 2236 } 2237 2238 for (i--; i >= 0; i--) 2239 { 2240 mp_limb_t n0 = np[i+dsize]; 2241 mp_limb_t n1 = np[i+dsize-1]; 2242 mp_limb_t q, dummy_r; 2243 2244 ASSERT (n0 <= d1); 2245 if (n0 == d1) 2246 q = GMP_NUMB_MAX; 2247 else 2248 q = refmpn_udiv_qrnnd (&dummy_r, n0, n1 << GMP_NAIL_BITS, 2249 d1 << GMP_NAIL_BITS); 2250 2251 n0 -= refmpn_submul_1 (np+i, dp, dsize, q); 2252 ASSERT (n0 == 0 || n0 == MP_LIMB_T_MAX); 2253 if (n0) 2254 { 2255 q--; 2256 if (! refmpn_add_n (np+i, np+i, dp, dsize)) 2257 { 2258 q--; 2259 ASSERT_CARRY (refmpn_add_n (np+i, np+i, dp, dsize)); 2260 } 2261 } 2262 np[i+dsize] = 0; 2263 2264 qp[i] = q; 2265 } 2266 2267 /* remainder < divisor */ 2268 #if 0 /* ASSERT triggers gcc 4.2.1 bug */ 2269 ASSERT (refmpn_cmp (np, dp, dsize) < 0); 2270 #endif 2271 2272 /* multiply back to original */ 2273 { 2274 mp_ptr mp = refmpn_malloc_limbs (nsize); 2275 2276 refmpn_mul_any (mp, qp, nsize-dsize, dp, dsize); 2277 if (retval) 2278 ASSERT_NOCARRY (refmpn_add_n (mp+nsize-dsize,mp+nsize-dsize, dp, dsize)); 2279 ASSERT_NOCARRY (refmpn_add (mp, mp, nsize, np, dsize)); 2280 ASSERT (refmpn_cmp (mp, np_orig, nsize) == 0); 2281 2282 free (mp); 2283 } 2284 2285 free (np_orig); 2286 return retval; 2287 } 2288 2289 /* Similar to the old mpn/generic/sb_divrem_mn.c, but somewhat simplified, in 2290 particular the trial quotient is allowed to be 2 too big. */ 2291 void 2292 refmpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn, 2293 mp_ptr np, mp_size_t nsize, 2294 mp_srcptr dp, mp_size_t dsize) 2295 { 2296 ASSERT (qxn == 0); 2297 ASSERT_MPN (np, nsize); 2298 ASSERT_MPN (dp, dsize); 2299 ASSERT (dsize > 0); 2300 ASSERT (dp[dsize-1] != 0); 2301 2302 if (dsize == 1) 2303 { 2304 rp[0] = refmpn_divmod_1 (qp, np, nsize, dp[0]); 2305 return; 2306 } 2307 else 2308 { 2309 mp_ptr n2p = refmpn_malloc_limbs (nsize+1); 2310 mp_ptr d2p = refmpn_malloc_limbs (dsize); 2311 int norm = refmpn_count_leading_zeros (dp[dsize-1]) - GMP_NAIL_BITS; 2312 2313 n2p[nsize] = refmpn_lshift_or_copy (n2p, np, nsize, norm); 2314 ASSERT_NOCARRY (refmpn_lshift_or_copy (d2p, dp, dsize, norm)); 2315 2316 refmpn_sb_div_qr (qp, n2p, nsize+1, d2p, dsize); 2317 refmpn_rshift_or_copy (rp, n2p, dsize, norm); 2318 2319 /* ASSERT (refmpn_zero_p (tp+dsize, nsize-dsize)); */ 2320 free (n2p); 2321 free (d2p); 2322 } 2323 } 2324 2325 mp_limb_t 2326 refmpn_redc_1 (mp_ptr rp, mp_ptr up, mp_srcptr mp, mp_size_t n, mp_limb_t invm) 2327 { 2328 mp_size_t j; 2329 mp_limb_t cy; 2330 2331 ASSERT_MPN (up, 2*n); 2332 /* ASSERT about directed overlap rp, up */ 2333 /* ASSERT about overlap rp, mp */ 2334 /* ASSERT about overlap up, mp */ 2335 2336 for (j = n - 1; j >= 0; j--) 2337 { 2338 up[0] = refmpn_addmul_1 (up, mp, n, (up[0] * invm) & GMP_NUMB_MASK); 2339 up++; 2340 } 2341 cy = mpn_add_n (rp, up, up - n, n); 2342 return cy; 2343 } 2344 2345 size_t 2346 refmpn_get_str (unsigned char *dst, int base, mp_ptr src, mp_size_t size) 2347 { 2348 unsigned char *d; 2349 size_t dsize; 2350 2351 ASSERT (size >= 0); 2352 ASSERT (base >= 2); 2353 ASSERT (base < numberof (mp_bases)); 2354 ASSERT (size == 0 || src[size-1] != 0); 2355 ASSERT_MPN (src, size); 2356 2357 MPN_SIZEINBASE (dsize, src, size, base); 2358 ASSERT (dsize >= 1); 2359 ASSERT (! byte_overlap_p (dst, (mp_size_t) dsize, src, size * GMP_LIMB_BYTES)); 2360 2361 if (size == 0) 2362 { 2363 dst[0] = 0; 2364 return 1; 2365 } 2366 2367 /* don't clobber input for power of 2 bases */ 2368 if (POW2_P (base)) 2369 src = refmpn_memdup_limbs (src, size); 2370 2371 d = dst + dsize; 2372 do 2373 { 2374 d--; 2375 ASSERT (d >= dst); 2376 *d = refmpn_divrem_1 (src, (mp_size_t) 0, src, size, (mp_limb_t) base); 2377 size -= (src[size-1] == 0); 2378 } 2379 while (size != 0); 2380 2381 /* Move result back and decrement dsize if we didn't generate 2382 the maximum possible digits. */ 2383 if (d != dst) 2384 { 2385 size_t i; 2386 dsize -= d - dst; 2387 for (i = 0; i < dsize; i++) 2388 dst[i] = d[i]; 2389 } 2390 2391 if (POW2_P (base)) 2392 free (src); 2393 2394 return dsize; 2395 } 2396 2397 2398 mp_limb_t 2399 ref_bswap_limb (mp_limb_t src) 2400 { 2401 mp_limb_t dst; 2402 int i; 2403 2404 dst = 0; 2405 for (i = 0; i < GMP_LIMB_BYTES; i++) 2406 { 2407 dst = (dst << 8) + (src & 0xFF); 2408 src >>= 8; 2409 } 2410 return dst; 2411 } 2412 2413 2414 /* These random functions are mostly for transitional purposes while adding 2415 nail support, since they're independent of the normal mpn routines. They 2416 can probably be removed when those normal routines are reliable, though 2417 perhaps something independent would still be useful at times. */ 2418 2419 #if GMP_LIMB_BITS == 32 2420 #define RAND_A CNST_LIMB(0x29CF535) 2421 #endif 2422 #if GMP_LIMB_BITS == 64 2423 #define RAND_A CNST_LIMB(0xBAECD515DAF0B49D) 2424 #endif 2425 2426 mp_limb_t refmpn_random_seed; 2427 2428 mp_limb_t 2429 refmpn_random_half (void) 2430 { 2431 refmpn_random_seed = refmpn_random_seed * RAND_A + 1; 2432 return (refmpn_random_seed >> GMP_LIMB_BITS/2); 2433 } 2434 2435 mp_limb_t 2436 refmpn_random_limb (void) 2437 { 2438 return ((refmpn_random_half () << (GMP_LIMB_BITS/2)) 2439 | refmpn_random_half ()) & GMP_NUMB_MASK; 2440 } 2441 2442 void 2443 refmpn_random (mp_ptr ptr, mp_size_t size) 2444 { 2445 mp_size_t i; 2446 if (GMP_NAIL_BITS == 0) 2447 { 2448 mpn_random (ptr, size); 2449 return; 2450 } 2451 2452 for (i = 0; i < size; i++) 2453 ptr[i] = refmpn_random_limb (); 2454 } 2455 2456 void 2457 refmpn_random2 (mp_ptr ptr, mp_size_t size) 2458 { 2459 mp_size_t i; 2460 mp_limb_t bit, mask, limb; 2461 int run; 2462 2463 if (GMP_NAIL_BITS == 0) 2464 { 2465 mpn_random2 (ptr, size); 2466 return; 2467 } 2468 2469 #define RUN_MODULUS 32 2470 2471 /* start with ones at a random pos in the high limb */ 2472 bit = CNST_LIMB(1) << (refmpn_random_half () % GMP_NUMB_BITS); 2473 mask = 0; 2474 run = 0; 2475 2476 for (i = size-1; i >= 0; i--) 2477 { 2478 limb = 0; 2479 do 2480 { 2481 if (run == 0) 2482 { 2483 run = (refmpn_random_half () % RUN_MODULUS) + 1; 2484 mask = ~mask; 2485 } 2486 2487 limb |= (bit & mask); 2488 bit >>= 1; 2489 run--; 2490 } 2491 while (bit != 0); 2492 2493 ptr[i] = limb; 2494 bit = GMP_NUMB_HIGHBIT; 2495 } 2496 } 2497 2498 /* This is a simple bitwise algorithm working high to low across "s" and 2499 testing each time whether setting the bit would make s^2 exceed n. */ 2500 mp_size_t 2501 refmpn_sqrtrem (mp_ptr sp, mp_ptr rp, mp_srcptr np, mp_size_t nsize) 2502 { 2503 mp_ptr tp, dp; 2504 mp_size_t ssize, talloc, tsize, dsize, ret, ilimbs; 2505 unsigned ibit; 2506 long i; 2507 mp_limb_t c; 2508 2509 ASSERT (nsize >= 0); 2510 2511 /* If n==0, then s=0 and r=0. */ 2512 if (nsize == 0) 2513 return 0; 2514 2515 ASSERT (np[nsize - 1] != 0); 2516 ASSERT (rp == NULL || MPN_SAME_OR_SEPARATE_P (np, rp, nsize)); 2517 ASSERT (rp == NULL || ! MPN_OVERLAP_P (sp, (nsize + 1) / 2, rp, nsize)); 2518 ASSERT (! MPN_OVERLAP_P (sp, (nsize + 1) / 2, np, nsize)); 2519 2520 /* root */ 2521 ssize = (nsize+1)/2; 2522 refmpn_zero (sp, ssize); 2523 2524 /* the remainder so far */ 2525 dp = refmpn_memdup_limbs (np, nsize); 2526 dsize = nsize; 2527 2528 /* temporary */ 2529 talloc = 2*ssize + 1; 2530 tp = refmpn_malloc_limbs (talloc); 2531 2532 for (i = GMP_NUMB_BITS * ssize - 1; i >= 0; i--) 2533 { 2534 /* t = 2*s*2^i + 2^(2*i), being the amount s^2 will increase by if 2^i 2535 is added to it */ 2536 2537 ilimbs = (i+1) / GMP_NUMB_BITS; 2538 ibit = (i+1) % GMP_NUMB_BITS; 2539 refmpn_zero (tp, ilimbs); 2540 c = refmpn_lshift_or_copy (tp+ilimbs, sp, ssize, ibit); 2541 tsize = ilimbs + ssize; 2542 tp[tsize] = c; 2543 tsize += (c != 0); 2544 2545 ilimbs = (2*i) / GMP_NUMB_BITS; 2546 ibit = (2*i) % GMP_NUMB_BITS; 2547 if (ilimbs + 1 > tsize) 2548 { 2549 refmpn_zero_extend (tp, tsize, ilimbs + 1); 2550 tsize = ilimbs + 1; 2551 } 2552 c = refmpn_add_1 (tp+ilimbs, tp+ilimbs, tsize-ilimbs, 2553 CNST_LIMB(1) << ibit); 2554 ASSERT (tsize < talloc); 2555 tp[tsize] = c; 2556 tsize += (c != 0); 2557 2558 if (refmpn_cmp_twosizes (dp, dsize, tp, tsize) >= 0) 2559 { 2560 /* set this bit in s and subtract from the remainder */ 2561 refmpn_setbit (sp, i); 2562 2563 ASSERT_NOCARRY (refmpn_sub_n (dp, dp, tp, dsize)); 2564 dsize = refmpn_normalize (dp, dsize); 2565 } 2566 } 2567 2568 if (rp == NULL) 2569 { 2570 ret = ! refmpn_zero_p (dp, dsize); 2571 } 2572 else 2573 { 2574 ASSERT (dsize == 0 || dp[dsize-1] != 0); 2575 refmpn_copy (rp, dp, dsize); 2576 ret = dsize; 2577 } 2578 2579 free (dp); 2580 free (tp); 2581 return ret; 2582 }