github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/tests/refmpn.c (about)

     1  /* Reference mpn functions, designed to be simple, portable and independent
     2     of the normal gmp code.  Speed isn't a consideration.
     3  
     4  Copyright 1996-2009, 2011-2014 Free Software Foundation, Inc.
     5  
     6  This file is part of the GNU MP Library test suite.
     7  
     8  The GNU MP Library test suite is free software; you can redistribute it
     9  and/or modify it under the terms of the GNU General Public License as
    10  published by the Free Software Foundation; either version 3 of the License,
    11  or (at your option) any later version.
    12  
    13  The GNU MP Library test suite is distributed in the hope that it will be
    14  useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
    15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
    16  Public License for more details.
    17  
    18  You should have received a copy of the GNU General Public License along with
    19  the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
    20  
    21  
    22  /* Most routines have assertions representing what the mpn routines are
    23     supposed to accept.  Many of these reference routines do sensible things
    24     outside these ranges (eg. for size==0), but the assertions are present to
    25     pick up bad parameters passed here that are about to be passed the same
    26     to a real mpn routine being compared.  */
    27  
    28  /* always do assertion checking */
    29  #define WANT_ASSERT  1
    30  
    31  #include <stdio.h>  /* for NULL */
    32  #include <stdlib.h> /* for malloc */
    33  
    34  #include "gmp.h"
    35  #include "gmp-impl.h"
    36  #include "longlong.h"
    37  
    38  #include "tests.h"
    39  
    40  
    41  
    42  /* Return non-zero if regions {xp,xsize} and {yp,ysize} overlap, with sizes
    43     in bytes. */
    44  int
    45  byte_overlap_p (const void *v_xp, mp_size_t xsize,
    46  		const void *v_yp, mp_size_t ysize)
    47  {
    48    const char *xp = (const char *) v_xp;
    49    const char *yp = (const char *) v_yp;
    50  
    51    ASSERT (xsize >= 0);
    52    ASSERT (ysize >= 0);
    53  
    54    /* no wraparounds */
    55    ASSERT (xp+xsize >= xp);
    56    ASSERT (yp+ysize >= yp);
    57  
    58    if (xp + xsize <= yp)
    59      return 0;
    60  
    61    if (yp + ysize <= xp)
    62      return 0;
    63  
    64    return 1;
    65  }
    66  
    67  /* Return non-zero if limb regions {xp,xsize} and {yp,ysize} overlap. */
    68  int
    69  refmpn_overlap_p (mp_srcptr xp, mp_size_t xsize, mp_srcptr yp, mp_size_t ysize)
    70  {
    71    return byte_overlap_p (xp, xsize * GMP_LIMB_BYTES,
    72  			 yp, ysize * GMP_LIMB_BYTES);
    73  }
    74  
    75  /* Check overlap for a routine defined to work low to high. */
    76  int
    77  refmpn_overlap_low_to_high_p (mp_srcptr dst, mp_srcptr src, mp_size_t size)
    78  {
    79    return (dst <= src || ! refmpn_overlap_p (dst, size, src, size));
    80  }
    81  
    82  /* Check overlap for a routine defined to work high to low. */
    83  int
    84  refmpn_overlap_high_to_low_p (mp_srcptr dst, mp_srcptr src, mp_size_t size)
    85  {
    86    return (dst >= src || ! refmpn_overlap_p (dst, size, src, size));
    87  }
    88  
    89  /* Check overlap for a standard routine requiring equal or separate. */
    90  int
    91  refmpn_overlap_fullonly_p (mp_srcptr dst, mp_srcptr src, mp_size_t size)
    92  {
    93    return (dst == src || ! refmpn_overlap_p (dst, size, src, size));
    94  }
    95  int
    96  refmpn_overlap_fullonly_two_p (mp_srcptr dst, mp_srcptr src1, mp_srcptr src2,
    97  			       mp_size_t size)
    98  {
    99    return (refmpn_overlap_fullonly_p (dst, src1, size)
   100  	  && refmpn_overlap_fullonly_p (dst, src2, size));
   101  }
   102  
   103  
   104  mp_ptr
   105  refmpn_malloc_limbs (mp_size_t size)
   106  {
   107    mp_ptr  p;
   108    ASSERT (size >= 0);
   109    if (size == 0)
   110      size = 1;
   111    p = (mp_ptr) malloc ((size_t) (size * GMP_LIMB_BYTES));
   112    ASSERT (p != NULL);
   113    return p;
   114  }
   115  
   116  /* Free limbs allocated by refmpn_malloc_limbs. NOTE: Can't free
   117   * memory allocated by refmpn_malloc_limbs_aligned. */
   118  void
   119  refmpn_free_limbs (mp_ptr p)
   120  {
   121    free (p);
   122  }
   123  
   124  mp_ptr
   125  refmpn_memdup_limbs (mp_srcptr ptr, mp_size_t size)
   126  {
   127    mp_ptr  p;
   128    p = refmpn_malloc_limbs (size);
   129    refmpn_copyi (p, ptr, size);
   130    return p;
   131  }
   132  
   133  /* malloc n limbs on a multiple of m bytes boundary */
   134  mp_ptr
   135  refmpn_malloc_limbs_aligned (mp_size_t n, size_t m)
   136  {
   137    return (mp_ptr) align_pointer (refmpn_malloc_limbs (n + m-1), m);
   138  }
   139  
   140  
   141  void
   142  refmpn_fill (mp_ptr ptr, mp_size_t size, mp_limb_t value)
   143  {
   144    mp_size_t  i;
   145    ASSERT (size >= 0);
   146    for (i = 0; i < size; i++)
   147      ptr[i] = value;
   148  }
   149  
   150  void
   151  refmpn_zero (mp_ptr ptr, mp_size_t size)
   152  {
   153    refmpn_fill (ptr, size, CNST_LIMB(0));
   154  }
   155  
   156  void
   157  refmpn_zero_extend (mp_ptr ptr, mp_size_t oldsize, mp_size_t newsize)
   158  {
   159    ASSERT (newsize >= oldsize);
   160    refmpn_zero (ptr+oldsize, newsize-oldsize);
   161  }
   162  
   163  int
   164  refmpn_zero_p (mp_srcptr ptr, mp_size_t size)
   165  {
   166    mp_size_t  i;
   167    for (i = 0; i < size; i++)
   168      if (ptr[i] != 0)
   169        return 0;
   170    return 1;
   171  }
   172  
   173  mp_size_t
   174  refmpn_normalize (mp_srcptr ptr, mp_size_t size)
   175  {
   176    ASSERT (size >= 0);
   177    while (size > 0 && ptr[size-1] == 0)
   178      size--;
   179    return size;
   180  }
   181  
   182  /* the highest one bit in x */
   183  mp_limb_t
   184  refmpn_msbone (mp_limb_t x)
   185  {
   186    mp_limb_t  n = (mp_limb_t) 1 << (GMP_LIMB_BITS-1);
   187  
   188    while (n != 0)
   189      {
   190        if (x & n)
   191  	break;
   192        n >>= 1;
   193      }
   194    return n;
   195  }
   196  
   197  /* a mask of the highest one bit plus and all bits below */
   198  mp_limb_t
   199  refmpn_msbone_mask (mp_limb_t x)
   200  {
   201    if (x == 0)
   202      return 0;
   203  
   204    return (refmpn_msbone (x) << 1) - 1;
   205  }
   206  
   207  /* How many digits in the given base will fit in a limb.
   208     Notice that the product b is allowed to be equal to the limit
   209     2^GMP_NUMB_BITS, this ensures the result for base==2 will be
   210     GMP_NUMB_BITS (and similarly other powers of 2).  */
   211  int
   212  refmpn_chars_per_limb (int base)
   213  {
   214    mp_limb_t  limit[2], b[2];
   215    int        chars_per_limb;
   216  
   217    ASSERT (base >= 2);
   218  
   219    limit[0] = 0;  /* limit = 2^GMP_NUMB_BITS */
   220    limit[1] = 1;
   221    b[0] = 1;      /* b = 1 */
   222    b[1] = 0;
   223  
   224    chars_per_limb = 0;
   225    for (;;)
   226      {
   227        if (refmpn_mul_1 (b, b, (mp_size_t) 2, (mp_limb_t) base))
   228  	break;
   229        if (refmpn_cmp (b, limit, (mp_size_t) 2) > 0)
   230  	break;
   231        chars_per_limb++;
   232      }
   233    return chars_per_limb;
   234  }
   235  
   236  /* The biggest value base**n which fits in GMP_NUMB_BITS. */
   237  mp_limb_t
   238  refmpn_big_base (int base)
   239  {
   240    int        chars_per_limb = refmpn_chars_per_limb (base);
   241    int        i;
   242    mp_limb_t  bb;
   243  
   244    ASSERT (base >= 2);
   245    bb = 1;
   246    for (i = 0; i < chars_per_limb; i++)
   247      bb *= base;
   248    return bb;
   249  }
   250  
   251  
   252  void
   253  refmpn_setbit (mp_ptr ptr, unsigned long bit)
   254  {
   255    ptr[bit/GMP_NUMB_BITS] |= CNST_LIMB(1) << (bit%GMP_NUMB_BITS);
   256  }
   257  
   258  void
   259  refmpn_clrbit (mp_ptr ptr, unsigned long bit)
   260  {
   261    ptr[bit/GMP_NUMB_BITS] &= ~ (CNST_LIMB(1) << (bit%GMP_NUMB_BITS));
   262  }
   263  
   264  #define REFMPN_TSTBIT(ptr,bit) \
   265    (((ptr)[(bit)/GMP_NUMB_BITS] & (CNST_LIMB(1) << ((bit)%GMP_NUMB_BITS))) != 0)
   266  
   267  int
   268  refmpn_tstbit (mp_srcptr ptr, unsigned long bit)
   269  {
   270    return REFMPN_TSTBIT (ptr, bit);
   271  }
   272  
   273  unsigned long
   274  refmpn_scan0 (mp_srcptr ptr, unsigned long bit)
   275  {
   276    while (REFMPN_TSTBIT (ptr, bit) != 0)
   277      bit++;
   278    return bit;
   279  }
   280  
   281  unsigned long
   282  refmpn_scan1 (mp_srcptr ptr, unsigned long bit)
   283  {
   284    while (REFMPN_TSTBIT (ptr, bit) == 0)
   285      bit++;
   286    return bit;
   287  }
   288  
   289  void
   290  refmpn_copy (mp_ptr rp, mp_srcptr sp, mp_size_t size)
   291  {
   292    ASSERT (refmpn_overlap_fullonly_p (rp, sp, size));
   293    refmpn_copyi (rp, sp, size);
   294  }
   295  
   296  void
   297  refmpn_copyi (mp_ptr rp, mp_srcptr sp, mp_size_t size)
   298  {
   299    mp_size_t i;
   300  
   301    ASSERT (refmpn_overlap_low_to_high_p (rp, sp, size));
   302    ASSERT (size >= 0);
   303  
   304    for (i = 0; i < size; i++)
   305      rp[i] = sp[i];
   306  }
   307  
   308  void
   309  refmpn_copyd (mp_ptr rp, mp_srcptr sp, mp_size_t size)
   310  {
   311    mp_size_t i;
   312  
   313    ASSERT (refmpn_overlap_high_to_low_p (rp, sp, size));
   314    ASSERT (size >= 0);
   315  
   316    for (i = size-1; i >= 0; i--)
   317      rp[i] = sp[i];
   318  }
   319  
   320  /* Copy {xp,xsize} to {wp,wsize}.  If x is shorter, then pad w with low
   321     zeros to wsize.  If x is longer, then copy just the high wsize limbs.  */
   322  void
   323  refmpn_copy_extend (mp_ptr wp, mp_size_t wsize, mp_srcptr xp, mp_size_t xsize)
   324  {
   325    ASSERT (wsize >= 0);
   326    ASSERT (xsize >= 0);
   327  
   328    /* high part of x if x bigger than w */
   329    if (xsize > wsize)
   330      {
   331        xp += xsize - wsize;
   332        xsize = wsize;
   333      }
   334  
   335    refmpn_copy (wp + wsize-xsize, xp, xsize);
   336    refmpn_zero (wp, wsize-xsize);
   337  }
   338  
   339  int
   340  refmpn_cmp (mp_srcptr xp, mp_srcptr yp, mp_size_t size)
   341  {
   342    mp_size_t  i;
   343  
   344    ASSERT (size >= 1);
   345    ASSERT_MPN (xp, size);
   346    ASSERT_MPN (yp, size);
   347  
   348    for (i = size-1; i >= 0; i--)
   349      {
   350        if (xp[i] > yp[i])  return 1;
   351        if (xp[i] < yp[i])  return -1;
   352      }
   353    return 0;
   354  }
   355  
   356  int
   357  refmpn_cmp_allowzero (mp_srcptr xp, mp_srcptr yp, mp_size_t size)
   358  {
   359    if (size == 0)
   360      return 0;
   361    else
   362      return refmpn_cmp (xp, yp, size);
   363  }
   364  
   365  int
   366  refmpn_cmp_twosizes (mp_srcptr xp, mp_size_t xsize,
   367  		     mp_srcptr yp, mp_size_t ysize)
   368  {
   369    int  opp, cmp;
   370  
   371    ASSERT_MPN (xp, xsize);
   372    ASSERT_MPN (yp, ysize);
   373  
   374    opp = (xsize < ysize);
   375    if (opp)
   376      MPN_SRCPTR_SWAP (xp,xsize, yp,ysize);
   377  
   378    if (! refmpn_zero_p (xp+ysize, xsize-ysize))
   379      cmp = 1;
   380    else
   381      cmp = refmpn_cmp (xp, yp, ysize);
   382  
   383    return (opp ? -cmp : cmp);
   384  }
   385  
   386  int
   387  refmpn_equal_anynail (mp_srcptr xp, mp_srcptr yp, mp_size_t size)
   388  {
   389    mp_size_t  i;
   390    ASSERT (size >= 0);
   391  
   392    for (i = 0; i < size; i++)
   393        if (xp[i] != yp[i])
   394  	return 0;
   395    return 1;
   396  }
   397  
   398  
   399  #define LOGOPS(operation)                                               \
   400    {                                                                     \
   401      mp_size_t  i;                                                       \
   402  									\
   403      ASSERT (refmpn_overlap_fullonly_two_p (rp, s1p, s2p, size));        \
   404      ASSERT (size >= 1);                                                 \
   405      ASSERT_MPN (s1p, size);                                             \
   406      ASSERT_MPN (s2p, size);                                             \
   407  									\
   408      for (i = 0; i < size; i++)                                          \
   409        rp[i] = operation;                                                \
   410    }
   411  
   412  void
   413  refmpn_and_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
   414  {
   415    LOGOPS (s1p[i] & s2p[i]);
   416  }
   417  void
   418  refmpn_andn_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
   419  {
   420    LOGOPS (s1p[i] & ~s2p[i]);
   421  }
   422  void
   423  refmpn_nand_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
   424  {
   425    LOGOPS ((s1p[i] & s2p[i]) ^ GMP_NUMB_MASK);
   426  }
   427  void
   428  refmpn_ior_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
   429  {
   430    LOGOPS (s1p[i] | s2p[i]);
   431  }
   432  void
   433  refmpn_iorn_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
   434  {
   435    LOGOPS (s1p[i] | (s2p[i] ^ GMP_NUMB_MASK));
   436  }
   437  void
   438  refmpn_nior_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
   439  {
   440    LOGOPS ((s1p[i] | s2p[i]) ^ GMP_NUMB_MASK);
   441  }
   442  void
   443  refmpn_xor_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
   444  {
   445    LOGOPS (s1p[i] ^ s2p[i]);
   446  }
   447  void
   448  refmpn_xnor_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
   449  {
   450    LOGOPS ((s1p[i] ^ s2p[i]) ^ GMP_NUMB_MASK);
   451  }
   452  
   453  
   454  /* set *dh,*dl to mh:ml - sh:sl, in full limbs */
   455  void
   456  refmpn_sub_ddmmss (mp_limb_t *dh, mp_limb_t *dl,
   457  		   mp_limb_t mh, mp_limb_t ml, mp_limb_t sh, mp_limb_t sl)
   458  {
   459    *dl = ml - sl;
   460    *dh = mh - sh - (ml < sl);
   461  }
   462  
   463  
   464  /* set *w to x+y, return 0 or 1 carry */
   465  mp_limb_t
   466  ref_addc_limb (mp_limb_t *w, mp_limb_t x, mp_limb_t y)
   467  {
   468    mp_limb_t  sum, cy;
   469  
   470    ASSERT_LIMB (x);
   471    ASSERT_LIMB (y);
   472  
   473    sum = x + y;
   474  #if GMP_NAIL_BITS == 0
   475    *w = sum;
   476    cy = (sum < x);
   477  #else
   478    *w = sum & GMP_NUMB_MASK;
   479    cy = (sum >> GMP_NUMB_BITS);
   480  #endif
   481    return cy;
   482  }
   483  
   484  /* set *w to x-y, return 0 or 1 borrow */
   485  mp_limb_t
   486  ref_subc_limb (mp_limb_t *w, mp_limb_t x, mp_limb_t y)
   487  {
   488    mp_limb_t  diff, cy;
   489  
   490    ASSERT_LIMB (x);
   491    ASSERT_LIMB (y);
   492  
   493    diff = x - y;
   494  #if GMP_NAIL_BITS == 0
   495    *w = diff;
   496    cy = (diff > x);
   497  #else
   498    *w = diff & GMP_NUMB_MASK;
   499    cy = (diff >> GMP_NUMB_BITS) & 1;
   500  #endif
   501    return cy;
   502  }
   503  
   504  /* set *w to x+y+c (where c is 0 or 1), return 0 or 1 carry */
   505  mp_limb_t
   506  adc (mp_limb_t *w, mp_limb_t x, mp_limb_t y, mp_limb_t c)
   507  {
   508    mp_limb_t  r;
   509  
   510    ASSERT_LIMB (x);
   511    ASSERT_LIMB (y);
   512    ASSERT (c == 0 || c == 1);
   513  
   514    r = ref_addc_limb (w, x, y);
   515    return r + ref_addc_limb (w, *w, c);
   516  }
   517  
   518  /* set *w to x-y-c (where c is 0 or 1), return 0 or 1 borrow */
   519  mp_limb_t
   520  sbb (mp_limb_t *w, mp_limb_t x, mp_limb_t y, mp_limb_t c)
   521  {
   522    mp_limb_t  r;
   523  
   524    ASSERT_LIMB (x);
   525    ASSERT_LIMB (y);
   526    ASSERT (c == 0 || c == 1);
   527  
   528    r = ref_subc_limb (w, x, y);
   529    return r + ref_subc_limb (w, *w, c);
   530  }
   531  
   532  
   533  #define AORS_1(operation)                               \
   534    {                                                     \
   535      mp_size_t  i;                                       \
   536  							\
   537      ASSERT (refmpn_overlap_fullonly_p (rp, sp, size));  \
   538      ASSERT (size >= 1);                                 \
   539      ASSERT_MPN (sp, size);                              \
   540      ASSERT_LIMB (n);                                    \
   541  							\
   542      for (i = 0; i < size; i++)                          \
   543        n = operation (&rp[i], sp[i], n);                 \
   544      return n;                                           \
   545    }
   546  
   547  mp_limb_t
   548  refmpn_add_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t n)
   549  {
   550    AORS_1 (ref_addc_limb);
   551  }
   552  mp_limb_t
   553  refmpn_sub_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t n)
   554  {
   555    AORS_1 (ref_subc_limb);
   556  }
   557  
   558  #define AORS_NC(operation)                                              \
   559    {                                                                     \
   560      mp_size_t  i;                                                       \
   561  									\
   562      ASSERT (refmpn_overlap_fullonly_two_p (rp, s1p, s2p, size));        \
   563      ASSERT (carry == 0 || carry == 1);                                  \
   564      ASSERT (size >= 1);                                                 \
   565      ASSERT_MPN (s1p, size);                                             \
   566      ASSERT_MPN (s2p, size);                                             \
   567  									\
   568      for (i = 0; i < size; i++)                                          \
   569        carry = operation (&rp[i], s1p[i], s2p[i], carry);                \
   570      return carry;                                                       \
   571    }
   572  
   573  mp_limb_t
   574  refmpn_add_nc (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size,
   575  	       mp_limb_t carry)
   576  {
   577    AORS_NC (adc);
   578  }
   579  mp_limb_t
   580  refmpn_sub_nc (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size,
   581  	       mp_limb_t carry)
   582  {
   583    AORS_NC (sbb);
   584  }
   585  
   586  
   587  mp_limb_t
   588  refmpn_add_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
   589  {
   590    return refmpn_add_nc (rp, s1p, s2p, size, CNST_LIMB(0));
   591  }
   592  mp_limb_t
   593  refmpn_sub_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
   594  {
   595    return refmpn_sub_nc (rp, s1p, s2p, size, CNST_LIMB(0));
   596  }
   597  
   598  mp_limb_t
   599  refmpn_cnd_add_n (mp_limb_t cnd, mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
   600  {
   601    if (cnd != 0)
   602      return refmpn_add_n (rp, s1p, s2p, size);
   603    else
   604      {
   605        refmpn_copyi (rp, s1p, size);
   606        return 0;
   607      }
   608  }
   609  mp_limb_t
   610  refmpn_cnd_sub_n (mp_limb_t cnd, mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
   611  {
   612    if (cnd != 0)
   613      return refmpn_sub_n (rp, s1p, s2p, size);
   614    else
   615      {
   616        refmpn_copyi (rp, s1p, size);
   617        return 0;
   618      }
   619  }
   620  
   621  
   622  #define AORS_ERR1_N(operation)						\
   623    {                                                                     \
   624      mp_size_t  i;                                                       \
   625      mp_limb_t carry2;							\
   626  									\
   627      ASSERT (refmpn_overlap_fullonly_p (rp, s1p, size));			\
   628      ASSERT (refmpn_overlap_fullonly_p (rp, s2p, size));			\
   629      ASSERT (! refmpn_overlap_p (rp, size, yp, size));			\
   630      ASSERT (! refmpn_overlap_p (ep, 2, s1p, size));			\
   631      ASSERT (! refmpn_overlap_p (ep, 2, s2p, size));			\
   632      ASSERT (! refmpn_overlap_p (ep, 2, yp, size));			\
   633      ASSERT (! refmpn_overlap_p (ep, 2, rp, size));			\
   634  									\
   635      ASSERT (carry == 0 || carry == 1);					\
   636      ASSERT (size >= 1);							\
   637      ASSERT_MPN (s1p, size);						\
   638      ASSERT_MPN (s2p, size);						\
   639      ASSERT_MPN (yp, size);						\
   640  									\
   641      ep[0] = ep[1] = CNST_LIMB(0);					\
   642  									\
   643      for (i = 0; i < size; i++)                                          \
   644        {									\
   645  	carry = operation (&rp[i], s1p[i], s2p[i], carry);		\
   646  	if (carry == 1)							\
   647  	  {								\
   648  	    carry2 = ref_addc_limb (&ep[0], ep[0], yp[size - 1 - i]);	\
   649  	    carry2 = ref_addc_limb (&ep[1], ep[1], carry2);		\
   650  	    ASSERT (carry2 == 0);					\
   651  	  }								\
   652        }									\
   653      return carry;                                                       \
   654    }
   655  
   656  mp_limb_t
   657  refmpn_add_err1_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p,
   658  		   mp_ptr ep, mp_srcptr yp,
   659  		   mp_size_t size, mp_limb_t carry)
   660  {
   661    AORS_ERR1_N (adc);
   662  }
   663  mp_limb_t
   664  refmpn_sub_err1_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p,
   665  		   mp_ptr ep, mp_srcptr yp,
   666  		   mp_size_t size, mp_limb_t carry)
   667  {
   668    AORS_ERR1_N (sbb);
   669  }
   670  
   671  
   672  #define AORS_ERR2_N(operation)						\
   673    {                                                                     \
   674      mp_size_t  i;                                                       \
   675      mp_limb_t carry2;							\
   676  									\
   677      ASSERT (refmpn_overlap_fullonly_p (rp, s1p, size));			\
   678      ASSERT (refmpn_overlap_fullonly_p (rp, s2p, size));			\
   679      ASSERT (! refmpn_overlap_p (rp, size, y1p, size));			\
   680      ASSERT (! refmpn_overlap_p (rp, size, y2p, size));			\
   681      ASSERT (! refmpn_overlap_p (ep, 4, s1p, size));			\
   682      ASSERT (! refmpn_overlap_p (ep, 4, s2p, size));			\
   683      ASSERT (! refmpn_overlap_p (ep, 4, y1p, size));			\
   684      ASSERT (! refmpn_overlap_p (ep, 4, y2p, size));			\
   685      ASSERT (! refmpn_overlap_p (ep, 4, rp, size));			\
   686  									\
   687      ASSERT (carry == 0 || carry == 1);					\
   688      ASSERT (size >= 1);							\
   689      ASSERT_MPN (s1p, size);						\
   690      ASSERT_MPN (s2p, size);						\
   691      ASSERT_MPN (y1p, size);						\
   692      ASSERT_MPN (y2p, size);						\
   693  									\
   694      ep[0] = ep[1] = CNST_LIMB(0);					\
   695      ep[2] = ep[3] = CNST_LIMB(0);					\
   696  									\
   697      for (i = 0; i < size; i++)                                          \
   698        {									\
   699  	carry = operation (&rp[i], s1p[i], s2p[i], carry);		\
   700  	if (carry == 1)							\
   701  	  {								\
   702  	    carry2 = ref_addc_limb (&ep[0], ep[0], y1p[size - 1 - i]);	\
   703  	    carry2 = ref_addc_limb (&ep[1], ep[1], carry2);		\
   704  	    ASSERT (carry2 == 0);					\
   705  	    carry2 = ref_addc_limb (&ep[2], ep[2], y2p[size - 1 - i]);	\
   706  	    carry2 = ref_addc_limb (&ep[3], ep[3], carry2);		\
   707  	    ASSERT (carry2 == 0);					\
   708  	  }								\
   709        }									\
   710      return carry;                                                       \
   711    }
   712  
   713  mp_limb_t
   714  refmpn_add_err2_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p,
   715  		   mp_ptr ep, mp_srcptr y1p, mp_srcptr y2p,
   716  		   mp_size_t size, mp_limb_t carry)
   717  {
   718    AORS_ERR2_N (adc);
   719  }
   720  mp_limb_t
   721  refmpn_sub_err2_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p,
   722  		   mp_ptr ep, mp_srcptr y1p, mp_srcptr y2p,
   723  		   mp_size_t size, mp_limb_t carry)
   724  {
   725    AORS_ERR2_N (sbb);
   726  }
   727  
   728  
   729  #define AORS_ERR3_N(operation)						\
   730    {                                                                     \
   731      mp_size_t  i;                                                       \
   732      mp_limb_t carry2;							\
   733  									\
   734      ASSERT (refmpn_overlap_fullonly_p (rp, s1p, size));			\
   735      ASSERT (refmpn_overlap_fullonly_p (rp, s2p, size));			\
   736      ASSERT (! refmpn_overlap_p (rp, size, y1p, size));			\
   737      ASSERT (! refmpn_overlap_p (rp, size, y2p, size));			\
   738      ASSERT (! refmpn_overlap_p (rp, size, y3p, size));			\
   739      ASSERT (! refmpn_overlap_p (ep, 6, s1p, size));			\
   740      ASSERT (! refmpn_overlap_p (ep, 6, s2p, size));			\
   741      ASSERT (! refmpn_overlap_p (ep, 6, y1p, size));			\
   742      ASSERT (! refmpn_overlap_p (ep, 6, y2p, size));			\
   743      ASSERT (! refmpn_overlap_p (ep, 6, y3p, size));			\
   744      ASSERT (! refmpn_overlap_p (ep, 6, rp, size));			\
   745  									\
   746      ASSERT (carry == 0 || carry == 1);					\
   747      ASSERT (size >= 1);							\
   748      ASSERT_MPN (s1p, size);						\
   749      ASSERT_MPN (s2p, size);						\
   750      ASSERT_MPN (y1p, size);						\
   751      ASSERT_MPN (y2p, size);						\
   752      ASSERT_MPN (y3p, size);						\
   753  									\
   754      ep[0] = ep[1] = CNST_LIMB(0);					\
   755      ep[2] = ep[3] = CNST_LIMB(0);					\
   756      ep[4] = ep[5] = CNST_LIMB(0);					\
   757  									\
   758      for (i = 0; i < size; i++)                                          \
   759        {									\
   760  	carry = operation (&rp[i], s1p[i], s2p[i], carry);		\
   761  	if (carry == 1)							\
   762  	  {								\
   763  	    carry2 = ref_addc_limb (&ep[0], ep[0], y1p[size - 1 - i]);	\
   764  	    carry2 = ref_addc_limb (&ep[1], ep[1], carry2);		\
   765  	    ASSERT (carry2 == 0);					\
   766  	    carry2 = ref_addc_limb (&ep[2], ep[2], y2p[size - 1 - i]);	\
   767  	    carry2 = ref_addc_limb (&ep[3], ep[3], carry2);		\
   768  	    ASSERT (carry2 == 0);					\
   769  	    carry2 = ref_addc_limb (&ep[4], ep[4], y3p[size - 1 - i]);	\
   770  	    carry2 = ref_addc_limb (&ep[5], ep[5], carry2);		\
   771  	    ASSERT (carry2 == 0);					\
   772  	  }								\
   773        }									\
   774      return carry;                                                       \
   775    }
   776  
   777  mp_limb_t
   778  refmpn_add_err3_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p,
   779  		   mp_ptr ep, mp_srcptr y1p, mp_srcptr y2p, mp_srcptr y3p,
   780  		   mp_size_t size, mp_limb_t carry)
   781  {
   782    AORS_ERR3_N (adc);
   783  }
   784  mp_limb_t
   785  refmpn_sub_err3_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p,
   786  		   mp_ptr ep, mp_srcptr y1p, mp_srcptr y2p, mp_srcptr y3p,
   787  		   mp_size_t size, mp_limb_t carry)
   788  {
   789    AORS_ERR3_N (sbb);
   790  }
   791  
   792  
   793  mp_limb_t
   794  refmpn_addlsh_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
   795  		 mp_size_t n, unsigned int s)
   796  {
   797    mp_limb_t cy;
   798    mp_ptr tp;
   799  
   800    ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n));
   801    ASSERT (n >= 1);
   802    ASSERT (0 < s && s < GMP_NUMB_BITS);
   803    ASSERT_MPN (up, n);
   804    ASSERT_MPN (vp, n);
   805  
   806    tp = refmpn_malloc_limbs (n);
   807    cy  = refmpn_lshift (tp, vp, n, s);
   808    cy += refmpn_add_n (rp, up, tp, n);
   809    free (tp);
   810    return cy;
   811  }
   812  mp_limb_t
   813  refmpn_addlsh1_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
   814  {
   815    return refmpn_addlsh_n (rp, up, vp, n, 1);
   816  }
   817  mp_limb_t
   818  refmpn_addlsh2_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
   819  {
   820    return refmpn_addlsh_n (rp, up, vp, n, 2);
   821  }
   822  mp_limb_t
   823  refmpn_addlsh_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n, unsigned int s)
   824  {
   825    return refmpn_addlsh_n (rp, rp, vp, n, s);
   826  }
   827  mp_limb_t
   828  refmpn_addlsh1_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n)
   829  {
   830    return refmpn_addlsh_n (rp, rp, vp, n, 1);
   831  }
   832  mp_limb_t
   833  refmpn_addlsh2_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n)
   834  {
   835    return refmpn_addlsh_n (rp, rp, vp, n, 2);
   836  }
   837  mp_limb_t
   838  refmpn_addlsh_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n, unsigned int s)
   839  {
   840    return refmpn_addlsh_n (rp, vp, rp, n, s);
   841  }
   842  mp_limb_t
   843  refmpn_addlsh1_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n)
   844  {
   845    return refmpn_addlsh_n (rp, vp, rp, n, 1);
   846  }
   847  mp_limb_t
   848  refmpn_addlsh2_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n)
   849  {
   850    return refmpn_addlsh_n (rp, vp, rp, n, 2);
   851  }
   852  mp_limb_t
   853  refmpn_addlsh_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
   854  		  mp_size_t n, unsigned int s, mp_limb_t carry)
   855  {
   856    mp_limb_t cy;
   857  
   858    ASSERT (carry <= (CNST_LIMB(1) << s));
   859  
   860    cy = refmpn_addlsh_n (rp, up, vp, n, s);
   861    cy += refmpn_add_1 (rp, rp, n, carry);
   862    return cy;
   863  }
   864  mp_limb_t
   865  refmpn_addlsh1_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t carry)
   866  {
   867    return refmpn_addlsh_nc (rp, up, vp, n, 1, carry);
   868  }
   869  mp_limb_t
   870  refmpn_addlsh2_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t carry)
   871  {
   872    return refmpn_addlsh_nc (rp, up, vp, n, 2, carry);
   873  }
   874  
   875  mp_limb_t
   876  refmpn_sublsh_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
   877  		 mp_size_t n, unsigned int s)
   878  {
   879    mp_limb_t cy;
   880    mp_ptr tp;
   881  
   882    ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n));
   883    ASSERT (n >= 1);
   884    ASSERT (0 < s && s < GMP_NUMB_BITS);
   885    ASSERT_MPN (up, n);
   886    ASSERT_MPN (vp, n);
   887  
   888    tp = refmpn_malloc_limbs (n);
   889    cy  = mpn_lshift (tp, vp, n, s);
   890    cy += mpn_sub_n (rp, up, tp, n);
   891    free (tp);
   892    return cy;
   893  }
   894  mp_limb_t
   895  refmpn_sublsh1_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
   896  {
   897    return refmpn_sublsh_n (rp, up, vp, n, 1);
   898  }
   899  mp_limb_t
   900  refmpn_sublsh2_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
   901  {
   902    return refmpn_sublsh_n (rp, up, vp, n, 2);
   903  }
   904  mp_limb_t
   905  refmpn_sublsh_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n, unsigned int s)
   906  {
   907    return refmpn_sublsh_n (rp, rp, vp, n, s);
   908  }
   909  mp_limb_t
   910  refmpn_sublsh1_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n)
   911  {
   912    return refmpn_sublsh_n (rp, rp, vp, n, 1);
   913  }
   914  mp_limb_t
   915  refmpn_sublsh2_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n)
   916  {
   917    return refmpn_sublsh_n (rp, rp, vp, n, 2);
   918  }
   919  mp_limb_t
   920  refmpn_sublsh_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n, unsigned int s)
   921  {
   922    return refmpn_sublsh_n (rp, vp, rp, n, s);
   923  }
   924  mp_limb_t
   925  refmpn_sublsh1_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n)
   926  {
   927    return refmpn_sublsh_n (rp, vp, rp, n, 1);
   928  }
   929  mp_limb_t
   930  refmpn_sublsh2_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n)
   931  {
   932    return refmpn_sublsh_n (rp, vp, rp, n, 2);
   933  }
   934  mp_limb_t
   935  refmpn_sublsh_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
   936  		  mp_size_t n, unsigned int s, mp_limb_t carry)
   937  {
   938    mp_limb_t cy;
   939  
   940    ASSERT (carry <= (CNST_LIMB(1) << s));
   941  
   942    cy = refmpn_sublsh_n (rp, up, vp, n, s);
   943    cy += refmpn_sub_1 (rp, rp, n, carry);
   944    return cy;
   945  }
   946  mp_limb_t
   947  refmpn_sublsh1_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t carry)
   948  {
   949    return refmpn_sublsh_nc (rp, up, vp, n, 1, carry);
   950  }
   951  mp_limb_t
   952  refmpn_sublsh2_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t carry)
   953  {
   954    return refmpn_sublsh_nc (rp, up, vp, n, 2, carry);
   955  }
   956  
   957  mp_limb_signed_t
   958  refmpn_rsblsh_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
   959  		 mp_size_t n, unsigned int s)
   960  {
   961    mp_limb_signed_t cy;
   962    mp_ptr tp;
   963  
   964    ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n));
   965    ASSERT (n >= 1);
   966    ASSERT (0 < s && s < GMP_NUMB_BITS);
   967    ASSERT_MPN (up, n);
   968    ASSERT_MPN (vp, n);
   969  
   970    tp = refmpn_malloc_limbs (n);
   971    cy  = mpn_lshift (tp, vp, n, s);
   972    cy -= mpn_sub_n (rp, tp, up, n);
   973    free (tp);
   974    return cy;
   975  }
   976  mp_limb_signed_t
   977  refmpn_rsblsh1_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
   978  {
   979    return refmpn_rsblsh_n (rp, up, vp, n, 1);
   980  }
   981  mp_limb_signed_t
   982  refmpn_rsblsh2_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
   983  {
   984    return refmpn_rsblsh_n (rp, up, vp, n, 2);
   985  }
   986  mp_limb_signed_t
   987  refmpn_rsblsh_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
   988  		  mp_size_t n, unsigned int s, mp_limb_signed_t carry)
   989  {
   990    mp_limb_signed_t cy;
   991  
   992    ASSERT (carry == -1 || (carry >> s) == 0);
   993  
   994    cy = refmpn_rsblsh_n (rp, up, vp, n, s);
   995    if (carry > 0)
   996      cy += refmpn_add_1 (rp, rp, n, carry);
   997    else
   998      cy -= refmpn_sub_1 (rp, rp, n, -carry);
   999    return cy;
  1000  }
  1001  mp_limb_signed_t
  1002  refmpn_rsblsh1_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_signed_t carry)
  1003  {
  1004    return refmpn_rsblsh_nc (rp, up, vp, n, 1, carry);
  1005  }
  1006  mp_limb_signed_t
  1007  refmpn_rsblsh2_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_signed_t carry)
  1008  {
  1009    return refmpn_rsblsh_nc (rp, up, vp, n, 2, carry);
  1010  }
  1011  
  1012  mp_limb_t
  1013  refmpn_rsh1add_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
  1014  {
  1015    mp_limb_t cya, cys;
  1016  
  1017    ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n));
  1018    ASSERT (n >= 1);
  1019    ASSERT_MPN (up, n);
  1020    ASSERT_MPN (vp, n);
  1021  
  1022    cya = mpn_add_n (rp, up, vp, n);
  1023    cys = mpn_rshift (rp, rp, n, 1) >> (GMP_NUMB_BITS - 1);
  1024    rp[n - 1] |= cya << (GMP_NUMB_BITS - 1);
  1025    return cys;
  1026  }
  1027  mp_limb_t
  1028  refmpn_rsh1sub_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
  1029  {
  1030    mp_limb_t cya, cys;
  1031  
  1032    ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n));
  1033    ASSERT (n >= 1);
  1034    ASSERT_MPN (up, n);
  1035    ASSERT_MPN (vp, n);
  1036  
  1037    cya = mpn_sub_n (rp, up, vp, n);
  1038    cys = mpn_rshift (rp, rp, n, 1) >> (GMP_NUMB_BITS - 1);
  1039    rp[n - 1] |= cya << (GMP_NUMB_BITS - 1);
  1040    return cys;
  1041  }
  1042  
  1043  /* Twos complement, return borrow. */
  1044  mp_limb_t
  1045  refmpn_neg (mp_ptr dst, mp_srcptr src, mp_size_t size)
  1046  {
  1047    mp_ptr     zeros;
  1048    mp_limb_t  ret;
  1049  
  1050    ASSERT (size >= 1);
  1051  
  1052    zeros = refmpn_malloc_limbs (size);
  1053    refmpn_fill (zeros, size, CNST_LIMB(0));
  1054    ret = refmpn_sub_n (dst, zeros, src, size);
  1055    free (zeros);
  1056    return ret;
  1057  }
  1058  
  1059  
  1060  #define AORS(aors_n, aors_1)                                    \
  1061    {                                                             \
  1062      mp_limb_t  c;                                               \
  1063      ASSERT (s1size >= s2size);                                  \
  1064      ASSERT (s2size >= 1);                                       \
  1065      c = aors_n (rp, s1p, s2p, s2size);                          \
  1066      if (s1size-s2size != 0)                                     \
  1067        c = aors_1 (rp+s2size, s1p+s2size, s1size-s2size, c);     \
  1068      return c;                                                   \
  1069    }
  1070  mp_limb_t
  1071  refmpn_add (mp_ptr rp,
  1072  	    mp_srcptr s1p, mp_size_t s1size,
  1073  	    mp_srcptr s2p, mp_size_t s2size)
  1074  {
  1075    AORS (refmpn_add_n, refmpn_add_1);
  1076  }
  1077  mp_limb_t
  1078  refmpn_sub (mp_ptr rp,
  1079  	    mp_srcptr s1p, mp_size_t s1size,
  1080  	    mp_srcptr s2p, mp_size_t s2size)
  1081  {
  1082    AORS (refmpn_sub_n, refmpn_sub_1);
  1083  }
  1084  
  1085  
  1086  #define SHIFTHIGH(x) ((x) << GMP_LIMB_BITS/2)
  1087  #define SHIFTLOW(x)  ((x) >> GMP_LIMB_BITS/2)
  1088  
  1089  #define LOWMASK   (((mp_limb_t) 1 << GMP_LIMB_BITS/2)-1)
  1090  #define HIGHMASK  SHIFTHIGH(LOWMASK)
  1091  
  1092  #define LOWPART(x)   ((x) & LOWMASK)
  1093  #define HIGHPART(x)  SHIFTLOW((x) & HIGHMASK)
  1094  
  1095  /* Set return:*lo to x*y, using full limbs not nails. */
  1096  mp_limb_t
  1097  refmpn_umul_ppmm (mp_limb_t *lo, mp_limb_t x, mp_limb_t y)
  1098  {
  1099    mp_limb_t  hi, s;
  1100  
  1101    *lo = LOWPART(x) * LOWPART(y);
  1102    hi = HIGHPART(x) * HIGHPART(y);
  1103  
  1104    s = LOWPART(x) * HIGHPART(y);
  1105    hi += HIGHPART(s);
  1106    s = SHIFTHIGH(LOWPART(s));
  1107    *lo += s;
  1108    hi += (*lo < s);
  1109  
  1110    s = HIGHPART(x) * LOWPART(y);
  1111    hi += HIGHPART(s);
  1112    s = SHIFTHIGH(LOWPART(s));
  1113    *lo += s;
  1114    hi += (*lo < s);
  1115  
  1116    return hi;
  1117  }
  1118  
  1119  mp_limb_t
  1120  refmpn_umul_ppmm_r (mp_limb_t x, mp_limb_t y, mp_limb_t *lo)
  1121  {
  1122    return refmpn_umul_ppmm (lo, x, y);
  1123  }
  1124  
  1125  mp_limb_t
  1126  refmpn_mul_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier,
  1127  	       mp_limb_t carry)
  1128  {
  1129    mp_size_t  i;
  1130    mp_limb_t  hi, lo;
  1131  
  1132    ASSERT (refmpn_overlap_low_to_high_p (rp, sp, size));
  1133    ASSERT (size >= 1);
  1134    ASSERT_MPN (sp, size);
  1135    ASSERT_LIMB (multiplier);
  1136    ASSERT_LIMB (carry);
  1137  
  1138    multiplier <<= GMP_NAIL_BITS;
  1139    for (i = 0; i < size; i++)
  1140      {
  1141        hi = refmpn_umul_ppmm (&lo, sp[i], multiplier);
  1142        lo >>= GMP_NAIL_BITS;
  1143        ASSERT_NOCARRY (ref_addc_limb (&hi, hi, ref_addc_limb (&lo, lo, carry)));
  1144        rp[i] = lo;
  1145        carry = hi;
  1146      }
  1147    return carry;
  1148  }
  1149  
  1150  mp_limb_t
  1151  refmpn_mul_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier)
  1152  {
  1153    return refmpn_mul_1c (rp, sp, size, multiplier, CNST_LIMB(0));
  1154  }
  1155  
  1156  
  1157  mp_limb_t
  1158  refmpn_mul_N (mp_ptr dst, mp_srcptr src, mp_size_t size,
  1159  	      mp_srcptr mult, mp_size_t msize)
  1160  {
  1161    mp_ptr     src_copy;
  1162    mp_limb_t  ret;
  1163    mp_size_t  i;
  1164  
  1165    ASSERT (refmpn_overlap_fullonly_p (dst, src, size));
  1166    ASSERT (! refmpn_overlap_p (dst, size+msize-1, mult, msize));
  1167    ASSERT (size >= msize);
  1168    ASSERT_MPN (mult, msize);
  1169  
  1170    /* in case dst==src */
  1171    src_copy = refmpn_malloc_limbs (size);
  1172    refmpn_copyi (src_copy, src, size);
  1173    src = src_copy;
  1174  
  1175    dst[size] = refmpn_mul_1 (dst, src, size, mult[0]);
  1176    for (i = 1; i < msize-1; i++)
  1177      dst[size+i] = refmpn_addmul_1 (dst+i, src, size, mult[i]);
  1178    ret = refmpn_addmul_1 (dst+i, src, size, mult[i]);
  1179  
  1180    free (src_copy);
  1181    return ret;
  1182  }
  1183  
  1184  mp_limb_t
  1185  refmpn_mul_2 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
  1186  {
  1187    return refmpn_mul_N (rp, sp, size, mult, (mp_size_t) 2);
  1188  }
  1189  mp_limb_t
  1190  refmpn_mul_3 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
  1191  {
  1192    return refmpn_mul_N (rp, sp, size, mult, (mp_size_t) 3);
  1193  }
  1194  mp_limb_t
  1195  refmpn_mul_4 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
  1196  {
  1197    return refmpn_mul_N (rp, sp, size, mult, (mp_size_t) 4);
  1198  }
  1199  mp_limb_t
  1200  refmpn_mul_5 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
  1201  {
  1202    return refmpn_mul_N (rp, sp, size, mult, (mp_size_t) 5);
  1203  }
  1204  mp_limb_t
  1205  refmpn_mul_6 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
  1206  {
  1207    return refmpn_mul_N (rp, sp, size, mult, (mp_size_t) 6);
  1208  }
  1209  
  1210  #define AORSMUL_1C(operation_n)                                 \
  1211    {                                                             \
  1212      mp_ptr     p;                                               \
  1213      mp_limb_t  ret;                                             \
  1214  								\
  1215      ASSERT (refmpn_overlap_fullonly_p (rp, sp, size));          \
  1216  								\
  1217      p = refmpn_malloc_limbs (size);                             \
  1218      ret = refmpn_mul_1c (p, sp, size, multiplier, carry);       \
  1219      ret += operation_n (rp, rp, p, size);                       \
  1220  								\
  1221      free (p);                                                   \
  1222      return ret;                                                 \
  1223    }
  1224  
  1225  mp_limb_t
  1226  refmpn_addmul_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size,
  1227  		  mp_limb_t multiplier, mp_limb_t carry)
  1228  {
  1229    AORSMUL_1C (refmpn_add_n);
  1230  }
  1231  mp_limb_t
  1232  refmpn_submul_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size,
  1233  		  mp_limb_t multiplier, mp_limb_t carry)
  1234  {
  1235    AORSMUL_1C (refmpn_sub_n);
  1236  }
  1237  
  1238  
  1239  mp_limb_t
  1240  refmpn_addmul_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier)
  1241  {
  1242    return refmpn_addmul_1c (rp, sp, size, multiplier, CNST_LIMB(0));
  1243  }
  1244  mp_limb_t
  1245  refmpn_submul_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier)
  1246  {
  1247    return refmpn_submul_1c (rp, sp, size, multiplier, CNST_LIMB(0));
  1248  }
  1249  
  1250  
  1251  mp_limb_t
  1252  refmpn_addmul_N (mp_ptr dst, mp_srcptr src, mp_size_t size,
  1253  		 mp_srcptr mult, mp_size_t msize)
  1254  {
  1255    mp_ptr     src_copy;
  1256    mp_limb_t  ret;
  1257    mp_size_t  i;
  1258  
  1259    ASSERT (dst == src || ! refmpn_overlap_p (dst, size+msize-1, src, size));
  1260    ASSERT (! refmpn_overlap_p (dst, size+msize-1, mult, msize));
  1261    ASSERT (size >= msize);
  1262    ASSERT_MPN (mult, msize);
  1263  
  1264    /* in case dst==src */
  1265    src_copy = refmpn_malloc_limbs (size);
  1266    refmpn_copyi (src_copy, src, size);
  1267    src = src_copy;
  1268  
  1269    for (i = 0; i < msize-1; i++)
  1270      dst[size+i] = refmpn_addmul_1 (dst+i, src, size, mult[i]);
  1271    ret = refmpn_addmul_1 (dst+i, src, size, mult[i]);
  1272  
  1273    free (src_copy);
  1274    return ret;
  1275  }
  1276  
  1277  mp_limb_t
  1278  refmpn_addmul_2 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
  1279  {
  1280    return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 2);
  1281  }
  1282  mp_limb_t
  1283  refmpn_addmul_3 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
  1284  {
  1285    return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 3);
  1286  }
  1287  mp_limb_t
  1288  refmpn_addmul_4 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
  1289  {
  1290    return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 4);
  1291  }
  1292  mp_limb_t
  1293  refmpn_addmul_5 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
  1294  {
  1295    return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 5);
  1296  }
  1297  mp_limb_t
  1298  refmpn_addmul_6 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
  1299  {
  1300    return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 6);
  1301  }
  1302  mp_limb_t
  1303  refmpn_addmul_7 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
  1304  {
  1305    return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 7);
  1306  }
  1307  mp_limb_t
  1308  refmpn_addmul_8 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
  1309  {
  1310    return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 8);
  1311  }
  1312  
  1313  mp_limb_t
  1314  refmpn_add_n_sub_nc (mp_ptr r1p, mp_ptr r2p,
  1315  		  mp_srcptr s1p, mp_srcptr s2p, mp_size_t size,
  1316  		  mp_limb_t carry)
  1317  {
  1318    mp_ptr p;
  1319    mp_limb_t acy, scy;
  1320  
  1321    /* Destinations can't overlap. */
  1322    ASSERT (! refmpn_overlap_p (r1p, size, r2p, size));
  1323    ASSERT (refmpn_overlap_fullonly_two_p (r1p, s1p, s2p, size));
  1324    ASSERT (refmpn_overlap_fullonly_two_p (r2p, s1p, s2p, size));
  1325    ASSERT (size >= 1);
  1326  
  1327    /* in case r1p==s1p or r1p==s2p */
  1328    p = refmpn_malloc_limbs (size);
  1329  
  1330    acy = refmpn_add_nc (p, s1p, s2p, size, carry >> 1);
  1331    scy = refmpn_sub_nc (r2p, s1p, s2p, size, carry & 1);
  1332    refmpn_copyi (r1p, p, size);
  1333  
  1334    free (p);
  1335    return 2 * acy + scy;
  1336  }
  1337  
  1338  mp_limb_t
  1339  refmpn_add_n_sub_n (mp_ptr r1p, mp_ptr r2p,
  1340  		 mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
  1341  {
  1342    return refmpn_add_n_sub_nc (r1p, r2p, s1p, s2p, size, CNST_LIMB(0));
  1343  }
  1344  
  1345  
  1346  /* Right shift hi,lo and return the low limb of the result.
  1347     Note a shift by GMP_LIMB_BITS isn't assumed to work (doesn't on x86). */
  1348  mp_limb_t
  1349  rshift_make (mp_limb_t hi, mp_limb_t lo, unsigned shift)
  1350  {
  1351    ASSERT (shift < GMP_NUMB_BITS);
  1352    if (shift == 0)
  1353      return lo;
  1354    else
  1355      return ((hi << (GMP_NUMB_BITS-shift)) | (lo >> shift)) & GMP_NUMB_MASK;
  1356  }
  1357  
  1358  /* Left shift hi,lo and return the high limb of the result.
  1359     Note a shift by GMP_LIMB_BITS isn't assumed to work (doesn't on x86). */
  1360  mp_limb_t
  1361  lshift_make (mp_limb_t hi, mp_limb_t lo, unsigned shift)
  1362  {
  1363    ASSERT (shift < GMP_NUMB_BITS);
  1364    if (shift == 0)
  1365      return hi;
  1366    else
  1367      return ((hi << shift) | (lo >> (GMP_NUMB_BITS-shift))) & GMP_NUMB_MASK;
  1368  }
  1369  
  1370  
  1371  mp_limb_t
  1372  refmpn_rshift (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift)
  1373  {
  1374    mp_limb_t  ret;
  1375    mp_size_t  i;
  1376  
  1377    ASSERT (refmpn_overlap_low_to_high_p (rp, sp, size));
  1378    ASSERT (size >= 1);
  1379    ASSERT (shift >= 1 && shift < GMP_NUMB_BITS);
  1380    ASSERT_MPN (sp, size);
  1381  
  1382    ret = rshift_make (sp[0], CNST_LIMB(0), shift);
  1383  
  1384    for (i = 0; i < size-1; i++)
  1385      rp[i] = rshift_make (sp[i+1], sp[i], shift);
  1386  
  1387    rp[i] = rshift_make (CNST_LIMB(0), sp[i], shift);
  1388    return ret;
  1389  }
  1390  
  1391  mp_limb_t
  1392  refmpn_lshift (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift)
  1393  {
  1394    mp_limb_t  ret;
  1395    mp_size_t  i;
  1396  
  1397    ASSERT (refmpn_overlap_high_to_low_p (rp, sp, size));
  1398    ASSERT (size >= 1);
  1399    ASSERT (shift >= 1 && shift < GMP_NUMB_BITS);
  1400    ASSERT_MPN (sp, size);
  1401  
  1402    ret = lshift_make (CNST_LIMB(0), sp[size-1], shift);
  1403  
  1404    for (i = size-2; i >= 0; i--)
  1405      rp[i+1] = lshift_make (sp[i+1], sp[i], shift);
  1406  
  1407    rp[i+1] = lshift_make (sp[i+1], CNST_LIMB(0), shift);
  1408    return ret;
  1409  }
  1410  
  1411  void
  1412  refmpn_com (mp_ptr rp, mp_srcptr sp, mp_size_t size)
  1413  {
  1414    mp_size_t i;
  1415  
  1416    /* We work downwards since mpn_lshiftc needs that. */
  1417    ASSERT (refmpn_overlap_high_to_low_p (rp, sp, size));
  1418  
  1419    for (i = size - 1; i >= 0; i--)
  1420      rp[i] = (~sp[i]) & GMP_NUMB_MASK;
  1421  }
  1422  
  1423  mp_limb_t
  1424  refmpn_lshiftc (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift)
  1425  {
  1426    mp_limb_t res;
  1427  
  1428    /* No asserts here, refmpn_lshift will assert what we need. */
  1429  
  1430    res = refmpn_lshift (rp, sp, size, shift);
  1431    refmpn_com (rp, rp, size);
  1432    return res;
  1433  }
  1434  
  1435  /* accepting shift==0 and doing a plain copyi or copyd in that case */
  1436  mp_limb_t
  1437  refmpn_rshift_or_copy (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift)
  1438  {
  1439    if (shift == 0)
  1440      {
  1441        refmpn_copyi (rp, sp, size);
  1442        return 0;
  1443      }
  1444    else
  1445      {
  1446        return refmpn_rshift (rp, sp, size, shift);
  1447      }
  1448  }
  1449  mp_limb_t
  1450  refmpn_lshift_or_copy (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift)
  1451  {
  1452    if (shift == 0)
  1453      {
  1454        refmpn_copyd (rp, sp, size);
  1455        return 0;
  1456      }
  1457    else
  1458      {
  1459        return refmpn_lshift (rp, sp, size, shift);
  1460      }
  1461  }
  1462  
  1463  /* accepting size==0 too */
  1464  mp_limb_t
  1465  refmpn_rshift_or_copy_any (mp_ptr rp, mp_srcptr sp, mp_size_t size,
  1466  			   unsigned shift)
  1467  {
  1468    return (size == 0 ? 0 : refmpn_rshift_or_copy (rp, sp, size, shift));
  1469  }
  1470  mp_limb_t
  1471  refmpn_lshift_or_copy_any (mp_ptr rp, mp_srcptr sp, mp_size_t size,
  1472  			   unsigned shift)
  1473  {
  1474    return (size == 0 ? 0 : refmpn_lshift_or_copy (rp, sp, size, shift));
  1475  }
  1476  
  1477  /* Divide h,l by d, return quotient, store remainder to *rp.
  1478     Operates on full limbs, not nails.
  1479     Must have h < d.
  1480     __udiv_qrnnd_c isn't simple, and it's a bit slow, but it works. */
  1481  mp_limb_t
  1482  refmpn_udiv_qrnnd (mp_limb_t *rp, mp_limb_t h, mp_limb_t l, mp_limb_t d)
  1483  {
  1484    mp_limb_t  q, r;
  1485    int  n;
  1486  
  1487    ASSERT (d != 0);
  1488    ASSERT (h < d);
  1489  
  1490  #if 0
  1491    udiv_qrnnd (q, r, h, l, d);
  1492    *rp = r;
  1493    return q;
  1494  #endif
  1495  
  1496    n = refmpn_count_leading_zeros (d);
  1497    d <<= n;
  1498  
  1499    if (n != 0)
  1500      {
  1501        h = (h << n) | (l >> (GMP_LIMB_BITS - n));
  1502        l <<= n;
  1503      }
  1504  
  1505    __udiv_qrnnd_c (q, r, h, l, d);
  1506    r >>= n;
  1507    *rp = r;
  1508    return q;
  1509  }
  1510  
  1511  mp_limb_t
  1512  refmpn_udiv_qrnnd_r (mp_limb_t h, mp_limb_t l, mp_limb_t d, mp_limb_t *rp)
  1513  {
  1514    return refmpn_udiv_qrnnd (rp, h, l, d);
  1515  }
  1516  
  1517  /* This little subroutine avoids some bad code generation from i386 gcc 3.0
  1518     -fPIC -O2 -fomit-frame-pointer (%ebp being used uninitialized).  */
  1519  static mp_limb_t
  1520  refmpn_divmod_1c_workaround (mp_ptr rp, mp_srcptr sp, mp_size_t size,
  1521  			     mp_limb_t divisor, mp_limb_t carry)
  1522  {
  1523    mp_size_t  i;
  1524    mp_limb_t rem[1];
  1525    for (i = size-1; i >= 0; i--)
  1526      {
  1527        rp[i] = refmpn_udiv_qrnnd (rem, carry,
  1528  				 sp[i] << GMP_NAIL_BITS,
  1529  				 divisor << GMP_NAIL_BITS);
  1530        carry = *rem >> GMP_NAIL_BITS;
  1531      }
  1532    return carry;
  1533  }
  1534  
  1535  mp_limb_t
  1536  refmpn_divmod_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size,
  1537  		  mp_limb_t divisor, mp_limb_t carry)
  1538  {
  1539    mp_ptr     sp_orig;
  1540    mp_ptr     prod;
  1541    mp_limb_t  carry_orig;
  1542  
  1543    ASSERT (refmpn_overlap_fullonly_p (rp, sp, size));
  1544    ASSERT (size >= 0);
  1545    ASSERT (carry < divisor);
  1546    ASSERT_MPN (sp, size);
  1547    ASSERT_LIMB (divisor);
  1548    ASSERT_LIMB (carry);
  1549  
  1550    if (size == 0)
  1551      return carry;
  1552  
  1553    sp_orig = refmpn_memdup_limbs (sp, size);
  1554    prod = refmpn_malloc_limbs (size);
  1555    carry_orig = carry;
  1556  
  1557    carry = refmpn_divmod_1c_workaround (rp, sp, size, divisor, carry);
  1558  
  1559    /* check by multiplying back */
  1560  #if 0
  1561    printf ("size=%ld divisor=0x%lX carry=0x%lX remainder=0x%lX\n",
  1562  	  size, divisor, carry_orig, carry);
  1563    mpn_trace("s",sp_copy,size);
  1564    mpn_trace("r",rp,size);
  1565    printf ("mul_1c %lX\n", refmpn_mul_1c (prod, rp, size, divisor, carry));
  1566    mpn_trace("p",prod,size);
  1567  #endif
  1568    ASSERT (refmpn_mul_1c (prod, rp, size, divisor, carry) == carry_orig);
  1569    ASSERT (refmpn_cmp (prod, sp_orig, size) == 0);
  1570    free (sp_orig);
  1571    free (prod);
  1572  
  1573    return carry;
  1574  }
  1575  
  1576  mp_limb_t
  1577  refmpn_divmod_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t divisor)
  1578  {
  1579    return refmpn_divmod_1c (rp, sp, size, divisor, CNST_LIMB(0));
  1580  }
  1581  
  1582  
  1583  mp_limb_t
  1584  refmpn_mod_1c (mp_srcptr sp, mp_size_t size, mp_limb_t divisor,
  1585  	       mp_limb_t carry)
  1586  {
  1587    mp_ptr  p = refmpn_malloc_limbs (size);
  1588    carry = refmpn_divmod_1c (p, sp, size, divisor, carry);
  1589    free (p);
  1590    return carry;
  1591  }
  1592  
  1593  mp_limb_t
  1594  refmpn_mod_1 (mp_srcptr sp, mp_size_t size, mp_limb_t divisor)
  1595  {
  1596    return refmpn_mod_1c (sp, size, divisor, CNST_LIMB(0));
  1597  }
  1598  
  1599  mp_limb_t
  1600  refmpn_preinv_mod_1 (mp_srcptr sp, mp_size_t size, mp_limb_t divisor,
  1601  		     mp_limb_t inverse)
  1602  {
  1603    ASSERT (divisor & GMP_NUMB_HIGHBIT);
  1604    ASSERT (inverse == refmpn_invert_limb (divisor));
  1605    return refmpn_mod_1 (sp, size, divisor);
  1606  }
  1607  
  1608  /* This implementation will be rather slow, but has the advantage of being
  1609     in a different style than the libgmp versions.  */
  1610  mp_limb_t
  1611  refmpn_mod_34lsub1 (mp_srcptr p, mp_size_t n)
  1612  {
  1613    ASSERT ((GMP_NUMB_BITS % 4) == 0);
  1614    return mpn_mod_1 (p, n, (CNST_LIMB(1) << (3 * GMP_NUMB_BITS / 4)) - 1);
  1615  }
  1616  
  1617  
  1618  mp_limb_t
  1619  refmpn_divrem_1c (mp_ptr rp, mp_size_t xsize,
  1620  		  mp_srcptr sp, mp_size_t size, mp_limb_t divisor,
  1621  		  mp_limb_t carry)
  1622  {
  1623    mp_ptr  z;
  1624  
  1625    z = refmpn_malloc_limbs (xsize);
  1626    refmpn_fill (z, xsize, CNST_LIMB(0));
  1627  
  1628    carry = refmpn_divmod_1c (rp+xsize, sp, size, divisor, carry);
  1629    carry = refmpn_divmod_1c (rp, z, xsize, divisor, carry);
  1630  
  1631    free (z);
  1632    return carry;
  1633  }
  1634  
  1635  mp_limb_t
  1636  refmpn_divrem_1 (mp_ptr rp, mp_size_t xsize,
  1637  		 mp_srcptr sp, mp_size_t size, mp_limb_t divisor)
  1638  {
  1639    return refmpn_divrem_1c (rp, xsize, sp, size, divisor, CNST_LIMB(0));
  1640  }
  1641  
  1642  mp_limb_t
  1643  refmpn_preinv_divrem_1 (mp_ptr rp, mp_size_t xsize,
  1644  			mp_srcptr sp, mp_size_t size,
  1645  			mp_limb_t divisor, mp_limb_t inverse, unsigned shift)
  1646  {
  1647    ASSERT (size >= 0);
  1648    ASSERT (shift == refmpn_count_leading_zeros (divisor));
  1649    ASSERT (inverse == refmpn_invert_limb (divisor << shift));
  1650  
  1651    return refmpn_divrem_1 (rp, xsize, sp, size, divisor);
  1652  }
  1653  
  1654  mp_limb_t
  1655  refmpn_divrem_2 (mp_ptr qp, mp_size_t qxn,
  1656  		 mp_ptr np, mp_size_t nn,
  1657  		 mp_srcptr dp)
  1658  {
  1659    mp_ptr tp;
  1660    mp_limb_t qh;
  1661  
  1662    tp = refmpn_malloc_limbs (nn + qxn);
  1663    refmpn_zero (tp, qxn);
  1664    refmpn_copyi (tp + qxn, np, nn);
  1665    qh = refmpn_sb_div_qr (qp, tp, nn + qxn, dp, 2);
  1666    refmpn_copyi (np, tp, 2);
  1667    free (tp);
  1668    return qh;
  1669  }
  1670  
  1671  /* Inverse is floor((b*(b-d)-1) / d), per division by invariant integers
  1672     paper, figure 8.1 m', where b=2^GMP_LIMB_BITS.  Note that -d-1 < d
  1673     since d has the high bit set. */
  1674  
  1675  mp_limb_t
  1676  refmpn_invert_limb (mp_limb_t d)
  1677  {
  1678    mp_limb_t r;
  1679    ASSERT (d & GMP_LIMB_HIGHBIT);
  1680    return refmpn_udiv_qrnnd (&r, -d-1, MP_LIMB_T_MAX, d);
  1681  }
  1682  
  1683  void
  1684  refmpn_invert (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_ptr scratch)
  1685  {
  1686    mp_ptr qp, tp;
  1687    TMP_DECL;
  1688    TMP_MARK;
  1689  
  1690    tp = TMP_ALLOC_LIMBS (2 * n);
  1691    qp = TMP_ALLOC_LIMBS (n + 1);
  1692  
  1693    MPN_ZERO (tp, 2 * n);  mpn_sub_1 (tp, tp, 2 * n, 1);
  1694  
  1695    refmpn_tdiv_qr (qp, rp, 0, tp, 2 * n, up, n);
  1696    refmpn_copyi (rp, qp, n);
  1697  
  1698    TMP_FREE;
  1699  }
  1700  
  1701  void
  1702  refmpn_binvert (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_ptr scratch)
  1703  {
  1704    mp_ptr tp;
  1705    mp_limb_t binv;
  1706    TMP_DECL;
  1707    TMP_MARK;
  1708  
  1709    /* We use the library mpn_sbpi1_bdiv_q here, which isn't kosher in testing
  1710       code.  To make up for it, we check that the inverse is correct using a
  1711       multiply.  */
  1712  
  1713    tp = TMP_ALLOC_LIMBS (2 * n);
  1714  
  1715    MPN_ZERO (tp, n);
  1716    tp[0] = 1;
  1717    binvert_limb (binv, up[0]);
  1718    mpn_sbpi1_bdiv_q (rp, tp, n, up, n, -binv);
  1719  
  1720    refmpn_mul_n (tp, rp, up, n);
  1721    ASSERT_ALWAYS (tp[0] == 1 && mpn_zero_p (tp + 1, n - 1));
  1722  
  1723    TMP_FREE;
  1724  }
  1725  
  1726  /* The aim is to produce a dst quotient and return a remainder c, satisfying
  1727     c*b^n + src-i == 3*dst, where i is the incoming carry.
  1728  
  1729     Some value c==0, c==1 or c==2 will satisfy, so just try each.
  1730  
  1731     If GMP_NUMB_BITS is even then 2^GMP_NUMB_BITS==1mod3 and a non-zero
  1732     remainder from the first division attempt determines the correct
  1733     remainder (3-c), but don't bother with that, since we can't guarantee
  1734     anything about GMP_NUMB_BITS when using nails.
  1735  
  1736     If the initial src-i produces a borrow then refmpn_sub_1 leaves a twos
  1737     complement negative, ie. b^n+a-i, and the calculation produces c1
  1738     satisfying c1*b^n + b^n+src-i == 3*dst, from which clearly c=c1+1.  This
  1739     means it's enough to just add any borrow back at the end.
  1740  
  1741     A borrow only occurs when a==0 or a==1, and, by the same reasoning as in
  1742     mpn/generic/diveby3.c, the c1 that results in those cases will only be 0
  1743     or 1 respectively, so with 1 added the final return value is still in the
  1744     prescribed range 0 to 2. */
  1745  
  1746  mp_limb_t
  1747  refmpn_divexact_by3c (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t carry)
  1748  {
  1749    mp_ptr     spcopy;
  1750    mp_limb_t  c, cs;
  1751  
  1752    ASSERT (refmpn_overlap_fullonly_p (rp, sp, size));
  1753    ASSERT (size >= 1);
  1754    ASSERT (carry <= 2);
  1755    ASSERT_MPN (sp, size);
  1756  
  1757    spcopy = refmpn_malloc_limbs (size);
  1758    cs = refmpn_sub_1 (spcopy, sp, size, carry);
  1759  
  1760    for (c = 0; c <= 2; c++)
  1761      if (refmpn_divmod_1c (rp, spcopy, size, CNST_LIMB(3), c) == 0)
  1762        goto done;
  1763    ASSERT_FAIL (no value of c satisfies);
  1764  
  1765   done:
  1766    c += cs;
  1767    ASSERT (c <= 2);
  1768  
  1769    free (spcopy);
  1770    return c;
  1771  }
  1772  
  1773  mp_limb_t
  1774  refmpn_divexact_by3 (mp_ptr rp, mp_srcptr sp, mp_size_t size)
  1775  {
  1776    return refmpn_divexact_by3c (rp, sp, size, CNST_LIMB(0));
  1777  }
  1778  
  1779  
  1780  /* The same as mpn/generic/mul_basecase.c, but using refmpn functions. */
  1781  void
  1782  refmpn_mul_basecase (mp_ptr prodp,
  1783  		     mp_srcptr up, mp_size_t usize,
  1784  		     mp_srcptr vp, mp_size_t vsize)
  1785  {
  1786    mp_size_t i;
  1787  
  1788    ASSERT (! refmpn_overlap_p (prodp, usize+vsize, up, usize));
  1789    ASSERT (! refmpn_overlap_p (prodp, usize+vsize, vp, vsize));
  1790    ASSERT (usize >= vsize);
  1791    ASSERT (vsize >= 1);
  1792    ASSERT_MPN (up, usize);
  1793    ASSERT_MPN (vp, vsize);
  1794  
  1795    prodp[usize] = refmpn_mul_1 (prodp, up, usize, vp[0]);
  1796    for (i = 1; i < vsize; i++)
  1797      prodp[usize+i] = refmpn_addmul_1 (prodp+i, up, usize, vp[i]);
  1798  }
  1799  
  1800  
  1801  /* The same as mpn/generic/mulmid_basecase.c, but using refmpn functions. */
  1802  void
  1803  refmpn_mulmid_basecase (mp_ptr rp,
  1804  			mp_srcptr up, mp_size_t un,
  1805  			mp_srcptr vp, mp_size_t vn)
  1806  {
  1807    mp_limb_t cy;
  1808    mp_size_t i;
  1809  
  1810    ASSERT (un >= vn);
  1811    ASSERT (vn >= 1);
  1812    ASSERT (! refmpn_overlap_p (rp, un - vn + 3, up, un));
  1813    ASSERT (! refmpn_overlap_p (rp, un - vn + 3, vp, vn));
  1814    ASSERT_MPN (up, un);
  1815    ASSERT_MPN (vp, vn);
  1816  
  1817    rp[un - vn + 1] = refmpn_mul_1 (rp, up + vn - 1, un - vn + 1, vp[0]);
  1818    rp[un - vn + 2] = CNST_LIMB (0);
  1819    for (i = 1; i < vn; i++)
  1820      {
  1821        cy = refmpn_addmul_1 (rp, up + vn - i - 1, un - vn + 1, vp[i]);
  1822        cy = ref_addc_limb (&rp[un - vn + 1], rp[un - vn + 1], cy);
  1823        cy = ref_addc_limb (&rp[un - vn + 2], rp[un - vn + 2], cy);
  1824        ASSERT (cy == 0);
  1825      }
  1826  }
  1827  
  1828  void
  1829  refmpn_toom42_mulmid (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n,
  1830  		      mp_ptr scratch)
  1831  {
  1832    refmpn_mulmid_basecase (rp, up, 2*n - 1, vp, n);
  1833  }
  1834  
  1835  void
  1836  refmpn_mulmid_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
  1837  {
  1838    /* FIXME: this could be made faster by using refmpn_mul and then subtracting
  1839       off products near the middle product region boundary */
  1840    refmpn_mulmid_basecase (rp, up, 2*n - 1, vp, n);
  1841  }
  1842  
  1843  void
  1844  refmpn_mulmid (mp_ptr rp, mp_srcptr up, mp_size_t un,
  1845  	       mp_srcptr vp, mp_size_t vn)
  1846  {
  1847    /* FIXME: this could be made faster by using refmpn_mul and then subtracting
  1848       off products near the middle product region boundary */
  1849    refmpn_mulmid_basecase (rp, up, un, vp, vn);
  1850  }
  1851  
  1852  
  1853  
  1854  #define TOOM3_THRESHOLD (MAX (MUL_TOOM33_THRESHOLD, SQR_TOOM3_THRESHOLD))
  1855  #define TOOM4_THRESHOLD (MAX (MUL_TOOM44_THRESHOLD, SQR_TOOM4_THRESHOLD))
  1856  #define TOOM6_THRESHOLD (MAX (MUL_TOOM6H_THRESHOLD, SQR_TOOM6_THRESHOLD))
  1857  #if WANT_FFT
  1858  #define FFT_THRESHOLD (MAX (MUL_FFT_THRESHOLD, SQR_FFT_THRESHOLD))
  1859  #else
  1860  #define FFT_THRESHOLD MP_SIZE_T_MAX /* don't use toom44 here */
  1861  #endif
  1862  
  1863  void
  1864  refmpn_mul (mp_ptr wp, mp_srcptr up, mp_size_t un, mp_srcptr vp, mp_size_t vn)
  1865  {
  1866    mp_ptr tp, rp;
  1867    mp_size_t tn;
  1868  
  1869    if (vn < TOOM3_THRESHOLD)
  1870      {
  1871        /* In the mpn_mul_basecase and toom2 ranges, use our own mul_basecase. */
  1872        if (vn != 0)
  1873  	refmpn_mul_basecase (wp, up, un, vp, vn);
  1874        else
  1875  	MPN_ZERO (wp, un);
  1876        return;
  1877      }
  1878  
  1879    MPN_ZERO (wp, vn);
  1880    rp = refmpn_malloc_limbs (2 * vn);
  1881  
  1882    if (vn < TOOM4_THRESHOLD)
  1883      tn = mpn_toom22_mul_itch (vn, vn);
  1884    else if (vn < TOOM6_THRESHOLD)
  1885      tn = mpn_toom33_mul_itch (vn, vn);
  1886    else if (vn < FFT_THRESHOLD)
  1887      tn = mpn_toom44_mul_itch (vn, vn);
  1888    else
  1889      tn = mpn_toom6h_mul_itch (vn, vn);
  1890    tp = refmpn_malloc_limbs (tn);
  1891  
  1892    while (un >= vn)
  1893      {
  1894        if (vn < TOOM4_THRESHOLD)
  1895  	/* In the toom3 range, use mpn_toom22_mul.  */
  1896  	mpn_toom22_mul (rp, up, vn, vp, vn, tp);
  1897        else if (vn < TOOM6_THRESHOLD)
  1898  	/* In the toom4 range, use mpn_toom33_mul.  */
  1899  	mpn_toom33_mul (rp, up, vn, vp, vn, tp);
  1900        else if (vn < FFT_THRESHOLD)
  1901  	/* In the toom6 range, use mpn_toom44_mul.  */
  1902  	mpn_toom44_mul (rp, up, vn, vp, vn, tp);
  1903        else
  1904  	/* For the largest operands, use mpn_toom6h_mul.  */
  1905  	mpn_toom6h_mul (rp, up, vn, vp, vn, tp);
  1906  
  1907        ASSERT_NOCARRY (refmpn_add (wp, rp, 2 * vn, wp, vn));
  1908        wp += vn;
  1909  
  1910        up += vn;
  1911        un -= vn;
  1912      }
  1913  
  1914    free (tp);
  1915  
  1916    if (un != 0)
  1917      {
  1918        refmpn_mul (rp, vp, vn, up, un);
  1919        ASSERT_NOCARRY (refmpn_add (wp, rp, un + vn, wp, vn));
  1920      }
  1921    free (rp);
  1922  }
  1923  
  1924  void
  1925  refmpn_mul_n (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size)
  1926  {
  1927    refmpn_mul (prodp, up, size, vp, size);
  1928  }
  1929  
  1930  void
  1931  refmpn_mullo_n (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size)
  1932  {
  1933    mp_ptr tp = refmpn_malloc_limbs (2*size);
  1934    refmpn_mul (tp, up, size, vp, size);
  1935    refmpn_copyi (prodp, tp, size);
  1936    free (tp);
  1937  }
  1938  
  1939  void
  1940  refmpn_sqr (mp_ptr dst, mp_srcptr src, mp_size_t size)
  1941  {
  1942    refmpn_mul (dst, src, size, src, size);
  1943  }
  1944  
  1945  void
  1946  refmpn_sqrlo (mp_ptr dst, mp_srcptr src, mp_size_t size)
  1947  {
  1948    refmpn_mullo_n (dst, src, src, size);
  1949  }
  1950  
  1951  /* Allowing usize<vsize, usize==0 or vsize==0. */
  1952  void
  1953  refmpn_mul_any (mp_ptr prodp,
  1954  		     mp_srcptr up, mp_size_t usize,
  1955  		     mp_srcptr vp, mp_size_t vsize)
  1956  {
  1957    ASSERT (! refmpn_overlap_p (prodp, usize+vsize, up, usize));
  1958    ASSERT (! refmpn_overlap_p (prodp, usize+vsize, vp, vsize));
  1959    ASSERT (usize >= 0);
  1960    ASSERT (vsize >= 0);
  1961    ASSERT_MPN (up, usize);
  1962    ASSERT_MPN (vp, vsize);
  1963  
  1964    if (usize == 0)
  1965      {
  1966        refmpn_fill (prodp, vsize, CNST_LIMB(0));
  1967        return;
  1968      }
  1969  
  1970    if (vsize == 0)
  1971      {
  1972        refmpn_fill (prodp, usize, CNST_LIMB(0));
  1973        return;
  1974      }
  1975  
  1976    if (usize >= vsize)
  1977      refmpn_mul (prodp, up, usize, vp, vsize);
  1978    else
  1979      refmpn_mul (prodp, vp, vsize, up, usize);
  1980  }
  1981  
  1982  
  1983  mp_limb_t
  1984  refmpn_gcd_1 (mp_srcptr xp, mp_size_t xsize, mp_limb_t y)
  1985  {
  1986    mp_limb_t  x;
  1987    int  twos;
  1988  
  1989    ASSERT (y != 0);
  1990    ASSERT (! refmpn_zero_p (xp, xsize));
  1991    ASSERT_MPN (xp, xsize);
  1992    ASSERT_LIMB (y);
  1993  
  1994    x = refmpn_mod_1 (xp, xsize, y);
  1995    if (x == 0)
  1996      return y;
  1997  
  1998    twos = 0;
  1999    while ((x & 1) == 0 && (y & 1) == 0)
  2000      {
  2001        x >>= 1;
  2002        y >>= 1;
  2003        twos++;
  2004      }
  2005  
  2006    for (;;)
  2007      {
  2008        while ((x & 1) == 0)  x >>= 1;
  2009        while ((y & 1) == 0)  y >>= 1;
  2010  
  2011        if (x < y)
  2012  	MP_LIMB_T_SWAP (x, y);
  2013  
  2014        x -= y;
  2015        if (x == 0)
  2016  	break;
  2017      }
  2018  
  2019    return y << twos;
  2020  }
  2021  
  2022  
  2023  /* Based on the full limb x, not nails. */
  2024  unsigned
  2025  refmpn_count_leading_zeros (mp_limb_t x)
  2026  {
  2027    unsigned  n = 0;
  2028  
  2029    ASSERT (x != 0);
  2030  
  2031    while ((x & GMP_LIMB_HIGHBIT) == 0)
  2032      {
  2033        x <<= 1;
  2034        n++;
  2035      }
  2036    return n;
  2037  }
  2038  
  2039  /* Full limbs allowed, not limited to nails. */
  2040  unsigned
  2041  refmpn_count_trailing_zeros (mp_limb_t x)
  2042  {
  2043    unsigned  n = 0;
  2044  
  2045    ASSERT (x != 0);
  2046    ASSERT_LIMB (x);
  2047  
  2048    while ((x & 1) == 0)
  2049      {
  2050        x >>= 1;
  2051        n++;
  2052      }
  2053    return n;
  2054  }
  2055  
  2056  /* Strip factors of two (low zero bits) from {p,size} by right shifting.
  2057     The return value is the number of twos stripped.  */
  2058  mp_size_t
  2059  refmpn_strip_twos (mp_ptr p, mp_size_t size)
  2060  {
  2061    mp_size_t  limbs;
  2062    unsigned   shift;
  2063  
  2064    ASSERT (size >= 1);
  2065    ASSERT (! refmpn_zero_p (p, size));
  2066    ASSERT_MPN (p, size);
  2067  
  2068    for (limbs = 0; p[0] == 0; limbs++)
  2069      {
  2070        refmpn_copyi (p, p+1, size-1);
  2071        p[size-1] = 0;
  2072      }
  2073  
  2074    shift = refmpn_count_trailing_zeros (p[0]);
  2075    if (shift)
  2076      refmpn_rshift (p, p, size, shift);
  2077  
  2078    return limbs*GMP_NUMB_BITS + shift;
  2079  }
  2080  
  2081  mp_limb_t
  2082  refmpn_gcd (mp_ptr gp, mp_ptr xp, mp_size_t xsize, mp_ptr yp, mp_size_t ysize)
  2083  {
  2084    int       cmp;
  2085  
  2086    ASSERT (ysize >= 1);
  2087    ASSERT (xsize >= ysize);
  2088    ASSERT ((xp[0] & 1) != 0);
  2089    ASSERT ((yp[0] & 1) != 0);
  2090    /* ASSERT (xp[xsize-1] != 0); */  /* don't think x needs to be odd */
  2091    ASSERT (yp[ysize-1] != 0);
  2092    ASSERT (refmpn_overlap_fullonly_p (gp, xp, xsize));
  2093    ASSERT (refmpn_overlap_fullonly_p (gp, yp, ysize));
  2094    ASSERT (! refmpn_overlap_p (xp, xsize, yp, ysize));
  2095    if (xsize == ysize)
  2096      ASSERT (refmpn_msbone (xp[xsize-1]) >= refmpn_msbone (yp[ysize-1]));
  2097    ASSERT_MPN (xp, xsize);
  2098    ASSERT_MPN (yp, ysize);
  2099  
  2100    refmpn_strip_twos (xp, xsize);
  2101    MPN_NORMALIZE (xp, xsize);
  2102    MPN_NORMALIZE (yp, ysize);
  2103  
  2104    for (;;)
  2105      {
  2106        cmp = refmpn_cmp_twosizes (xp, xsize, yp, ysize);
  2107        if (cmp == 0)
  2108  	break;
  2109        if (cmp < 0)
  2110  	MPN_PTR_SWAP (xp,xsize, yp,ysize);
  2111  
  2112        ASSERT_NOCARRY (refmpn_sub (xp, xp, xsize, yp, ysize));
  2113  
  2114        refmpn_strip_twos (xp, xsize);
  2115        MPN_NORMALIZE (xp, xsize);
  2116      }
  2117  
  2118    refmpn_copyi (gp, xp, xsize);
  2119    return xsize;
  2120  }
  2121  
  2122  unsigned long
  2123  ref_popc_limb (mp_limb_t src)
  2124  {
  2125    unsigned long  count;
  2126    int  i;
  2127  
  2128    count = 0;
  2129    for (i = 0; i < GMP_LIMB_BITS; i++)
  2130      {
  2131        count += (src & 1);
  2132        src >>= 1;
  2133      }
  2134    return count;
  2135  }
  2136  
  2137  unsigned long
  2138  refmpn_popcount (mp_srcptr sp, mp_size_t size)
  2139  {
  2140    unsigned long  count = 0;
  2141    mp_size_t  i;
  2142  
  2143    ASSERT (size >= 0);
  2144    ASSERT_MPN (sp, size);
  2145  
  2146    for (i = 0; i < size; i++)
  2147      count += ref_popc_limb (sp[i]);
  2148    return count;
  2149  }
  2150  
  2151  unsigned long
  2152  refmpn_hamdist (mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
  2153  {
  2154    mp_ptr  d;
  2155    unsigned long  count;
  2156  
  2157    ASSERT (size >= 0);
  2158    ASSERT_MPN (s1p, size);
  2159    ASSERT_MPN (s2p, size);
  2160  
  2161    if (size == 0)
  2162      return 0;
  2163  
  2164    d = refmpn_malloc_limbs (size);
  2165    refmpn_xor_n (d, s1p, s2p, size);
  2166    count = refmpn_popcount (d, size);
  2167    free (d);
  2168    return count;
  2169  }
  2170  
  2171  
  2172  /* set r to a%d */
  2173  void
  2174  refmpn_mod2 (mp_limb_t r[2], const mp_limb_t a[2], const mp_limb_t d[2])
  2175  {
  2176    mp_limb_t  D[2];
  2177    int        n;
  2178  
  2179    ASSERT (! refmpn_overlap_p (r, (mp_size_t) 2, d, (mp_size_t) 2));
  2180    ASSERT_MPN (a, 2);
  2181    ASSERT_MPN (d, 2);
  2182  
  2183    D[1] = d[1], D[0] = d[0];
  2184    r[1] = a[1], r[0] = a[0];
  2185    n = 0;
  2186  
  2187    for (;;)
  2188      {
  2189        if (D[1] & GMP_NUMB_HIGHBIT)
  2190  	break;
  2191        if (refmpn_cmp (r, D, (mp_size_t) 2) <= 0)
  2192  	break;
  2193        refmpn_lshift (D, D, (mp_size_t) 2, 1);
  2194        n++;
  2195        ASSERT (n <= GMP_NUMB_BITS);
  2196      }
  2197  
  2198    while (n >= 0)
  2199      {
  2200        if (refmpn_cmp (r, D, (mp_size_t) 2) >= 0)
  2201  	ASSERT_NOCARRY (refmpn_sub_n (r, r, D, (mp_size_t) 2));
  2202        refmpn_rshift (D, D, (mp_size_t) 2, 1);
  2203        n--;
  2204      }
  2205  
  2206    ASSERT (refmpn_cmp (r, d, (mp_size_t) 2) < 0);
  2207  }
  2208  
  2209  
  2210  
  2211  /* Similar to the old mpn/generic/sb_divrem_mn.c, but somewhat simplified, in
  2212     particular the trial quotient is allowed to be 2 too big. */
  2213  mp_limb_t
  2214  refmpn_sb_div_qr (mp_ptr qp,
  2215  		  mp_ptr np, mp_size_t nsize,
  2216  		  mp_srcptr dp, mp_size_t dsize)
  2217  {
  2218    mp_limb_t  retval = 0;
  2219    mp_size_t  i;
  2220    mp_limb_t  d1 = dp[dsize-1];
  2221    mp_ptr     np_orig = refmpn_memdup_limbs (np, nsize);
  2222  
  2223    ASSERT (nsize >= dsize);
  2224    /* ASSERT (dsize > 2); */
  2225    ASSERT (dsize >= 2);
  2226    ASSERT (dp[dsize-1] & GMP_NUMB_HIGHBIT);
  2227    ASSERT (! refmpn_overlap_p (qp, nsize-dsize, np, nsize) || qp+dsize >= np);
  2228    ASSERT_MPN (np, nsize);
  2229    ASSERT_MPN (dp, dsize);
  2230  
  2231    i = nsize-dsize;
  2232    if (refmpn_cmp (np+i, dp, dsize) >= 0)
  2233      {
  2234        ASSERT_NOCARRY (refmpn_sub_n (np+i, np+i, dp, dsize));
  2235        retval = 1;
  2236      }
  2237  
  2238    for (i--; i >= 0; i--)
  2239      {
  2240        mp_limb_t  n0 = np[i+dsize];
  2241        mp_limb_t  n1 = np[i+dsize-1];
  2242        mp_limb_t  q, dummy_r;
  2243  
  2244        ASSERT (n0 <= d1);
  2245        if (n0 == d1)
  2246  	q = GMP_NUMB_MAX;
  2247        else
  2248  	q = refmpn_udiv_qrnnd (&dummy_r, n0, n1 << GMP_NAIL_BITS,
  2249  			       d1 << GMP_NAIL_BITS);
  2250  
  2251        n0 -= refmpn_submul_1 (np+i, dp, dsize, q);
  2252        ASSERT (n0 == 0 || n0 == MP_LIMB_T_MAX);
  2253        if (n0)
  2254  	{
  2255  	  q--;
  2256  	  if (! refmpn_add_n (np+i, np+i, dp, dsize))
  2257  	    {
  2258  	      q--;
  2259  	      ASSERT_CARRY (refmpn_add_n (np+i, np+i, dp, dsize));
  2260  	    }
  2261  	}
  2262        np[i+dsize] = 0;
  2263  
  2264        qp[i] = q;
  2265      }
  2266  
  2267    /* remainder < divisor */
  2268  #if 0		/* ASSERT triggers gcc 4.2.1 bug */
  2269    ASSERT (refmpn_cmp (np, dp, dsize) < 0);
  2270  #endif
  2271  
  2272    /* multiply back to original */
  2273    {
  2274      mp_ptr  mp = refmpn_malloc_limbs (nsize);
  2275  
  2276      refmpn_mul_any (mp, qp, nsize-dsize, dp, dsize);
  2277      if (retval)
  2278        ASSERT_NOCARRY (refmpn_add_n (mp+nsize-dsize,mp+nsize-dsize, dp, dsize));
  2279      ASSERT_NOCARRY (refmpn_add (mp, mp, nsize, np, dsize));
  2280      ASSERT (refmpn_cmp (mp, np_orig, nsize) == 0);
  2281  
  2282      free (mp);
  2283    }
  2284  
  2285    free (np_orig);
  2286    return retval;
  2287  }
  2288  
  2289  /* Similar to the old mpn/generic/sb_divrem_mn.c, but somewhat simplified, in
  2290     particular the trial quotient is allowed to be 2 too big. */
  2291  void
  2292  refmpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn,
  2293  		mp_ptr np, mp_size_t nsize,
  2294  		mp_srcptr dp, mp_size_t dsize)
  2295  {
  2296    ASSERT (qxn == 0);
  2297    ASSERT_MPN (np, nsize);
  2298    ASSERT_MPN (dp, dsize);
  2299    ASSERT (dsize > 0);
  2300    ASSERT (dp[dsize-1] != 0);
  2301  
  2302    if (dsize == 1)
  2303      {
  2304        rp[0] = refmpn_divmod_1 (qp, np, nsize, dp[0]);
  2305        return;
  2306      }
  2307    else
  2308      {
  2309        mp_ptr  n2p = refmpn_malloc_limbs (nsize+1);
  2310        mp_ptr  d2p = refmpn_malloc_limbs (dsize);
  2311        int     norm = refmpn_count_leading_zeros (dp[dsize-1]) - GMP_NAIL_BITS;
  2312  
  2313        n2p[nsize] = refmpn_lshift_or_copy (n2p, np, nsize, norm);
  2314        ASSERT_NOCARRY (refmpn_lshift_or_copy (d2p, dp, dsize, norm));
  2315  
  2316        refmpn_sb_div_qr (qp, n2p, nsize+1, d2p, dsize);
  2317        refmpn_rshift_or_copy (rp, n2p, dsize, norm);
  2318  
  2319        /* ASSERT (refmpn_zero_p (tp+dsize, nsize-dsize)); */
  2320        free (n2p);
  2321        free (d2p);
  2322      }
  2323  }
  2324  
  2325  mp_limb_t
  2326  refmpn_redc_1 (mp_ptr rp, mp_ptr up, mp_srcptr mp, mp_size_t n, mp_limb_t invm)
  2327  {
  2328    mp_size_t j;
  2329    mp_limb_t cy;
  2330  
  2331    ASSERT_MPN (up, 2*n);
  2332    /* ASSERT about directed overlap rp, up */
  2333    /* ASSERT about overlap rp, mp */
  2334    /* ASSERT about overlap up, mp */
  2335  
  2336    for (j = n - 1; j >= 0; j--)
  2337      {
  2338        up[0] = refmpn_addmul_1 (up, mp, n, (up[0] * invm) & GMP_NUMB_MASK);
  2339        up++;
  2340      }
  2341    cy = mpn_add_n (rp, up, up - n, n);
  2342    return cy;
  2343  }
  2344  
  2345  size_t
  2346  refmpn_get_str (unsigned char *dst, int base, mp_ptr src, mp_size_t size)
  2347  {
  2348    unsigned char  *d;
  2349    size_t  dsize;
  2350  
  2351    ASSERT (size >= 0);
  2352    ASSERT (base >= 2);
  2353    ASSERT (base < numberof (mp_bases));
  2354    ASSERT (size == 0 || src[size-1] != 0);
  2355    ASSERT_MPN (src, size);
  2356  
  2357    MPN_SIZEINBASE (dsize, src, size, base);
  2358    ASSERT (dsize >= 1);
  2359    ASSERT (! byte_overlap_p (dst, (mp_size_t) dsize, src, size * GMP_LIMB_BYTES));
  2360  
  2361    if (size == 0)
  2362      {
  2363        dst[0] = 0;
  2364        return 1;
  2365      }
  2366  
  2367    /* don't clobber input for power of 2 bases */
  2368    if (POW2_P (base))
  2369      src = refmpn_memdup_limbs (src, size);
  2370  
  2371    d = dst + dsize;
  2372    do
  2373      {
  2374        d--;
  2375        ASSERT (d >= dst);
  2376        *d = refmpn_divrem_1 (src, (mp_size_t) 0, src, size, (mp_limb_t) base);
  2377        size -= (src[size-1] == 0);
  2378      }
  2379    while (size != 0);
  2380  
  2381    /* Move result back and decrement dsize if we didn't generate
  2382       the maximum possible digits.  */
  2383    if (d != dst)
  2384      {
  2385        size_t i;
  2386        dsize -= d - dst;
  2387        for (i = 0; i < dsize; i++)
  2388  	dst[i] = d[i];
  2389      }
  2390  
  2391    if (POW2_P (base))
  2392      free (src);
  2393  
  2394    return dsize;
  2395  }
  2396  
  2397  
  2398  mp_limb_t
  2399  ref_bswap_limb (mp_limb_t src)
  2400  {
  2401    mp_limb_t  dst;
  2402    int        i;
  2403  
  2404    dst = 0;
  2405    for (i = 0; i < GMP_LIMB_BYTES; i++)
  2406      {
  2407        dst = (dst << 8) + (src & 0xFF);
  2408        src >>= 8;
  2409      }
  2410    return dst;
  2411  }
  2412  
  2413  
  2414  /* These random functions are mostly for transitional purposes while adding
  2415     nail support, since they're independent of the normal mpn routines.  They
  2416     can probably be removed when those normal routines are reliable, though
  2417     perhaps something independent would still be useful at times.  */
  2418  
  2419  #if GMP_LIMB_BITS == 32
  2420  #define RAND_A  CNST_LIMB(0x29CF535)
  2421  #endif
  2422  #if GMP_LIMB_BITS == 64
  2423  #define RAND_A  CNST_LIMB(0xBAECD515DAF0B49D)
  2424  #endif
  2425  
  2426  mp_limb_t  refmpn_random_seed;
  2427  
  2428  mp_limb_t
  2429  refmpn_random_half (void)
  2430  {
  2431    refmpn_random_seed = refmpn_random_seed * RAND_A + 1;
  2432    return (refmpn_random_seed >> GMP_LIMB_BITS/2);
  2433  }
  2434  
  2435  mp_limb_t
  2436  refmpn_random_limb (void)
  2437  {
  2438    return ((refmpn_random_half () << (GMP_LIMB_BITS/2))
  2439  	   | refmpn_random_half ()) & GMP_NUMB_MASK;
  2440  }
  2441  
  2442  void
  2443  refmpn_random (mp_ptr ptr, mp_size_t size)
  2444  {
  2445    mp_size_t  i;
  2446    if (GMP_NAIL_BITS == 0)
  2447      {
  2448        mpn_random (ptr, size);
  2449        return;
  2450      }
  2451  
  2452    for (i = 0; i < size; i++)
  2453      ptr[i] = refmpn_random_limb ();
  2454  }
  2455  
  2456  void
  2457  refmpn_random2 (mp_ptr ptr, mp_size_t size)
  2458  {
  2459    mp_size_t  i;
  2460    mp_limb_t  bit, mask, limb;
  2461    int        run;
  2462  
  2463    if (GMP_NAIL_BITS == 0)
  2464      {
  2465        mpn_random2 (ptr, size);
  2466        return;
  2467      }
  2468  
  2469  #define RUN_MODULUS  32
  2470  
  2471    /* start with ones at a random pos in the high limb */
  2472    bit = CNST_LIMB(1) << (refmpn_random_half () % GMP_NUMB_BITS);
  2473    mask = 0;
  2474    run = 0;
  2475  
  2476    for (i = size-1; i >= 0; i--)
  2477      {
  2478        limb = 0;
  2479        do
  2480  	{
  2481  	  if (run == 0)
  2482  	    {
  2483  	      run = (refmpn_random_half () % RUN_MODULUS) + 1;
  2484  	      mask = ~mask;
  2485  	    }
  2486  
  2487  	  limb |= (bit & mask);
  2488  	  bit >>= 1;
  2489  	  run--;
  2490  	}
  2491        while (bit != 0);
  2492  
  2493        ptr[i] = limb;
  2494        bit = GMP_NUMB_HIGHBIT;
  2495      }
  2496  }
  2497  
  2498  /* This is a simple bitwise algorithm working high to low across "s" and
  2499     testing each time whether setting the bit would make s^2 exceed n.  */
  2500  mp_size_t
  2501  refmpn_sqrtrem (mp_ptr sp, mp_ptr rp, mp_srcptr np, mp_size_t nsize)
  2502  {
  2503    mp_ptr     tp, dp;
  2504    mp_size_t  ssize, talloc, tsize, dsize, ret, ilimbs;
  2505    unsigned   ibit;
  2506    long       i;
  2507    mp_limb_t  c;
  2508  
  2509    ASSERT (nsize >= 0);
  2510  
  2511    /* If n==0, then s=0 and r=0.  */
  2512    if (nsize == 0)
  2513      return 0;
  2514  
  2515    ASSERT (np[nsize - 1] != 0);
  2516    ASSERT (rp == NULL || MPN_SAME_OR_SEPARATE_P (np, rp, nsize));
  2517    ASSERT (rp == NULL || ! MPN_OVERLAP_P (sp, (nsize + 1) / 2, rp, nsize));
  2518    ASSERT (! MPN_OVERLAP_P (sp, (nsize + 1) / 2, np, nsize));
  2519  
  2520    /* root */
  2521    ssize = (nsize+1)/2;
  2522    refmpn_zero (sp, ssize);
  2523  
  2524    /* the remainder so far */
  2525    dp = refmpn_memdup_limbs (np, nsize);
  2526    dsize = nsize;
  2527  
  2528    /* temporary */
  2529    talloc = 2*ssize + 1;
  2530    tp = refmpn_malloc_limbs (talloc);
  2531  
  2532    for (i = GMP_NUMB_BITS * ssize - 1; i >= 0; i--)
  2533      {
  2534        /* t = 2*s*2^i + 2^(2*i), being the amount s^2 will increase by if 2^i
  2535  	 is added to it */
  2536  
  2537        ilimbs = (i+1) / GMP_NUMB_BITS;
  2538        ibit = (i+1) % GMP_NUMB_BITS;
  2539        refmpn_zero (tp, ilimbs);
  2540        c = refmpn_lshift_or_copy (tp+ilimbs, sp, ssize, ibit);
  2541        tsize = ilimbs + ssize;
  2542        tp[tsize] = c;
  2543        tsize += (c != 0);
  2544  
  2545        ilimbs = (2*i) / GMP_NUMB_BITS;
  2546        ibit = (2*i) % GMP_NUMB_BITS;
  2547        if (ilimbs + 1 > tsize)
  2548  	{
  2549  	  refmpn_zero_extend (tp, tsize, ilimbs + 1);
  2550  	  tsize = ilimbs + 1;
  2551  	}
  2552        c = refmpn_add_1 (tp+ilimbs, tp+ilimbs, tsize-ilimbs,
  2553  			CNST_LIMB(1) << ibit);
  2554        ASSERT (tsize < talloc);
  2555        tp[tsize] = c;
  2556        tsize += (c != 0);
  2557  
  2558        if (refmpn_cmp_twosizes (dp, dsize, tp, tsize) >= 0)
  2559  	{
  2560  	  /* set this bit in s and subtract from the remainder */
  2561  	  refmpn_setbit (sp, i);
  2562  
  2563  	  ASSERT_NOCARRY (refmpn_sub_n (dp, dp, tp, dsize));
  2564  	  dsize = refmpn_normalize (dp, dsize);
  2565  	}
  2566      }
  2567  
  2568    if (rp == NULL)
  2569      {
  2570        ret = ! refmpn_zero_p (dp, dsize);
  2571      }
  2572    else
  2573      {
  2574        ASSERT (dsize == 0 || dp[dsize-1] != 0);
  2575        refmpn_copy (rp, dp, dsize);
  2576        ret = dsize;
  2577      }
  2578  
  2579    free (dp);
  2580    free (tp);
  2581    return ret;
  2582  }