github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/generic/rootrem.c (about)

     1  /* mpn_rootrem(rootp,remp,ap,an,nth) -- Compute the nth root of {ap,an}, and
     2     store the truncated integer part at rootp and the remainder at remp.
     3  
     4     Contributed by Paul Zimmermann (algorithm) and
     5     Paul Zimmermann and Torbjorn Granlund (implementation).
     6     Marco Bodrato wrote logbased_root to seed the loop. 
     7  
     8     THE FUNCTIONS IN THIS FILE ARE INTERNAL, AND HAVE MUTABLE INTERFACES.  IT'S
     9     ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT'S ALMOST
    10     GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
    11  
    12  Copyright 2002, 2005, 2009-2012, 2015 Free Software Foundation, Inc.
    13  
    14  This file is part of the GNU MP Library.
    15  
    16  The GNU MP Library is free software; you can redistribute it and/or modify
    17  it under the terms of either:
    18  
    19    * the GNU Lesser General Public License as published by the Free
    20      Software Foundation; either version 3 of the License, or (at your
    21      option) any later version.
    22  
    23  or
    24  
    25    * the GNU General Public License as published by the Free Software
    26      Foundation; either version 2 of the License, or (at your option) any
    27      later version.
    28  
    29  or both in parallel, as here.
    30  
    31  The GNU MP Library is distributed in the hope that it will be useful, but
    32  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    33  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    34  for more details.
    35  
    36  You should have received copies of the GNU General Public License and the
    37  GNU Lesser General Public License along with the GNU MP Library.  If not,
    38  see https://www.gnu.org/licenses/.  */
    39  
    40  /* FIXME:
    41       This implementation is not optimal when remp == NULL, since the complexity
    42       is M(n), whereas it should be M(n/k) on average.
    43  */
    44  
    45  #include <stdio.h>		/* for NULL */
    46  
    47  #include "gmp.h"
    48  #include "gmp-impl.h"
    49  #include "longlong.h"
    50  
    51  static mp_size_t mpn_rootrem_internal (mp_ptr, mp_ptr, mp_srcptr, mp_size_t,
    52  				       mp_limb_t, int);
    53  
    54  #define MPN_RSHIFT(rp,up,un,cnt) \
    55    do {									\
    56      if ((cnt) != 0)							\
    57        mpn_rshift (rp, up, un, cnt);					\
    58      else								\
    59        {									\
    60  	MPN_COPY_INCR (rp, up, un);					\
    61        }									\
    62    } while (0)
    63  
    64  #define MPN_LSHIFT(cy,rp,up,un,cnt) \
    65    do {									\
    66      if ((cnt) != 0)							\
    67        cy = mpn_lshift (rp, up, un, cnt);				\
    68      else								\
    69        {									\
    70  	MPN_COPY_DECR (rp, up, un);					\
    71  	cy = 0;								\
    72        }									\
    73    } while (0)
    74  
    75  
    76  /* Put in {rootp, ceil(un/k)} the kth root of {up, un}, rounded toward zero.
    77     If remp <> NULL, put in {remp, un} the remainder.
    78     Return the size (in limbs) of the remainder if remp <> NULL,
    79  	  or a non-zero value iff the remainder is non-zero when remp = NULL.
    80     Assumes:
    81     (a) up[un-1] is not zero
    82     (b) rootp has at least space for ceil(un/k) limbs
    83     (c) remp has at least space for un limbs (in case remp <> NULL)
    84     (d) the operands do not overlap.
    85  
    86     The auxiliary memory usage is 3*un+2 if remp = NULL,
    87     and 2*un+2 if remp <> NULL.  FIXME: This is an incorrect comment.
    88  */
    89  mp_size_t
    90  mpn_rootrem (mp_ptr rootp, mp_ptr remp,
    91  	     mp_srcptr up, mp_size_t un, mp_limb_t k)
    92  {
    93    ASSERT (un > 0);
    94    ASSERT (up[un - 1] != 0);
    95    ASSERT (k > 1);
    96  
    97    if (UNLIKELY (k == 2))
    98      return mpn_sqrtrem (rootp, remp, up, un);
    99    /* (un-1)/k > 2 <=> un > 3k <=> (un + 2)/3 > k */
   100    if (remp == NULL && (un + 2) / 3 > k)
   101      /* Pad {up,un} with k zero limbs.  This will produce an approximate root
   102         with one more limb, allowing us to compute the exact integral result. */
   103      {
   104        mp_ptr sp, wp;
   105        mp_size_t rn, sn, wn;
   106        TMP_DECL;
   107        TMP_MARK;
   108        wn = un + k;
   109        sn = (un - 1) / k + 2; /* ceil(un/k) + 1 */
   110        TMP_ALLOC_LIMBS_2 (wp, wn, /* will contain the padded input */
   111  			 sp, sn); /* approximate root of padded input */
   112        MPN_COPY (wp + k, up, un);
   113        MPN_FILL (wp, k, 0);
   114        rn = mpn_rootrem_internal (sp, NULL, wp, wn, k, 1);
   115        /* The approximate root S = {sp,sn} is either the correct root of
   116  	 {sp,sn}, or 1 too large.  Thus unless the least significant limb of
   117  	 S is 0 or 1, we can deduce the root of {up,un} is S truncated by one
   118  	 limb.  (In case sp[0]=1, we can deduce the root, but not decide
   119  	 whether it is exact or not.) */
   120        MPN_COPY (rootp, sp + 1, sn - 1);
   121        TMP_FREE;
   122        return rn;
   123      }
   124    else
   125      {
   126        return mpn_rootrem_internal (rootp, remp, up, un, k, 0);
   127      }
   128  }
   129  
   130  #define LOGROOT_USED_BITS 8
   131  #define LOGROOT_NEEDS_TWO_CORRECTIONS 1
   132  #define LOGROOT_RETURNED_BITS (LOGROOT_USED_BITS + LOGROOT_NEEDS_TWO_CORRECTIONS)
   133  /* Puts in *rootp some bits of the k^nt root of the number
   134     2^bitn * 1.op ; where op represents the "fractional" bits.
   135  
   136     The returned value is the number of bits of the root minus one;
   137     i.e. an approximation of the root will be
   138     (*rootp) * 2^(retval-LOGROOT_RETURNED_BITS+1).
   139  
   140     Currently, only LOGROOT_USED_BITS bits of op are used (the implicit
   141     one is not counted).
   142   */
   143  static unsigned
   144  logbased_root (mp_ptr rootp, mp_limb_t op, mp_bitcnt_t bitn, mp_limb_t k)
   145  {
   146    /* vlog=vector(256,i,floor((log(256+i)/log(2)-8)*256)-(i>255)) */
   147    static const
   148    unsigned char vlog[] = {1,   2,   4,   5,   7,   8,   9,  11,  12,  14,  15,  16,  18,  19,  21,  22,
   149  			 23,  25,  26,  27,  29,  30,  31,  33,  34,  35,  37,  38,  39,  40,  42,  43,
   150  			 44,  46,  47,  48,  49,  51,  52,  53,  54,  56,  57,  58,  59,  61,  62,  63,
   151  			 64,  65,  67,  68,  69,  70,  71,  73,  74,  75,  76,  77,  78,  80,  81,  82,
   152  			 83,  84,  85,  87,  88,  89,  90,  91,  92,  93,  94,  96,  97,  98,  99, 100,
   153  			101, 102, 103, 104, 105, 106, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117,
   154  			118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 131, 132, 133, 134,
   155  			135, 136, 137, 138, 139, 140, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
   156  			150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 162, 163, 164,
   157  			165, 166, 167, 168, 169, 170, 171, 172, 173, 173, 174, 175, 176, 177, 178, 179,
   158  			180, 181, 181, 182, 183, 184, 185, 186, 187, 188, 188, 189, 190, 191, 192, 193,
   159  			194, 194, 195, 196, 197, 198, 199, 200, 200, 201, 202, 203, 204, 205, 205, 206,
   160  			207, 208, 209, 209, 210, 211, 212, 213, 214, 214, 215, 216, 217, 218, 218, 219,
   161  			220, 221, 222, 222, 223, 224, 225, 225, 226, 227, 228, 229, 229, 230, 231, 232,
   162  			232, 233, 234, 235, 235, 236, 237, 238, 239, 239, 240, 241, 242, 242, 243, 244,
   163  			245, 245, 246, 247, 247, 248, 249, 250, 250, 251, 252, 253, 253, 254, 255, 255};
   164  
   165    /* vexp=vector(256,i,floor(2^(8+i/256)-256)-(i>255)) */
   166    static const
   167    unsigned char vexp[] = {0,   1,   2,   2,   3,   4,   4,   5,   6,   7,   7,   8,   9,   9,  10,  11,
   168  			 12,  12,  13,  14,  14,  15,  16,  17,  17,  18,  19,  20,  20,  21,  22,  23,
   169  			 23,  24,  25,  26,  26,  27,  28,  29,  30,  30,  31,  32,  33,  33,  34,  35,
   170  			 36,  37,  37,  38,  39,  40,  41,  41,  42,  43,  44,  45,  45,  46,  47,  48,
   171  			 49,  50,  50,  51,  52,  53,  54,  55,  55,  56,  57,  58,  59,  60,  61,  61,
   172  			 62,  63,  64,  65,  66,  67,  67,  68,  69,  70,  71,  72,  73,  74,  75,  75,
   173  			 76,  77,  78,  79,  80,  81,  82,  83,  84,  85,  86,  86,  87,  88,  89,  90,
   174  			 91,  92,  93,  94,  95,  96,  97,  98,  99, 100, 101, 102, 103, 104, 105, 106,
   175  			107, 108, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 119, 120, 121, 122,
   176  			123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138,
   177  			139, 140, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 154, 155, 156,
   178  			157, 158, 159, 160, 161, 163, 164, 165, 166, 167, 168, 169, 171, 172, 173, 174,
   179  			175, 176, 178, 179, 180, 181, 182, 183, 185, 186, 187, 188, 189, 191, 192, 193,
   180  			194, 196, 197, 198, 199, 200, 202, 203, 204, 205, 207, 208, 209, 210, 212, 213,
   181  			214, 216, 217, 218, 219, 221, 222, 223, 225, 226, 227, 229, 230, 231, 232, 234,
   182  			235, 236, 238, 239, 240, 242, 243, 245, 246, 247, 249, 250, 251, 253, 254, 255};
   183    mp_bitcnt_t retval;
   184  
   185    if (UNLIKELY (bitn > (~ (mp_bitcnt_t) 0) >> LOGROOT_USED_BITS))
   186      {
   187        /* In the unlikely case, we use two divisions and a modulo. */
   188        retval = bitn / k;
   189        bitn %= k;
   190        bitn = (bitn << LOGROOT_USED_BITS |
   191  	      vlog[op >> (GMP_NUMB_BITS - LOGROOT_USED_BITS)]) / k;
   192      }
   193    else
   194      {
   195        bitn = (bitn << LOGROOT_USED_BITS |
   196  	      vlog[op >> (GMP_NUMB_BITS - LOGROOT_USED_BITS)]) / k;
   197        retval = bitn >> LOGROOT_USED_BITS;
   198        bitn &= (CNST_LIMB (1) << LOGROOT_USED_BITS) - 1;
   199      }
   200    ASSERT(bitn < CNST_LIMB (1) << LOGROOT_USED_BITS);
   201    *rootp = CNST_LIMB(1) << (LOGROOT_USED_BITS - ! LOGROOT_NEEDS_TWO_CORRECTIONS)
   202      | vexp[bitn] >> ! LOGROOT_NEEDS_TWO_CORRECTIONS;
   203    return retval;
   204  }
   205  
   206  /* if approx is non-zero, does not compute the final remainder */
   207  static mp_size_t
   208  mpn_rootrem_internal (mp_ptr rootp, mp_ptr remp, mp_srcptr up, mp_size_t un,
   209  		      mp_limb_t k, int approx)
   210  {
   211    mp_ptr qp, rp, sp, wp, scratch;
   212    mp_size_t qn, rn, sn, wn, nl, bn;
   213    mp_limb_t save, save2, cy, uh;
   214    mp_bitcnt_t unb; /* number of significant bits of {up,un} */
   215    mp_bitcnt_t xnb; /* number of significant bits of the result */
   216    mp_bitcnt_t b, kk;
   217    mp_bitcnt_t sizes[GMP_NUMB_BITS + 1];
   218    int ni;
   219    int perf_pow;
   220    unsigned ulz, snb, c, logk;
   221    TMP_DECL;
   222  
   223    /* MPN_SIZEINBASE_2EXP(unb, up, un, 1); --unb; */
   224    uh = up[un - 1];
   225    count_leading_zeros (ulz, uh);
   226    ulz = ulz - GMP_NAIL_BITS + 1; /* Ignore the first 1. */
   227    unb = (mp_bitcnt_t) un * GMP_NUMB_BITS - ulz;
   228    /* unb is the (truncated) logarithm of the input U in base 2*/
   229  
   230    if (unb < k) /* root is 1 */
   231      {
   232        rootp[0] = 1;
   233        if (remp == NULL)
   234  	un -= (*up == CNST_LIMB (1)); /* Non-zero iif {up,un} > 1 */
   235        else
   236  	{
   237  	  mpn_sub_1 (remp, up, un, CNST_LIMB (1));
   238  	  un -= (remp [un - 1] == 0);	/* There should be at most one zero limb,
   239  				   if we demand u to be normalized  */
   240  	}
   241        return un;
   242      }
   243    /* if (unb - k < k/2 + k/16) // root is 2 */
   244  
   245    if (ulz == GMP_NUMB_BITS)
   246      uh = up[un - 2];
   247    else
   248      uh = (uh << ulz & GMP_NUMB_MASK) | up[un - 1 - (un != 1)] >> (GMP_NUMB_BITS - ulz);
   249    ASSERT (un != 1 || up[un - 1 - (un != 1)] >> (GMP_NUMB_BITS - ulz) == 1);
   250  
   251    xnb = logbased_root (rootp, uh, unb, k);
   252    snb = LOGROOT_RETURNED_BITS - 1;
   253    /* xnb+1 is the number of bits of the root R */
   254    /* snb+1 is the number of bits of the current approximation S */
   255  
   256    kk = k * xnb;		/* number of truncated bits in the input */
   257  
   258    /* FIXME: Should we skip the next two loops when xnb <= snb ? */
   259    for (uh = (k - 1) / 2, logk = 3; (uh >>= 1) != 0; ++logk )
   260      ;
   261    /* logk = ceil(log(k)/log(2)) + 1 */
   262  
   263    /* xnb is the number of remaining bits to determine in the kth root */
   264    for (ni = 0; (sizes[ni] = xnb) > snb; ++ni)
   265      {
   266        /* invariant: here we want xnb+1 total bits for the kth root */
   267  
   268        /* if c is the new value of xnb, this means that we'll go from a
   269  	 root of c+1 bits (say s') to a root of xnb+1 bits.
   270  	 It is proved in the book "Modern Computer Arithmetic" by Brent
   271  	 and Zimmermann, Chapter 1, that
   272  	 if s' >= k*beta, then at most one correction is necessary.
   273  	 Here beta = 2^(xnb-c), and s' >= 2^c, thus it suffices that
   274  	 c >= ceil((xnb + log2(k))/2). */
   275        if (xnb > logk)
   276  	xnb = (xnb + logk) / 2;
   277        else
   278  	--xnb;	/* add just one bit at a time */
   279      }
   280  
   281    *rootp >>= snb - xnb;
   282    kk -= xnb;
   283  
   284    ASSERT_ALWAYS (ni < GMP_NUMB_BITS + 1);
   285    /* We have sizes[0] = b > sizes[1] > ... > sizes[ni] = 0 with
   286       sizes[i] <= 2 * sizes[i+1].
   287       Newton iteration will first compute sizes[ni-1] extra bits,
   288       then sizes[ni-2], ..., then sizes[0] = b. */
   289  
   290    TMP_MARK;
   291    /* qp and wp need enough space to store S'^k where S' is an approximate
   292       root. Since S' can be as large as S+2, the worst case is when S=2 and
   293       S'=4. But then since we know the number of bits of S in advance, S'
   294       can only be 3 at most. Similarly for S=4, then S' can be 6 at most.
   295       So the worst case is S'/S=3/2, thus S'^k <= (3/2)^k * S^k. Since S^k
   296       fits in un limbs, the number of extra limbs needed is bounded by
   297       ceil(k*log2(3/2)/GMP_NUMB_BITS). */
   298    /* THINK: with the use of logbased_root, maybe the constant is
   299       258/256 instead of 3/2 ? log2(258/256) < 1/89 < 1/64 */
   300  #define EXTRA 2 + (mp_size_t) (0.585 * (double) k / (double) GMP_NUMB_BITS)
   301    TMP_ALLOC_LIMBS_3 (scratch, un + 1, /* used by mpn_div_q */
   302  		     qp, un + EXTRA,  /* will contain quotient and remainder
   303  					 of R/(k*S^(k-1)), and S^k */
   304  		     wp, un + EXTRA); /* will contain S^(k-1), k*S^(k-1),
   305  					 and temporary for mpn_pow_1 */
   306  
   307    if (remp == NULL)
   308      rp = scratch;	/* will contain the remainder */
   309    else
   310      rp = remp;
   311    sp = rootp;
   312  
   313    sn = 1;		/* Initial approximation has one limb */
   314  
   315    for (b = xnb; ni != 0; --ni)
   316      {
   317        /* 1: loop invariant:
   318  	 {sp, sn} is the current approximation of the root, which has
   319  		  exactly 1 + sizes[ni] bits.
   320  	 {rp, rn} is the current remainder
   321  	 {wp, wn} = {sp, sn}^(k-1)
   322  	 kk = number of truncated bits of the input
   323        */
   324  
   325        /* Since each iteration treats b bits from the root and thus k*b bits
   326  	 from the input, and we already considered b bits from the input,
   327  	 we now have to take another (k-1)*b bits from the input. */
   328        kk -= (k - 1) * b; /* remaining input bits */
   329        /* {rp, rn} = floor({up, un} / 2^kk) */
   330        rn = un - kk / GMP_NUMB_BITS;
   331        MPN_RSHIFT (rp, up + kk / GMP_NUMB_BITS, rn, kk % GMP_NUMB_BITS);
   332        rn -= rp[rn - 1] == 0;
   333  
   334        /* 9: current buffers: {sp,sn}, {rp,rn} */
   335  
   336        for (c = 0;; c++)
   337  	{
   338  	  /* Compute S^k in {qp,qn}. */
   339  	  /* W <- S^(k-1) for the next iteration,
   340  	     and S^k = W * S. */
   341  	  wn = mpn_pow_1 (wp, sp, sn, k - 1, qp);
   342  	  mpn_mul (qp, wp, wn, sp, sn);
   343  	  qn = wn + sn;
   344  	  qn -= qp[qn - 1] == 0;
   345  
   346  	  perf_pow = 1;
   347  	  /* if S^k > floor(U/2^kk), the root approximation was too large */
   348  	  if (qn > rn || (qn == rn && (perf_pow=mpn_cmp (qp, rp, rn)) > 0))
   349  	    MPN_DECR_U (sp, sn, 1);
   350  	  else
   351  	    break;
   352  	}
   353  
   354        /* 10: current buffers: {sp,sn}, {rp,rn}, {qp,qn}, {wp,wn} */
   355  
   356        /* sometimes two corrections are needed with logbased_root*/
   357        ASSERT (c <= 1 + LOGROOT_NEEDS_TWO_CORRECTIONS);
   358        ASSERT_ALWAYS (rn >= qn);
   359  
   360        b = sizes[ni - 1] - sizes[ni]; /* number of bits to compute in the
   361  				      next iteration */
   362        bn = b / GMP_NUMB_BITS; /* lowest limb from high part of rp[], after shift */
   363  
   364        kk = kk - b;
   365        /* nl is the number of limbs in U which contain bits [kk,kk+b-1] */
   366        nl = 1 + (kk + b - 1) / GMP_NUMB_BITS - (kk / GMP_NUMB_BITS);
   367        /* nl  = 1 + floor((kk + b - 1) / GMP_NUMB_BITS)
   368  		 - floor(kk / GMP_NUMB_BITS)
   369  	     <= 1 + (kk + b - 1) / GMP_NUMB_BITS
   370  		  - (kk - GMP_NUMB_BITS + 1) / GMP_NUMB_BITS
   371  	     = 2 + (b - 2) / GMP_NUMB_BITS
   372  	 thus since nl is an integer:
   373  	 nl <= 2 + floor(b/GMP_NUMB_BITS) <= 2 + bn. */
   374  
   375        /* 11: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */
   376  
   377        /* R = R - Q = floor(U/2^kk) - S^k */
   378        if (perf_pow != 0)
   379  	{
   380  	  mpn_sub (rp, rp, rn, qp, qn);
   381  	  MPN_NORMALIZE_NOT_ZERO (rp, rn);
   382  
   383  	  /* first multiply the remainder by 2^b */
   384  	  MPN_LSHIFT (cy, rp + bn, rp, rn, b % GMP_NUMB_BITS);
   385  	  rn = rn + bn;
   386  	  if (cy != 0)
   387  	    {
   388  	      rp[rn] = cy;
   389  	      rn++;
   390  	    }
   391  
   392  	  save = rp[bn];
   393  	  /* we have to save rp[bn] up to rp[nl-1], i.e. 1 or 2 limbs */
   394  	  if (nl - 1 > bn)
   395  	    save2 = rp[bn + 1];
   396  	}
   397        else
   398  	{
   399  	  rn = bn;
   400  	  save2 = save = 0;
   401  	}
   402        /* 2: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */
   403  
   404        /* Now insert bits [kk,kk+b-1] from the input U */
   405        MPN_RSHIFT (rp, up + kk / GMP_NUMB_BITS, nl, kk % GMP_NUMB_BITS);
   406        /* set to zero high bits of rp[bn] */
   407        rp[bn] &= (CNST_LIMB (1) << (b % GMP_NUMB_BITS)) - 1;
   408        /* restore corresponding bits */
   409        rp[bn] |= save;
   410        if (nl - 1 > bn)
   411  	rp[bn + 1] = save2; /* the low b bits go in rp[0..bn] only, since
   412  			       they start by bit 0 in rp[0], so they use
   413  			       at most ceil(b/GMP_NUMB_BITS) limbs */
   414        /* FIXME: Should we normalise {rp,rn} here ?*/
   415  
   416        /* 3: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */
   417  
   418        /* compute {wp, wn} = k * {sp, sn}^(k-1) */
   419        cy = mpn_mul_1 (wp, wp, wn, k);
   420        wp[wn] = cy;
   421        wn += cy != 0;
   422  
   423        /* 6: current buffers: {sp,sn}, {qp,qn} */
   424  
   425        /* multiply the root approximation by 2^b */
   426        MPN_LSHIFT (cy, sp + b / GMP_NUMB_BITS, sp, sn, b % GMP_NUMB_BITS);
   427        sn = sn + b / GMP_NUMB_BITS;
   428        if (cy != 0)
   429  	{
   430  	  sp[sn] = cy;
   431  	  sn++;
   432  	}
   433  
   434        save = sp[b / GMP_NUMB_BITS];
   435  
   436        /* Number of limbs used by b bits, when least significant bit is
   437  	 aligned to least limb */
   438        bn = (b - 1) / GMP_NUMB_BITS + 1;
   439  
   440        /* 4: current buffers: {sp,sn}, {rp,rn}, {wp,wn} */
   441  
   442        /* now divide {rp, rn} by {wp, wn} to get the low part of the root */
   443        if (UNLIKELY (rn < wn))
   444  	{
   445  	  MPN_FILL (sp, bn, 0);
   446  	}
   447        else
   448  	{
   449  	  qn = rn - wn; /* expected quotient size */
   450  	  if (qn <= bn) { /* Divide only if result is not too big. */
   451  	    mpn_div_q (qp, rp, rn, wp, wn, scratch);
   452  	    qn += qp[qn] != 0;
   453  	  }
   454  
   455        /* 5: current buffers: {sp,sn}, {qp,qn}.
   456  	 Note: {rp,rn} is not needed any more since we'll compute it from
   457  	 scratch at the end of the loop.
   458         */
   459  
   460        /* the quotient should be smaller than 2^b, since the previous
   461  	 approximation was correctly rounded toward zero */
   462  	  if (qn > bn || (qn == bn && (b % GMP_NUMB_BITS != 0) &&
   463  			  qp[qn - 1] >= (CNST_LIMB (1) << (b % GMP_NUMB_BITS))))
   464  	    {
   465  	      for (qn = 1; qn < bn; ++qn)
   466  		sp[qn - 1] = GMP_NUMB_MAX;
   467  	      sp[qn - 1] = GMP_NUMB_MAX >> (GMP_NUMB_BITS - 1 - ((b - 1) % GMP_NUMB_BITS));
   468  	    }
   469  	  else
   470  	    {
   471        /* 7: current buffers: {sp,sn}, {qp,qn} */
   472  
   473        /* Combine sB and q to form sB + q.  */
   474  	      MPN_COPY (sp, qp, qn);
   475  	      MPN_ZERO (sp + qn, bn - qn);
   476  	    }
   477  	}
   478        sp[b / GMP_NUMB_BITS] |= save;
   479  
   480        /* 8: current buffer: {sp,sn} */
   481  
   482      };
   483  
   484    /* otherwise we have rn > 0, thus the return value is ok */
   485    if (!approx || sp[0] <= CNST_LIMB (1))
   486      {
   487        for (c = 0;; c++)
   488  	{
   489  	  /* Compute S^k in {qp,qn}. */
   490  	  /* Last iteration: we don't need W anymore. */
   491  	  /* mpn_pow_1 requires that both qp and wp have enough
   492  	     space to store the result {sp,sn}^k + 1 limb */
   493  	  qn = mpn_pow_1 (qp, sp, sn, k, wp);
   494  
   495  	  perf_pow = 1;
   496  	  if (qn > un || (qn == un && (perf_pow=mpn_cmp (qp, up, un)) > 0))
   497  	    MPN_DECR_U (sp, sn, 1);
   498  	  else
   499  	    break;
   500  	};
   501  
   502        /* sometimes two corrections are needed with logbased_root*/
   503        ASSERT (c <= 1 + LOGROOT_NEEDS_TWO_CORRECTIONS);
   504  
   505        rn = perf_pow != 0;
   506        if (rn != 0 && remp != NULL)
   507  	{
   508  	  mpn_sub (remp, up, un, qp, qn);
   509  	  rn = un;
   510  	  MPN_NORMALIZE_NOT_ZERO (remp, rn);
   511  	}
   512      }
   513  
   514    TMP_FREE;
   515    return rn;
   516  }