github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/generic/broot.c (about)

     1  /* mpn_broot -- Compute hensel sqrt
     2  
     3     Contributed to the GNU project by Niels Möller
     4  
     5     THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
     6     SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
     7     GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
     8  
     9  Copyright 2012 Free Software Foundation, Inc.
    10  
    11  This file is part of the GNU MP Library.
    12  
    13  The GNU MP Library is free software; you can redistribute it and/or modify
    14  it under the terms of either:
    15  
    16    * the GNU Lesser General Public License as published by the Free
    17      Software Foundation; either version 3 of the License, or (at your
    18      option) any later version.
    19  
    20  or
    21  
    22    * the GNU General Public License as published by the Free Software
    23      Foundation; either version 2 of the License, or (at your option) any
    24      later version.
    25  
    26  or both in parallel, as here.
    27  
    28  The GNU MP Library is distributed in the hope that it will be useful, but
    29  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    30  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    31  for more details.
    32  
    33  You should have received copies of the GNU General Public License and the
    34  GNU Lesser General Public License along with the GNU MP Library.  If not,
    35  see https://www.gnu.org/licenses/.  */
    36  
    37  #include "gmp.h"
    38  #include "gmp-impl.h"
    39  
    40  /* Computes a^e (mod B). Uses right-to-left binary algorithm, since
    41     typical use will have e small. */
    42  static mp_limb_t
    43  powlimb (mp_limb_t a, mp_limb_t e)
    44  {
    45    mp_limb_t r = 1;
    46    mp_limb_t s = a;
    47  
    48    for (r = 1, s = a; e > 0; e >>= 1, s *= s)
    49      if (e & 1)
    50        r *= s;
    51  
    52    return r;
    53  }
    54  
    55  /* Computes a^{1/k - 1} (mod B^n). Both a and k must be odd.
    56  
    57     Iterates
    58  
    59       r' <-- r - r * (a^{k-1} r^k - 1) / n
    60  
    61     If
    62  
    63       a^{k-1} r^k = 1 (mod 2^m),
    64  
    65     then
    66  
    67       a^{k-1} r'^k = 1 (mod 2^{2m}),
    68  
    69     Compute the update term as
    70  
    71       r' = r - (a^{k-1} r^{k+1} - r) / k
    72  
    73     where we still have cancellation of low limbs.
    74  
    75   */
    76  void
    77  mpn_broot_invm1 (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t k)
    78  {
    79    mp_size_t sizes[GMP_LIMB_BITS * 2];
    80    mp_ptr akm1, tp, rnp, ep;
    81    mp_limb_t a0, r0, km1, kp1h, kinv;
    82    mp_size_t rn;
    83    unsigned i;
    84  
    85    TMP_DECL;
    86  
    87    ASSERT (n > 0);
    88    ASSERT (ap[0] & 1);
    89    ASSERT (k & 1);
    90    ASSERT (k >= 3);
    91  
    92    TMP_MARK;
    93  
    94    akm1 = TMP_ALLOC_LIMBS (4*n);
    95    tp = akm1 + n;
    96  
    97    km1 = k-1;
    98    /* FIXME: Could arrange the iteration so we don't need to compute
    99       this up front, computing a^{k-1} * r^k as (a r)^{k-1} * r. Note
   100       that we can use wraparound also for a*r, since the low half is
   101       unchanged from the previous iteration. Or possibly mulmid. Also,
   102       a r = a^{1/k}, so we get that value too, for free? */
   103    mpn_powlo (akm1, ap, &km1, 1, n, tp); /* 3 n scratch space */
   104  
   105    a0 = ap[0];
   106    binvert_limb (kinv, k);
   107  
   108    /* 4 bits: a^{1/k - 1} (mod 16):
   109  
   110  	a % 8
   111  	1 3 5 7
   112     k%4 +-------
   113       1 |1 1 1 1
   114       3 |1 9 9 1
   115    */
   116    r0 = 1 + (((k << 2) & ((a0 << 1) ^ (a0 << 2))) & 8);
   117    r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k & 0x7f)); /* 8 bits */
   118    r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k & 0x7fff)); /* 16 bits */
   119    r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k)); /* 32 bits */
   120  #if GMP_NUMB_BITS > 32
   121    {
   122      unsigned prec = 32;
   123      do
   124        {
   125  	r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k));
   126  	prec *= 2;
   127        }
   128      while (prec < GMP_NUMB_BITS);
   129    }
   130  #endif
   131  
   132    rp[0] = r0;
   133    if (n == 1)
   134      {
   135        TMP_FREE;
   136        return;
   137      }
   138  
   139    /* For odd k, (k+1)/2 = k/2+1, and the latter avoids overflow. */
   140    kp1h = k/2 + 1;
   141  
   142    /* FIXME: Special case for two limb iteration. */
   143    rnp = TMP_ALLOC_LIMBS (2*n + 1);
   144    ep = rnp + n;
   145  
   146    /* FIXME: Possible to this on the fly with some bit fiddling. */
   147    for (i = 0; n > 1; n = (n + 1)/2)
   148      sizes[i++] = n;
   149  
   150    rn = 1;
   151  
   152    while (i-- > 0)
   153      {
   154        /* Compute x^{k+1}. */
   155        mpn_sqr (ep, rp, rn); /* For odd n, writes n+1 limbs in the
   156  			       final iteration. */
   157        mpn_powlo (rnp, ep, &kp1h, 1, sizes[i], tp);
   158  
   159        /* Multiply by a^{k-1}. Can use wraparound; low part equals r. */
   160  
   161        mpn_mullo_n (ep, rnp, akm1, sizes[i]);
   162        ASSERT (mpn_cmp (ep, rp, rn) == 0);
   163  
   164        ASSERT (sizes[i] <= 2*rn);
   165        mpn_pi1_bdiv_q_1 (rp + rn, ep + rn, sizes[i] - rn, k, kinv, 0);
   166        mpn_neg (rp + rn, rp + rn, sizes[i] - rn);
   167        rn = sizes[i];
   168      }
   169    TMP_FREE;
   170  }
   171  
   172  /* Computes a^{1/k} (mod B^n). Both a and k must be odd. */
   173  void
   174  mpn_broot (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t k)
   175  {
   176    mp_ptr tp;
   177    TMP_DECL;
   178  
   179    ASSERT (n > 0);
   180    ASSERT (ap[0] & 1);
   181    ASSERT (k & 1);
   182  
   183    if (k == 1)
   184      {
   185        MPN_COPY (rp, ap, n);
   186        return;
   187      }
   188  
   189    TMP_MARK;
   190    tp = TMP_ALLOC_LIMBS (n);
   191  
   192    mpn_broot_invm1 (tp, ap, n, k);
   193    mpn_mullo_n (rp, tp, ap, n);
   194  
   195    TMP_FREE;
   196  }