github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/generic/sqrmod_bnm1.c (about)

     1  /* sqrmod_bnm1.c -- squaring mod B^n-1.
     2  
     3     Contributed to the GNU project by Niels Möller, Torbjorn Granlund and
     4     Marco Bodrato.
     5  
     6     THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
     7     SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
     8     GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
     9  
    10  Copyright 2009, 2010, 2012 Free Software Foundation, Inc.
    11  
    12  This file is part of the GNU MP Library.
    13  
    14  The GNU MP Library is free software; you can redistribute it and/or modify
    15  it under the terms of either:
    16  
    17    * the GNU Lesser General Public License as published by the Free
    18      Software Foundation; either version 3 of the License, or (at your
    19      option) any later version.
    20  
    21  or
    22  
    23    * the GNU General Public License as published by the Free Software
    24      Foundation; either version 2 of the License, or (at your option) any
    25      later version.
    26  
    27  or both in parallel, as here.
    28  
    29  The GNU MP Library is distributed in the hope that it will be useful, but
    30  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    31  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    32  for more details.
    33  
    34  You should have received copies of the GNU General Public License and the
    35  GNU Lesser General Public License along with the GNU MP Library.  If not,
    36  see https://www.gnu.org/licenses/.  */
    37  
    38  
    39  #include "gmp.h"
    40  #include "gmp-impl.h"
    41  #include "longlong.h"
    42  
    43  /* Input is {ap,rn}; output is {rp,rn}, computation is
    44     mod B^rn - 1, and values are semi-normalised; zero is represented
    45     as either 0 or B^n - 1.  Needs a scratch of 2rn limbs at tp.
    46     tp==rp is allowed. */
    47  static void
    48  mpn_bc_sqrmod_bnm1 (mp_ptr rp, mp_srcptr ap, mp_size_t rn, mp_ptr tp)
    49  {
    50    mp_limb_t cy;
    51  
    52    ASSERT (0 < rn);
    53  
    54    mpn_sqr (tp, ap, rn);
    55    cy = mpn_add_n (rp, tp, tp + rn, rn);
    56    /* If cy == 1, then the value of rp is at most B^rn - 2, so there can
    57     * be no overflow when adding in the carry. */
    58    MPN_INCR_U (rp, rn, cy);
    59  }
    60  
    61  
    62  /* Input is {ap,rn+1}; output is {rp,rn+1}, in
    63     semi-normalised representation, computation is mod B^rn + 1. Needs
    64     a scratch area of 2rn + 2 limbs at tp; tp == rp is allowed.
    65     Output is normalised. */
    66  static void
    67  mpn_bc_sqrmod_bnp1 (mp_ptr rp, mp_srcptr ap, mp_size_t rn, mp_ptr tp)
    68  {
    69    mp_limb_t cy;
    70  
    71    ASSERT (0 < rn);
    72  
    73    mpn_sqr (tp, ap, rn + 1);
    74    ASSERT (tp[2*rn+1] == 0);
    75    ASSERT (tp[2*rn] < GMP_NUMB_MAX);
    76    cy = tp[2*rn] + mpn_sub_n (rp, tp, tp+rn, rn);
    77    rp[rn] = 0;
    78    MPN_INCR_U (rp, rn+1, cy );
    79  }
    80  
    81  
    82  /* Computes {rp,MIN(rn,2an)} <- {ap,an}^2 Mod(B^rn-1)
    83   *
    84   * The result is expected to be ZERO if and only if the operand
    85   * already is. Otherwise the class [0] Mod(B^rn-1) is represented by
    86   * B^rn-1.
    87   * It should not be a problem if sqrmod_bnm1 is used to
    88   * compute the full square with an <= 2*rn, because this condition
    89   * implies (B^an-1)^2 < (B^rn-1) .
    90   *
    91   * Requires rn/4 < an <= rn
    92   * Scratch need: rn/2 + (need for recursive call OR rn + 3). This gives
    93   *
    94   * S(n) <= rn/2 + MAX (rn + 4, S(n/2)) <= 3/2 rn + 4
    95   */
    96  void
    97  mpn_sqrmod_bnm1 (mp_ptr rp, mp_size_t rn, mp_srcptr ap, mp_size_t an, mp_ptr tp)
    98  {
    99    ASSERT (0 < an);
   100    ASSERT (an <= rn);
   101  
   102    if ((rn & 1) != 0 || BELOW_THRESHOLD (rn, SQRMOD_BNM1_THRESHOLD))
   103      {
   104        if (UNLIKELY (an < rn))
   105  	{
   106  	  if (UNLIKELY (2*an <= rn))
   107  	    {
   108  	      mpn_sqr (rp, ap, an);
   109  	    }
   110  	  else
   111  	    {
   112  	      mp_limb_t cy;
   113  	      mpn_sqr (tp, ap, an);
   114  	      cy = mpn_add (rp, tp, rn, tp + rn, 2*an - rn);
   115  	      MPN_INCR_U (rp, rn, cy);
   116  	    }
   117  	}
   118        else
   119  	mpn_bc_sqrmod_bnm1 (rp, ap, rn, tp);
   120      }
   121    else
   122      {
   123        mp_size_t n;
   124        mp_limb_t cy;
   125        mp_limb_t hi;
   126  
   127        n = rn >> 1;
   128  
   129        ASSERT (2*an > n);
   130  
   131        /* Compute xm = a^2 mod (B^n - 1), xp = a^2 mod (B^n + 1)
   132  	 and crt together as
   133  
   134  	 x = -xp * B^n + (B^n + 1) * [ (xp + xm)/2 mod (B^n-1)]
   135        */
   136  
   137  #define a0 ap
   138  #define a1 (ap + n)
   139  
   140  #define xp  tp	/* 2n + 2 */
   141        /* am1  maybe in {xp, n} */
   142  #define sp1 (tp + 2*n + 2)
   143        /* ap1  maybe in {sp1, n + 1} */
   144  
   145        {
   146  	mp_srcptr am1;
   147  	mp_size_t anm;
   148  	mp_ptr so;
   149  
   150  	if (LIKELY (an > n))
   151  	  {
   152  	    so = xp + n;
   153  	    am1 = xp;
   154  	    cy = mpn_add (xp, a0, n, a1, an - n);
   155  	    MPN_INCR_U (xp, n, cy);
   156  	    anm = n;
   157  	  }
   158  	else
   159  	  {
   160  	    so = xp;
   161  	    am1 = a0;
   162  	    anm = an;
   163  	  }
   164  
   165  	mpn_sqrmod_bnm1 (rp, n, am1, anm, so);
   166        }
   167  
   168        {
   169  	int       k;
   170  	mp_srcptr ap1;
   171  	mp_size_t anp;
   172  
   173  	if (LIKELY (an > n)) {
   174  	  ap1 = sp1;
   175  	  cy = mpn_sub (sp1, a0, n, a1, an - n);
   176  	  sp1[n] = 0;
   177  	  MPN_INCR_U (sp1, n + 1, cy);
   178  	  anp = n + ap1[n];
   179  	} else {
   180  	  ap1 = a0;
   181  	  anp = an;
   182  	}
   183  
   184  	if (BELOW_THRESHOLD (n, MUL_FFT_MODF_THRESHOLD))
   185  	  k=0;
   186  	else
   187  	  {
   188  	    int mask;
   189  	    k = mpn_fft_best_k (n, 1);
   190  	    mask = (1<<k) -1;
   191  	    while (n & mask) {k--; mask >>=1;};
   192  	  }
   193  	if (k >= FFT_FIRST_K)
   194  	  xp[n] = mpn_mul_fft (xp, n, ap1, anp, ap1, anp, k);
   195  	else if (UNLIKELY (ap1 == a0))
   196  	  {
   197  	    ASSERT (anp <= n);
   198  	    ASSERT (2*anp > n);
   199  	    mpn_sqr (xp, a0, an);
   200  	    anp = 2*an - n;
   201  	    cy = mpn_sub (xp, xp, n, xp + n, anp);
   202  	    xp[n] = 0;
   203  	    MPN_INCR_U (xp, n+1, cy);
   204  	  }
   205  	else
   206  	  mpn_bc_sqrmod_bnp1 (xp, ap1, n, xp);
   207        }
   208  
   209        /* Here the CRT recomposition begins.
   210  
   211  	 xm <- (xp + xm)/2 = (xp + xm)B^n/2 mod (B^n-1)
   212  	 Division by 2 is a bitwise rotation.
   213  
   214  	 Assumes xp normalised mod (B^n+1).
   215  
   216  	 The residue class [0] is represented by [B^n-1]; except when
   217  	 both input are ZERO.
   218        */
   219  
   220  #if HAVE_NATIVE_mpn_rsh1add_n || HAVE_NATIVE_mpn_rsh1add_nc
   221  #if HAVE_NATIVE_mpn_rsh1add_nc
   222        cy = mpn_rsh1add_nc(rp, rp, xp, n, xp[n]); /* B^n = 1 */
   223        hi = cy << (GMP_NUMB_BITS - 1);
   224        cy = 0;
   225        /* next update of rp[n-1] will set cy = 1 only if rp[n-1]+=hi
   226  	 overflows, i.e. a further increment will not overflow again. */
   227  #else /* ! _nc */
   228        cy = xp[n] + mpn_rsh1add_n(rp, rp, xp, n); /* B^n = 1 */
   229        hi = (cy<<(GMP_NUMB_BITS-1))&GMP_NUMB_MASK; /* (cy&1) << ... */
   230        cy >>= 1;
   231        /* cy = 1 only if xp[n] = 1 i.e. {xp,n} = ZERO, this implies that
   232  	 the rsh1add was a simple rshift: the top bit is 0. cy=1 => hi=0. */
   233  #endif
   234  #if GMP_NAIL_BITS == 0
   235        add_ssaaaa(cy, rp[n-1], cy, rp[n-1], CNST_LIMB(0), hi);
   236  #else
   237        cy += (hi & rp[n-1]) >> (GMP_NUMB_BITS-1);
   238        rp[n-1] ^= hi;
   239  #endif
   240  #else /* ! HAVE_NATIVE_mpn_rsh1add_n */
   241  #if HAVE_NATIVE_mpn_add_nc
   242        cy = mpn_add_nc(rp, rp, xp, n, xp[n]);
   243  #else /* ! _nc */
   244        cy = xp[n] + mpn_add_n(rp, rp, xp, n); /* xp[n] == 1 implies {xp,n} == ZERO */
   245  #endif
   246        cy += (rp[0]&1);
   247        mpn_rshift(rp, rp, n, 1);
   248        ASSERT (cy <= 2);
   249        hi = (cy<<(GMP_NUMB_BITS-1))&GMP_NUMB_MASK; /* (cy&1) << ... */
   250        cy >>= 1;
   251        /* We can have cy != 0 only if hi = 0... */
   252        ASSERT ((rp[n-1] & GMP_NUMB_HIGHBIT) == 0);
   253        rp[n-1] |= hi;
   254        /* ... rp[n-1] + cy can not overflow, the following INCR is correct. */
   255  #endif
   256        ASSERT (cy <= 1);
   257        /* Next increment can not overflow, read the previous comments about cy. */
   258        ASSERT ((cy == 0) || ((rp[n-1] & GMP_NUMB_HIGHBIT) == 0));
   259        MPN_INCR_U(rp, n, cy);
   260  
   261        /* Compute the highest half:
   262  	 ([(xp + xm)/2 mod (B^n-1)] - xp ) * B^n
   263         */
   264        if (UNLIKELY (2*an < rn))
   265  	{
   266  	  /* Note that in this case, the only way the result can equal
   267  	     zero mod B^{rn} - 1 is if the input is zero, and
   268  	     then the output of both the recursive calls and this CRT
   269  	     reconstruction is zero, not B^{rn} - 1. */
   270  	  cy = mpn_sub_n (rp + n, rp, xp, 2*an - n);
   271  
   272  	  /* FIXME: This subtraction of the high parts is not really
   273  	     necessary, we do it to get the carry out, and for sanity
   274  	     checking. */
   275  	  cy = xp[n] + mpn_sub_nc (xp + 2*an - n, rp + 2*an - n,
   276  				   xp + 2*an - n, rn - 2*an, cy);
   277  	  ASSERT (mpn_zero_p (xp + 2*an - n+1, rn - 1 - 2*an));
   278  	  cy = mpn_sub_1 (rp, rp, 2*an, cy);
   279  	  ASSERT (cy == (xp + 2*an - n)[0]);
   280  	}
   281        else
   282  	{
   283  	  cy = xp[n] + mpn_sub_n (rp + n, rp, xp, n);
   284  	  /* cy = 1 only if {xp,n+1} is not ZERO, i.e. {rp,n} is not ZERO.
   285  	     DECR will affect _at most_ the lowest n limbs. */
   286  	  MPN_DECR_U (rp, 2*n, cy);
   287  	}
   288  #undef a0
   289  #undef a1
   290  #undef xp
   291  #undef sp1
   292      }
   293  }
   294  
   295  mp_size_t
   296  mpn_sqrmod_bnm1_next_size (mp_size_t n)
   297  {
   298    mp_size_t nh;
   299  
   300    if (BELOW_THRESHOLD (n,     SQRMOD_BNM1_THRESHOLD))
   301      return n;
   302    if (BELOW_THRESHOLD (n, 4 * (SQRMOD_BNM1_THRESHOLD - 1) + 1))
   303      return (n + (2-1)) & (-2);
   304    if (BELOW_THRESHOLD (n, 8 * (SQRMOD_BNM1_THRESHOLD - 1) + 1))
   305      return (n + (4-1)) & (-4);
   306  
   307    nh = (n + 1) >> 1;
   308  
   309    if (BELOW_THRESHOLD (nh, SQR_FFT_MODF_THRESHOLD))
   310      return (n + (8-1)) & (-8);
   311  
   312    return 2 * mpn_fft_next_size (nh, mpn_fft_best_k (nh, 1));
   313  }