github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/generic/mulmod_bnm1.c (about)

     1  /* mulmod_bnm1.c -- multiplication mod B^n-1.
     2  
     3     Contributed to the GNU project by Niels Möller, Torbjorn Granlund and
     4     Marco Bodrato.
     5  
     6     THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
     7     SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
     8     GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
     9  
    10  Copyright 2009, 2010, 2012, 2013 Free Software Foundation, Inc.
    11  
    12  This file is part of the GNU MP Library.
    13  
    14  The GNU MP Library is free software; you can redistribute it and/or modify
    15  it under the terms of either:
    16  
    17    * the GNU Lesser General Public License as published by the Free
    18      Software Foundation; either version 3 of the License, or (at your
    19      option) any later version.
    20  
    21  or
    22  
    23    * the GNU General Public License as published by the Free Software
    24      Foundation; either version 2 of the License, or (at your option) any
    25      later version.
    26  
    27  or both in parallel, as here.
    28  
    29  The GNU MP Library is distributed in the hope that it will be useful, but
    30  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    31  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    32  for more details.
    33  
    34  You should have received copies of the GNU General Public License and the
    35  GNU Lesser General Public License along with the GNU MP Library.  If not,
    36  see https://www.gnu.org/licenses/.  */
    37  
    38  
    39  #include "gmp.h"
    40  #include "gmp-impl.h"
    41  #include "longlong.h"
    42  
    43  /* Inputs are {ap,rn} and {bp,rn}; output is {rp,rn}, computation is
    44     mod B^rn - 1, and values are semi-normalised; zero is represented
    45     as either 0 or B^n - 1.  Needs a scratch of 2rn limbs at tp.
    46     tp==rp is allowed. */
    47  void
    48  mpn_bc_mulmod_bnm1 (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t rn,
    49  		    mp_ptr tp)
    50  {
    51    mp_limb_t cy;
    52  
    53    ASSERT (0 < rn);
    54  
    55    mpn_mul_n (tp, ap, bp, rn);
    56    cy = mpn_add_n (rp, tp, tp + rn, rn);
    57    /* If cy == 1, then the value of rp is at most B^rn - 2, so there can
    58     * be no overflow when adding in the carry. */
    59    MPN_INCR_U (rp, rn, cy);
    60  }
    61  
    62  
    63  /* Inputs are {ap,rn+1} and {bp,rn+1}; output is {rp,rn+1}, in
    64     semi-normalised representation, computation is mod B^rn + 1. Needs
    65     a scratch area of 2rn + 2 limbs at tp; tp == rp is allowed.
    66     Output is normalised. */
    67  static void
    68  mpn_bc_mulmod_bnp1 (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t rn,
    69  		    mp_ptr tp)
    70  {
    71    mp_limb_t cy;
    72  
    73    ASSERT (0 < rn);
    74  
    75    mpn_mul_n (tp, ap, bp, rn + 1);
    76    ASSERT (tp[2*rn+1] == 0);
    77    ASSERT (tp[2*rn] < GMP_NUMB_MAX);
    78    cy = tp[2*rn] + mpn_sub_n (rp, tp, tp+rn, rn);
    79    rp[rn] = 0;
    80    MPN_INCR_U (rp, rn+1, cy );
    81  }
    82  
    83  
    84  /* Computes {rp,MIN(rn,an+bn)} <- {ap,an}*{bp,bn} Mod(B^rn-1)
    85   *
    86   * The result is expected to be ZERO if and only if one of the operand
    87   * already is. Otherwise the class [0] Mod(B^rn-1) is represented by
    88   * B^rn-1. This should not be a problem if mulmod_bnm1 is used to
    89   * combine results and obtain a natural number when one knows in
    90   * advance that the final value is less than (B^rn-1).
    91   * Moreover it should not be a problem if mulmod_bnm1 is used to
    92   * compute the full product with an+bn <= rn, because this condition
    93   * implies (B^an-1)(B^bn-1) < (B^rn-1) .
    94   *
    95   * Requires 0 < bn <= an <= rn and an + bn > rn/2
    96   * Scratch need: rn + (need for recursive call OR rn + 4). This gives
    97   *
    98   * S(n) <= rn + MAX (rn + 4, S(n/2)) <= 2rn + 4
    99   */
   100  void
   101  mpn_mulmod_bnm1 (mp_ptr rp, mp_size_t rn, mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn, mp_ptr tp)
   102  {
   103    ASSERT (0 < bn);
   104    ASSERT (bn <= an);
   105    ASSERT (an <= rn);
   106  
   107    if ((rn & 1) != 0 || BELOW_THRESHOLD (rn, MULMOD_BNM1_THRESHOLD))
   108      {
   109        if (UNLIKELY (bn < rn))
   110  	{
   111  	  if (UNLIKELY (an + bn <= rn))
   112  	    {
   113  	      mpn_mul (rp, ap, an, bp, bn);
   114  	    }
   115  	  else
   116  	    {
   117  	      mp_limb_t cy;
   118  	      mpn_mul (tp, ap, an, bp, bn);
   119  	      cy = mpn_add (rp, tp, rn, tp + rn, an + bn - rn);
   120  	      MPN_INCR_U (rp, rn, cy);
   121  	    }
   122  	}
   123        else
   124  	mpn_bc_mulmod_bnm1 (rp, ap, bp, rn, tp);
   125      }
   126    else
   127      {
   128        mp_size_t n;
   129        mp_limb_t cy;
   130        mp_limb_t hi;
   131  
   132        n = rn >> 1;
   133  
   134        /* We need at least an + bn >= n, to be able to fit one of the
   135  	 recursive products at rp. Requiring strict inequality makes
   136  	 the code slightly simpler. If desired, we could avoid this
   137  	 restriction by initially halving rn as long as rn is even and
   138  	 an + bn <= rn/2. */
   139  
   140        ASSERT (an + bn > n);
   141  
   142        /* Compute xm = a*b mod (B^n - 1), xp = a*b mod (B^n + 1)
   143  	 and crt together as
   144  
   145  	 x = -xp * B^n + (B^n + 1) * [ (xp + xm)/2 mod (B^n-1)]
   146        */
   147  
   148  #define a0 ap
   149  #define a1 (ap + n)
   150  #define b0 bp
   151  #define b1 (bp + n)
   152  
   153  #define xp  tp	/* 2n + 2 */
   154        /* am1  maybe in {xp, n} */
   155        /* bm1  maybe in {xp + n, n} */
   156  #define sp1 (tp + 2*n + 2)
   157        /* ap1  maybe in {sp1, n + 1} */
   158        /* bp1  maybe in {sp1 + n + 1, n + 1} */
   159  
   160        {
   161  	mp_srcptr am1, bm1;
   162  	mp_size_t anm, bnm;
   163  	mp_ptr so;
   164  
   165  	bm1 = b0;
   166  	bnm = bn;
   167  	if (LIKELY (an > n))
   168  	  {
   169  	    am1 = xp;
   170  	    cy = mpn_add (xp, a0, n, a1, an - n);
   171  	    MPN_INCR_U (xp, n, cy);
   172  	    anm = n;
   173  	    so = xp + n;
   174  	    if (LIKELY (bn > n))
   175  	      {
   176  		bm1 = so;
   177  		cy = mpn_add (so, b0, n, b1, bn - n);
   178  		MPN_INCR_U (so, n, cy);
   179  		bnm = n;
   180  		so += n;
   181  	      }
   182  	  }
   183  	else
   184  	  {
   185  	    so = xp;
   186  	    am1 = a0;
   187  	    anm = an;
   188  	  }
   189  
   190  	mpn_mulmod_bnm1 (rp, n, am1, anm, bm1, bnm, so);
   191        }
   192  
   193        {
   194  	int       k;
   195  	mp_srcptr ap1, bp1;
   196  	mp_size_t anp, bnp;
   197  
   198  	bp1 = b0;
   199  	bnp = bn;
   200  	if (LIKELY (an > n)) {
   201  	  ap1 = sp1;
   202  	  cy = mpn_sub (sp1, a0, n, a1, an - n);
   203  	  sp1[n] = 0;
   204  	  MPN_INCR_U (sp1, n + 1, cy);
   205  	  anp = n + ap1[n];
   206  	  if (LIKELY (bn > n)) {
   207  	    bp1 = sp1 + n + 1;
   208  	    cy = mpn_sub (sp1 + n + 1, b0, n, b1, bn - n);
   209  	    sp1[2*n+1] = 0;
   210  	    MPN_INCR_U (sp1 + n + 1, n + 1, cy);
   211  	    bnp = n + bp1[n];
   212  	  }
   213  	} else {
   214  	  ap1 = a0;
   215  	  anp = an;
   216  	}
   217  
   218  	if (BELOW_THRESHOLD (n, MUL_FFT_MODF_THRESHOLD))
   219  	  k=0;
   220  	else
   221  	  {
   222  	    int mask;
   223  	    k = mpn_fft_best_k (n, 0);
   224  	    mask = (1<<k) - 1;
   225  	    while (n & mask) {k--; mask >>=1;};
   226  	  }
   227  	if (k >= FFT_FIRST_K)
   228  	  xp[n] = mpn_mul_fft (xp, n, ap1, anp, bp1, bnp, k);
   229  	else if (UNLIKELY (bp1 == b0))
   230  	  {
   231  	    ASSERT (anp + bnp <= 2*n+1);
   232  	    ASSERT (anp + bnp > n);
   233  	    ASSERT (anp >= bnp);
   234  	    mpn_mul (xp, ap1, anp, bp1, bnp);
   235  	    anp = anp + bnp - n;
   236  	    ASSERT (anp <= n || xp[2*n]==0);
   237  	    anp-= anp > n;
   238  	    cy = mpn_sub (xp, xp, n, xp + n, anp);
   239  	    xp[n] = 0;
   240  	    MPN_INCR_U (xp, n+1, cy);
   241  	  }
   242  	else
   243  	  mpn_bc_mulmod_bnp1 (xp, ap1, bp1, n, xp);
   244        }
   245  
   246        /* Here the CRT recomposition begins.
   247  
   248  	 xm <- (xp + xm)/2 = (xp + xm)B^n/2 mod (B^n-1)
   249  	 Division by 2 is a bitwise rotation.
   250  
   251  	 Assumes xp normalised mod (B^n+1).
   252  
   253  	 The residue class [0] is represented by [B^n-1]; except when
   254  	 both input are ZERO.
   255        */
   256  
   257  #if HAVE_NATIVE_mpn_rsh1add_n || HAVE_NATIVE_mpn_rsh1add_nc
   258  #if HAVE_NATIVE_mpn_rsh1add_nc
   259        cy = mpn_rsh1add_nc(rp, rp, xp, n, xp[n]); /* B^n = 1 */
   260        hi = cy << (GMP_NUMB_BITS - 1);
   261        cy = 0;
   262        /* next update of rp[n-1] will set cy = 1 only if rp[n-1]+=hi
   263  	 overflows, i.e. a further increment will not overflow again. */
   264  #else /* ! _nc */
   265        cy = xp[n] + mpn_rsh1add_n(rp, rp, xp, n); /* B^n = 1 */
   266        hi = (cy<<(GMP_NUMB_BITS-1))&GMP_NUMB_MASK; /* (cy&1) << ... */
   267        cy >>= 1;
   268        /* cy = 1 only if xp[n] = 1 i.e. {xp,n} = ZERO, this implies that
   269  	 the rsh1add was a simple rshift: the top bit is 0. cy=1 => hi=0. */
   270  #endif
   271  #if GMP_NAIL_BITS == 0
   272        add_ssaaaa(cy, rp[n-1], cy, rp[n-1], 0, hi);
   273  #else
   274        cy += (hi & rp[n-1]) >> (GMP_NUMB_BITS-1);
   275        rp[n-1] ^= hi;
   276  #endif
   277  #else /* ! HAVE_NATIVE_mpn_rsh1add_n */
   278  #if HAVE_NATIVE_mpn_add_nc
   279        cy = mpn_add_nc(rp, rp, xp, n, xp[n]);
   280  #else /* ! _nc */
   281        cy = xp[n] + mpn_add_n(rp, rp, xp, n); /* xp[n] == 1 implies {xp,n} == ZERO */
   282  #endif
   283        cy += (rp[0]&1);
   284        mpn_rshift(rp, rp, n, 1);
   285        ASSERT (cy <= 2);
   286        hi = (cy<<(GMP_NUMB_BITS-1))&GMP_NUMB_MASK; /* (cy&1) << ... */
   287        cy >>= 1;
   288        /* We can have cy != 0 only if hi = 0... */
   289        ASSERT ((rp[n-1] & GMP_NUMB_HIGHBIT) == 0);
   290        rp[n-1] |= hi;
   291        /* ... rp[n-1] + cy can not overflow, the following INCR is correct. */
   292  #endif
   293        ASSERT (cy <= 1);
   294        /* Next increment can not overflow, read the previous comments about cy. */
   295        ASSERT ((cy == 0) || ((rp[n-1] & GMP_NUMB_HIGHBIT) == 0));
   296        MPN_INCR_U(rp, n, cy);
   297  
   298        /* Compute the highest half:
   299  	 ([(xp + xm)/2 mod (B^n-1)] - xp ) * B^n
   300         */
   301        if (UNLIKELY (an + bn < rn))
   302  	{
   303  	  /* Note that in this case, the only way the result can equal
   304  	     zero mod B^{rn} - 1 is if one of the inputs is zero, and
   305  	     then the output of both the recursive calls and this CRT
   306  	     reconstruction is zero, not B^{rn} - 1. Which is good,
   307  	     since the latter representation doesn't fit in the output
   308  	     area.*/
   309  	  cy = mpn_sub_n (rp + n, rp, xp, an + bn - n);
   310  
   311  	  /* FIXME: This subtraction of the high parts is not really
   312  	     necessary, we do it to get the carry out, and for sanity
   313  	     checking. */
   314  	  cy = xp[n] + mpn_sub_nc (xp + an + bn - n, rp + an + bn - n,
   315  				   xp + an + bn - n, rn - (an + bn), cy);
   316  	  ASSERT (an + bn == rn - 1 ||
   317  		  mpn_zero_p (xp + an + bn - n + 1, rn - 1 - (an + bn)));
   318  	  cy = mpn_sub_1 (rp, rp, an + bn, cy);
   319  	  ASSERT (cy == (xp + an + bn - n)[0]);
   320  	}
   321        else
   322  	{
   323  	  cy = xp[n] + mpn_sub_n (rp + n, rp, xp, n);
   324  	  /* cy = 1 only if {xp,n+1} is not ZERO, i.e. {rp,n} is not ZERO.
   325  	     DECR will affect _at most_ the lowest n limbs. */
   326  	  MPN_DECR_U (rp, 2*n, cy);
   327  	}
   328  #undef a0
   329  #undef a1
   330  #undef b0
   331  #undef b1
   332  #undef xp
   333  #undef sp1
   334      }
   335  }
   336  
   337  mp_size_t
   338  mpn_mulmod_bnm1_next_size (mp_size_t n)
   339  {
   340    mp_size_t nh;
   341  
   342    if (BELOW_THRESHOLD (n,     MULMOD_BNM1_THRESHOLD))
   343      return n;
   344    if (BELOW_THRESHOLD (n, 4 * (MULMOD_BNM1_THRESHOLD - 1) + 1))
   345      return (n + (2-1)) & (-2);
   346    if (BELOW_THRESHOLD (n, 8 * (MULMOD_BNM1_THRESHOLD - 1) + 1))
   347      return (n + (4-1)) & (-4);
   348  
   349    nh = (n + 1) >> 1;
   350  
   351    if (BELOW_THRESHOLD (nh, MUL_FFT_MODF_THRESHOLD))
   352      return (n + (8-1)) & (-8);
   353  
   354    return 2 * mpn_fft_next_size (nh, mpn_fft_best_k (nh, 0));
   355  }