github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/generic/mulmod_bnm1.c (about) 1 /* mulmod_bnm1.c -- multiplication mod B^n-1. 2 3 Contributed to the GNU project by Niels Möller, Torbjorn Granlund and 4 Marco Bodrato. 5 6 THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY 7 SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST 8 GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. 9 10 Copyright 2009, 2010, 2012, 2013 Free Software Foundation, Inc. 11 12 This file is part of the GNU MP Library. 13 14 The GNU MP Library is free software; you can redistribute it and/or modify 15 it under the terms of either: 16 17 * the GNU Lesser General Public License as published by the Free 18 Software Foundation; either version 3 of the License, or (at your 19 option) any later version. 20 21 or 22 23 * the GNU General Public License as published by the Free Software 24 Foundation; either version 2 of the License, or (at your option) any 25 later version. 26 27 or both in parallel, as here. 28 29 The GNU MP Library is distributed in the hope that it will be useful, but 30 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 31 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 32 for more details. 33 34 You should have received copies of the GNU General Public License and the 35 GNU Lesser General Public License along with the GNU MP Library. If not, 36 see https://www.gnu.org/licenses/. */ 37 38 39 #include "gmp.h" 40 #include "gmp-impl.h" 41 #include "longlong.h" 42 43 /* Inputs are {ap,rn} and {bp,rn}; output is {rp,rn}, computation is 44 mod B^rn - 1, and values are semi-normalised; zero is represented 45 as either 0 or B^n - 1. Needs a scratch of 2rn limbs at tp. 46 tp==rp is allowed. */ 47 void 48 mpn_bc_mulmod_bnm1 (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t rn, 49 mp_ptr tp) 50 { 51 mp_limb_t cy; 52 53 ASSERT (0 < rn); 54 55 mpn_mul_n (tp, ap, bp, rn); 56 cy = mpn_add_n (rp, tp, tp + rn, rn); 57 /* If cy == 1, then the value of rp is at most B^rn - 2, so there can 58 * be no overflow when adding in the carry. */ 59 MPN_INCR_U (rp, rn, cy); 60 } 61 62 63 /* Inputs are {ap,rn+1} and {bp,rn+1}; output is {rp,rn+1}, in 64 semi-normalised representation, computation is mod B^rn + 1. Needs 65 a scratch area of 2rn + 2 limbs at tp; tp == rp is allowed. 66 Output is normalised. */ 67 static void 68 mpn_bc_mulmod_bnp1 (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t rn, 69 mp_ptr tp) 70 { 71 mp_limb_t cy; 72 73 ASSERT (0 < rn); 74 75 mpn_mul_n (tp, ap, bp, rn + 1); 76 ASSERT (tp[2*rn+1] == 0); 77 ASSERT (tp[2*rn] < GMP_NUMB_MAX); 78 cy = tp[2*rn] + mpn_sub_n (rp, tp, tp+rn, rn); 79 rp[rn] = 0; 80 MPN_INCR_U (rp, rn+1, cy ); 81 } 82 83 84 /* Computes {rp,MIN(rn,an+bn)} <- {ap,an}*{bp,bn} Mod(B^rn-1) 85 * 86 * The result is expected to be ZERO if and only if one of the operand 87 * already is. Otherwise the class [0] Mod(B^rn-1) is represented by 88 * B^rn-1. This should not be a problem if mulmod_bnm1 is used to 89 * combine results and obtain a natural number when one knows in 90 * advance that the final value is less than (B^rn-1). 91 * Moreover it should not be a problem if mulmod_bnm1 is used to 92 * compute the full product with an+bn <= rn, because this condition 93 * implies (B^an-1)(B^bn-1) < (B^rn-1) . 94 * 95 * Requires 0 < bn <= an <= rn and an + bn > rn/2 96 * Scratch need: rn + (need for recursive call OR rn + 4). This gives 97 * 98 * S(n) <= rn + MAX (rn + 4, S(n/2)) <= 2rn + 4 99 */ 100 void 101 mpn_mulmod_bnm1 (mp_ptr rp, mp_size_t rn, mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn, mp_ptr tp) 102 { 103 ASSERT (0 < bn); 104 ASSERT (bn <= an); 105 ASSERT (an <= rn); 106 107 if ((rn & 1) != 0 || BELOW_THRESHOLD (rn, MULMOD_BNM1_THRESHOLD)) 108 { 109 if (UNLIKELY (bn < rn)) 110 { 111 if (UNLIKELY (an + bn <= rn)) 112 { 113 mpn_mul (rp, ap, an, bp, bn); 114 } 115 else 116 { 117 mp_limb_t cy; 118 mpn_mul (tp, ap, an, bp, bn); 119 cy = mpn_add (rp, tp, rn, tp + rn, an + bn - rn); 120 MPN_INCR_U (rp, rn, cy); 121 } 122 } 123 else 124 mpn_bc_mulmod_bnm1 (rp, ap, bp, rn, tp); 125 } 126 else 127 { 128 mp_size_t n; 129 mp_limb_t cy; 130 mp_limb_t hi; 131 132 n = rn >> 1; 133 134 /* We need at least an + bn >= n, to be able to fit one of the 135 recursive products at rp. Requiring strict inequality makes 136 the code slightly simpler. If desired, we could avoid this 137 restriction by initially halving rn as long as rn is even and 138 an + bn <= rn/2. */ 139 140 ASSERT (an + bn > n); 141 142 /* Compute xm = a*b mod (B^n - 1), xp = a*b mod (B^n + 1) 143 and crt together as 144 145 x = -xp * B^n + (B^n + 1) * [ (xp + xm)/2 mod (B^n-1)] 146 */ 147 148 #define a0 ap 149 #define a1 (ap + n) 150 #define b0 bp 151 #define b1 (bp + n) 152 153 #define xp tp /* 2n + 2 */ 154 /* am1 maybe in {xp, n} */ 155 /* bm1 maybe in {xp + n, n} */ 156 #define sp1 (tp + 2*n + 2) 157 /* ap1 maybe in {sp1, n + 1} */ 158 /* bp1 maybe in {sp1 + n + 1, n + 1} */ 159 160 { 161 mp_srcptr am1, bm1; 162 mp_size_t anm, bnm; 163 mp_ptr so; 164 165 bm1 = b0; 166 bnm = bn; 167 if (LIKELY (an > n)) 168 { 169 am1 = xp; 170 cy = mpn_add (xp, a0, n, a1, an - n); 171 MPN_INCR_U (xp, n, cy); 172 anm = n; 173 so = xp + n; 174 if (LIKELY (bn > n)) 175 { 176 bm1 = so; 177 cy = mpn_add (so, b0, n, b1, bn - n); 178 MPN_INCR_U (so, n, cy); 179 bnm = n; 180 so += n; 181 } 182 } 183 else 184 { 185 so = xp; 186 am1 = a0; 187 anm = an; 188 } 189 190 mpn_mulmod_bnm1 (rp, n, am1, anm, bm1, bnm, so); 191 } 192 193 { 194 int k; 195 mp_srcptr ap1, bp1; 196 mp_size_t anp, bnp; 197 198 bp1 = b0; 199 bnp = bn; 200 if (LIKELY (an > n)) { 201 ap1 = sp1; 202 cy = mpn_sub (sp1, a0, n, a1, an - n); 203 sp1[n] = 0; 204 MPN_INCR_U (sp1, n + 1, cy); 205 anp = n + ap1[n]; 206 if (LIKELY (bn > n)) { 207 bp1 = sp1 + n + 1; 208 cy = mpn_sub (sp1 + n + 1, b0, n, b1, bn - n); 209 sp1[2*n+1] = 0; 210 MPN_INCR_U (sp1 + n + 1, n + 1, cy); 211 bnp = n + bp1[n]; 212 } 213 } else { 214 ap1 = a0; 215 anp = an; 216 } 217 218 if (BELOW_THRESHOLD (n, MUL_FFT_MODF_THRESHOLD)) 219 k=0; 220 else 221 { 222 int mask; 223 k = mpn_fft_best_k (n, 0); 224 mask = (1<<k) - 1; 225 while (n & mask) {k--; mask >>=1;}; 226 } 227 if (k >= FFT_FIRST_K) 228 xp[n] = mpn_mul_fft (xp, n, ap1, anp, bp1, bnp, k); 229 else if (UNLIKELY (bp1 == b0)) 230 { 231 ASSERT (anp + bnp <= 2*n+1); 232 ASSERT (anp + bnp > n); 233 ASSERT (anp >= bnp); 234 mpn_mul (xp, ap1, anp, bp1, bnp); 235 anp = anp + bnp - n; 236 ASSERT (anp <= n || xp[2*n]==0); 237 anp-= anp > n; 238 cy = mpn_sub (xp, xp, n, xp + n, anp); 239 xp[n] = 0; 240 MPN_INCR_U (xp, n+1, cy); 241 } 242 else 243 mpn_bc_mulmod_bnp1 (xp, ap1, bp1, n, xp); 244 } 245 246 /* Here the CRT recomposition begins. 247 248 xm <- (xp + xm)/2 = (xp + xm)B^n/2 mod (B^n-1) 249 Division by 2 is a bitwise rotation. 250 251 Assumes xp normalised mod (B^n+1). 252 253 The residue class [0] is represented by [B^n-1]; except when 254 both input are ZERO. 255 */ 256 257 #if HAVE_NATIVE_mpn_rsh1add_n || HAVE_NATIVE_mpn_rsh1add_nc 258 #if HAVE_NATIVE_mpn_rsh1add_nc 259 cy = mpn_rsh1add_nc(rp, rp, xp, n, xp[n]); /* B^n = 1 */ 260 hi = cy << (GMP_NUMB_BITS - 1); 261 cy = 0; 262 /* next update of rp[n-1] will set cy = 1 only if rp[n-1]+=hi 263 overflows, i.e. a further increment will not overflow again. */ 264 #else /* ! _nc */ 265 cy = xp[n] + mpn_rsh1add_n(rp, rp, xp, n); /* B^n = 1 */ 266 hi = (cy<<(GMP_NUMB_BITS-1))&GMP_NUMB_MASK; /* (cy&1) << ... */ 267 cy >>= 1; 268 /* cy = 1 only if xp[n] = 1 i.e. {xp,n} = ZERO, this implies that 269 the rsh1add was a simple rshift: the top bit is 0. cy=1 => hi=0. */ 270 #endif 271 #if GMP_NAIL_BITS == 0 272 add_ssaaaa(cy, rp[n-1], cy, rp[n-1], 0, hi); 273 #else 274 cy += (hi & rp[n-1]) >> (GMP_NUMB_BITS-1); 275 rp[n-1] ^= hi; 276 #endif 277 #else /* ! HAVE_NATIVE_mpn_rsh1add_n */ 278 #if HAVE_NATIVE_mpn_add_nc 279 cy = mpn_add_nc(rp, rp, xp, n, xp[n]); 280 #else /* ! _nc */ 281 cy = xp[n] + mpn_add_n(rp, rp, xp, n); /* xp[n] == 1 implies {xp,n} == ZERO */ 282 #endif 283 cy += (rp[0]&1); 284 mpn_rshift(rp, rp, n, 1); 285 ASSERT (cy <= 2); 286 hi = (cy<<(GMP_NUMB_BITS-1))&GMP_NUMB_MASK; /* (cy&1) << ... */ 287 cy >>= 1; 288 /* We can have cy != 0 only if hi = 0... */ 289 ASSERT ((rp[n-1] & GMP_NUMB_HIGHBIT) == 0); 290 rp[n-1] |= hi; 291 /* ... rp[n-1] + cy can not overflow, the following INCR is correct. */ 292 #endif 293 ASSERT (cy <= 1); 294 /* Next increment can not overflow, read the previous comments about cy. */ 295 ASSERT ((cy == 0) || ((rp[n-1] & GMP_NUMB_HIGHBIT) == 0)); 296 MPN_INCR_U(rp, n, cy); 297 298 /* Compute the highest half: 299 ([(xp + xm)/2 mod (B^n-1)] - xp ) * B^n 300 */ 301 if (UNLIKELY (an + bn < rn)) 302 { 303 /* Note that in this case, the only way the result can equal 304 zero mod B^{rn} - 1 is if one of the inputs is zero, and 305 then the output of both the recursive calls and this CRT 306 reconstruction is zero, not B^{rn} - 1. Which is good, 307 since the latter representation doesn't fit in the output 308 area.*/ 309 cy = mpn_sub_n (rp + n, rp, xp, an + bn - n); 310 311 /* FIXME: This subtraction of the high parts is not really 312 necessary, we do it to get the carry out, and for sanity 313 checking. */ 314 cy = xp[n] + mpn_sub_nc (xp + an + bn - n, rp + an + bn - n, 315 xp + an + bn - n, rn - (an + bn), cy); 316 ASSERT (an + bn == rn - 1 || 317 mpn_zero_p (xp + an + bn - n + 1, rn - 1 - (an + bn))); 318 cy = mpn_sub_1 (rp, rp, an + bn, cy); 319 ASSERT (cy == (xp + an + bn - n)[0]); 320 } 321 else 322 { 323 cy = xp[n] + mpn_sub_n (rp + n, rp, xp, n); 324 /* cy = 1 only if {xp,n+1} is not ZERO, i.e. {rp,n} is not ZERO. 325 DECR will affect _at most_ the lowest n limbs. */ 326 MPN_DECR_U (rp, 2*n, cy); 327 } 328 #undef a0 329 #undef a1 330 #undef b0 331 #undef b1 332 #undef xp 333 #undef sp1 334 } 335 } 336 337 mp_size_t 338 mpn_mulmod_bnm1_next_size (mp_size_t n) 339 { 340 mp_size_t nh; 341 342 if (BELOW_THRESHOLD (n, MULMOD_BNM1_THRESHOLD)) 343 return n; 344 if (BELOW_THRESHOLD (n, 4 * (MULMOD_BNM1_THRESHOLD - 1) + 1)) 345 return (n + (2-1)) & (-2); 346 if (BELOW_THRESHOLD (n, 8 * (MULMOD_BNM1_THRESHOLD - 1) + 1)) 347 return (n + (4-1)) & (-4); 348 349 nh = (n + 1) >> 1; 350 351 if (BELOW_THRESHOLD (nh, MUL_FFT_MODF_THRESHOLD)) 352 return (n + (8-1)) & (-8); 353 354 return 2 * mpn_fft_next_size (nh, mpn_fft_best_k (nh, 0)); 355 }