github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/generic/toom_interpolate_7pts.c (about) 1 /* mpn_toom_interpolate_7pts -- Interpolate for toom44, 53, 62. 2 3 Contributed to the GNU project by Niels Möller. 4 Improvements by Marco Bodrato. 5 6 THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY 7 SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST 8 GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. 9 10 Copyright 2006, 2007, 2009, 2014 Free Software Foundation, Inc. 11 12 This file is part of the GNU MP Library. 13 14 The GNU MP Library is free software; you can redistribute it and/or modify 15 it under the terms of either: 16 17 * the GNU Lesser General Public License as published by the Free 18 Software Foundation; either version 3 of the License, or (at your 19 option) any later version. 20 21 or 22 23 * the GNU General Public License as published by the Free Software 24 Foundation; either version 2 of the License, or (at your option) any 25 later version. 26 27 or both in parallel, as here. 28 29 The GNU MP Library is distributed in the hope that it will be useful, but 30 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 31 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 32 for more details. 33 34 You should have received copies of the GNU General Public License and the 35 GNU Lesser General Public License along with the GNU MP Library. If not, 36 see https://www.gnu.org/licenses/. */ 37 38 #include "gmp.h" 39 #include "gmp-impl.h" 40 41 #define BINVERT_3 MODLIMB_INVERSE_3 42 43 #define BINVERT_9 \ 44 ((((GMP_NUMB_MAX / 9) << (6 - GMP_NUMB_BITS % 6)) * 8 & GMP_NUMB_MAX) | 0x39) 45 46 #define BINVERT_15 \ 47 ((((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 4)) / 15) * 14 * 16 & GMP_NUMB_MAX) + 15) 48 49 /* For the various mpn_divexact_byN here, fall back to using either 50 mpn_pi1_bdiv_q_1 or mpn_divexact_1. The former has less overhead and is 51 many faster if it is native. For now, since mpn_divexact_1 is native on 52 several platforms where mpn_pi1_bdiv_q_1 does not yet exist, do not use 53 mpn_pi1_bdiv_q_1 unconditionally. FIXME. */ 54 55 /* For odd divisors, mpn_divexact_1 works fine with two's complement. */ 56 #ifndef mpn_divexact_by3 57 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1 58 #define mpn_divexact_by3(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,3,BINVERT_3,0) 59 #else 60 #define mpn_divexact_by3(dst,src,size) mpn_divexact_1(dst,src,size,3) 61 #endif 62 #endif 63 64 #ifndef mpn_divexact_by9 65 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1 66 #define mpn_divexact_by9(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,9,BINVERT_9,0) 67 #else 68 #define mpn_divexact_by9(dst,src,size) mpn_divexact_1(dst,src,size,9) 69 #endif 70 #endif 71 72 #ifndef mpn_divexact_by15 73 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1 74 #define mpn_divexact_by15(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,15,BINVERT_15,0) 75 #else 76 #define mpn_divexact_by15(dst,src,size) mpn_divexact_1(dst,src,size,15) 77 #endif 78 #endif 79 80 /* Interpolation for toom4, using the evaluation points 0, infinity, 81 1, -1, 2, -2, 1/2. More precisely, we want to compute 82 f(2^(GMP_NUMB_BITS * n)) for a polynomial f of degree 6, given the 83 seven values 84 85 w0 = f(0), 86 w1 = f(-2), 87 w2 = f(1), 88 w3 = f(-1), 89 w4 = f(2) 90 w5 = 64 * f(1/2) 91 w6 = limit at infinity of f(x) / x^6, 92 93 The result is 6*n + w6n limbs. At entry, w0 is stored at {rp, 2n }, 94 w2 is stored at { rp + 2n, 2n+1 }, and w6 is stored at { rp + 6n, 95 w6n }. The other values are 2n + 1 limbs each (with most 96 significant limbs small). f(-1) and f(-1/2) may be negative, signs 97 determined by the flag bits. Inputs are destroyed. 98 99 Needs (2*n + 1) limbs of temporary storage. 100 */ 101 102 void 103 mpn_toom_interpolate_7pts (mp_ptr rp, mp_size_t n, enum toom7_flags flags, 104 mp_ptr w1, mp_ptr w3, mp_ptr w4, mp_ptr w5, 105 mp_size_t w6n, mp_ptr tp) 106 { 107 mp_size_t m; 108 mp_limb_t cy; 109 110 m = 2*n + 1; 111 #define w0 rp 112 #define w2 (rp + 2*n) 113 #define w6 (rp + 6*n) 114 115 ASSERT (w6n > 0); 116 ASSERT (w6n <= 2*n); 117 118 /* Using formulas similar to Marco Bodrato's 119 120 W5 = W5 + W4 121 W1 =(W4 - W1)/2 122 W4 = W4 - W0 123 W4 =(W4 - W1)/4 - W6*16 124 W3 =(W2 - W3)/2 125 W2 = W2 - W3 126 127 W5 = W5 - W2*65 May be negative. 128 W2 = W2 - W6 - W0 129 W5 =(W5 + W2*45)/2 Now >= 0 again. 130 W4 =(W4 - W2)/3 131 W2 = W2 - W4 132 133 W1 = W5 - W1 May be negative. 134 W5 =(W5 - W3*8)/9 135 W3 = W3 - W5 136 W1 =(W1/15 + W5)/2 Now >= 0 again. 137 W5 = W5 - W1 138 139 where W0 = f(0), W1 = f(-2), W2 = f(1), W3 = f(-1), 140 W4 = f(2), W5 = f(1/2), W6 = f(oo), 141 142 Note that most intermediate results are positive; the ones that 143 may be negative are represented in two's complement. We must 144 never shift right a value that may be negative, since that would 145 invalidate the sign bit. On the other hand, divexact by odd 146 numbers work fine with two's complement. 147 */ 148 149 mpn_add_n (w5, w5, w4, m); 150 if (flags & toom7_w1_neg) 151 { 152 #ifdef HAVE_NATIVE_mpn_rsh1add_n 153 mpn_rsh1add_n (w1, w1, w4, m); 154 #else 155 mpn_add_n (w1, w1, w4, m); ASSERT (!(w1[0] & 1)); 156 mpn_rshift (w1, w1, m, 1); 157 #endif 158 } 159 else 160 { 161 #ifdef HAVE_NATIVE_mpn_rsh1sub_n 162 mpn_rsh1sub_n (w1, w4, w1, m); 163 #else 164 mpn_sub_n (w1, w4, w1, m); ASSERT (!(w1[0] & 1)); 165 mpn_rshift (w1, w1, m, 1); 166 #endif 167 } 168 mpn_sub (w4, w4, m, w0, 2*n); 169 mpn_sub_n (w4, w4, w1, m); ASSERT (!(w4[0] & 3)); 170 mpn_rshift (w4, w4, m, 2); /* w4>=0 */ 171 172 tp[w6n] = mpn_lshift (tp, w6, w6n, 4); 173 mpn_sub (w4, w4, m, tp, w6n+1); 174 175 if (flags & toom7_w3_neg) 176 { 177 #ifdef HAVE_NATIVE_mpn_rsh1add_n 178 mpn_rsh1add_n (w3, w3, w2, m); 179 #else 180 mpn_add_n (w3, w3, w2, m); ASSERT (!(w3[0] & 1)); 181 mpn_rshift (w3, w3, m, 1); 182 #endif 183 } 184 else 185 { 186 #ifdef HAVE_NATIVE_mpn_rsh1sub_n 187 mpn_rsh1sub_n (w3, w2, w3, m); 188 #else 189 mpn_sub_n (w3, w2, w3, m); ASSERT (!(w3[0] & 1)); 190 mpn_rshift (w3, w3, m, 1); 191 #endif 192 } 193 194 mpn_sub_n (w2, w2, w3, m); 195 196 mpn_submul_1 (w5, w2, m, 65); 197 mpn_sub (w2, w2, m, w6, w6n); 198 mpn_sub (w2, w2, m, w0, 2*n); 199 200 mpn_addmul_1 (w5, w2, m, 45); ASSERT (!(w5[0] & 1)); 201 mpn_rshift (w5, w5, m, 1); 202 mpn_sub_n (w4, w4, w2, m); 203 204 mpn_divexact_by3 (w4, w4, m); 205 mpn_sub_n (w2, w2, w4, m); 206 207 mpn_sub_n (w1, w5, w1, m); 208 mpn_lshift (tp, w3, m, 3); 209 mpn_sub_n (w5, w5, tp, m); 210 mpn_divexact_by9 (w5, w5, m); 211 mpn_sub_n (w3, w3, w5, m); 212 213 mpn_divexact_by15 (w1, w1, m); 214 mpn_add_n (w1, w1, w5, m); ASSERT (!(w1[0] & 1)); 215 mpn_rshift (w1, w1, m, 1); /* w1>=0 now */ 216 mpn_sub_n (w5, w5, w1, m); 217 218 /* These bounds are valid for the 4x4 polynomial product of toom44, 219 * and they are conservative for toom53 and toom62. */ 220 ASSERT (w1[2*n] < 2); 221 ASSERT (w2[2*n] < 3); 222 ASSERT (w3[2*n] < 4); 223 ASSERT (w4[2*n] < 3); 224 ASSERT (w5[2*n] < 2); 225 226 /* Addition chain. Note carries and the 2n'th limbs that need to be 227 * added in. 228 * 229 * Special care is needed for w2[2n] and the corresponding carry, 230 * since the "simple" way of adding it all together would overwrite 231 * the limb at wp[2*n] and rp[4*n] (same location) with the sum of 232 * the high half of w3 and the low half of w4. 233 * 234 * 7 6 5 4 3 2 1 0 235 * | | | | | | | | | 236 * ||w3 (2n+1)| 237 * ||w4 (2n+1)| 238 * ||w5 (2n+1)| ||w1 (2n+1)| 239 * + | w6 (w6n)| ||w2 (2n+1)| w0 (2n) | (share storage with r) 240 * ----------------------------------------------- 241 * r | | | | | | | | | 242 * c7 c6 c5 c4 c3 Carries to propagate 243 */ 244 245 cy = mpn_add_n (rp + n, rp + n, w1, m); 246 MPN_INCR_U (w2 + n + 1, n , cy); 247 cy = mpn_add_n (rp + 3*n, rp + 3*n, w3, n); 248 MPN_INCR_U (w3 + n, n + 1, w2[2*n] + cy); 249 cy = mpn_add_n (rp + 4*n, w3 + n, w4, n); 250 MPN_INCR_U (w4 + n, n + 1, w3[2*n] + cy); 251 cy = mpn_add_n (rp + 5*n, w4 + n, w5, n); 252 MPN_INCR_U (w5 + n, n + 1, w4[2*n] + cy); 253 if (w6n > n + 1) 254 { 255 cy = mpn_add_n (rp + 6*n, rp + 6*n, w5 + n, n + 1); 256 MPN_INCR_U (rp + 7*n + 1, w6n - n - 1, cy); 257 } 258 else 259 { 260 ASSERT_NOCARRY (mpn_add_n (rp + 6*n, rp + 6*n, w5 + n, w6n)); 261 #if WANT_ASSERT 262 { 263 mp_size_t i; 264 for (i = w6n; i <= n; i++) 265 ASSERT (w5[n + i] == 0); 266 } 267 #endif 268 } 269 }