github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/generic/toom33_mul.c (about) 1 /* mpn_toom33_mul -- Multiply {ap,an} and {p,bn} where an and bn are close in 2 size. Or more accurately, bn <= an < (3/2)bn. 3 4 Contributed to the GNU project by Torbjorn Granlund. 5 Additional improvements by Marco Bodrato. 6 7 THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY 8 SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST 9 GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. 10 11 Copyright 2006-2008, 2010, 2012 Free Software Foundation, Inc. 12 13 This file is part of the GNU MP Library. 14 15 The GNU MP Library is free software; you can redistribute it and/or modify 16 it under the terms of either: 17 18 * the GNU Lesser General Public License as published by the Free 19 Software Foundation; either version 3 of the License, or (at your 20 option) any later version. 21 22 or 23 24 * the GNU General Public License as published by the Free Software 25 Foundation; either version 2 of the License, or (at your option) any 26 later version. 27 28 or both in parallel, as here. 29 30 The GNU MP Library is distributed in the hope that it will be useful, but 31 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 32 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 33 for more details. 34 35 You should have received copies of the GNU General Public License and the 36 GNU Lesser General Public License along with the GNU MP Library. If not, 37 see https://www.gnu.org/licenses/. */ 38 39 40 #include "gmp.h" 41 #include "gmp-impl.h" 42 43 /* Evaluate in: -1, 0, +1, +2, +inf 44 45 <-s--><--n--><--n--> 46 ____ ______ ______ 47 |_a2_|___a1_|___a0_| 48 |b2_|___b1_|___b0_| 49 <-t-><--n--><--n--> 50 51 v0 = a0 * b0 # A(0)*B(0) 52 v1 = (a0+ a1+ a2)*(b0+ b1+ b2) # A(1)*B(1) ah <= 2 bh <= 2 53 vm1 = (a0- a1+ a2)*(b0- b1+ b2) # A(-1)*B(-1) |ah| <= 1 bh <= 1 54 v2 = (a0+2a1+4a2)*(b0+2b1+4b2) # A(2)*B(2) ah <= 6 bh <= 6 55 vinf= a2 * b2 # A(inf)*B(inf) 56 */ 57 58 #if TUNE_PROGRAM_BUILD || WANT_FAT_BINARY 59 #define MAYBE_mul_basecase 1 60 #define MAYBE_mul_toom33 1 61 #else 62 #define MAYBE_mul_basecase \ 63 (MUL_TOOM33_THRESHOLD < 3 * MUL_TOOM22_THRESHOLD) 64 #define MAYBE_mul_toom33 \ 65 (MUL_TOOM44_THRESHOLD >= 3 * MUL_TOOM33_THRESHOLD) 66 #endif 67 68 /* FIXME: TOOM33_MUL_N_REC is not quite right for a balanced 69 multiplication at the infinity point. We may have 70 MAYBE_mul_basecase == 0, and still get s just below 71 MUL_TOOM22_THRESHOLD. If MUL_TOOM33_THRESHOLD == 7, we can even get 72 s == 1 and mpn_toom22_mul will crash. 73 */ 74 75 #define TOOM33_MUL_N_REC(p, a, b, n, ws) \ 76 do { \ 77 if (MAYBE_mul_basecase \ 78 && BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD)) \ 79 mpn_mul_basecase (p, a, n, b, n); \ 80 else if (! MAYBE_mul_toom33 \ 81 || BELOW_THRESHOLD (n, MUL_TOOM33_THRESHOLD)) \ 82 mpn_toom22_mul (p, a, n, b, n, ws); \ 83 else \ 84 mpn_toom33_mul (p, a, n, b, n, ws); \ 85 } while (0) 86 87 void 88 mpn_toom33_mul (mp_ptr pp, 89 mp_srcptr ap, mp_size_t an, 90 mp_srcptr bp, mp_size_t bn, 91 mp_ptr scratch) 92 { 93 const int __gmpn_cpuvec_initialized = 1; 94 mp_size_t n, s, t; 95 int vm1_neg; 96 mp_limb_t cy, vinf0; 97 mp_ptr gp; 98 mp_ptr as1, asm1, as2; 99 mp_ptr bs1, bsm1, bs2; 100 101 #define a0 ap 102 #define a1 (ap + n) 103 #define a2 (ap + 2*n) 104 #define b0 bp 105 #define b1 (bp + n) 106 #define b2 (bp + 2*n) 107 108 n = (an + 2) / (size_t) 3; 109 110 s = an - 2 * n; 111 t = bn - 2 * n; 112 113 ASSERT (an >= bn); 114 115 ASSERT (0 < s && s <= n); 116 ASSERT (0 < t && t <= n); 117 118 as1 = scratch + 4 * n + 4; 119 asm1 = scratch + 2 * n + 2; 120 as2 = pp + n + 1; 121 122 bs1 = pp; 123 bsm1 = scratch + 3 * n + 3; /* we need 4n+4 <= 4n+s+t */ 124 bs2 = pp + 2 * n + 2; 125 126 gp = scratch; 127 128 vm1_neg = 0; 129 130 /* Compute as1 and asm1. */ 131 cy = mpn_add (gp, a0, n, a2, s); 132 #if HAVE_NATIVE_mpn_add_n_sub_n 133 if (cy == 0 && mpn_cmp (gp, a1, n) < 0) 134 { 135 cy = mpn_add_n_sub_n (as1, asm1, a1, gp, n); 136 as1[n] = cy >> 1; 137 asm1[n] = 0; 138 vm1_neg = 1; 139 } 140 else 141 { 142 mp_limb_t cy2; 143 cy2 = mpn_add_n_sub_n (as1, asm1, gp, a1, n); 144 as1[n] = cy + (cy2 >> 1); 145 asm1[n] = cy - (cy2 & 1); 146 } 147 #else 148 as1[n] = cy + mpn_add_n (as1, gp, a1, n); 149 if (cy == 0 && mpn_cmp (gp, a1, n) < 0) 150 { 151 mpn_sub_n (asm1, a1, gp, n); 152 asm1[n] = 0; 153 vm1_neg = 1; 154 } 155 else 156 { 157 cy -= mpn_sub_n (asm1, gp, a1, n); 158 asm1[n] = cy; 159 } 160 #endif 161 162 /* Compute as2. */ 163 #if HAVE_NATIVE_mpn_rsblsh1_n 164 cy = mpn_add_n (as2, a2, as1, s); 165 if (s != n) 166 cy = mpn_add_1 (as2 + s, as1 + s, n - s, cy); 167 cy += as1[n]; 168 cy = 2 * cy + mpn_rsblsh1_n (as2, a0, as2, n); 169 #else 170 #if HAVE_NATIVE_mpn_addlsh1_n 171 cy = mpn_addlsh1_n (as2, a1, a2, s); 172 if (s != n) 173 cy = mpn_add_1 (as2 + s, a1 + s, n - s, cy); 174 cy = 2 * cy + mpn_addlsh1_n (as2, a0, as2, n); 175 #else 176 cy = mpn_add_n (as2, a2, as1, s); 177 if (s != n) 178 cy = mpn_add_1 (as2 + s, as1 + s, n - s, cy); 179 cy += as1[n]; 180 cy = 2 * cy + mpn_lshift (as2, as2, n, 1); 181 cy -= mpn_sub_n (as2, as2, a0, n); 182 #endif 183 #endif 184 as2[n] = cy; 185 186 /* Compute bs1 and bsm1. */ 187 cy = mpn_add (gp, b0, n, b2, t); 188 #if HAVE_NATIVE_mpn_add_n_sub_n 189 if (cy == 0 && mpn_cmp (gp, b1, n) < 0) 190 { 191 cy = mpn_add_n_sub_n (bs1, bsm1, b1, gp, n); 192 bs1[n] = cy >> 1; 193 bsm1[n] = 0; 194 vm1_neg ^= 1; 195 } 196 else 197 { 198 mp_limb_t cy2; 199 cy2 = mpn_add_n_sub_n (bs1, bsm1, gp, b1, n); 200 bs1[n] = cy + (cy2 >> 1); 201 bsm1[n] = cy - (cy2 & 1); 202 } 203 #else 204 bs1[n] = cy + mpn_add_n (bs1, gp, b1, n); 205 if (cy == 0 && mpn_cmp (gp, b1, n) < 0) 206 { 207 mpn_sub_n (bsm1, b1, gp, n); 208 bsm1[n] = 0; 209 vm1_neg ^= 1; 210 } 211 else 212 { 213 cy -= mpn_sub_n (bsm1, gp, b1, n); 214 bsm1[n] = cy; 215 } 216 #endif 217 218 /* Compute bs2. */ 219 #if HAVE_NATIVE_mpn_rsblsh1_n 220 cy = mpn_add_n (bs2, b2, bs1, t); 221 if (t != n) 222 cy = mpn_add_1 (bs2 + t, bs1 + t, n - t, cy); 223 cy += bs1[n]; 224 cy = 2 * cy + mpn_rsblsh1_n (bs2, b0, bs2, n); 225 #else 226 #if HAVE_NATIVE_mpn_addlsh1_n 227 cy = mpn_addlsh1_n (bs2, b1, b2, t); 228 if (t != n) 229 cy = mpn_add_1 (bs2 + t, b1 + t, n - t, cy); 230 cy = 2 * cy + mpn_addlsh1_n (bs2, b0, bs2, n); 231 #else 232 cy = mpn_add_n (bs2, bs1, b2, t); 233 if (t != n) 234 cy = mpn_add_1 (bs2 + t, bs1 + t, n - t, cy); 235 cy += bs1[n]; 236 cy = 2 * cy + mpn_lshift (bs2, bs2, n, 1); 237 cy -= mpn_sub_n (bs2, bs2, b0, n); 238 #endif 239 #endif 240 bs2[n] = cy; 241 242 ASSERT (as1[n] <= 2); 243 ASSERT (bs1[n] <= 2); 244 ASSERT (asm1[n] <= 1); 245 ASSERT (bsm1[n] <= 1); 246 ASSERT (as2[n] <= 6); 247 ASSERT (bs2[n] <= 6); 248 249 #define v0 pp /* 2n */ 250 #define v1 (pp + 2 * n) /* 2n+1 */ 251 #define vinf (pp + 4 * n) /* s+t */ 252 #define vm1 scratch /* 2n+1 */ 253 #define v2 (scratch + 2 * n + 1) /* 2n+2 */ 254 #define scratch_out (scratch + 5 * n + 5) 255 256 /* vm1, 2n+1 limbs */ 257 #ifdef SMALLER_RECURSION 258 TOOM33_MUL_N_REC (vm1, asm1, bsm1, n, scratch_out); 259 cy = 0; 260 if (asm1[n] != 0) 261 cy = bsm1[n] + mpn_add_n (vm1 + n, vm1 + n, bsm1, n); 262 if (bsm1[n] != 0) 263 cy += mpn_add_n (vm1 + n, vm1 + n, asm1, n); 264 vm1[2 * n] = cy; 265 #else 266 TOOM33_MUL_N_REC (vm1, asm1, bsm1, n + 1, scratch_out); 267 #endif 268 269 TOOM33_MUL_N_REC (v2, as2, bs2, n + 1, scratch_out); /* v2, 2n+1 limbs */ 270 271 /* vinf, s+t limbs */ 272 if (s > t) mpn_mul (vinf, a2, s, b2, t); 273 else TOOM33_MUL_N_REC (vinf, a2, b2, s, scratch_out); 274 275 vinf0 = vinf[0]; /* v1 overlaps with this */ 276 277 #ifdef SMALLER_RECURSION 278 /* v1, 2n+1 limbs */ 279 TOOM33_MUL_N_REC (v1, as1, bs1, n, scratch_out); 280 if (as1[n] == 1) 281 { 282 cy = bs1[n] + mpn_add_n (v1 + n, v1 + n, bs1, n); 283 } 284 else if (as1[n] != 0) 285 { 286 #if HAVE_NATIVE_mpn_addlsh1_n 287 cy = 2 * bs1[n] + mpn_addlsh1_n (v1 + n, v1 + n, bs1, n); 288 #else 289 cy = 2 * bs1[n] + mpn_addmul_1 (v1 + n, bs1, n, CNST_LIMB(2)); 290 #endif 291 } 292 else 293 cy = 0; 294 if (bs1[n] == 1) 295 { 296 cy += mpn_add_n (v1 + n, v1 + n, as1, n); 297 } 298 else if (bs1[n] != 0) 299 { 300 #if HAVE_NATIVE_mpn_addlsh1_n 301 cy += mpn_addlsh1_n (v1 + n, v1 + n, as1, n); 302 #else 303 cy += mpn_addmul_1 (v1 + n, as1, n, CNST_LIMB(2)); 304 #endif 305 } 306 v1[2 * n] = cy; 307 #else 308 cy = vinf[1]; 309 TOOM33_MUL_N_REC (v1, as1, bs1, n + 1, scratch_out); 310 vinf[1] = cy; 311 #endif 312 313 TOOM33_MUL_N_REC (v0, ap, bp, n, scratch_out); /* v0, 2n limbs */ 314 315 mpn_toom_interpolate_5pts (pp, v2, vm1, n, s + t, vm1_neg, vinf0); 316 }