github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/generic/toom53_mul.c (about) 1 /* mpn_toom53_mul -- Multiply {ap,an} and {bp,bn} where an is nominally 5/3 2 times as large as bn. Or more accurately, (4/3)bn < an < (5/2)bn. 3 4 Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato. 5 6 The idea of applying toom to unbalanced multiplication is due to Marco 7 Bodrato and Alberto Zanoni. 8 9 THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY 10 SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST 11 GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. 12 13 Copyright 2006-2008, 2012, 2014 Free Software Foundation, Inc. 14 15 This file is part of the GNU MP Library. 16 17 The GNU MP Library is free software; you can redistribute it and/or modify 18 it under the terms of either: 19 20 * the GNU Lesser General Public License as published by the Free 21 Software Foundation; either version 3 of the License, or (at your 22 option) any later version. 23 24 or 25 26 * the GNU General Public License as published by the Free Software 27 Foundation; either version 2 of the License, or (at your option) any 28 later version. 29 30 or both in parallel, as here. 31 32 The GNU MP Library is distributed in the hope that it will be useful, but 33 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 34 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 35 for more details. 36 37 You should have received copies of the GNU General Public License and the 38 GNU Lesser General Public License along with the GNU MP Library. If not, 39 see https://www.gnu.org/licenses/. */ 40 41 42 #include "gmp.h" 43 #include "gmp-impl.h" 44 45 /* Evaluate in: 0, +1, -1, +2, -2, 1/2, +inf 46 47 <-s-><--n--><--n--><--n--><--n--> 48 ___ ______ ______ ______ ______ 49 |a4_|___a3_|___a2_|___a1_|___a0_| 50 |__b2|___b1_|___b0_| 51 <-t--><--n--><--n--> 52 53 v0 = a0 * b0 # A(0)*B(0) 54 v1 = ( a0+ a1+ a2+ a3+ a4)*( b0+ b1+ b2) # A(1)*B(1) ah <= 4 bh <= 2 55 vm1 = ( a0- a1+ a2- a3+ a4)*( b0- b1+ b2) # A(-1)*B(-1) |ah| <= 2 bh <= 1 56 v2 = ( a0+2a1+4a2+8a3+16a4)*( b0+2b1+4b2) # A(2)*B(2) ah <= 30 bh <= 6 57 vm2 = ( a0-2a1+4a2-8a3+16a4)*( b0-2b1+4b2) # A(2)*B(2) -9<=ah<=20 -1<=bh<=4 58 vh = (16a0+8a1+4a2+2a3+ a4)*(4b0+2b1+ b2) # A(1/2)*B(1/2) ah <= 30 bh <= 6 59 vinf= a4 * b2 # A(inf)*B(inf) 60 */ 61 62 void 63 mpn_toom53_mul (mp_ptr pp, 64 mp_srcptr ap, mp_size_t an, 65 mp_srcptr bp, mp_size_t bn, 66 mp_ptr scratch) 67 { 68 mp_size_t n, s, t; 69 mp_limb_t cy; 70 mp_ptr gp; 71 mp_ptr as1, asm1, as2, asm2, ash; 72 mp_ptr bs1, bsm1, bs2, bsm2, bsh; 73 mp_ptr tmp; 74 enum toom7_flags flags; 75 TMP_DECL; 76 77 #define a0 ap 78 #define a1 (ap + n) 79 #define a2 (ap + 2*n) 80 #define a3 (ap + 3*n) 81 #define a4 (ap + 4*n) 82 #define b0 bp 83 #define b1 (bp + n) 84 #define b2 (bp + 2*n) 85 86 n = 1 + (3 * an >= 5 * bn ? (an - 1) / (size_t) 5 : (bn - 1) / (size_t) 3); 87 88 s = an - 4 * n; 89 t = bn - 2 * n; 90 91 ASSERT (0 < s && s <= n); 92 ASSERT (0 < t && t <= n); 93 94 TMP_MARK; 95 96 tmp = TMP_ALLOC_LIMBS (10 * (n + 1)); 97 as1 = tmp; tmp += n + 1; 98 asm1 = tmp; tmp += n + 1; 99 as2 = tmp; tmp += n + 1; 100 asm2 = tmp; tmp += n + 1; 101 ash = tmp; tmp += n + 1; 102 bs1 = tmp; tmp += n + 1; 103 bsm1 = tmp; tmp += n + 1; 104 bs2 = tmp; tmp += n + 1; 105 bsm2 = tmp; tmp += n + 1; 106 bsh = tmp; tmp += n + 1; 107 108 gp = pp; 109 110 /* Compute as1 and asm1. */ 111 flags = (enum toom7_flags) (toom7_w3_neg & mpn_toom_eval_pm1 (as1, asm1, 4, ap, n, s, gp)); 112 113 /* Compute as2 and asm2. */ 114 flags = (enum toom7_flags) (flags | (toom7_w1_neg & mpn_toom_eval_pm2 (as2, asm2, 4, ap, n, s, gp))); 115 116 /* Compute ash = 16 a0 + 8 a1 + 4 a2 + 2 a3 + a4 117 = 2*(2*(2*(2*a0 + a1) + a2) + a3) + a4 */ 118 #if HAVE_NATIVE_mpn_addlsh1_n 119 cy = mpn_addlsh1_n (ash, a1, a0, n); 120 cy = 2*cy + mpn_addlsh1_n (ash, a2, ash, n); 121 cy = 2*cy + mpn_addlsh1_n (ash, a3, ash, n); 122 if (s < n) 123 { 124 mp_limb_t cy2; 125 cy2 = mpn_addlsh1_n (ash, a4, ash, s); 126 ash[n] = 2*cy + mpn_lshift (ash + s, ash + s, n - s, 1); 127 MPN_INCR_U (ash + s, n+1-s, cy2); 128 } 129 else 130 ash[n] = 2*cy + mpn_addlsh1_n (ash, a4, ash, n); 131 #else 132 cy = mpn_lshift (ash, a0, n, 1); 133 cy += mpn_add_n (ash, ash, a1, n); 134 cy = 2*cy + mpn_lshift (ash, ash, n, 1); 135 cy += mpn_add_n (ash, ash, a2, n); 136 cy = 2*cy + mpn_lshift (ash, ash, n, 1); 137 cy += mpn_add_n (ash, ash, a3, n); 138 cy = 2*cy + mpn_lshift (ash, ash, n, 1); 139 ash[n] = cy + mpn_add (ash, ash, n, a4, s); 140 #endif 141 142 /* Compute bs1 and bsm1. */ 143 bs1[n] = mpn_add (bs1, b0, n, b2, t); /* b0 + b2 */ 144 #if HAVE_NATIVE_mpn_add_n_sub_n 145 if (bs1[n] == 0 && mpn_cmp (bs1, b1, n) < 0) 146 { 147 bs1[n] = mpn_add_n_sub_n (bs1, bsm1, b1, bs1, n) >> 1; 148 bsm1[n] = 0; 149 flags = (enum toom7_flags) (flags ^ toom7_w3_neg); 150 } 151 else 152 { 153 cy = mpn_add_n_sub_n (bs1, bsm1, bs1, b1, n); 154 bsm1[n] = bs1[n] - (cy & 1); 155 bs1[n] += (cy >> 1); 156 } 157 #else 158 if (bs1[n] == 0 && mpn_cmp (bs1, b1, n) < 0) 159 { 160 mpn_sub_n (bsm1, b1, bs1, n); 161 bsm1[n] = 0; 162 flags = (enum toom7_flags) (flags ^ toom7_w3_neg); 163 } 164 else 165 { 166 bsm1[n] = bs1[n] - mpn_sub_n (bsm1, bs1, b1, n); 167 } 168 bs1[n] += mpn_add_n (bs1, bs1, b1, n); /* b0+b1+b2 */ 169 #endif 170 171 /* Compute bs2 and bsm2. */ 172 #if HAVE_NATIVE_mpn_addlsh_n || HAVE_NATIVE_mpn_addlsh2_n 173 #if HAVE_NATIVE_mpn_addlsh2_n 174 cy = mpn_addlsh2_n (bs2, b0, b2, t); 175 #else /* HAVE_NATIVE_mpn_addlsh_n */ 176 cy = mpn_addlsh_n (bs2, b0, b2, t, 2); 177 #endif 178 if (t < n) 179 cy = mpn_add_1 (bs2 + t, b0 + t, n - t, cy); 180 bs2[n] = cy; 181 #else 182 cy = mpn_lshift (gp, b2, t, 2); 183 bs2[n] = mpn_add (bs2, b0, n, gp, t); 184 MPN_INCR_U (bs2 + t, n+1-t, cy); 185 #endif 186 187 gp[n] = mpn_lshift (gp, b1, n, 1); 188 189 #if HAVE_NATIVE_mpn_add_n_sub_n 190 if (mpn_cmp (bs2, gp, n+1) < 0) 191 { 192 ASSERT_NOCARRY (mpn_add_n_sub_n (bs2, bsm2, gp, bs2, n+1)); 193 flags = (enum toom7_flags) (flags ^ toom7_w1_neg); 194 } 195 else 196 { 197 ASSERT_NOCARRY (mpn_add_n_sub_n (bs2, bsm2, bs2, gp, n+1)); 198 } 199 #else 200 if (mpn_cmp (bs2, gp, n+1) < 0) 201 { 202 ASSERT_NOCARRY (mpn_sub_n (bsm2, gp, bs2, n+1)); 203 flags = (enum toom7_flags) (flags ^ toom7_w1_neg); 204 } 205 else 206 { 207 ASSERT_NOCARRY (mpn_sub_n (bsm2, bs2, gp, n+1)); 208 } 209 mpn_add_n (bs2, bs2, gp, n+1); 210 #endif 211 212 /* Compute bsh = 4 b0 + 2 b1 + b2 = 2*(2*b0 + b1)+b2. */ 213 #if HAVE_NATIVE_mpn_addlsh1_n 214 cy = mpn_addlsh1_n (bsh, b1, b0, n); 215 if (t < n) 216 { 217 mp_limb_t cy2; 218 cy2 = mpn_addlsh1_n (bsh, b2, bsh, t); 219 bsh[n] = 2*cy + mpn_lshift (bsh + t, bsh + t, n - t, 1); 220 MPN_INCR_U (bsh + t, n+1-t, cy2); 221 } 222 else 223 bsh[n] = 2*cy + mpn_addlsh1_n (bsh, b2, bsh, n); 224 #else 225 cy = mpn_lshift (bsh, b0, n, 1); 226 cy += mpn_add_n (bsh, bsh, b1, n); 227 cy = 2*cy + mpn_lshift (bsh, bsh, n, 1); 228 bsh[n] = cy + mpn_add (bsh, bsh, n, b2, t); 229 #endif 230 231 ASSERT (as1[n] <= 4); 232 ASSERT (bs1[n] <= 2); 233 ASSERT (asm1[n] <= 2); 234 ASSERT (bsm1[n] <= 1); 235 ASSERT (as2[n] <= 30); 236 ASSERT (bs2[n] <= 6); 237 ASSERT (asm2[n] <= 20); 238 ASSERT (bsm2[n] <= 4); 239 ASSERT (ash[n] <= 30); 240 ASSERT (bsh[n] <= 6); 241 242 #define v0 pp /* 2n */ 243 #define v1 (pp + 2 * n) /* 2n+1 */ 244 #define vinf (pp + 6 * n) /* s+t */ 245 #define v2 scratch /* 2n+1 */ 246 #define vm2 (scratch + 2 * n + 1) /* 2n+1 */ 247 #define vh (scratch + 4 * n + 2) /* 2n+1 */ 248 #define vm1 (scratch + 6 * n + 3) /* 2n+1 */ 249 #define scratch_out (scratch + 8 * n + 4) /* 2n+1 */ 250 /* Total scratch need: 10*n+5 */ 251 252 /* Must be in allocation order, as they overwrite one limb beyond 253 * 2n+1. */ 254 mpn_mul_n (v2, as2, bs2, n + 1); /* v2, 2n+1 limbs */ 255 mpn_mul_n (vm2, asm2, bsm2, n + 1); /* vm2, 2n+1 limbs */ 256 mpn_mul_n (vh, ash, bsh, n + 1); /* vh, 2n+1 limbs */ 257 258 /* vm1, 2n+1 limbs */ 259 #ifdef SMALLER_RECURSION 260 mpn_mul_n (vm1, asm1, bsm1, n); 261 if (asm1[n] == 1) 262 { 263 cy = bsm1[n] + mpn_add_n (vm1 + n, vm1 + n, bsm1, n); 264 } 265 else if (asm1[n] == 2) 266 { 267 #if HAVE_NATIVE_mpn_addlsh1_n 268 cy = 2 * bsm1[n] + mpn_addlsh1_n (vm1 + n, vm1 + n, bsm1, n); 269 #else 270 cy = 2 * bsm1[n] + mpn_addmul_1 (vm1 + n, bsm1, n, CNST_LIMB(2)); 271 #endif 272 } 273 else 274 cy = 0; 275 if (bsm1[n] != 0) 276 cy += mpn_add_n (vm1 + n, vm1 + n, asm1, n); 277 vm1[2 * n] = cy; 278 #else /* SMALLER_RECURSION */ 279 vm1[2 * n] = 0; 280 mpn_mul_n (vm1, asm1, bsm1, n + ((asm1[n] | bsm1[n]) != 0)); 281 #endif /* SMALLER_RECURSION */ 282 283 /* v1, 2n+1 limbs */ 284 #ifdef SMALLER_RECURSION 285 mpn_mul_n (v1, as1, bs1, n); 286 if (as1[n] == 1) 287 { 288 cy = bs1[n] + mpn_add_n (v1 + n, v1 + n, bs1, n); 289 } 290 else if (as1[n] == 2) 291 { 292 #if HAVE_NATIVE_mpn_addlsh1_n 293 cy = 2 * bs1[n] + mpn_addlsh1_n (v1 + n, v1 + n, bs1, n); 294 #else 295 cy = 2 * bs1[n] + mpn_addmul_1 (v1 + n, bs1, n, CNST_LIMB(2)); 296 #endif 297 } 298 else if (as1[n] != 0) 299 { 300 cy = as1[n] * bs1[n] + mpn_addmul_1 (v1 + n, bs1, n, as1[n]); 301 } 302 else 303 cy = 0; 304 if (bs1[n] == 1) 305 { 306 cy += mpn_add_n (v1 + n, v1 + n, as1, n); 307 } 308 else if (bs1[n] == 2) 309 { 310 #if HAVE_NATIVE_mpn_addlsh1_n 311 cy += mpn_addlsh1_n (v1 + n, v1 + n, as1, n); 312 #else 313 cy += mpn_addmul_1 (v1 + n, as1, n, CNST_LIMB(2)); 314 #endif 315 } 316 v1[2 * n] = cy; 317 #else /* SMALLER_RECURSION */ 318 v1[2 * n] = 0; 319 mpn_mul_n (v1, as1, bs1, n + ((as1[n] | bs1[n]) != 0)); 320 #endif /* SMALLER_RECURSION */ 321 322 mpn_mul_n (v0, a0, b0, n); /* v0, 2n limbs */ 323 324 /* vinf, s+t limbs */ 325 if (s > t) mpn_mul (vinf, a4, s, b2, t); 326 else mpn_mul (vinf, b2, t, a4, s); 327 328 mpn_toom_interpolate_7pts (pp, n, flags, vm2, vm1, v2, vh, s + t, 329 scratch_out); 330 331 TMP_FREE; 332 }