github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/generic/toom6h_mul.c (about) 1 /* Implementation of the multiplication algorithm for Toom-Cook 6.5-way. 2 3 Contributed to the GNU project by Marco Bodrato. 4 5 THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY 6 SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST 7 GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. 8 9 Copyright 2009, 2010, 2012 Free Software Foundation, Inc. 10 11 This file is part of the GNU MP Library. 12 13 The GNU MP Library is free software; you can redistribute it and/or modify 14 it under the terms of either: 15 16 * the GNU Lesser General Public License as published by the Free 17 Software Foundation; either version 3 of the License, or (at your 18 option) any later version. 19 20 or 21 22 * the GNU General Public License as published by the Free Software 23 Foundation; either version 2 of the License, or (at your option) any 24 later version. 25 26 or both in parallel, as here. 27 28 The GNU MP Library is distributed in the hope that it will be useful, but 29 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 30 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 31 for more details. 32 33 You should have received copies of the GNU General Public License and the 34 GNU Lesser General Public License along with the GNU MP Library. If not, 35 see https://www.gnu.org/licenses/. */ 36 37 38 #include "gmp.h" 39 #include "gmp-impl.h" 40 41 42 #if GMP_NUMB_BITS < 21 43 #error Not implemented. 44 #endif 45 46 #if TUNE_PROGRAM_BUILD 47 #define MAYBE_mul_basecase 1 48 #define MAYBE_mul_toom22 1 49 #define MAYBE_mul_toom33 1 50 #define MAYBE_mul_toom6h 1 51 #else 52 #define MAYBE_mul_basecase \ 53 (MUL_TOOM6H_THRESHOLD < 6 * MUL_TOOM22_THRESHOLD) 54 #define MAYBE_mul_toom22 \ 55 (MUL_TOOM6H_THRESHOLD < 6 * MUL_TOOM33_THRESHOLD) 56 #define MAYBE_mul_toom33 \ 57 (MUL_TOOM6H_THRESHOLD < 6 * MUL_TOOM44_THRESHOLD) 58 #define MAYBE_mul_toom6h \ 59 (MUL_FFT_THRESHOLD >= 6 * MUL_TOOM6H_THRESHOLD) 60 #endif 61 62 #define TOOM6H_MUL_N_REC(p, a, b, f, p2, a2, b2, n, ws) \ 63 do { \ 64 if (MAYBE_mul_basecase \ 65 && BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD)) { \ 66 mpn_mul_basecase (p, a, n, b, n); \ 67 if (f) \ 68 mpn_mul_basecase (p2, a2, n, b2, n); \ 69 } else if (MAYBE_mul_toom22 \ 70 && BELOW_THRESHOLD (n, MUL_TOOM33_THRESHOLD)) { \ 71 mpn_toom22_mul (p, a, n, b, n, ws); \ 72 if (f) \ 73 mpn_toom22_mul (p2, a2, n, b2, n, ws); \ 74 } else if (MAYBE_mul_toom33 \ 75 && BELOW_THRESHOLD (n, MUL_TOOM44_THRESHOLD)) { \ 76 mpn_toom33_mul (p, a, n, b, n, ws); \ 77 if (f) \ 78 mpn_toom33_mul (p2, a2, n, b2, n, ws); \ 79 } else if (! MAYBE_mul_toom6h \ 80 || BELOW_THRESHOLD (n, MUL_TOOM6H_THRESHOLD)) { \ 81 mpn_toom44_mul (p, a, n, b, n, ws); \ 82 if (f) \ 83 mpn_toom44_mul (p2, a2, n, b2, n, ws); \ 84 } else { \ 85 mpn_toom6h_mul (p, a, n, b, n, ws); \ 86 if (f) \ 87 mpn_toom6h_mul (p2, a2, n, b2, n, ws); \ 88 } \ 89 } while (0) 90 91 #define TOOM6H_MUL_REC(p, a, na, b, nb, ws) \ 92 do { mpn_mul (p, a, na, b, nb); \ 93 } while (0) 94 95 /* Toom-6.5 , compute the product {pp,an+bn} <- {ap,an} * {bp,bn} 96 With: an >= bn >= 46, an*6 < bn * 17. 97 It _may_ work with bn<=46 and bn*17 < an*6 < bn*18 98 99 Evaluate in: infinity, +4, -4, +2, -2, +1, -1, +1/2, -1/2, +1/4, -1/4, 0. 100 */ 101 /* Estimate on needed scratch: 102 S(n) <= (n+5)\6*10+4+MAX(S((n+5)\6),1+2*(n+5)\6), 103 since n>42; S(n) <= ceil(log(n)/log(6))*(10+4)+n*12\6 < n*2 + lg2(n)*6 104 */ 105 106 void 107 mpn_toom6h_mul (mp_ptr pp, 108 mp_srcptr ap, mp_size_t an, 109 mp_srcptr bp, mp_size_t bn, mp_ptr scratch) 110 { 111 mp_size_t n, s, t; 112 int p, q, half; 113 int sign; 114 115 /***************************** decomposition *******************************/ 116 117 ASSERT (an >= bn); 118 /* Can not handle too much unbalancement */ 119 ASSERT (bn >= 42); 120 /* Can not handle too much unbalancement */ 121 ASSERT ((an*3 < bn * 8) || (bn >= 46 && an * 6 < bn * 17)); 122 123 /* Limit num/den is a rational number between 124 (12/11)^(log(4)/log(2*4-1)) and (12/11)^(log(6)/log(2*6-1)) */ 125 #define LIMIT_numerator (18) 126 #define LIMIT_denominat (17) 127 128 if (LIKELY (an * LIMIT_denominat < LIMIT_numerator * bn)) /* is 6*... < 6*... */ 129 { 130 n = 1 + (an - 1) / (size_t) 6; 131 p = q = 5; 132 half = 0; 133 134 s = an - 5 * n; 135 t = bn - 5 * n; 136 } 137 else { 138 if (an * 5 * LIMIT_numerator < LIMIT_denominat * 7 * bn) 139 { p = 7; q = 6; } 140 else if (an * 5 * LIMIT_denominat < LIMIT_numerator * 7 * bn) 141 { p = 7; q = 5; } 142 else if (an * LIMIT_numerator < LIMIT_denominat * 2 * bn) /* is 4*... < 8*... */ 143 { p = 8; q = 5; } 144 else if (an * LIMIT_denominat < LIMIT_numerator * 2 * bn) /* is 4*... < 8*... */ 145 { p = 8; q = 4; } 146 else 147 { p = 9; q = 4; } 148 149 half = (p ^ q) & 1; 150 n = 1 + (q * an >= p * bn ? (an - 1) / (size_t) p : (bn - 1) / (size_t) q); 151 p--; q--; 152 153 s = an - p * n; 154 t = bn - q * n; 155 156 /* With LIMIT = 16/15, the following recover is needed only if bn<=73*/ 157 if (half) { /* Recover from badly chosen splitting */ 158 if (UNLIKELY (s<1)) {p--; s+=n; half=0;} 159 else if (UNLIKELY (t<1)) {q--; t+=n; half=0;} 160 } 161 } 162 #undef LIMIT_numerator 163 #undef LIMIT_denominat 164 165 ASSERT (0 < s && s <= n); 166 ASSERT (0 < t && t <= n); 167 ASSERT (half || s + t > 3); 168 ASSERT (n > 2); 169 170 #define r4 (pp + 3 * n) /* 3n+1 */ 171 #define r2 (pp + 7 * n) /* 3n+1 */ 172 #define r0 (pp +11 * n) /* s+t <= 2*n */ 173 #define r5 (scratch) /* 3n+1 */ 174 #define r3 (scratch + 3 * n + 1) /* 3n+1 */ 175 #define r1 (scratch + 6 * n + 2) /* 3n+1 */ 176 #define v0 (pp + 7 * n) /* n+1 */ 177 #define v1 (pp + 8 * n+1) /* n+1 */ 178 #define v2 (pp + 9 * n+2) /* n+1 */ 179 #define v3 (scratch + 9 * n + 3) /* n+1 */ 180 #define wsi (scratch + 9 * n + 3) /* 3n+1 */ 181 #define wse (scratch +10 * n + 4) /* 2n+1 */ 182 183 /* Alloc also 3n+1 limbs for wsi... toom_interpolate_12pts may 184 need all of them */ 185 /* if (scratch == NULL) */ 186 /* scratch = TMP_SALLOC_LIMBS(mpn_toom6_sqr_itch(n * 6)); */ 187 ASSERT (12 * n + 6 <= mpn_toom6h_mul_itch(an,bn)); 188 ASSERT (12 * n + 6 <= mpn_toom6_sqr_itch(n * 6)); 189 190 /********************** evaluation and recursive calls *********************/ 191 /* $\pm1/2$ */ 192 sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 1, pp) ^ 193 mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 1, pp); 194 /* A(-1/2)*B(-1/2)*2^. */ /* A(+1/2)*B(+1/2)*2^. */ 195 TOOM6H_MUL_N_REC(pp, v0, v1, 2, r5, v2, v3, n + 1, wse); 196 mpn_toom_couple_handling (r5, 2 * n + 1, pp, sign, n, 1+half , half); 197 198 /* $\pm1$ */ 199 sign = mpn_toom_eval_pm1 (v2, v0, p, ap, n, s, pp); 200 if (UNLIKELY (q == 3)) 201 sign ^= mpn_toom_eval_dgr3_pm1 (v3, v1, bp, n, t, pp); 202 else 203 sign ^= mpn_toom_eval_pm1 (v3, v1, q, bp, n, t, pp); 204 /* A(-1)*B(-1) */ /* A(1)*B(1) */ 205 TOOM6H_MUL_N_REC(pp, v0, v1, 2, r3, v2, v3, n + 1, wse); 206 mpn_toom_couple_handling (r3, 2 * n + 1, pp, sign, n, 0, 0); 207 208 /* $\pm4$ */ 209 sign = mpn_toom_eval_pm2exp (v2, v0, p, ap, n, s, 2, pp) ^ 210 mpn_toom_eval_pm2exp (v3, v1, q, bp, n, t, 2, pp); 211 /* A(-4)*B(-4) */ 212 TOOM6H_MUL_N_REC(pp, v0, v1, 2, r1, v2, v3, n + 1, wse); /* A(+4)*B(+4) */ 213 mpn_toom_couple_handling (r1, 2 * n + 1, pp, sign, n, 2, 4); 214 215 /* $\pm1/4$ */ 216 sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 2, pp) ^ 217 mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 2, pp); 218 /* A(-1/4)*B(-1/4)*4^. */ /* A(+1/4)*B(+1/4)*4^. */ 219 TOOM6H_MUL_N_REC(pp, v0, v1, 2, r4, v2, v3, n + 1, wse); 220 mpn_toom_couple_handling (r4, 2 * n + 1, pp, sign, n, 2*(1+half), 2*(half)); 221 222 /* $\pm2$ */ 223 sign = mpn_toom_eval_pm2 (v2, v0, p, ap, n, s, pp) ^ 224 mpn_toom_eval_pm2 (v3, v1, q, bp, n, t, pp); 225 /* A(-2)*B(-2) */ /* A(+2)*B(+2) */ 226 TOOM6H_MUL_N_REC(pp, v0, v1, 2, r2, v2, v3, n + 1, wse); 227 mpn_toom_couple_handling (r2, 2 * n + 1, pp, sign, n, 1, 2); 228 229 #undef v0 230 #undef v1 231 #undef v2 232 #undef v3 233 #undef wse 234 235 /* A(0)*B(0) */ 236 TOOM6H_MUL_N_REC(pp, ap, bp, 0, pp, ap, bp, n, wsi); 237 238 /* Infinity */ 239 if (UNLIKELY (half != 0)) { 240 if (s > t) { 241 TOOM6H_MUL_REC(r0, ap + p * n, s, bp + q * n, t, wsi); 242 } else { 243 TOOM6H_MUL_REC(r0, bp + q * n, t, ap + p * n, s, wsi); 244 }; 245 }; 246 247 mpn_toom_interpolate_12pts (pp, r1, r3, r5, n, s+t, half, wsi); 248 249 #undef r0 250 #undef r1 251 #undef r2 252 #undef r3 253 #undef r4 254 #undef r5 255 #undef wsi 256 } 257 258 #undef TOOM6H_MUL_N_REC 259 #undef TOOM6H_MUL_REC 260 #undef MAYBE_mul_basecase 261 #undef MAYBE_mul_toom22 262 #undef MAYBE_mul_toom33 263 #undef MAYBE_mul_toom6h