github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/generic/toom8h_mul.c (about) 1 /* Implementation of the multiplication algorithm for Toom-Cook 8.5-way. 2 3 Contributed to the GNU project by Marco Bodrato. 4 5 THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY 6 SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST 7 GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. 8 9 Copyright 2009, 2010, 2012 Free Software Foundation, Inc. 10 11 This file is part of the GNU MP Library. 12 13 The GNU MP Library is free software; you can redistribute it and/or modify 14 it under the terms of either: 15 16 * the GNU Lesser General Public License as published by the Free 17 Software Foundation; either version 3 of the License, or (at your 18 option) any later version. 19 20 or 21 22 * the GNU General Public License as published by the Free Software 23 Foundation; either version 2 of the License, or (at your option) any 24 later version. 25 26 or both in parallel, as here. 27 28 The GNU MP Library is distributed in the hope that it will be useful, but 29 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 30 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 31 for more details. 32 33 You should have received copies of the GNU General Public License and the 34 GNU Lesser General Public License along with the GNU MP Library. If not, 35 see https://www.gnu.org/licenses/. */ 36 37 38 #include "gmp.h" 39 #include "gmp-impl.h" 40 41 42 #if GMP_NUMB_BITS < 29 43 #error Not implemented. 44 #endif 45 46 #if GMP_NUMB_BITS < 43 47 #define BIT_CORRECTION 1 48 #define CORRECTION_BITS GMP_NUMB_BITS 49 #else 50 #define BIT_CORRECTION 0 51 #define CORRECTION_BITS 0 52 #endif 53 54 55 #if TUNE_PROGRAM_BUILD 56 #define MAYBE_mul_basecase 1 57 #define MAYBE_mul_toom22 1 58 #define MAYBE_mul_toom33 1 59 #define MAYBE_mul_toom44 1 60 #define MAYBE_mul_toom8h 1 61 #else 62 #define MAYBE_mul_basecase \ 63 (MUL_TOOM8H_THRESHOLD < 8 * MUL_TOOM22_THRESHOLD) 64 #define MAYBE_mul_toom22 \ 65 (MUL_TOOM8H_THRESHOLD < 8 * MUL_TOOM33_THRESHOLD) 66 #define MAYBE_mul_toom33 \ 67 (MUL_TOOM8H_THRESHOLD < 8 * MUL_TOOM44_THRESHOLD) 68 #define MAYBE_mul_toom44 \ 69 (MUL_TOOM8H_THRESHOLD < 8 * MUL_TOOM6H_THRESHOLD) 70 #define MAYBE_mul_toom8h \ 71 (MUL_FFT_THRESHOLD >= 8 * MUL_TOOM8H_THRESHOLD) 72 #endif 73 74 #define TOOM8H_MUL_N_REC(p, a, b, f, p2, a2, b2, n, ws) \ 75 do { \ 76 if (MAYBE_mul_basecase \ 77 && BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD)) { \ 78 mpn_mul_basecase (p, a, n, b, n); \ 79 if (f) mpn_mul_basecase (p2, a2, n, b2, n); \ 80 } else if (MAYBE_mul_toom22 \ 81 && BELOW_THRESHOLD (n, MUL_TOOM33_THRESHOLD)) { \ 82 mpn_toom22_mul (p, a, n, b, n, ws); \ 83 if (f) mpn_toom22_mul (p2, a2, n, b2, n, ws); \ 84 } else if (MAYBE_mul_toom33 \ 85 && BELOW_THRESHOLD (n, MUL_TOOM44_THRESHOLD)) { \ 86 mpn_toom33_mul (p, a, n, b, n, ws); \ 87 if (f) mpn_toom33_mul (p2, a2, n, b2, n, ws); \ 88 } else if (MAYBE_mul_toom44 \ 89 && BELOW_THRESHOLD (n, MUL_TOOM6H_THRESHOLD)) { \ 90 mpn_toom44_mul (p, a, n, b, n, ws); \ 91 if (f) mpn_toom44_mul (p2, a2, n, b2, n, ws); \ 92 } else if (! MAYBE_mul_toom8h \ 93 || BELOW_THRESHOLD (n, MUL_TOOM8H_THRESHOLD)) { \ 94 mpn_toom6h_mul (p, a, n, b, n, ws); \ 95 if (f) mpn_toom6h_mul (p2, a2, n, b2, n, ws); \ 96 } else { \ 97 mpn_toom8h_mul (p, a, n, b, n, ws); \ 98 if (f) mpn_toom8h_mul (p2, a2, n, b2, n, ws); \ 99 } \ 100 } while (0) 101 102 #define TOOM8H_MUL_REC(p, a, na, b, nb, ws) \ 103 do { mpn_mul (p, a, na, b, nb); } while (0) 104 105 /* Toom-8.5 , compute the product {pp,an+bn} <- {ap,an} * {bp,bn} 106 With: an >= bn >= 86, an*5 < bn * 11. 107 It _may_ work with bn<=?? and bn*?? < an*? < bn*?? 108 109 Evaluate in: infinity, +8,-8,+4,-4,+2,-2,+1,-1,+1/2,-1/2,+1/4,-1/4,+1/8,-1/8,0. 110 */ 111 /* Estimate on needed scratch: 112 S(n) <= (n+7)\8*13+5+MAX(S((n+7)\8),1+2*(n+7)\8), 113 since n>80; S(n) <= ceil(log(n/10)/log(8))*(13+5)+n*15\8 < n*15\8 + lg2(n)*6 114 */ 115 116 void 117 mpn_toom8h_mul (mp_ptr pp, 118 mp_srcptr ap, mp_size_t an, 119 mp_srcptr bp, mp_size_t bn, mp_ptr scratch) 120 { 121 mp_size_t n, s, t; 122 int p, q, half; 123 int sign; 124 125 /***************************** decomposition *******************************/ 126 127 ASSERT (an >= bn); 128 /* Can not handle too small operands */ 129 ASSERT (bn >= 86); 130 /* Can not handle too much unbalancement */ 131 ASSERT (an <= bn*4); 132 ASSERT (GMP_NUMB_BITS > 11*3 || an*4 <= bn*11); 133 ASSERT (GMP_NUMB_BITS > 10*3 || an*1 <= bn* 2); 134 ASSERT (GMP_NUMB_BITS > 9*3 || an*2 <= bn* 3); 135 136 /* Limit num/den is a rational number between 137 (16/15)^(log(6)/log(2*6-1)) and (16/15)^(log(8)/log(2*8-1)) */ 138 #define LIMIT_numerator (21) 139 #define LIMIT_denominat (20) 140 141 if (LIKELY (an == bn) || an * (LIMIT_denominat>>1) < LIMIT_numerator * (bn>>1) ) /* is 8*... < 8*... */ 142 { 143 half = 0; 144 n = 1 + ((an - 1)>>3); 145 p = q = 7; 146 s = an - 7 * n; 147 t = bn - 7 * n; 148 } 149 else 150 { 151 if (an * 13 < 16 * bn) /* (an*7*LIMIT_numerator<LIMIT_denominat*9*bn) */ 152 { p = 9; q = 8; } 153 else if (GMP_NUMB_BITS <= 9*3 || 154 an *(LIMIT_denominat>>1) < (LIMIT_numerator/7*9) * (bn>>1)) 155 { p = 9; q = 7; } 156 else if (an * 10 < 33 * (bn>>1)) /* (an*3*LIMIT_numerator<LIMIT_denominat*5*bn) */ 157 { p =10; q = 7; } 158 else if (GMP_NUMB_BITS <= 10*3 || 159 an * (LIMIT_denominat/5) < (LIMIT_numerator/3) * bn) 160 { p =10; q = 6; } 161 else if (an * 6 < 13 * bn) /*(an * 5 * LIMIT_numerator < LIMIT_denominat *11 * bn)*/ 162 { p =11; q = 6; } 163 else if (GMP_NUMB_BITS <= 11*3 || 164 an * 4 < 9 * bn) 165 { p =11; q = 5; } 166 else if (an *(LIMIT_numerator/3) < LIMIT_denominat * bn) /* is 4*... <12*... */ 167 { p =12; q = 5; } 168 else if (GMP_NUMB_BITS <= 12*3 || 169 an * 9 < 28 * bn ) /* is 4*... <12*... */ 170 { p =12; q = 4; } 171 else 172 { p =13; q = 4; } 173 174 half = (p+q)&1; 175 n = 1 + (q * an >= p * bn ? (an - 1) / (size_t) p : (bn - 1) / (size_t) q); 176 p--; q--; 177 178 s = an - p * n; 179 t = bn - q * n; 180 181 if(half) { /* Recover from badly chosen splitting */ 182 if (UNLIKELY (s<1)) {p--; s+=n; half=0;} 183 else if (UNLIKELY (t<1)) {q--; t+=n; half=0;} 184 } 185 } 186 #undef LIMIT_numerator 187 #undef LIMIT_denominat 188 189 ASSERT (0 < s && s <= n); 190 ASSERT (0 < t && t <= n); 191 ASSERT (half || s + t > 3); 192 ASSERT (n > 2); 193 194 #define r6 (pp + 3 * n) /* 3n+1 */ 195 #define r4 (pp + 7 * n) /* 3n+1 */ 196 #define r2 (pp +11 * n) /* 3n+1 */ 197 #define r0 (pp +15 * n) /* s+t <= 2*n */ 198 #define r7 (scratch) /* 3n+1 */ 199 #define r5 (scratch + 3 * n + 1) /* 3n+1 */ 200 #define r3 (scratch + 6 * n + 2) /* 3n+1 */ 201 #define r1 (scratch + 9 * n + 3) /* 3n+1 */ 202 #define v0 (pp +11 * n) /* n+1 */ 203 #define v1 (pp +12 * n+1) /* n+1 */ 204 #define v2 (pp +13 * n+2) /* n+1 */ 205 #define v3 (scratch +12 * n + 4) /* n+1 */ 206 #define wsi (scratch +12 * n + 4) /* 3n+1 */ 207 #define wse (scratch +13 * n + 5) /* 2n+1 */ 208 209 /* Alloc also 3n+1 limbs for wsi... toom_interpolate_16pts may 210 need all of them */ 211 /* if (scratch == NULL) */ 212 /* scratch = TMP_SALLOC_LIMBS(mpn_toom8_sqr_itch(n * 8)); */ 213 ASSERT (15 * n + 6 <= mpn_toom8h_mul_itch (an, bn)); 214 ASSERT (15 * n + 6 <= mpn_toom8_sqr_itch (n * 8)); 215 216 /********************** evaluation and recursive calls *********************/ 217 218 /* $\pm1/8$ */ 219 sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 3, pp) ^ 220 mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 3, pp); 221 /* A(-1/8)*B(-1/8)*8^. */ /* A(+1/8)*B(+1/8)*8^. */ 222 TOOM8H_MUL_N_REC(pp, v0, v1, 2, r7, v2, v3, n + 1, wse); 223 mpn_toom_couple_handling (r7, 2 * n + 1 + BIT_CORRECTION, pp, sign, n, 3*(1+half), 3*(half)); 224 225 /* $\pm1/4$ */ 226 sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 2, pp) ^ 227 mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 2, pp); 228 /* A(-1/4)*B(-1/4)*4^. */ /* A(+1/4)*B(+1/4)*4^. */ 229 TOOM8H_MUL_N_REC(pp, v0, v1, 2, r5, v2, v3, n + 1, wse); 230 mpn_toom_couple_handling (r5, 2 * n + 1, pp, sign, n, 2*(1+half), 2*(half)); 231 232 /* $\pm2$ */ 233 sign = mpn_toom_eval_pm2 (v2, v0, p, ap, n, s, pp) ^ 234 mpn_toom_eval_pm2 (v3, v1, q, bp, n, t, pp); 235 /* A(-2)*B(-2) */ /* A(+2)*B(+2) */ 236 TOOM8H_MUL_N_REC(pp, v0, v1, 2, r3, v2, v3, n + 1, wse); 237 mpn_toom_couple_handling (r3, 2 * n + 1, pp, sign, n, 1, 2); 238 239 /* $\pm8$ */ 240 sign = mpn_toom_eval_pm2exp (v2, v0, p, ap, n, s, 3, pp) ^ 241 mpn_toom_eval_pm2exp (v3, v1, q, bp, n, t, 3, pp); 242 /* A(-8)*B(-8) */ /* A(+8)*B(+8) */ 243 TOOM8H_MUL_N_REC(pp, v0, v1, 2, r1, v2, v3, n + 1, wse); 244 mpn_toom_couple_handling (r1, 2 * n + 1 + BIT_CORRECTION, pp, sign, n, 3, 6); 245 246 /* $\pm1/2$ */ 247 sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 1, pp) ^ 248 mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 1, pp); 249 /* A(-1/2)*B(-1/2)*2^. */ /* A(+1/2)*B(+1/2)*2^. */ 250 TOOM8H_MUL_N_REC(pp, v0, v1, 2, r6, v2, v3, n + 1, wse); 251 mpn_toom_couple_handling (r6, 2 * n + 1, pp, sign, n, 1+half, half); 252 253 /* $\pm1$ */ 254 sign = mpn_toom_eval_pm1 (v2, v0, p, ap, n, s, pp); 255 if (GMP_NUMB_BITS > 12*3 && UNLIKELY (q == 3)) 256 sign ^= mpn_toom_eval_dgr3_pm1 (v3, v1, bp, n, t, pp); 257 else 258 sign ^= mpn_toom_eval_pm1 (v3, v1, q, bp, n, t, pp); 259 /* A(-1)*B(-1) */ /* A(1)*B(1) */ 260 TOOM8H_MUL_N_REC(pp, v0, v1, 2, r4, v2, v3, n + 1, wse); 261 mpn_toom_couple_handling (r4, 2 * n + 1, pp, sign, n, 0, 0); 262 263 /* $\pm4$ */ 264 sign = mpn_toom_eval_pm2exp (v2, v0, p, ap, n, s, 2, pp) ^ 265 mpn_toom_eval_pm2exp (v3, v1, q, bp, n, t, 2, pp); 266 /* A(-4)*B(-4) */ /* A(+4)*B(+4) */ 267 TOOM8H_MUL_N_REC(pp, v0, v1, 2, r2, v2, v3, n + 1, wse); 268 mpn_toom_couple_handling (r2, 2 * n + 1, pp, sign, n, 2, 4); 269 270 #undef v0 271 #undef v1 272 #undef v2 273 #undef v3 274 #undef wse 275 276 /* A(0)*B(0) */ 277 TOOM8H_MUL_N_REC(pp, ap, bp, 0, pp, ap, bp, n, wsi); 278 279 /* Infinity */ 280 if (UNLIKELY (half != 0)) { 281 if (s > t) { 282 TOOM8H_MUL_REC(r0, ap + p * n, s, bp + q * n, t, wsi); 283 } else { 284 TOOM8H_MUL_REC(r0, bp + q * n, t, ap + p * n, s, wsi); 285 }; 286 }; 287 288 mpn_toom_interpolate_16pts (pp, r1, r3, r5, r7, n, s+t, half, wsi); 289 290 #undef r0 291 #undef r1 292 #undef r2 293 #undef r3 294 #undef r4 295 #undef r5 296 #undef r6 297 #undef wsi 298 } 299 300 #undef TOOM8H_MUL_N_REC 301 #undef TOOM8H_MUL_REC 302 #undef MAYBE_mul_basecase 303 #undef MAYBE_mul_toom22 304 #undef MAYBE_mul_toom33 305 #undef MAYBE_mul_toom44 306 #undef MAYBE_mul_toom8h