github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/generic/toom8_sqr.c (about) 1 /* Implementation of the squaring algorithm with Toom-Cook 8.5-way. 2 3 Contributed to the GNU project by Marco Bodrato. 4 5 THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY 6 SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST 7 GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. 8 9 Copyright 2009, 2012 Free Software Foundation, Inc. 10 11 This file is part of the GNU MP Library. 12 13 The GNU MP Library is free software; you can redistribute it and/or modify 14 it under the terms of either: 15 16 * the GNU Lesser General Public License as published by the Free 17 Software Foundation; either version 3 of the License, or (at your 18 option) any later version. 19 20 or 21 22 * the GNU General Public License as published by the Free Software 23 Foundation; either version 2 of the License, or (at your option) any 24 later version. 25 26 or both in parallel, as here. 27 28 The GNU MP Library is distributed in the hope that it will be useful, but 29 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 30 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 31 for more details. 32 33 You should have received copies of the GNU General Public License and the 34 GNU Lesser General Public License along with the GNU MP Library. If not, 35 see https://www.gnu.org/licenses/. */ 36 37 38 #include "gmp.h" 39 #include "gmp-impl.h" 40 41 #if GMP_NUMB_BITS < 29 42 #error Not implemented. 43 #endif 44 45 #if GMP_NUMB_BITS < 43 46 #define BIT_CORRECTION 1 47 #define CORRECTION_BITS GMP_NUMB_BITS 48 #else 49 #define BIT_CORRECTION 0 50 #define CORRECTION_BITS 0 51 #endif 52 53 #ifndef SQR_TOOM8_THRESHOLD 54 #define SQR_TOOM8_THRESHOLD MUL_TOOM8H_THRESHOLD 55 #endif 56 57 #ifndef SQR_TOOM6_THRESHOLD 58 #define SQR_TOOM6_THRESHOLD MUL_TOOM6H_THRESHOLD 59 #endif 60 61 #if TUNE_PROGRAM_BUILD 62 #define MAYBE_sqr_basecase 1 63 #define MAYBE_sqr_above_basecase 1 64 #define MAYBE_sqr_toom2 1 65 #define MAYBE_sqr_above_toom2 1 66 #define MAYBE_sqr_toom3 1 67 #define MAYBE_sqr_above_toom3 1 68 #define MAYBE_sqr_toom4 1 69 #define MAYBE_sqr_above_toom4 1 70 #define MAYBE_sqr_above_toom6 1 71 #else 72 #define SQR_TOOM8_MAX \ 73 ((SQR_FFT_THRESHOLD <= MP_SIZE_T_MAX - (8*2-1+7)) ? \ 74 ((SQR_FFT_THRESHOLD+8*2-1+7)/8) \ 75 : MP_SIZE_T_MAX ) 76 #define MAYBE_sqr_basecase \ 77 (SQR_TOOM8_THRESHOLD < 8 * SQR_TOOM2_THRESHOLD) 78 #define MAYBE_sqr_above_basecase \ 79 (SQR_TOOM8_MAX >= SQR_TOOM2_THRESHOLD) 80 #define MAYBE_sqr_toom2 \ 81 (SQR_TOOM8_THRESHOLD < 8 * SQR_TOOM3_THRESHOLD) 82 #define MAYBE_sqr_above_toom2 \ 83 (SQR_TOOM8_MAX >= SQR_TOOM3_THRESHOLD) 84 #define MAYBE_sqr_toom3 \ 85 (SQR_TOOM8_THRESHOLD < 8 * SQR_TOOM4_THRESHOLD) 86 #define MAYBE_sqr_above_toom3 \ 87 (SQR_TOOM8_MAX >= SQR_TOOM4_THRESHOLD) 88 #define MAYBE_sqr_toom4 \ 89 (SQR_TOOM8_THRESHOLD < 8 * SQR_TOOM6_THRESHOLD) 90 #define MAYBE_sqr_above_toom4 \ 91 (SQR_TOOM8_MAX >= SQR_TOOM6_THRESHOLD) 92 #define MAYBE_sqr_above_toom6 \ 93 (SQR_TOOM8_MAX >= SQR_TOOM8_THRESHOLD) 94 #endif 95 96 #define TOOM8_SQR_REC(p, a, f, p2, a2, n, ws) \ 97 do { \ 98 if (MAYBE_sqr_basecase && ( !MAYBE_sqr_above_basecase \ 99 || BELOW_THRESHOLD (n, SQR_TOOM2_THRESHOLD))) { \ 100 mpn_sqr_basecase (p, a, n); \ 101 if (f) mpn_sqr_basecase (p2, a2, n); \ 102 } else if (MAYBE_sqr_toom2 && ( !MAYBE_sqr_above_toom2 \ 103 || BELOW_THRESHOLD (n, SQR_TOOM3_THRESHOLD))) { \ 104 mpn_toom2_sqr (p, a, n, ws); \ 105 if (f) mpn_toom2_sqr (p2, a2, n, ws); \ 106 } else if (MAYBE_sqr_toom3 && ( !MAYBE_sqr_above_toom3 \ 107 || BELOW_THRESHOLD (n, SQR_TOOM4_THRESHOLD))) { \ 108 mpn_toom3_sqr (p, a, n, ws); \ 109 if (f) mpn_toom3_sqr (p2, a2, n, ws); \ 110 } else if (MAYBE_sqr_toom4 && ( !MAYBE_sqr_above_toom4 \ 111 || BELOW_THRESHOLD (n, SQR_TOOM6_THRESHOLD))) { \ 112 mpn_toom4_sqr (p, a, n, ws); \ 113 if (f) mpn_toom4_sqr (p2, a2, n, ws); \ 114 } else if (! MAYBE_sqr_above_toom6 \ 115 || BELOW_THRESHOLD (n, SQR_TOOM8_THRESHOLD)) { \ 116 mpn_toom6_sqr (p, a, n, ws); \ 117 if (f) mpn_toom6_sqr (p2, a2, n, ws); \ 118 } else { \ 119 mpn_toom8_sqr (p, a, n, ws); \ 120 if (f) mpn_toom8_sqr (p2, a2, n, ws); \ 121 } \ 122 } while (0) 123 124 void 125 mpn_toom8_sqr (mp_ptr pp, mp_srcptr ap, mp_size_t an, mp_ptr scratch) 126 { 127 mp_size_t n, s; 128 129 /***************************** decomposition *******************************/ 130 131 ASSERT ( an >= 40 ); 132 133 n = 1 + ((an - 1)>>3); 134 135 s = an - 7 * n; 136 137 ASSERT (0 < s && s <= n); 138 ASSERT ( s + s > 3 ); 139 140 #define r6 (pp + 3 * n) /* 3n+1 */ 141 #define r4 (pp + 7 * n) /* 3n+1 */ 142 #define r2 (pp +11 * n) /* 3n+1 */ 143 #define r0 (pp +15 * n) /* s+t <= 2*n */ 144 #define r7 (scratch) /* 3n+1 */ 145 #define r5 (scratch + 3 * n + 1) /* 3n+1 */ 146 #define r3 (scratch + 6 * n + 2) /* 3n+1 */ 147 #define r1 (scratch + 9 * n + 3) /* 3n+1 */ 148 #define v0 (pp +11 * n) /* n+1 */ 149 #define v2 (pp +13 * n+2) /* n+1 */ 150 #define wse (scratch +12 * n + 4) /* 3n+1 */ 151 152 /* Alloc also 3n+1 limbs for ws... toom_interpolate_16pts may 153 need all of them, when DO_mpn_sublsh_n usea a scratch */ 154 /* if (scratch == NULL) */ 155 /* scratch = TMP_SALLOC_LIMBS (30 * n + 6); */ 156 157 /********************** evaluation and recursive calls *********************/ 158 /* $\pm1/8$ */ 159 mpn_toom_eval_pm2rexp (v2, v0, 7, ap, n, s, 3, pp); 160 /* A(-1/8)*B(-1/8)*8^. */ /* A(+1/8)*B(+1/8)*8^. */ 161 TOOM8_SQR_REC(pp, v0, 2, r7, v2, n + 1, wse); 162 mpn_toom_couple_handling (r7, 2 * n + 1 + BIT_CORRECTION, pp, 0, n, 3, 0); 163 164 /* $\pm1/4$ */ 165 mpn_toom_eval_pm2rexp (v2, v0, 7, ap, n, s, 2, pp); 166 /* A(-1/4)*B(-1/4)*4^. */ /* A(+1/4)*B(+1/4)*4^. */ 167 TOOM8_SQR_REC(pp, v0, 2, r5, v2, n + 1, wse); 168 mpn_toom_couple_handling (r5, 2 * n + 1, pp, 0, n, 2, 0); 169 170 /* $\pm2$ */ 171 mpn_toom_eval_pm2 (v2, v0, 7, ap, n, s, pp); 172 /* A(-2)*B(-2) */ /* A(+2)*B(+2) */ 173 TOOM8_SQR_REC(pp, v0, 2, r3, v2, n + 1, wse); 174 mpn_toom_couple_handling (r3, 2 * n + 1, pp, 0, n, 1, 2); 175 176 /* $\pm8$ */ 177 mpn_toom_eval_pm2exp (v2, v0, 7, ap, n, s, 3, pp); 178 /* A(-8)*B(-8) */ /* A(+8)*B(+8) */ 179 TOOM8_SQR_REC(pp, v0, 2, r1, v2, n + 1, wse); 180 mpn_toom_couple_handling (r1, 2 * n + 1 + BIT_CORRECTION, pp, 0, n, 3, 6); 181 182 /* $\pm1/2$ */ 183 mpn_toom_eval_pm2rexp (v2, v0, 7, ap, n, s, 1, pp); 184 /* A(-1/2)*B(-1/2)*2^. */ /* A(+1/2)*B(+1/2)*2^. */ 185 TOOM8_SQR_REC(pp, v0, 2, r6, v2, n + 1, wse); 186 mpn_toom_couple_handling (r6, 2 * n + 1, pp, 0, n, 1, 0); 187 188 /* $\pm1$ */ 189 mpn_toom_eval_pm1 (v2, v0, 7, ap, n, s, pp); 190 /* A(-1)*B(-1) */ /* A(1)*B(1) */ 191 TOOM8_SQR_REC(pp, v0, 2, r4, v2, n + 1, wse); 192 mpn_toom_couple_handling (r4, 2 * n + 1, pp, 0, n, 0, 0); 193 194 /* $\pm4$ */ 195 mpn_toom_eval_pm2exp (v2, v0, 7, ap, n, s, 2, pp); 196 /* A(-4)*B(-4) */ /* A(+4)*B(+4) */ 197 TOOM8_SQR_REC(pp, v0, 2, r2, v2, n + 1, wse); 198 mpn_toom_couple_handling (r2, 2 * n + 1, pp, 0, n, 2, 4); 199 200 #undef v0 201 #undef v2 202 203 /* A(0)*B(0) */ 204 TOOM8_SQR_REC(pp, ap, 0, pp, ap, n, wse); 205 206 mpn_toom_interpolate_16pts (pp, r1, r3, r5, r7, n, 2 * s, 0, wse); 207 208 #undef r0 209 #undef r1 210 #undef r2 211 #undef r3 212 #undef r4 213 #undef r5 214 #undef r6 215 #undef wse 216 217 } 218 219 #undef TOOM8_SQR_REC 220 #undef MAYBE_sqr_basecase 221 #undef MAYBE_sqr_above_basecase 222 #undef MAYBE_sqr_toom2 223 #undef MAYBE_sqr_above_toom2 224 #undef MAYBE_sqr_toom3 225 #undef MAYBE_sqr_above_toom3 226 #undef MAYBE_sqr_above_toom4