github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/generic/toom3_sqr.c (about) 1 /* mpn_toom3_sqr -- Square {ap,an}. 2 3 Contributed to the GNU project by Torbjorn Granlund. 4 Additional improvements by Marco Bodrato. 5 6 THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY 7 SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST 8 GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. 9 10 Copyright 2006-2010, 2012 Free Software Foundation, Inc. 11 12 This file is part of the GNU MP Library. 13 14 The GNU MP Library is free software; you can redistribute it and/or modify 15 it under the terms of either: 16 17 * the GNU Lesser General Public License as published by the Free 18 Software Foundation; either version 3 of the License, or (at your 19 option) any later version. 20 21 or 22 23 * the GNU General Public License as published by the Free Software 24 Foundation; either version 2 of the License, or (at your option) any 25 later version. 26 27 or both in parallel, as here. 28 29 The GNU MP Library is distributed in the hope that it will be useful, but 30 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 31 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 32 for more details. 33 34 You should have received copies of the GNU General Public License and the 35 GNU Lesser General Public License along with the GNU MP Library. If not, 36 see https://www.gnu.org/licenses/. */ 37 38 39 #include "gmp.h" 40 #include "gmp-impl.h" 41 42 /* Evaluate in: -1, 0, +1, +2, +inf 43 44 <-s--><--n--><--n--> 45 ____ ______ ______ 46 |_a2_|___a1_|___a0_| 47 48 v0 = a0 ^2 # A(0)^2 49 v1 = (a0+ a1+ a2)^2 # A(1)^2 ah <= 2 50 vm1 = (a0- a1+ a2)^2 # A(-1)^2 |ah| <= 1 51 v2 = (a0+2a1+4a2)^2 # A(2)^2 ah <= 6 52 vinf= a2 ^2 # A(inf)^2 53 */ 54 55 #if TUNE_PROGRAM_BUILD || WANT_FAT_BINARY 56 #define MAYBE_sqr_basecase 1 57 #define MAYBE_sqr_toom3 1 58 #else 59 #define MAYBE_sqr_basecase \ 60 (SQR_TOOM3_THRESHOLD < 3 * SQR_TOOM2_THRESHOLD) 61 #define MAYBE_sqr_toom3 \ 62 (SQR_TOOM4_THRESHOLD >= 3 * SQR_TOOM3_THRESHOLD) 63 #endif 64 65 #define TOOM3_SQR_REC(p, a, n, ws) \ 66 do { \ 67 if (MAYBE_sqr_basecase \ 68 && BELOW_THRESHOLD (n, SQR_TOOM2_THRESHOLD)) \ 69 mpn_sqr_basecase (p, a, n); \ 70 else if (! MAYBE_sqr_toom3 \ 71 || BELOW_THRESHOLD (n, SQR_TOOM3_THRESHOLD)) \ 72 mpn_toom2_sqr (p, a, n, ws); \ 73 else \ 74 mpn_toom3_sqr (p, a, n, ws); \ 75 } while (0) 76 77 void 78 mpn_toom3_sqr (mp_ptr pp, 79 mp_srcptr ap, mp_size_t an, 80 mp_ptr scratch) 81 { 82 const int __gmpn_cpuvec_initialized = 1; 83 mp_size_t n, s; 84 mp_limb_t cy, vinf0; 85 mp_ptr gp; 86 mp_ptr as1, asm1, as2; 87 88 #define a0 ap 89 #define a1 (ap + n) 90 #define a2 (ap + 2*n) 91 92 n = (an + 2) / (size_t) 3; 93 94 s = an - 2 * n; 95 96 ASSERT (0 < s && s <= n); 97 98 as1 = scratch + 4 * n + 4; 99 asm1 = scratch + 2 * n + 2; 100 as2 = pp + n + 1; 101 102 gp = scratch; 103 104 /* Compute as1 and asm1. */ 105 cy = mpn_add (gp, a0, n, a2, s); 106 #if HAVE_NATIVE_mpn_add_n_sub_n 107 if (cy == 0 && mpn_cmp (gp, a1, n) < 0) 108 { 109 cy = mpn_add_n_sub_n (as1, asm1, a1, gp, n); 110 as1[n] = cy >> 1; 111 asm1[n] = 0; 112 } 113 else 114 { 115 mp_limb_t cy2; 116 cy2 = mpn_add_n_sub_n (as1, asm1, gp, a1, n); 117 as1[n] = cy + (cy2 >> 1); 118 asm1[n] = cy - (cy2 & 1); 119 } 120 #else 121 as1[n] = cy + mpn_add_n (as1, gp, a1, n); 122 if (cy == 0 && mpn_cmp (gp, a1, n) < 0) 123 { 124 mpn_sub_n (asm1, a1, gp, n); 125 asm1[n] = 0; 126 } 127 else 128 { 129 cy -= mpn_sub_n (asm1, gp, a1, n); 130 asm1[n] = cy; 131 } 132 #endif 133 134 /* Compute as2. */ 135 #if HAVE_NATIVE_mpn_rsblsh1_n 136 cy = mpn_add_n (as2, a2, as1, s); 137 if (s != n) 138 cy = mpn_add_1 (as2 + s, as1 + s, n - s, cy); 139 cy += as1[n]; 140 cy = 2 * cy + mpn_rsblsh1_n (as2, a0, as2, n); 141 #else 142 #if HAVE_NATIVE_mpn_addlsh1_n 143 cy = mpn_addlsh1_n (as2, a1, a2, s); 144 if (s != n) 145 cy = mpn_add_1 (as2 + s, a1 + s, n - s, cy); 146 cy = 2 * cy + mpn_addlsh1_n (as2, a0, as2, n); 147 #else 148 cy = mpn_add_n (as2, a2, as1, s); 149 if (s != n) 150 cy = mpn_add_1 (as2 + s, as1 + s, n - s, cy); 151 cy += as1[n]; 152 cy = 2 * cy + mpn_lshift (as2, as2, n, 1); 153 cy -= mpn_sub_n (as2, as2, a0, n); 154 #endif 155 #endif 156 as2[n] = cy; 157 158 ASSERT (as1[n] <= 2); 159 ASSERT (asm1[n] <= 1); 160 161 #define v0 pp /* 2n */ 162 #define v1 (pp + 2 * n) /* 2n+1 */ 163 #define vinf (pp + 4 * n) /* s+s */ 164 #define vm1 scratch /* 2n+1 */ 165 #define v2 (scratch + 2 * n + 1) /* 2n+2 */ 166 #define scratch_out (scratch + 5 * n + 5) 167 168 /* vm1, 2n+1 limbs */ 169 #ifdef SMALLER_RECURSION 170 TOOM3_SQR_REC (vm1, asm1, n, scratch_out); 171 cy = 0; 172 if (asm1[n] != 0) 173 cy = asm1[n] + mpn_add_n (vm1 + n, vm1 + n, asm1, n); 174 if (asm1[n] != 0) 175 cy += mpn_add_n (vm1 + n, vm1 + n, asm1, n); 176 vm1[2 * n] = cy; 177 #else 178 TOOM3_SQR_REC (vm1, asm1, n + 1, scratch_out); 179 #endif 180 181 TOOM3_SQR_REC (v2, as2, n + 1, scratch_out); /* v2, 2n+1 limbs */ 182 183 TOOM3_SQR_REC (vinf, a2, s, scratch_out); /* vinf, s+s limbs */ 184 185 vinf0 = vinf[0]; /* v1 overlaps with this */ 186 187 #ifdef SMALLER_RECURSION 188 /* v1, 2n+1 limbs */ 189 TOOM3_SQR_REC (v1, as1, n, scratch_out); 190 if (as1[n] == 1) 191 { 192 cy = as1[n] + mpn_add_n (v1 + n, v1 + n, as1, n); 193 } 194 else if (as1[n] != 0) 195 { 196 #if HAVE_NATIVE_mpn_addlsh1_n 197 cy = 2 * as1[n] + mpn_addlsh1_n (v1 + n, v1 + n, as1, n); 198 #else 199 cy = 2 * as1[n] + mpn_addmul_1 (v1 + n, as1, n, CNST_LIMB(2)); 200 #endif 201 } 202 else 203 cy = 0; 204 if (as1[n] == 1) 205 { 206 cy += mpn_add_n (v1 + n, v1 + n, as1, n); 207 } 208 else if (as1[n] != 0) 209 { 210 #if HAVE_NATIVE_mpn_addlsh1_n 211 cy += mpn_addlsh1_n (v1 + n, v1 + n, as1, n); 212 #else 213 cy += mpn_addmul_1 (v1 + n, as1, n, CNST_LIMB(2)); 214 #endif 215 } 216 v1[2 * n] = cy; 217 #else 218 cy = vinf[1]; 219 TOOM3_SQR_REC (v1, as1, n + 1, scratch_out); 220 vinf[1] = cy; 221 #endif 222 223 TOOM3_SQR_REC (v0, ap, n, scratch_out); /* v0, 2n limbs */ 224 225 mpn_toom_interpolate_5pts (pp, v2, vm1, n, s + s, 0, vinf0); 226 }