github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/generic/sqr_basecase.c (about) 1 /* mpn_sqr_basecase -- Internal routine to square a natural number 2 of length n. 3 4 THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE. IT IS ONLY 5 SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES. 6 7 8 Copyright 1991-1994, 1996, 1997, 2000-2005, 2008, 2010, 2011 Free Software 9 Foundation, Inc. 10 11 This file is part of the GNU MP Library. 12 13 The GNU MP Library is free software; you can redistribute it and/or modify 14 it under the terms of either: 15 16 * the GNU Lesser General Public License as published by the Free 17 Software Foundation; either version 3 of the License, or (at your 18 option) any later version. 19 20 or 21 22 * the GNU General Public License as published by the Free Software 23 Foundation; either version 2 of the License, or (at your option) any 24 later version. 25 26 or both in parallel, as here. 27 28 The GNU MP Library is distributed in the hope that it will be useful, but 29 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 30 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 31 for more details. 32 33 You should have received copies of the GNU General Public License and the 34 GNU Lesser General Public License along with the GNU MP Library. If not, 35 see https://www.gnu.org/licenses/. */ 36 37 #include "gmp.h" 38 #include "gmp-impl.h" 39 #include "longlong.h" 40 41 42 #if HAVE_NATIVE_mpn_sqr_diagonal 43 #define MPN_SQR_DIAGONAL(rp, up, n) \ 44 mpn_sqr_diagonal (rp, up, n) 45 #else 46 #define MPN_SQR_DIAGONAL(rp, up, n) \ 47 do { \ 48 mp_size_t _i; \ 49 for (_i = 0; _i < (n); _i++) \ 50 { \ 51 mp_limb_t ul, lpl; \ 52 ul = (up)[_i]; \ 53 umul_ppmm ((rp)[2 * _i + 1], lpl, ul, ul << GMP_NAIL_BITS); \ 54 (rp)[2 * _i] = lpl >> GMP_NAIL_BITS; \ 55 } \ 56 } while (0) 57 #endif 58 59 #if HAVE_NATIVE_mpn_sqr_diag_addlsh1 60 #define MPN_SQR_DIAG_ADDLSH1(rp, tp, up, n) \ 61 mpn_sqr_diag_addlsh1 (rp, tp, up, n) 62 #else 63 #if HAVE_NATIVE_mpn_addlsh1_n 64 #define MPN_SQR_DIAG_ADDLSH1(rp, tp, up, n) \ 65 do { \ 66 mp_limb_t cy; \ 67 MPN_SQR_DIAGONAL (rp, up, n); \ 68 cy = mpn_addlsh1_n (rp + 1, rp + 1, tp, 2 * n - 2); \ 69 rp[2 * n - 1] += cy; \ 70 } while (0) 71 #else 72 #define MPN_SQR_DIAG_ADDLSH1(rp, tp, up, n) \ 73 do { \ 74 mp_limb_t cy; \ 75 MPN_SQR_DIAGONAL (rp, up, n); \ 76 cy = mpn_lshift (tp, tp, 2 * n - 2, 1); \ 77 cy += mpn_add_n (rp + 1, rp + 1, tp, 2 * n - 2); \ 78 rp[2 * n - 1] += cy; \ 79 } while (0) 80 #endif 81 #endif 82 83 84 #undef READY_WITH_mpn_sqr_basecase 85 86 87 #if ! defined (READY_WITH_mpn_sqr_basecase) && HAVE_NATIVE_mpn_addmul_2s 88 void 89 mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n) 90 { 91 mp_size_t i; 92 mp_limb_t tarr[2 * SQR_TOOM2_THRESHOLD]; 93 mp_ptr tp = tarr; 94 mp_limb_t cy; 95 96 /* must fit 2*n limbs in tarr */ 97 ASSERT (n <= SQR_TOOM2_THRESHOLD); 98 99 if ((n & 1) != 0) 100 { 101 if (n == 1) 102 { 103 mp_limb_t ul, lpl; 104 ul = up[0]; 105 umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS); 106 rp[0] = lpl >> GMP_NAIL_BITS; 107 return; 108 } 109 110 MPN_ZERO (tp, n); 111 112 for (i = 0; i <= n - 2; i += 2) 113 { 114 cy = mpn_addmul_2s (tp + 2 * i, up + i + 1, n - (i + 1), up + i); 115 tp[n + i] = cy; 116 } 117 } 118 else 119 { 120 if (n == 2) 121 { 122 #if HAVE_NATIVE_mpn_mul_2 123 rp[3] = mpn_mul_2 (rp, up, 2, up); 124 #else 125 rp[0] = 0; 126 rp[1] = 0; 127 rp[3] = mpn_addmul_2 (rp, up, 2, up); 128 #endif 129 return; 130 } 131 132 MPN_ZERO (tp, n); 133 134 for (i = 0; i <= n - 4; i += 2) 135 { 136 cy = mpn_addmul_2s (tp + 2 * i, up + i + 1, n - (i + 1), up + i); 137 tp[n + i] = cy; 138 } 139 cy = mpn_addmul_1 (tp + 2 * n - 4, up + n - 1, 1, up[n - 2]); 140 tp[2 * n - 3] = cy; 141 } 142 143 MPN_SQR_DIAG_ADDLSH1 (rp, tp, up, n); 144 } 145 #define READY_WITH_mpn_sqr_basecase 146 #endif 147 148 149 #if ! defined (READY_WITH_mpn_sqr_basecase) && HAVE_NATIVE_mpn_addmul_2 150 151 /* mpn_sqr_basecase using plain mpn_addmul_2. 152 153 This is tricky, since we have to let mpn_addmul_2 make some undesirable 154 multiplies, u[k]*u[k], that we would like to let mpn_sqr_diagonal handle. 155 This forces us to conditionally add or subtract the mpn_sqr_diagonal 156 results. Examples of the product we form: 157 158 n = 4 n = 5 n = 6 159 u1u0 * u3u2u1 u1u0 * u4u3u2u1 u1u0 * u5u4u3u2u1 160 u2 * u3 u3u2 * u4u3 u3u2 * u5u4u3 161 u4 * u5 162 add: u0 u2 u3 add: u0 u2 u4 add: u0 u2 u4 u5 163 sub: u1 sub: u1 u3 sub: u1 u3 164 */ 165 166 void 167 mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n) 168 { 169 mp_size_t i; 170 mp_limb_t tarr[2 * SQR_TOOM2_THRESHOLD]; 171 mp_ptr tp = tarr; 172 mp_limb_t cy; 173 174 /* must fit 2*n limbs in tarr */ 175 ASSERT (n <= SQR_TOOM2_THRESHOLD); 176 177 if ((n & 1) != 0) 178 { 179 mp_limb_t x0, x1; 180 181 if (n == 1) 182 { 183 mp_limb_t ul, lpl; 184 ul = up[0]; 185 umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS); 186 rp[0] = lpl >> GMP_NAIL_BITS; 187 return; 188 } 189 190 /* The code below doesn't like unnormalized operands. Since such 191 operands are unusual, handle them with a dumb recursion. */ 192 if (up[n - 1] == 0) 193 { 194 rp[2 * n - 2] = 0; 195 rp[2 * n - 1] = 0; 196 mpn_sqr_basecase (rp, up, n - 1); 197 return; 198 } 199 200 MPN_ZERO (tp, n); 201 202 for (i = 0; i <= n - 2; i += 2) 203 { 204 cy = mpn_addmul_2 (tp + 2 * i, up + i + 1, n - (i + 1), up + i); 205 tp[n + i] = cy; 206 } 207 208 MPN_SQR_DIAGONAL (rp, up, n); 209 210 for (i = 2;; i += 4) 211 { 212 x0 = rp[i + 0]; 213 rp[i + 0] = (-x0) & GMP_NUMB_MASK; 214 x1 = rp[i + 1]; 215 rp[i + 1] = (-x1 - (x0 != 0)) & GMP_NUMB_MASK; 216 __GMPN_SUB_1 (cy, rp + i + 2, rp + i + 2, 2, (x1 | x0) != 0); 217 if (i + 4 >= 2 * n) 218 break; 219 mpn_incr_u (rp + i + 4, cy); 220 } 221 } 222 else 223 { 224 mp_limb_t x0, x1; 225 226 if (n == 2) 227 { 228 #if HAVE_NATIVE_mpn_mul_2 229 rp[3] = mpn_mul_2 (rp, up, 2, up); 230 #else 231 rp[0] = 0; 232 rp[1] = 0; 233 rp[3] = mpn_addmul_2 (rp, up, 2, up); 234 #endif 235 return; 236 } 237 238 /* The code below doesn't like unnormalized operands. Since such 239 operands are unusual, handle them with a dumb recursion. */ 240 if (up[n - 1] == 0) 241 { 242 rp[2 * n - 2] = 0; 243 rp[2 * n - 1] = 0; 244 mpn_sqr_basecase (rp, up, n - 1); 245 return; 246 } 247 248 MPN_ZERO (tp, n); 249 250 for (i = 0; i <= n - 4; i += 2) 251 { 252 cy = mpn_addmul_2 (tp + 2 * i, up + i + 1, n - (i + 1), up + i); 253 tp[n + i] = cy; 254 } 255 cy = mpn_addmul_1 (tp + 2 * n - 4, up + n - 1, 1, up[n - 2]); 256 tp[2 * n - 3] = cy; 257 258 MPN_SQR_DIAGONAL (rp, up, n); 259 260 for (i = 2;; i += 4) 261 { 262 x0 = rp[i + 0]; 263 rp[i + 0] = (-x0) & GMP_NUMB_MASK; 264 x1 = rp[i + 1]; 265 rp[i + 1] = (-x1 - (x0 != 0)) & GMP_NUMB_MASK; 266 if (i + 6 >= 2 * n) 267 break; 268 __GMPN_SUB_1 (cy, rp + i + 2, rp + i + 2, 2, (x1 | x0) != 0); 269 mpn_incr_u (rp + i + 4, cy); 270 } 271 mpn_decr_u (rp + i + 2, (x1 | x0) != 0); 272 } 273 274 #if HAVE_NATIVE_mpn_addlsh1_n 275 cy = mpn_addlsh1_n (rp + 1, rp + 1, tp, 2 * n - 2); 276 #else 277 cy = mpn_lshift (tp, tp, 2 * n - 2, 1); 278 cy += mpn_add_n (rp + 1, rp + 1, tp, 2 * n - 2); 279 #endif 280 rp[2 * n - 1] += cy; 281 } 282 #define READY_WITH_mpn_sqr_basecase 283 #endif 284 285 286 #if ! defined (READY_WITH_mpn_sqr_basecase) 287 288 /* Default mpn_sqr_basecase using mpn_addmul_1. */ 289 290 void 291 mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n) 292 { 293 mp_size_t i; 294 295 ASSERT (n >= 1); 296 ASSERT (! MPN_OVERLAP_P (rp, 2*n, up, n)); 297 298 { 299 mp_limb_t ul, lpl; 300 ul = up[0]; 301 umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS); 302 rp[0] = lpl >> GMP_NAIL_BITS; 303 } 304 if (n > 1) 305 { 306 mp_limb_t tarr[2 * SQR_TOOM2_THRESHOLD]; 307 mp_ptr tp = tarr; 308 mp_limb_t cy; 309 310 /* must fit 2*n limbs in tarr */ 311 ASSERT (n <= SQR_TOOM2_THRESHOLD); 312 313 cy = mpn_mul_1 (tp, up + 1, n - 1, up[0]); 314 tp[n - 1] = cy; 315 for (i = 2; i < n; i++) 316 { 317 mp_limb_t cy; 318 cy = mpn_addmul_1 (tp + 2 * i - 2, up + i, n - i, up[i - 1]); 319 tp[n + i - 2] = cy; 320 } 321 322 MPN_SQR_DIAG_ADDLSH1 (rp, tp, up, n); 323 } 324 } 325 #endif