github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/generic/gcd.c (about) 1 /* mpn/gcd.c: mpn_gcd for gcd of two odd integers. 2 3 Copyright 1991, 1993-1998, 2000-2005, 2008, 2010, 2012 Free Software 4 Foundation, Inc. 5 6 This file is part of the GNU MP Library. 7 8 The GNU MP Library is free software; you can redistribute it and/or modify 9 it under the terms of either: 10 11 * the GNU Lesser General Public License as published by the Free 12 Software Foundation; either version 3 of the License, or (at your 13 option) any later version. 14 15 or 16 17 * the GNU General Public License as published by the Free Software 18 Foundation; either version 2 of the License, or (at your option) any 19 later version. 20 21 or both in parallel, as here. 22 23 The GNU MP Library is distributed in the hope that it will be useful, but 24 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 25 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 26 for more details. 27 28 You should have received copies of the GNU General Public License and the 29 GNU Lesser General Public License along with the GNU MP Library. If not, 30 see https://www.gnu.org/licenses/. */ 31 32 #include "gmp.h" 33 #include "gmp-impl.h" 34 #include "longlong.h" 35 36 /* Uses the HGCD operation described in 37 38 N. Möller, On Schönhage's algorithm and subquadratic integer gcd 39 computation, Math. Comp. 77 (2008), 589-607. 40 41 to reduce inputs until they are of size below GCD_DC_THRESHOLD, and 42 then uses Lehmer's algorithm. 43 */ 44 45 /* Some reasonable choices are n / 2 (same as in hgcd), and p = (n + 46 * 2)/3, which gives a balanced multiplication in 47 * mpn_hgcd_matrix_adjust. However, p = 2 n/3 gives slightly better 48 * performance. The matrix-vector multiplication is then 49 * 4:1-unbalanced, with matrix elements of size n/6, and vector 50 * elements of size p = 2n/3. */ 51 52 /* From analysis of the theoretical running time, it appears that when 53 * multiplication takes time O(n^alpha), p should be chosen so that 54 * the ratio of the time for the mpn_hgcd call, and the time for the 55 * multiplication in mpn_hgcd_matrix_adjust, is roughly 1/(alpha - 56 * 1). */ 57 #ifdef TUNE_GCD_P 58 #define P_TABLE_SIZE 10000 59 mp_size_t p_table[P_TABLE_SIZE]; 60 #define CHOOSE_P(n) ( (n) < P_TABLE_SIZE ? p_table[n] : 2*(n)/3) 61 #else 62 #define CHOOSE_P(n) (2*(n) / 3) 63 #endif 64 65 struct gcd_ctx 66 { 67 mp_ptr gp; 68 mp_size_t gn; 69 }; 70 71 static void 72 gcd_hook (void *p, mp_srcptr gp, mp_size_t gn, 73 mp_srcptr qp, mp_size_t qn, int d) 74 { 75 struct gcd_ctx *ctx = (struct gcd_ctx *) p; 76 MPN_COPY (ctx->gp, gp, gn); 77 ctx->gn = gn; 78 } 79 80 #if GMP_NAIL_BITS > 0 81 /* Nail supports should be easy, replacing the sub_ddmmss with nails 82 * logic. */ 83 #error Nails not supported. 84 #endif 85 86 /* Use binary algorithm to compute G <-- GCD (U, V) for usize, vsize == 2. 87 Both U and V must be odd. */ 88 static inline mp_size_t 89 gcd_2 (mp_ptr gp, mp_srcptr up, mp_srcptr vp) 90 { 91 mp_limb_t u0, u1, v0, v1; 92 mp_size_t gn; 93 94 u0 = up[0]; 95 u1 = up[1]; 96 v0 = vp[0]; 97 v1 = vp[1]; 98 99 ASSERT (u0 & 1); 100 ASSERT (v0 & 1); 101 102 /* Check for u0 != v0 needed to ensure that argument to 103 * count_trailing_zeros is non-zero. */ 104 while (u1 != v1 && u0 != v0) 105 { 106 unsigned long int r; 107 if (u1 > v1) 108 { 109 sub_ddmmss (u1, u0, u1, u0, v1, v0); 110 count_trailing_zeros (r, u0); 111 u0 = ((u1 << (GMP_NUMB_BITS - r)) & GMP_NUMB_MASK) | (u0 >> r); 112 u1 >>= r; 113 } 114 else /* u1 < v1. */ 115 { 116 sub_ddmmss (v1, v0, v1, v0, u1, u0); 117 count_trailing_zeros (r, v0); 118 v0 = ((v1 << (GMP_NUMB_BITS - r)) & GMP_NUMB_MASK) | (v0 >> r); 119 v1 >>= r; 120 } 121 } 122 123 gp[0] = u0, gp[1] = u1, gn = 1 + (u1 != 0); 124 125 /* If U == V == GCD, done. Otherwise, compute GCD (V, |U - V|). */ 126 if (u1 == v1 && u0 == v0) 127 return gn; 128 129 v0 = (u0 == v0) ? ((u1 > v1) ? u1-v1 : v1-u1) : ((u0 > v0) ? u0-v0 : v0-u0); 130 gp[0] = mpn_gcd_1 (gp, gn, v0); 131 132 return 1; 133 } 134 135 mp_size_t 136 mpn_gcd (mp_ptr gp, mp_ptr up, mp_size_t usize, mp_ptr vp, mp_size_t n) 137 { 138 mp_size_t talloc; 139 mp_size_t scratch; 140 mp_size_t matrix_scratch; 141 142 struct gcd_ctx ctx; 143 mp_ptr tp; 144 TMP_DECL; 145 146 ASSERT (usize >= n); 147 ASSERT (n > 0); 148 ASSERT (vp[n-1] > 0); 149 150 /* FIXME: Check for small sizes first, before setting up temporary 151 storage etc. */ 152 talloc = MPN_GCD_SUBDIV_STEP_ITCH(n); 153 154 /* For initial division */ 155 scratch = usize - n + 1; 156 if (scratch > talloc) 157 talloc = scratch; 158 159 #if TUNE_GCD_P 160 if (CHOOSE_P (n) > 0) 161 #else 162 if (ABOVE_THRESHOLD (n, GCD_DC_THRESHOLD)) 163 #endif 164 { 165 mp_size_t hgcd_scratch; 166 mp_size_t update_scratch; 167 mp_size_t p = CHOOSE_P (n); 168 mp_size_t scratch; 169 #if TUNE_GCD_P 170 /* Worst case, since we don't guarantee that n - CHOOSE_P(n) 171 is increasing */ 172 matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n); 173 hgcd_scratch = mpn_hgcd_itch (n); 174 update_scratch = 2*(n - 1); 175 #else 176 matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p); 177 hgcd_scratch = mpn_hgcd_itch (n - p); 178 update_scratch = p + n - 1; 179 #endif 180 scratch = matrix_scratch + MAX(hgcd_scratch, update_scratch); 181 if (scratch > talloc) 182 talloc = scratch; 183 } 184 185 TMP_MARK; 186 tp = TMP_ALLOC_LIMBS(talloc); 187 188 if (usize > n) 189 { 190 mpn_tdiv_qr (tp, up, 0, up, usize, vp, n); 191 192 if (mpn_zero_p (up, n)) 193 { 194 MPN_COPY (gp, vp, n); 195 ctx.gn = n; 196 goto done; 197 } 198 } 199 200 ctx.gp = gp; 201 202 #if TUNE_GCD_P 203 while (CHOOSE_P (n) > 0) 204 #else 205 while (ABOVE_THRESHOLD (n, GCD_DC_THRESHOLD)) 206 #endif 207 { 208 struct hgcd_matrix M; 209 mp_size_t p = CHOOSE_P (n); 210 mp_size_t matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p); 211 mp_size_t nn; 212 mpn_hgcd_matrix_init (&M, n - p, tp); 213 nn = mpn_hgcd (up + p, vp + p, n - p, &M, tp + matrix_scratch); 214 if (nn > 0) 215 { 216 ASSERT (M.n <= (n - p - 1)/2); 217 ASSERT (M.n + p <= (p + n - 1) / 2); 218 /* Temporary storage 2 (p + M->n) <= p + n - 1. */ 219 n = mpn_hgcd_matrix_adjust (&M, p + nn, up, vp, p, tp + matrix_scratch); 220 } 221 else 222 { 223 /* Temporary storage n */ 224 n = mpn_gcd_subdiv_step (up, vp, n, 0, gcd_hook, &ctx, tp); 225 if (n == 0) 226 goto done; 227 } 228 } 229 230 while (n > 2) 231 { 232 struct hgcd_matrix1 M; 233 mp_limb_t uh, ul, vh, vl; 234 mp_limb_t mask; 235 236 mask = up[n-1] | vp[n-1]; 237 ASSERT (mask > 0); 238 239 if (mask & GMP_NUMB_HIGHBIT) 240 { 241 uh = up[n-1]; ul = up[n-2]; 242 vh = vp[n-1]; vl = vp[n-2]; 243 } 244 else 245 { 246 int shift; 247 248 count_leading_zeros (shift, mask); 249 uh = MPN_EXTRACT_NUMB (shift, up[n-1], up[n-2]); 250 ul = MPN_EXTRACT_NUMB (shift, up[n-2], up[n-3]); 251 vh = MPN_EXTRACT_NUMB (shift, vp[n-1], vp[n-2]); 252 vl = MPN_EXTRACT_NUMB (shift, vp[n-2], vp[n-3]); 253 } 254 255 /* Try an mpn_hgcd2 step */ 256 if (mpn_hgcd2 (uh, ul, vh, vl, &M)) 257 { 258 n = mpn_matrix22_mul1_inverse_vector (&M, tp, up, vp, n); 259 MP_PTR_SWAP (up, tp); 260 } 261 else 262 { 263 /* mpn_hgcd2 has failed. Then either one of a or b is very 264 small, or the difference is very small. Perform one 265 subtraction followed by one division. */ 266 267 /* Temporary storage n */ 268 n = mpn_gcd_subdiv_step (up, vp, n, 0, &gcd_hook, &ctx, tp); 269 if (n == 0) 270 goto done; 271 } 272 } 273 274 ASSERT(up[n-1] | vp[n-1]); 275 276 if (n == 1) 277 { 278 *gp = mpn_gcd_1(up, 1, vp[0]); 279 ctx.gn = 1; 280 goto done; 281 } 282 283 /* Due to the calling convention for mpn_gcd, at most one can be 284 even. */ 285 286 if (! (up[0] & 1)) 287 MP_PTR_SWAP (up, vp); 288 289 ASSERT (up[0] & 1); 290 291 if (vp[0] == 0) 292 { 293 *gp = mpn_gcd_1 (up, 2, vp[1]); 294 ctx.gn = 1; 295 goto done; 296 } 297 else if (! (vp[0] & 1)) 298 { 299 int r; 300 count_trailing_zeros (r, vp[0]); 301 vp[0] = ((vp[1] << (GMP_NUMB_BITS - r)) & GMP_NUMB_MASK) | (vp[0] >> r); 302 vp[1] >>= r; 303 } 304 305 ctx.gn = gcd_2(gp, up, vp); 306 307 done: 308 TMP_FREE; 309 return ctx.gn; 310 }