github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/generic/hgcd2.c (about) 1 /* hgcd2.c 2 3 THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY 4 SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST 5 GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. 6 7 Copyright 1996, 1998, 2000-2004, 2008, 2012 Free Software Foundation, Inc. 8 9 This file is part of the GNU MP Library. 10 11 The GNU MP Library is free software; you can redistribute it and/or modify 12 it under the terms of either: 13 14 * the GNU Lesser General Public License as published by the Free 15 Software Foundation; either version 3 of the License, or (at your 16 option) any later version. 17 18 or 19 20 * the GNU General Public License as published by the Free Software 21 Foundation; either version 2 of the License, or (at your option) any 22 later version. 23 24 or both in parallel, as here. 25 26 The GNU MP Library is distributed in the hope that it will be useful, but 27 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 28 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 29 for more details. 30 31 You should have received copies of the GNU General Public License and the 32 GNU Lesser General Public License along with the GNU MP Library. If not, 33 see https://www.gnu.org/licenses/. */ 34 35 #include "gmp.h" 36 #include "gmp-impl.h" 37 #include "longlong.h" 38 39 #if GMP_NAIL_BITS == 0 40 41 /* Copied from the old mpn/generic/gcdext.c, and modified slightly to return 42 the remainder. */ 43 44 /* Single-limb division optimized for small quotients. */ 45 static inline mp_limb_t 46 div1 (mp_ptr rp, 47 mp_limb_t n0, 48 mp_limb_t d0) 49 { 50 mp_limb_t q = 0; 51 52 if ((mp_limb_signed_t) n0 < 0) 53 { 54 int cnt; 55 for (cnt = 1; (mp_limb_signed_t) d0 >= 0; cnt++) 56 { 57 d0 = d0 << 1; 58 } 59 60 q = 0; 61 while (cnt) 62 { 63 q <<= 1; 64 if (n0 >= d0) 65 { 66 n0 = n0 - d0; 67 q |= 1; 68 } 69 d0 = d0 >> 1; 70 cnt--; 71 } 72 } 73 else 74 { 75 int cnt; 76 for (cnt = 0; n0 >= d0; cnt++) 77 { 78 d0 = d0 << 1; 79 } 80 81 q = 0; 82 while (cnt) 83 { 84 d0 = d0 >> 1; 85 q <<= 1; 86 if (n0 >= d0) 87 { 88 n0 = n0 - d0; 89 q |= 1; 90 } 91 cnt--; 92 } 93 } 94 *rp = n0; 95 return q; 96 } 97 98 /* Two-limb division optimized for small quotients. */ 99 static inline mp_limb_t 100 div2 (mp_ptr rp, 101 mp_limb_t nh, mp_limb_t nl, 102 mp_limb_t dh, mp_limb_t dl) 103 { 104 mp_limb_t q = 0; 105 106 if ((mp_limb_signed_t) nh < 0) 107 { 108 int cnt; 109 for (cnt = 1; (mp_limb_signed_t) dh >= 0; cnt++) 110 { 111 dh = (dh << 1) | (dl >> (GMP_LIMB_BITS - 1)); 112 dl = dl << 1; 113 } 114 115 while (cnt) 116 { 117 q <<= 1; 118 if (nh > dh || (nh == dh && nl >= dl)) 119 { 120 sub_ddmmss (nh, nl, nh, nl, dh, dl); 121 q |= 1; 122 } 123 dl = (dh << (GMP_LIMB_BITS - 1)) | (dl >> 1); 124 dh = dh >> 1; 125 cnt--; 126 } 127 } 128 else 129 { 130 int cnt; 131 for (cnt = 0; nh > dh || (nh == dh && nl >= dl); cnt++) 132 { 133 dh = (dh << 1) | (dl >> (GMP_LIMB_BITS - 1)); 134 dl = dl << 1; 135 } 136 137 while (cnt) 138 { 139 dl = (dh << (GMP_LIMB_BITS - 1)) | (dl >> 1); 140 dh = dh >> 1; 141 q <<= 1; 142 if (nh > dh || (nh == dh && nl >= dl)) 143 { 144 sub_ddmmss (nh, nl, nh, nl, dh, dl); 145 q |= 1; 146 } 147 cnt--; 148 } 149 } 150 151 rp[0] = nl; 152 rp[1] = nh; 153 154 return q; 155 } 156 157 #if 0 158 /* This div2 uses less branches, but it seems to nevertheless be 159 slightly slower than the above code. */ 160 static inline mp_limb_t 161 div2 (mp_ptr rp, 162 mp_limb_t nh, mp_limb_t nl, 163 mp_limb_t dh, mp_limb_t dl) 164 { 165 mp_limb_t q = 0; 166 int ncnt; 167 int dcnt; 168 169 count_leading_zeros (ncnt, nh); 170 count_leading_zeros (dcnt, dh); 171 dcnt -= ncnt; 172 173 dh = (dh << dcnt) + (-(dcnt > 0) & (dl >> (GMP_LIMB_BITS - dcnt))); 174 dl <<= dcnt; 175 176 do 177 { 178 mp_limb_t bit; 179 q <<= 1; 180 if (UNLIKELY (nh == dh)) 181 bit = (nl >= dl); 182 else 183 bit = (nh > dh); 184 185 q |= bit; 186 187 sub_ddmmss (nh, nl, nh, nl, (-bit) & dh, (-bit) & dl); 188 189 dl = (dh << (GMP_LIMB_BITS - 1)) | (dl >> 1); 190 dh = dh >> 1; 191 } 192 while (dcnt--); 193 194 rp[0] = nl; 195 rp[1] = nh; 196 197 return q; 198 } 199 #endif 200 201 #else /* GMP_NAIL_BITS != 0 */ 202 /* Check all functions for nail support. */ 203 /* hgcd2 should be defined to take inputs including nail bits, and 204 produce a matrix with elements also including nail bits. This is 205 necessary, for the matrix elements to be useful with mpn_mul_1, 206 mpn_addmul_1 and friends. */ 207 #error Not implemented 208 #endif /* GMP_NAIL_BITS != 0 */ 209 210 /* Reduces a,b until |a-b| (almost) fits in one limb + 1 bit. Constructs 211 matrix M. Returns 1 if we make progress, i.e. can perform at least 212 one subtraction. Otherwise returns zero. */ 213 214 /* FIXME: Possible optimizations: 215 216 The div2 function starts with checking the most significant bit of 217 the numerator. We can maintained normalized operands here, call 218 hgcd with normalized operands only, which should make the code 219 simpler and possibly faster. 220 221 Experiment with table lookups on the most significant bits. 222 223 This function is also a candidate for assembler implementation. 224 */ 225 int 226 mpn_hgcd2 (mp_limb_t ah, mp_limb_t al, mp_limb_t bh, mp_limb_t bl, 227 struct hgcd_matrix1 *M) 228 { 229 mp_limb_t u00, u01, u10, u11; 230 231 if (ah < 2 || bh < 2) 232 return 0; 233 234 if (ah > bh || (ah == bh && al > bl)) 235 { 236 sub_ddmmss (ah, al, ah, al, bh, bl); 237 if (ah < 2) 238 return 0; 239 240 u00 = u01 = u11 = 1; 241 u10 = 0; 242 } 243 else 244 { 245 sub_ddmmss (bh, bl, bh, bl, ah, al); 246 if (bh < 2) 247 return 0; 248 249 u00 = u10 = u11 = 1; 250 u01 = 0; 251 } 252 253 if (ah < bh) 254 goto subtract_a; 255 256 for (;;) 257 { 258 ASSERT (ah >= bh); 259 if (ah == bh) 260 goto done; 261 262 if (ah < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2))) 263 { 264 ah = (ah << (GMP_LIMB_BITS / 2) ) + (al >> (GMP_LIMB_BITS / 2)); 265 bh = (bh << (GMP_LIMB_BITS / 2) ) + (bl >> (GMP_LIMB_BITS / 2)); 266 267 break; 268 } 269 270 /* Subtract a -= q b, and multiply M from the right by (1 q ; 0 271 1), affecting the second column of M. */ 272 ASSERT (ah > bh); 273 sub_ddmmss (ah, al, ah, al, bh, bl); 274 275 if (ah < 2) 276 goto done; 277 278 if (ah <= bh) 279 { 280 /* Use q = 1 */ 281 u01 += u00; 282 u11 += u10; 283 } 284 else 285 { 286 mp_limb_t r[2]; 287 mp_limb_t q = div2 (r, ah, al, bh, bl); 288 al = r[0]; ah = r[1]; 289 if (ah < 2) 290 { 291 /* A is too small, but q is correct. */ 292 u01 += q * u00; 293 u11 += q * u10; 294 goto done; 295 } 296 q++; 297 u01 += q * u00; 298 u11 += q * u10; 299 } 300 subtract_a: 301 ASSERT (bh >= ah); 302 if (ah == bh) 303 goto done; 304 305 if (bh < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2))) 306 { 307 ah = (ah << (GMP_LIMB_BITS / 2) ) + (al >> (GMP_LIMB_BITS / 2)); 308 bh = (bh << (GMP_LIMB_BITS / 2) ) + (bl >> (GMP_LIMB_BITS / 2)); 309 310 goto subtract_a1; 311 } 312 313 /* Subtract b -= q a, and multiply M from the right by (1 0 ; q 314 1), affecting the first column of M. */ 315 sub_ddmmss (bh, bl, bh, bl, ah, al); 316 317 if (bh < 2) 318 goto done; 319 320 if (bh <= ah) 321 { 322 /* Use q = 1 */ 323 u00 += u01; 324 u10 += u11; 325 } 326 else 327 { 328 mp_limb_t r[2]; 329 mp_limb_t q = div2 (r, bh, bl, ah, al); 330 bl = r[0]; bh = r[1]; 331 if (bh < 2) 332 { 333 /* B is too small, but q is correct. */ 334 u00 += q * u01; 335 u10 += q * u11; 336 goto done; 337 } 338 q++; 339 u00 += q * u01; 340 u10 += q * u11; 341 } 342 } 343 344 /* NOTE: Since we discard the least significant half limb, we don't 345 get a truly maximal M (corresponding to |a - b| < 346 2^{GMP_LIMB_BITS +1}). */ 347 /* Single precision loop */ 348 for (;;) 349 { 350 ASSERT (ah >= bh); 351 352 ah -= bh; 353 if (ah < (CNST_LIMB (1) << (GMP_LIMB_BITS / 2 + 1))) 354 break; 355 356 if (ah <= bh) 357 { 358 /* Use q = 1 */ 359 u01 += u00; 360 u11 += u10; 361 } 362 else 363 { 364 mp_limb_t r; 365 mp_limb_t q = div1 (&r, ah, bh); 366 ah = r; 367 if (ah < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2 + 1))) 368 { 369 /* A is too small, but q is correct. */ 370 u01 += q * u00; 371 u11 += q * u10; 372 break; 373 } 374 q++; 375 u01 += q * u00; 376 u11 += q * u10; 377 } 378 subtract_a1: 379 ASSERT (bh >= ah); 380 381 bh -= ah; 382 if (bh < (CNST_LIMB (1) << (GMP_LIMB_BITS / 2 + 1))) 383 break; 384 385 if (bh <= ah) 386 { 387 /* Use q = 1 */ 388 u00 += u01; 389 u10 += u11; 390 } 391 else 392 { 393 mp_limb_t r; 394 mp_limb_t q = div1 (&r, bh, ah); 395 bh = r; 396 if (bh < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2 + 1))) 397 { 398 /* B is too small, but q is correct. */ 399 u00 += q * u01; 400 u10 += q * u11; 401 break; 402 } 403 q++; 404 u00 += q * u01; 405 u10 += q * u11; 406 } 407 } 408 409 done: 410 M->u[0][0] = u00; M->u[0][1] = u01; 411 M->u[1][0] = u10; M->u[1][1] = u11; 412 413 return 1; 414 } 415 416 /* Sets (r;b) = (a;b) M, with M = (u00, u01; u10, u11). Vector must 417 * have space for n + 1 limbs. Uses three buffers to avoid a copy*/ 418 mp_size_t 419 mpn_hgcd_mul_matrix1_vector (const struct hgcd_matrix1 *M, 420 mp_ptr rp, mp_srcptr ap, mp_ptr bp, mp_size_t n) 421 { 422 mp_limb_t ah, bh; 423 424 /* Compute (r,b) <-- (u00 a + u10 b, u01 a + u11 b) as 425 426 r = u00 * a 427 r += u10 * b 428 b *= u11 429 b += u01 * a 430 */ 431 432 #if HAVE_NATIVE_mpn_addaddmul_1msb0 433 ah = mpn_addaddmul_1msb0 (rp, ap, bp, n, M->u[0][0], M->u[1][0]); 434 bh = mpn_addaddmul_1msb0 (bp, bp, ap, n, M->u[1][1], M->u[0][1]); 435 #else 436 ah = mpn_mul_1 (rp, ap, n, M->u[0][0]); 437 ah += mpn_addmul_1 (rp, bp, n, M->u[1][0]); 438 439 bh = mpn_mul_1 (bp, bp, n, M->u[1][1]); 440 bh += mpn_addmul_1 (bp, ap, n, M->u[0][1]); 441 #endif 442 rp[n] = ah; 443 bp[n] = bh; 444 445 n += (ah | bh) > 0; 446 return n; 447 }