github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/generic/hgcd_jacobi.c (about) 1 /* hgcd_jacobi.c. 2 3 THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY 4 SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST 5 GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. 6 7 Copyright 2003-2005, 2008, 2011, 2012 Free Software Foundation, Inc. 8 9 This file is part of the GNU MP Library. 10 11 The GNU MP Library is free software; you can redistribute it and/or modify 12 it under the terms of either: 13 14 * the GNU Lesser General Public License as published by the Free 15 Software Foundation; either version 3 of the License, or (at your 16 option) any later version. 17 18 or 19 20 * the GNU General Public License as published by the Free Software 21 Foundation; either version 2 of the License, or (at your option) any 22 later version. 23 24 or both in parallel, as here. 25 26 The GNU MP Library is distributed in the hope that it will be useful, but 27 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 28 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 29 for more details. 30 31 You should have received copies of the GNU General Public License and the 32 GNU Lesser General Public License along with the GNU MP Library. If not, 33 see https://www.gnu.org/licenses/. */ 34 35 #include "gmp.h" 36 #include "gmp-impl.h" 37 #include "longlong.h" 38 39 /* This file is almost a copy of hgcd.c, with some added calls to 40 mpn_jacobi_update */ 41 42 struct hgcd_jacobi_ctx 43 { 44 struct hgcd_matrix *M; 45 unsigned *bitsp; 46 }; 47 48 static void 49 hgcd_jacobi_hook (void *p, mp_srcptr gp, mp_size_t gn, 50 mp_srcptr qp, mp_size_t qn, int d) 51 { 52 ASSERT (!gp); 53 ASSERT (d >= 0); 54 55 MPN_NORMALIZE (qp, qn); 56 if (qn > 0) 57 { 58 struct hgcd_jacobi_ctx *ctx = (struct hgcd_jacobi_ctx *) p; 59 /* NOTES: This is a bit ugly. A tp area is passed to 60 gcd_subdiv_step, which stores q at the start of that area. We 61 now use the rest. */ 62 mp_ptr tp = (mp_ptr) qp + qn; 63 64 mpn_hgcd_matrix_update_q (ctx->M, qp, qn, d, tp); 65 *ctx->bitsp = mpn_jacobi_update (*ctx->bitsp, d, qp[0] & 3); 66 } 67 } 68 69 /* Perform a few steps, using some of mpn_hgcd2, subtraction and 70 division. Reduces the size by almost one limb or more, but never 71 below the given size s. Return new size for a and b, or 0 if no 72 more steps are possible. 73 74 If hgcd2 succeeds, needs temporary space for hgcd_matrix_mul_1, M->n 75 limbs, and hgcd_mul_matrix1_inverse_vector, n limbs. If hgcd2 76 fails, needs space for the quotient, qn <= n - s + 1 limbs, for and 77 hgcd_matrix_update_q, qn + (size of the appropriate column of M) <= 78 resulting size of M. 79 80 If N is the input size to the calling hgcd, then s = floor(N/2) + 81 1, M->n < N, qn + matrix size <= n - s + 1 + n - s = 2 (n - s) + 1 82 < N, so N is sufficient. 83 */ 84 85 static mp_size_t 86 hgcd_jacobi_step (mp_size_t n, mp_ptr ap, mp_ptr bp, mp_size_t s, 87 struct hgcd_matrix *M, unsigned *bitsp, mp_ptr tp) 88 { 89 struct hgcd_matrix1 M1; 90 mp_limb_t mask; 91 mp_limb_t ah, al, bh, bl; 92 93 ASSERT (n > s); 94 95 mask = ap[n-1] | bp[n-1]; 96 ASSERT (mask > 0); 97 98 if (n == s + 1) 99 { 100 if (mask < 4) 101 goto subtract; 102 103 ah = ap[n-1]; al = ap[n-2]; 104 bh = bp[n-1]; bl = bp[n-2]; 105 } 106 else if (mask & GMP_NUMB_HIGHBIT) 107 { 108 ah = ap[n-1]; al = ap[n-2]; 109 bh = bp[n-1]; bl = bp[n-2]; 110 } 111 else 112 { 113 int shift; 114 115 count_leading_zeros (shift, mask); 116 ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]); 117 al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]); 118 bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]); 119 bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]); 120 } 121 122 /* Try an mpn_hgcd2 step */ 123 if (mpn_hgcd2_jacobi (ah, al, bh, bl, &M1, bitsp)) 124 { 125 /* Multiply M <- M * M1 */ 126 mpn_hgcd_matrix_mul_1 (M, &M1, tp); 127 128 /* Can't swap inputs, so we need to copy. */ 129 MPN_COPY (tp, ap, n); 130 /* Multiply M1^{-1} (a;b) */ 131 return mpn_matrix22_mul1_inverse_vector (&M1, ap, tp, bp, n); 132 } 133 134 subtract: 135 { 136 struct hgcd_jacobi_ctx ctx; 137 ctx.M = M; 138 ctx.bitsp = bitsp; 139 140 return mpn_gcd_subdiv_step (ap, bp, n, s, hgcd_jacobi_hook, &ctx, tp); 141 } 142 } 143 144 /* Reduces a,b until |a-b| fits in n/2 + 1 limbs. Constructs matrix M 145 with elements of size at most (n+1)/2 - 1. Returns new size of a, 146 b, or zero if no reduction is possible. */ 147 148 /* Same scratch requirements as for mpn_hgcd. */ 149 mp_size_t 150 mpn_hgcd_jacobi (mp_ptr ap, mp_ptr bp, mp_size_t n, 151 struct hgcd_matrix *M, unsigned *bitsp, mp_ptr tp) 152 { 153 mp_size_t s = n/2 + 1; 154 155 mp_size_t nn; 156 int success = 0; 157 158 if (n <= s) 159 /* Happens when n <= 2, a fairly uninteresting case but exercised 160 by the random inputs of the testsuite. */ 161 return 0; 162 163 ASSERT ((ap[n-1] | bp[n-1]) > 0); 164 165 ASSERT ((n+1)/2 - 1 < M->alloc); 166 167 if (ABOVE_THRESHOLD (n, HGCD_THRESHOLD)) 168 { 169 mp_size_t n2 = (3*n)/4 + 1; 170 mp_size_t p = n/2; 171 172 nn = mpn_hgcd_jacobi (ap + p, bp + p, n - p, M, bitsp, tp); 173 if (nn > 0) 174 { 175 /* Needs 2*(p + M->n) <= 2*(floor(n/2) + ceil(n/2) - 1) 176 = 2 (n - 1) */ 177 n = mpn_hgcd_matrix_adjust (M, p + nn, ap, bp, p, tp); 178 success = 1; 179 } 180 while (n > n2) 181 { 182 /* Needs n + 1 storage */ 183 nn = hgcd_jacobi_step (n, ap, bp, s, M, bitsp, tp); 184 if (!nn) 185 return success ? n : 0; 186 n = nn; 187 success = 1; 188 } 189 190 if (n > s + 2) 191 { 192 struct hgcd_matrix M1; 193 mp_size_t scratch; 194 195 p = 2*s - n + 1; 196 scratch = MPN_HGCD_MATRIX_INIT_ITCH (n-p); 197 198 mpn_hgcd_matrix_init(&M1, n - p, tp); 199 nn = mpn_hgcd_jacobi (ap + p, bp + p, n - p, &M1, bitsp, tp + scratch); 200 if (nn > 0) 201 { 202 /* We always have max(M) > 2^{-(GMP_NUMB_BITS + 1)} max(M1) */ 203 ASSERT (M->n + 2 >= M1.n); 204 205 /* Furthermore, assume M ends with a quotient (1, q; 0, 1), 206 then either q or q + 1 is a correct quotient, and M1 will 207 start with either (1, 0; 1, 1) or (2, 1; 1, 1). This 208 rules out the case that the size of M * M1 is much 209 smaller than the expected M->n + M1->n. */ 210 211 ASSERT (M->n + M1.n < M->alloc); 212 213 /* Needs 2 (p + M->n) <= 2 (2*s - n2 + 1 + n2 - s - 1) 214 = 2*s <= 2*(floor(n/2) + 1) <= n + 2. */ 215 n = mpn_hgcd_matrix_adjust (&M1, p + nn, ap, bp, p, tp + scratch); 216 217 /* We need a bound for of M->n + M1.n. Let n be the original 218 input size. Then 219 220 ceil(n/2) - 1 >= size of product >= M.n + M1.n - 2 221 222 and it follows that 223 224 M.n + M1.n <= ceil(n/2) + 1 225 226 Then 3*(M.n + M1.n) + 5 <= 3 * ceil(n/2) + 8 is the 227 amount of needed scratch space. */ 228 mpn_hgcd_matrix_mul (M, &M1, tp + scratch); 229 success = 1; 230 } 231 } 232 } 233 234 for (;;) 235 { 236 /* Needs s+3 < n */ 237 nn = hgcd_jacobi_step (n, ap, bp, s, M, bitsp, tp); 238 if (!nn) 239 return success ? n : 0; 240 241 n = nn; 242 success = 1; 243 } 244 }