github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/generic/div_q.c (about) 1 /* mpn_div_q -- division for arbitrary size operands. 2 3 Contributed to the GNU project by Torbjorn Granlund. 4 5 THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY 6 SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST 7 GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE. 8 9 Copyright 2009, 2010 Free Software Foundation, Inc. 10 11 This file is part of the GNU MP Library. 12 13 The GNU MP Library is free software; you can redistribute it and/or modify 14 it under the terms of either: 15 16 * the GNU Lesser General Public License as published by the Free 17 Software Foundation; either version 3 of the License, or (at your 18 option) any later version. 19 20 or 21 22 * the GNU General Public License as published by the Free Software 23 Foundation; either version 2 of the License, or (at your option) any 24 later version. 25 26 or both in parallel, as here. 27 28 The GNU MP Library is distributed in the hope that it will be useful, but 29 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 30 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 31 for more details. 32 33 You should have received copies of the GNU General Public License and the 34 GNU Lesser General Public License along with the GNU MP Library. If not, 35 see https://www.gnu.org/licenses/. */ 36 37 #include "gmp.h" 38 #include "gmp-impl.h" 39 #include "longlong.h" 40 41 42 /* Compute Q = N/D with truncation. 43 N = {np,nn} 44 D = {dp,dn} 45 Q = {qp,nn-dn+1} 46 T = {scratch,nn+1} is scratch space 47 N and D are both untouched by the computation. 48 N and T may overlap; pass the same space if N is irrelevant after the call, 49 but note that tp needs an extra limb. 50 51 Operand requirements: 52 N >= D > 0 53 dp[dn-1] != 0 54 No overlap between the N, D, and Q areas. 55 56 This division function does not clobber its input operands, since it is 57 intended to support average-O(qn) division, and for that to be effective, it 58 cannot put requirements on callers to copy a O(nn) operand. 59 60 If a caller does not care about the value of {np,nn+1} after calling this 61 function, it should pass np also for the scratch argument. This function 62 will then save some time and space by avoiding allocation and copying. 63 (FIXME: Is this a good design? We only really save any copying for 64 already-normalised divisors, which should be rare. It also prevents us from 65 reasonably asking for all scratch space we need.) 66 67 We write nn-dn+1 limbs for the quotient, but return void. Why not return 68 the most significant quotient limb? Look at the 4 main code blocks below 69 (consisting of an outer if-else where each arm contains an if-else). It is 70 tricky for the first code block, since the mpn_*_div_q calls will typically 71 generate all nn-dn+1 and return 0 or 1. I don't see how to fix that unless 72 we generate the most significant quotient limb here, before calling 73 mpn_*_div_q, or put the quotient in a temporary area. Since this is a 74 critical division case (the SB sub-case in particular) copying is not a good 75 idea. 76 77 It might make sense to split the if-else parts of the (qn + FUDGE 78 >= dn) blocks into separate functions, since we could promise quite 79 different things to callers in these two cases. The 'then' case 80 benefits from np=scratch, and it could perhaps even tolerate qp=np, 81 saving some headache for many callers. 82 83 FIXME: Scratch allocation leaves a lot to be desired. E.g., for the MU size 84 operands, we do not reuse the huge scratch for adjustments. This can be a 85 serious waste of memory for the largest operands. 86 */ 87 88 /* FUDGE determines when to try getting an approximate quotient from the upper 89 parts of the dividend and divisor, then adjust. N.B. FUDGE must be >= 2 90 for the code to be correct. */ 91 #define FUDGE 5 /* FIXME: tune this */ 92 93 #define DC_DIV_Q_THRESHOLD DC_DIVAPPR_Q_THRESHOLD 94 #define MU_DIV_Q_THRESHOLD MU_DIVAPPR_Q_THRESHOLD 95 #define MUPI_DIV_Q_THRESHOLD MUPI_DIVAPPR_Q_THRESHOLD 96 #ifndef MUPI_DIVAPPR_Q_THRESHOLD 97 #define MUPI_DIVAPPR_Q_THRESHOLD MUPI_DIV_QR_THRESHOLD 98 #endif 99 100 void 101 mpn_div_q (mp_ptr qp, 102 mp_srcptr np, mp_size_t nn, 103 mp_srcptr dp, mp_size_t dn, mp_ptr scratch) 104 { 105 mp_ptr new_dp, new_np, tp, rp; 106 mp_limb_t cy, dh, qh; 107 mp_size_t new_nn, qn; 108 gmp_pi1_t dinv; 109 int cnt; 110 TMP_DECL; 111 TMP_MARK; 112 113 ASSERT (nn >= dn); 114 ASSERT (dn > 0); 115 ASSERT (dp[dn - 1] != 0); 116 ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1, np, nn)); 117 ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1, dp, dn)); 118 ASSERT (MPN_SAME_OR_SEPARATE_P (np, scratch, nn)); 119 120 ASSERT_ALWAYS (FUDGE >= 2); 121 122 if (dn == 1) 123 { 124 mpn_divrem_1 (qp, 0L, np, nn, dp[dn - 1]); 125 return; 126 } 127 128 qn = nn - dn + 1; /* Quotient size, high limb might be zero */ 129 130 if (qn + FUDGE >= dn) 131 { 132 /* |________________________| 133 |_______| */ 134 new_np = scratch; 135 136 dh = dp[dn - 1]; 137 if (LIKELY ((dh & GMP_NUMB_HIGHBIT) == 0)) 138 { 139 count_leading_zeros (cnt, dh); 140 141 cy = mpn_lshift (new_np, np, nn, cnt); 142 new_np[nn] = cy; 143 new_nn = nn + (cy != 0); 144 145 new_dp = TMP_ALLOC_LIMBS (dn); 146 mpn_lshift (new_dp, dp, dn, cnt); 147 148 if (dn == 2) 149 { 150 qh = mpn_divrem_2 (qp, 0L, new_np, new_nn, new_dp); 151 } 152 else if (BELOW_THRESHOLD (dn, DC_DIV_Q_THRESHOLD) || 153 BELOW_THRESHOLD (new_nn - dn, DC_DIV_Q_THRESHOLD)) 154 { 155 invert_pi1 (dinv, new_dp[dn - 1], new_dp[dn - 2]); 156 qh = mpn_sbpi1_div_q (qp, new_np, new_nn, new_dp, dn, dinv.inv32); 157 } 158 else if (BELOW_THRESHOLD (dn, MUPI_DIV_Q_THRESHOLD) || /* fast condition */ 159 BELOW_THRESHOLD (nn, 2 * MU_DIV_Q_THRESHOLD) || /* fast condition */ 160 (double) (2 * (MU_DIV_Q_THRESHOLD - MUPI_DIV_Q_THRESHOLD)) * dn /* slow... */ 161 + (double) MUPI_DIV_Q_THRESHOLD * nn > (double) dn * nn) /* ...condition */ 162 { 163 invert_pi1 (dinv, new_dp[dn - 1], new_dp[dn - 2]); 164 qh = mpn_dcpi1_div_q (qp, new_np, new_nn, new_dp, dn, &dinv); 165 } 166 else 167 { 168 mp_size_t itch = mpn_mu_div_q_itch (new_nn, dn, 0); 169 mp_ptr scratch = TMP_ALLOC_LIMBS (itch); 170 qh = mpn_mu_div_q (qp, new_np, new_nn, new_dp, dn, scratch); 171 } 172 if (cy == 0) 173 qp[qn - 1] = qh; 174 else if (UNLIKELY (qh != 0)) 175 { 176 /* This happens only when the quotient is close to B^n and 177 mpn_*_divappr_q returned B^n. */ 178 mp_size_t i, n; 179 n = new_nn - dn; 180 for (i = 0; i < n; i++) 181 qp[i] = GMP_NUMB_MAX; 182 qh = 0; /* currently ignored */ 183 } 184 } 185 else /* divisor is already normalised */ 186 { 187 if (new_np != np) 188 MPN_COPY (new_np, np, nn); 189 190 if (dn == 2) 191 { 192 qh = mpn_divrem_2 (qp, 0L, new_np, nn, dp); 193 } 194 else if (BELOW_THRESHOLD (dn, DC_DIV_Q_THRESHOLD) || 195 BELOW_THRESHOLD (nn - dn, DC_DIV_Q_THRESHOLD)) 196 { 197 invert_pi1 (dinv, dh, dp[dn - 2]); 198 qh = mpn_sbpi1_div_q (qp, new_np, nn, dp, dn, dinv.inv32); 199 } 200 else if (BELOW_THRESHOLD (dn, MUPI_DIV_Q_THRESHOLD) || /* fast condition */ 201 BELOW_THRESHOLD (nn, 2 * MU_DIV_Q_THRESHOLD) || /* fast condition */ 202 (double) (2 * (MU_DIV_Q_THRESHOLD - MUPI_DIV_Q_THRESHOLD)) * dn /* slow... */ 203 + (double) MUPI_DIV_Q_THRESHOLD * nn > (double) dn * nn) /* ...condition */ 204 { 205 invert_pi1 (dinv, dh, dp[dn - 2]); 206 qh = mpn_dcpi1_div_q (qp, new_np, nn, dp, dn, &dinv); 207 } 208 else 209 { 210 mp_size_t itch = mpn_mu_div_q_itch (nn, dn, 0); 211 mp_ptr scratch = TMP_ALLOC_LIMBS (itch); 212 qh = mpn_mu_div_q (qp, np, nn, dp, dn, scratch); 213 } 214 qp[nn - dn] = qh; 215 } 216 } 217 else 218 { 219 /* |________________________| 220 |_________________| */ 221 tp = TMP_ALLOC_LIMBS (qn + 1); 222 223 new_np = scratch; 224 new_nn = 2 * qn + 1; 225 if (new_np == np) 226 /* We need {np,nn} to remain untouched until the final adjustment, so 227 we need to allocate separate space for new_np. */ 228 new_np = TMP_ALLOC_LIMBS (new_nn + 1); 229 230 231 dh = dp[dn - 1]; 232 if (LIKELY ((dh & GMP_NUMB_HIGHBIT) == 0)) 233 { 234 count_leading_zeros (cnt, dh); 235 236 cy = mpn_lshift (new_np, np + nn - new_nn, new_nn, cnt); 237 new_np[new_nn] = cy; 238 239 new_nn += (cy != 0); 240 241 new_dp = TMP_ALLOC_LIMBS (qn + 1); 242 mpn_lshift (new_dp, dp + dn - (qn + 1), qn + 1, cnt); 243 new_dp[0] |= dp[dn - (qn + 1) - 1] >> (GMP_NUMB_BITS - cnt); 244 245 if (qn + 1 == 2) 246 { 247 qh = mpn_divrem_2 (tp, 0L, new_np, new_nn, new_dp); 248 } 249 else if (BELOW_THRESHOLD (qn, DC_DIVAPPR_Q_THRESHOLD - 1)) 250 { 251 invert_pi1 (dinv, new_dp[qn], new_dp[qn - 1]); 252 qh = mpn_sbpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, dinv.inv32); 253 } 254 else if (BELOW_THRESHOLD (qn, MU_DIVAPPR_Q_THRESHOLD - 1)) 255 { 256 invert_pi1 (dinv, new_dp[qn], new_dp[qn - 1]); 257 qh = mpn_dcpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, &dinv); 258 } 259 else 260 { 261 mp_size_t itch = mpn_mu_divappr_q_itch (new_nn, qn + 1, 0); 262 mp_ptr scratch = TMP_ALLOC_LIMBS (itch); 263 qh = mpn_mu_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, scratch); 264 } 265 if (cy == 0) 266 tp[qn] = qh; 267 else if (UNLIKELY (qh != 0)) 268 { 269 /* This happens only when the quotient is close to B^n and 270 mpn_*_divappr_q returned B^n. */ 271 mp_size_t i, n; 272 n = new_nn - (qn + 1); 273 for (i = 0; i < n; i++) 274 tp[i] = GMP_NUMB_MAX; 275 qh = 0; /* currently ignored */ 276 } 277 } 278 else /* divisor is already normalised */ 279 { 280 MPN_COPY (new_np, np + nn - new_nn, new_nn); /* pointless if MU will be used */ 281 282 new_dp = (mp_ptr) dp + dn - (qn + 1); 283 284 if (qn == 2 - 1) 285 { 286 qh = mpn_divrem_2 (tp, 0L, new_np, new_nn, new_dp); 287 } 288 else if (BELOW_THRESHOLD (qn, DC_DIVAPPR_Q_THRESHOLD - 1)) 289 { 290 invert_pi1 (dinv, dh, new_dp[qn - 1]); 291 qh = mpn_sbpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, dinv.inv32); 292 } 293 else if (BELOW_THRESHOLD (qn, MU_DIVAPPR_Q_THRESHOLD - 1)) 294 { 295 invert_pi1 (dinv, dh, new_dp[qn - 1]); 296 qh = mpn_dcpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, &dinv); 297 } 298 else 299 { 300 mp_size_t itch = mpn_mu_divappr_q_itch (new_nn, qn + 1, 0); 301 mp_ptr scratch = TMP_ALLOC_LIMBS (itch); 302 qh = mpn_mu_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, scratch); 303 } 304 tp[qn] = qh; 305 } 306 307 MPN_COPY (qp, tp + 1, qn); 308 if (tp[0] <= 4) 309 { 310 mp_size_t rn; 311 312 rp = TMP_ALLOC_LIMBS (dn + qn); 313 mpn_mul (rp, dp, dn, tp + 1, qn); 314 rn = dn + qn; 315 rn -= rp[rn - 1] == 0; 316 317 if (rn > nn || mpn_cmp (np, rp, nn) < 0) 318 mpn_decr_u (qp, 1); 319 } 320 } 321 322 TMP_FREE; 323 }