github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/generic/mu_bdiv_qr.c (about) 1 /* mpn_mu_bdiv_qr(qp,rp,np,nn,dp,dn,tp) -- Compute {np,nn} / {dp,dn} mod B^qn, 2 where qn = nn-dn, storing the result in {qp,qn}. Overlap allowed between Q 3 and N; all other overlap disallowed. 4 5 Contributed to the GNU project by Torbjorn Granlund. 6 7 THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY 8 SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST 9 GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE. 10 11 Copyright 2005-2007, 2009, 2010, 2012 Free Software Foundation, Inc. 12 13 This file is part of the GNU MP Library. 14 15 The GNU MP Library is free software; you can redistribute it and/or modify 16 it under the terms of either: 17 18 * the GNU Lesser General Public License as published by the Free 19 Software Foundation; either version 3 of the License, or (at your 20 option) any later version. 21 22 or 23 24 * the GNU General Public License as published by the Free Software 25 Foundation; either version 2 of the License, or (at your option) any 26 later version. 27 28 or both in parallel, as here. 29 30 The GNU MP Library is distributed in the hope that it will be useful, but 31 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 32 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 33 for more details. 34 35 You should have received copies of the GNU General Public License and the 36 GNU Lesser General Public License along with the GNU MP Library. If not, 37 see https://www.gnu.org/licenses/. */ 38 39 40 /* 41 The idea of the algorithm used herein is to compute a smaller inverted value 42 than used in the standard Barrett algorithm, and thus save time in the 43 Newton iterations, and pay just a small price when using the inverted value 44 for developing quotient bits. This algorithm was presented at ICMS 2006. 45 */ 46 47 #include "gmp.h" 48 #include "gmp-impl.h" 49 50 51 /* N = {np,nn} 52 D = {dp,dn} 53 54 Requirements: N >= D 55 D >= 1 56 D odd 57 dn >= 2 58 nn >= 2 59 scratch space as determined by mpn_mu_bdiv_qr_itch(nn,dn). 60 61 Write quotient to Q = {qp,nn-dn}. 62 63 FIXME: When iterating, perhaps do the small step before loop, not after. 64 FIXME: Try to avoid the scalar divisions when computing inverse size. 65 FIXME: Trim allocation for (qn > dn) case, 3*dn might be possible. In 66 particular, when dn==in, tp and rp could use the same space. 67 */ 68 mp_limb_t 69 mpn_mu_bdiv_qr (mp_ptr qp, 70 mp_ptr rp, 71 mp_srcptr np, mp_size_t nn, 72 mp_srcptr dp, mp_size_t dn, 73 mp_ptr scratch) 74 { 75 mp_size_t qn; 76 mp_size_t in; 77 mp_limb_t cy, c0; 78 mp_size_t tn, wn; 79 80 qn = nn - dn; 81 82 ASSERT (dn >= 2); 83 ASSERT (qn >= 2); 84 85 if (qn > dn) 86 { 87 mp_size_t b; 88 89 /* |_______________________| dividend 90 |________| divisor */ 91 92 #define ip scratch /* in */ 93 #define tp (scratch + in) /* dn+in or next_size(dn) or rest >= binvert_itch(in) */ 94 #define scratch_out (scratch + in + tn)/* mulmod_bnm1_itch(next_size(dn)) */ 95 96 /* Compute an inverse size that is a nice partition of the quotient. */ 97 b = (qn - 1) / dn + 1; /* ceil(qn/dn), number of blocks */ 98 in = (qn - 1) / b + 1; /* ceil(qn/b) = ceil(qn / ceil(qn/dn)) */ 99 100 /* Some notes on allocation: 101 102 When in = dn, R dies when mpn_mullo returns, if in < dn the low in 103 limbs of R dies at that point. We could save memory by letting T live 104 just under R, and let the upper part of T expand into R. These changes 105 should reduce itch to perhaps 3dn. 106 */ 107 108 mpn_binvert (ip, dp, in, tp); 109 110 MPN_COPY (rp, np, dn); 111 np += dn; 112 cy = 0; 113 114 while (qn > in) 115 { 116 mpn_mullo_n (qp, rp, ip, in); 117 118 if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD)) 119 mpn_mul (tp, dp, dn, qp, in); /* mulhi, need tp[dn+in-1...in] */ 120 else 121 { 122 tn = mpn_mulmod_bnm1_next_size (dn); 123 mpn_mulmod_bnm1 (tp, tn, dp, dn, qp, in, scratch_out); 124 wn = dn + in - tn; /* number of wrapped limbs */ 125 if (wn > 0) 126 { 127 c0 = mpn_sub_n (tp + tn, tp, rp, wn); 128 mpn_decr_u (tp + wn, c0); 129 } 130 } 131 132 qp += in; 133 qn -= in; 134 135 if (dn != in) 136 { 137 /* Subtract tp[dn-1...in] from partial remainder. */ 138 cy += mpn_sub_n (rp, rp + in, tp + in, dn - in); 139 if (cy == 2) 140 { 141 mpn_incr_u (tp + dn, 1); 142 cy = 1; 143 } 144 } 145 /* Subtract tp[dn+in-1...dn] from dividend. */ 146 cy = mpn_sub_nc (rp + dn - in, np, tp + dn, in, cy); 147 np += in; 148 } 149 150 /* Generate last qn limbs. */ 151 mpn_mullo_n (qp, rp, ip, qn); 152 153 if (BELOW_THRESHOLD (qn, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD)) 154 mpn_mul (tp, dp, dn, qp, qn); /* mulhi, need tp[qn+in-1...in] */ 155 else 156 { 157 tn = mpn_mulmod_bnm1_next_size (dn); 158 mpn_mulmod_bnm1 (tp, tn, dp, dn, qp, qn, scratch_out); 159 wn = dn + qn - tn; /* number of wrapped limbs */ 160 if (wn > 0) 161 { 162 c0 = mpn_sub_n (tp + tn, tp, rp, wn); 163 mpn_decr_u (tp + wn, c0); 164 } 165 } 166 167 if (dn != qn) 168 { 169 cy += mpn_sub_n (rp, rp + qn, tp + qn, dn - qn); 170 if (cy == 2) 171 { 172 mpn_incr_u (tp + dn, 1); 173 cy = 1; 174 } 175 } 176 return mpn_sub_nc (rp + dn - qn, np, tp + dn, qn, cy); 177 178 #undef ip 179 #undef tp 180 #undef scratch_out 181 } 182 else 183 { 184 /* |_______________________| dividend 185 |________________| divisor */ 186 187 #define ip scratch /* in */ 188 #define tp (scratch + in) /* dn+in or next_size(dn) or rest >= binvert_itch(in) */ 189 #define scratch_out (scratch + in + tn)/* mulmod_bnm1_itch(next_size(dn)) */ 190 191 /* Compute half-sized inverse. */ 192 in = qn - (qn >> 1); 193 194 mpn_binvert (ip, dp, in, tp); 195 196 mpn_mullo_n (qp, np, ip, in); /* low `in' quotient limbs */ 197 198 if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD)) 199 mpn_mul (tp, dp, dn, qp, in); /* mulhigh */ 200 else 201 { 202 tn = mpn_mulmod_bnm1_next_size (dn); 203 mpn_mulmod_bnm1 (tp, tn, dp, dn, qp, in, scratch_out); 204 wn = dn + in - tn; /* number of wrapped limbs */ 205 if (wn > 0) 206 { 207 c0 = mpn_sub_n (tp + tn, tp, np, wn); 208 mpn_decr_u (tp + wn, c0); 209 } 210 } 211 212 qp += in; 213 qn -= in; 214 215 cy = mpn_sub_n (rp, np + in, tp + in, dn); 216 mpn_mullo_n (qp, rp, ip, qn); /* high qn quotient limbs */ 217 218 if (BELOW_THRESHOLD (qn, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD)) 219 mpn_mul (tp, dp, dn, qp, qn); /* mulhigh */ 220 else 221 { 222 tn = mpn_mulmod_bnm1_next_size (dn); 223 mpn_mulmod_bnm1 (tp, tn, dp, dn, qp, qn, scratch_out); 224 wn = dn + qn - tn; /* number of wrapped limbs */ 225 if (wn > 0) 226 { 227 c0 = mpn_sub_n (tp + tn, tp, rp, wn); 228 mpn_decr_u (tp + wn, c0); 229 } 230 } 231 232 cy += mpn_sub_n (rp, rp + qn, tp + qn, dn - qn); 233 if (cy == 2) 234 { 235 mpn_incr_u (tp + dn, 1); 236 cy = 1; 237 } 238 return mpn_sub_nc (rp + dn - qn, np + dn + in, tp + dn, qn, cy); 239 240 #undef ip 241 #undef tp 242 #undef scratch_out 243 } 244 } 245 246 mp_size_t 247 mpn_mu_bdiv_qr_itch (mp_size_t nn, mp_size_t dn) 248 { 249 mp_size_t qn, in, tn, itch_binvert, itch_out, itches; 250 mp_size_t b; 251 252 ASSERT_ALWAYS (DC_BDIV_Q_THRESHOLD < MU_BDIV_Q_THRESHOLD); 253 254 qn = nn - dn; 255 256 if (qn > dn) 257 { 258 b = (qn - 1) / dn + 1; /* ceil(qn/dn), number of blocks */ 259 in = (qn - 1) / b + 1; /* ceil(qn/b) = ceil(qn / ceil(qn/dn)) */ 260 } 261 else 262 { 263 in = qn - (qn >> 1); 264 } 265 266 if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD)) 267 { 268 tn = dn + in; 269 itch_out = 0; 270 } 271 else 272 { 273 tn = mpn_mulmod_bnm1_next_size (dn); 274 itch_out = mpn_mulmod_bnm1_itch (tn, dn, in); 275 } 276 277 itch_binvert = mpn_binvert_itch (in); 278 itches = tn + itch_out; 279 return in + MAX (itches, itch_binvert); 280 }