github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/generic/divrem_1.c (about) 1 /* mpn_divrem_1 -- mpn by limb division. 2 3 Copyright 1991, 1993, 1994, 1996, 1998-2000, 2002, 2003 Free Software 4 Foundation, Inc. 5 6 This file is part of the GNU MP Library. 7 8 The GNU MP Library is free software; you can redistribute it and/or modify 9 it under the terms of either: 10 11 * the GNU Lesser General Public License as published by the Free 12 Software Foundation; either version 3 of the License, or (at your 13 option) any later version. 14 15 or 16 17 * the GNU General Public License as published by the Free Software 18 Foundation; either version 2 of the License, or (at your option) any 19 later version. 20 21 or both in parallel, as here. 22 23 The GNU MP Library is distributed in the hope that it will be useful, but 24 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 25 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 26 for more details. 27 28 You should have received copies of the GNU General Public License and the 29 GNU Lesser General Public License along with the GNU MP Library. If not, 30 see https://www.gnu.org/licenses/. */ 31 32 #include "gmp.h" 33 #include "gmp-impl.h" 34 #include "longlong.h" 35 36 37 /* The size where udiv_qrnnd_preinv should be used rather than udiv_qrnnd, 38 meaning the quotient size where that should happen, the quotient size 39 being how many udiv divisions will be done. 40 41 The default is to use preinv always, CPUs where this doesn't suit have 42 tuned thresholds. Note in particular that preinv should certainly be 43 used if that's the only division available (USE_PREINV_ALWAYS). */ 44 45 #ifndef DIVREM_1_NORM_THRESHOLD 46 #define DIVREM_1_NORM_THRESHOLD 0 47 #endif 48 #ifndef DIVREM_1_UNNORM_THRESHOLD 49 #define DIVREM_1_UNNORM_THRESHOLD 0 50 #endif 51 52 53 54 /* If the cpu only has multiply-by-inverse division (eg. alpha), then NORM 55 and UNNORM thresholds are 0 and only the inversion code is included. 56 57 If multiply-by-inverse is never viable, then NORM and UNNORM thresholds 58 will be MP_SIZE_T_MAX and only the plain division code is included. 59 60 Otherwise mul-by-inverse is better than plain division above some 61 threshold, and best results are obtained by having code for both present. 62 63 The main reason for separating the norm and unnorm cases is that not all 64 CPUs give zero for "n0 >> GMP_LIMB_BITS" which would arise in the unnorm 65 code used on an already normalized divisor. 66 67 If UDIV_NEEDS_NORMALIZATION is false then plain division uses the same 68 non-shifting code for both the norm and unnorm cases, though with 69 different criteria for skipping a division, and with different thresholds 70 of course. And in fact if inversion is never viable, then that simple 71 non-shifting division would be all that's left. 72 73 The NORM and UNNORM thresholds might not differ much, but if there's 74 going to be separate code for norm and unnorm then it makes sense to have 75 separate thresholds. One thing that's possible is that the 76 mul-by-inverse might be better only for normalized divisors, due to that 77 case not needing variable bit shifts. 78 79 Notice that the thresholds are tested after the decision to possibly skip 80 one divide step, so they're based on the actual number of divisions done. 81 82 For the unnorm case, it would be possible to call mpn_lshift to adjust 83 the dividend all in one go (into the quotient space say), rather than 84 limb-by-limb in the loop. This might help if mpn_lshift is a lot faster 85 than what the compiler can generate for EXTRACT. But this is left to CPU 86 specific implementations to consider, especially since EXTRACT isn't on 87 the dependent chain. */ 88 89 mp_limb_t 90 mpn_divrem_1 (mp_ptr qp, mp_size_t qxn, 91 mp_srcptr up, mp_size_t un, mp_limb_t d) 92 { 93 mp_size_t n; 94 mp_size_t i; 95 mp_limb_t n1, n0; 96 mp_limb_t r = 0; 97 98 ASSERT (qxn >= 0); 99 ASSERT (un >= 0); 100 ASSERT (d != 0); 101 /* FIXME: What's the correct overlap rule when qxn!=0? */ 102 ASSERT (MPN_SAME_OR_SEPARATE_P (qp+qxn, up, un)); 103 104 n = un + qxn; 105 if (n == 0) 106 return 0; 107 108 d <<= GMP_NAIL_BITS; 109 110 qp += (n - 1); /* Make qp point at most significant quotient limb */ 111 112 if ((d & GMP_LIMB_HIGHBIT) != 0) 113 { 114 if (un != 0) 115 { 116 /* High quotient limb is 0 or 1, skip a divide step. */ 117 mp_limb_t q; 118 r = up[un - 1] << GMP_NAIL_BITS; 119 q = (r >= d); 120 *qp-- = q; 121 r -= (d & -q); 122 r >>= GMP_NAIL_BITS; 123 n--; 124 un--; 125 } 126 127 if (BELOW_THRESHOLD (n, DIVREM_1_NORM_THRESHOLD)) 128 { 129 plain: 130 for (i = un - 1; i >= 0; i--) 131 { 132 n0 = up[i] << GMP_NAIL_BITS; 133 udiv_qrnnd (*qp, r, r, n0, d); 134 r >>= GMP_NAIL_BITS; 135 qp--; 136 } 137 for (i = qxn - 1; i >= 0; i--) 138 { 139 udiv_qrnnd (*qp, r, r, CNST_LIMB(0), d); 140 r >>= GMP_NAIL_BITS; 141 qp--; 142 } 143 return r; 144 } 145 else 146 { 147 /* Multiply-by-inverse, divisor already normalized. */ 148 mp_limb_t dinv; 149 invert_limb (dinv, d); 150 151 for (i = un - 1; i >= 0; i--) 152 { 153 n0 = up[i] << GMP_NAIL_BITS; 154 udiv_qrnnd_preinv (*qp, r, r, n0, d, dinv); 155 r >>= GMP_NAIL_BITS; 156 qp--; 157 } 158 for (i = qxn - 1; i >= 0; i--) 159 { 160 udiv_qrnnd_preinv (*qp, r, r, CNST_LIMB(0), d, dinv); 161 r >>= GMP_NAIL_BITS; 162 qp--; 163 } 164 return r; 165 } 166 } 167 else 168 { 169 /* Most significant bit of divisor == 0. */ 170 int cnt; 171 172 /* Skip a division if high < divisor (high quotient 0). Testing here 173 before normalizing will still skip as often as possible. */ 174 if (un != 0) 175 { 176 n1 = up[un - 1] << GMP_NAIL_BITS; 177 if (n1 < d) 178 { 179 r = n1 >> GMP_NAIL_BITS; 180 *qp-- = 0; 181 n--; 182 if (n == 0) 183 return r; 184 un--; 185 } 186 } 187 188 if (! UDIV_NEEDS_NORMALIZATION 189 && BELOW_THRESHOLD (n, DIVREM_1_UNNORM_THRESHOLD)) 190 goto plain; 191 192 count_leading_zeros (cnt, d); 193 d <<= cnt; 194 r <<= cnt; 195 196 if (UDIV_NEEDS_NORMALIZATION 197 && BELOW_THRESHOLD (n, DIVREM_1_UNNORM_THRESHOLD)) 198 { 199 mp_limb_t nshift; 200 if (un != 0) 201 { 202 n1 = up[un - 1] << GMP_NAIL_BITS; 203 r |= (n1 >> (GMP_LIMB_BITS - cnt)); 204 for (i = un - 2; i >= 0; i--) 205 { 206 n0 = up[i] << GMP_NAIL_BITS; 207 nshift = (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt)); 208 udiv_qrnnd (*qp, r, r, nshift, d); 209 r >>= GMP_NAIL_BITS; 210 qp--; 211 n1 = n0; 212 } 213 udiv_qrnnd (*qp, r, r, n1 << cnt, d); 214 r >>= GMP_NAIL_BITS; 215 qp--; 216 } 217 for (i = qxn - 1; i >= 0; i--) 218 { 219 udiv_qrnnd (*qp, r, r, CNST_LIMB(0), d); 220 r >>= GMP_NAIL_BITS; 221 qp--; 222 } 223 return r >> cnt; 224 } 225 else 226 { 227 mp_limb_t dinv, nshift; 228 invert_limb (dinv, d); 229 if (un != 0) 230 { 231 n1 = up[un - 1] << GMP_NAIL_BITS; 232 r |= (n1 >> (GMP_LIMB_BITS - cnt)); 233 for (i = un - 2; i >= 0; i--) 234 { 235 n0 = up[i] << GMP_NAIL_BITS; 236 nshift = (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt)); 237 udiv_qrnnd_preinv (*qp, r, r, nshift, d, dinv); 238 r >>= GMP_NAIL_BITS; 239 qp--; 240 n1 = n0; 241 } 242 udiv_qrnnd_preinv (*qp, r, r, n1 << cnt, d, dinv); 243 r >>= GMP_NAIL_BITS; 244 qp--; 245 } 246 for (i = qxn - 1; i >= 0; i--) 247 { 248 udiv_qrnnd_preinv (*qp, r, r, CNST_LIMB(0), d, dinv); 249 r >>= GMP_NAIL_BITS; 250 qp--; 251 } 252 return r >> cnt; 253 } 254 } 255 }