github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/sparc64/divrem_1.c (about) 1 /* UltraSparc 64 mpn_divrem_1 -- mpn by limb division. 2 3 Copyright 1991, 1993, 1994, 1996, 1998-2001, 2003 Free Software Foundation, 4 Inc. 5 6 This file is part of the GNU MP Library. 7 8 The GNU MP Library is free software; you can redistribute it and/or modify 9 it under the terms of either: 10 11 * the GNU Lesser General Public License as published by the Free 12 Software Foundation; either version 3 of the License, or (at your 13 option) any later version. 14 15 or 16 17 * the GNU General Public License as published by the Free Software 18 Foundation; either version 2 of the License, or (at your option) any 19 later version. 20 21 or both in parallel, as here. 22 23 The GNU MP Library is distributed in the hope that it will be useful, but 24 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 25 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 26 for more details. 27 28 You should have received copies of the GNU General Public License and the 29 GNU Lesser General Public License along with the GNU MP Library. If not, 30 see https://www.gnu.org/licenses/. */ 31 32 #include "gmp.h" 33 #include "gmp-impl.h" 34 #include "longlong.h" 35 36 #include "mpn/sparc64/sparc64.h" 37 38 39 /* 64-bit divisor 32-bit divisor 40 cycles/limb cycles/limb 41 (approx) (approx) 42 integer fraction integer fraction 43 Ultrasparc 2i: 160 160 122 96 44 */ 45 46 47 /* 32-bit divisors are treated in special case code. This requires 4 mulx 48 per limb instead of 8 in the general case. 49 50 For big endian systems we need HALF_ENDIAN_ADJ included in the src[i] 51 addressing, to get the two halves of each limb read in the correct order. 52 This is kept in an adj variable. Doing that measures about 4 c/l faster 53 than just writing HALF_ENDIAN_ADJ(i) in the integer loop. The latter 54 shouldn't be 6 cycles worth of work, but perhaps it doesn't schedule well 55 (on gcc 3.2.1 at least). The fraction loop doesn't seem affected, but we 56 still use a variable since that ought to work out best. */ 57 58 mp_limb_t 59 mpn_divrem_1 (mp_ptr qp_limbptr, mp_size_t xsize_limbs, 60 mp_srcptr ap_limbptr, mp_size_t size_limbs, mp_limb_t d_limb) 61 { 62 mp_size_t total_size_limbs; 63 mp_size_t i; 64 65 ASSERT (xsize_limbs >= 0); 66 ASSERT (size_limbs >= 0); 67 ASSERT (d_limb != 0); 68 /* FIXME: What's the correct overlap rule when xsize!=0? */ 69 ASSERT (MPN_SAME_OR_SEPARATE_P (qp_limbptr + xsize_limbs, 70 ap_limbptr, size_limbs)); 71 72 total_size_limbs = size_limbs + xsize_limbs; 73 if (UNLIKELY (total_size_limbs == 0)) 74 return 0; 75 76 /* udivx is good for total_size==1, and no need to bother checking 77 limb<divisor, since if that's likely the caller should check */ 78 if (UNLIKELY (total_size_limbs == 1)) 79 { 80 mp_limb_t a, q; 81 a = (LIKELY (size_limbs != 0) ? ap_limbptr[0] : 0); 82 q = a / d_limb; 83 qp_limbptr[0] = q; 84 return a - q*d_limb; 85 } 86 87 if (d_limb <= CNST_LIMB(0xFFFFFFFF)) 88 { 89 mp_size_t size, xsize, total_size, adj; 90 unsigned *qp, n1, n0, q, r, nshift, norm_rmask; 91 mp_limb_t dinv_limb; 92 const unsigned *ap; 93 int norm, norm_rshift; 94 95 size = 2 * size_limbs; 96 xsize = 2 * xsize_limbs; 97 total_size = size + xsize; 98 99 ap = (unsigned *) ap_limbptr; 100 qp = (unsigned *) qp_limbptr; 101 102 qp += xsize; 103 r = 0; /* initial remainder */ 104 105 if (LIKELY (size != 0)) 106 { 107 n1 = ap[size-1 + HALF_ENDIAN_ADJ(1)]; 108 109 /* If the length of the source is uniformly distributed, then 110 there's a 50% chance of the high 32-bits being zero, which we 111 can skip. */ 112 if (n1 == 0) 113 { 114 n1 = ap[size-2 + HALF_ENDIAN_ADJ(0)]; 115 total_size--; 116 size--; 117 ASSERT (size > 0); /* because always even */ 118 qp[size + HALF_ENDIAN_ADJ(1)] = 0; 119 } 120 121 /* Skip a division if high < divisor (high quotient 0). Testing 122 here before before normalizing will still skip as often as 123 possible. */ 124 if (n1 < d_limb) 125 { 126 r = n1; 127 size--; 128 qp[size + HALF_ENDIAN_ADJ(size)] = 0; 129 total_size--; 130 if (total_size == 0) 131 return r; 132 } 133 } 134 135 count_leading_zeros_32 (norm, d_limb); 136 norm -= 32; 137 d_limb <<= norm; 138 r <<= norm; 139 140 norm_rshift = 32 - norm; 141 norm_rmask = (norm == 0 ? 0 : 0xFFFFFFFF); 142 143 invert_half_limb (dinv_limb, d_limb); 144 145 if (LIKELY (size != 0)) 146 { 147 i = size - 1; 148 adj = HALF_ENDIAN_ADJ (i); 149 n1 = ap[i + adj]; 150 adj = -adj; 151 r |= ((n1 >> norm_rshift) & norm_rmask); 152 for ( ; i > 0; i--) 153 { 154 n0 = ap[i-1 + adj]; 155 adj = -adj; 156 nshift = (n1 << norm) | ((n0 >> norm_rshift) & norm_rmask); 157 udiv_qrnnd_half_preinv (q, r, r, nshift, d_limb, dinv_limb); 158 qp[i + adj] = q; 159 n1 = n0; 160 } 161 nshift = n1 << norm; 162 udiv_qrnnd_half_preinv (q, r, r, nshift, d_limb, dinv_limb); 163 qp[0 + HALF_ENDIAN_ADJ(0)] = q; 164 } 165 qp -= xsize; 166 adj = HALF_ENDIAN_ADJ (0); 167 for (i = xsize-1; i >= 0; i--) 168 { 169 udiv_qrnnd_half_preinv (q, r, r, 0, d_limb, dinv_limb); 170 adj = -adj; 171 qp[i + adj] = q; 172 } 173 174 return r >> norm; 175 } 176 else 177 { 178 mp_srcptr ap; 179 mp_ptr qp; 180 mp_size_t size, xsize, total_size; 181 mp_limb_t d, n1, n0, q, r, dinv, nshift, norm_rmask; 182 int norm, norm_rshift; 183 184 ap = ap_limbptr; 185 qp = qp_limbptr; 186 size = size_limbs; 187 xsize = xsize_limbs; 188 total_size = total_size_limbs; 189 d = d_limb; 190 191 qp += total_size; /* above high limb */ 192 r = 0; /* initial remainder */ 193 194 if (LIKELY (size != 0)) 195 { 196 /* Skip a division if high < divisor (high quotient 0). Testing 197 here before before normalizing will still skip as often as 198 possible. */ 199 n1 = ap[size-1]; 200 if (n1 < d) 201 { 202 r = n1; 203 *--qp = 0; 204 total_size--; 205 if (total_size == 0) 206 return r; 207 size--; 208 } 209 } 210 211 count_leading_zeros (norm, d); 212 d <<= norm; 213 r <<= norm; 214 215 norm_rshift = GMP_LIMB_BITS - norm; 216 norm_rmask = (norm == 0 ? 0 : ~CNST_LIMB(0)); 217 218 invert_limb (dinv, d); 219 220 if (LIKELY (size != 0)) 221 { 222 n1 = ap[size-1]; 223 r |= ((n1 >> norm_rshift) & norm_rmask); 224 for (i = size-2; i >= 0; i--) 225 { 226 n0 = ap[i]; 227 nshift = (n1 << norm) | ((n0 >> norm_rshift) & norm_rmask); 228 udiv_qrnnd_preinv (q, r, r, nshift, d, dinv); 229 *--qp = q; 230 n1 = n0; 231 } 232 nshift = n1 << norm; 233 udiv_qrnnd_preinv (q, r, r, nshift, d, dinv); 234 *--qp = q; 235 } 236 for (i = 0; i < xsize; i++) 237 { 238 udiv_qrnnd_preinv (q, r, r, CNST_LIMB(0), d, dinv); 239 *--qp = q; 240 } 241 return r >> norm; 242 } 243 }