github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/sparc64/mod_1.c (about) 1 /* UltraSPARC 64 mpn_mod_1 -- mpn by limb remainder. 2 3 Copyright 1991, 1993, 1994, 1999-2001, 2003, 2010 Free Software Foundation, 4 Inc. 5 6 This file is part of the GNU MP Library. 7 8 The GNU MP Library is free software; you can redistribute it and/or modify 9 it under the terms of either: 10 11 * the GNU Lesser General Public License as published by the Free 12 Software Foundation; either version 3 of the License, or (at your 13 option) any later version. 14 15 or 16 17 * the GNU General Public License as published by the Free Software 18 Foundation; either version 2 of the License, or (at your option) any 19 later version. 20 21 or both in parallel, as here. 22 23 The GNU MP Library is distributed in the hope that it will be useful, but 24 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 25 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 26 for more details. 27 28 You should have received copies of the GNU General Public License and the 29 GNU Lesser General Public License along with the GNU MP Library. If not, 30 see https://www.gnu.org/licenses/. */ 31 32 #include "gmp.h" 33 #include "gmp-impl.h" 34 #include "longlong.h" 35 36 #include "mpn/sparc64/sparc64.h" 37 38 39 /* 64-bit divisor 32-bit divisor 40 cycles/limb cycles/limb 41 (approx) (approx) 42 Ultrasparc 2i: 160 120 43 */ 44 45 46 /* 32-bit divisors are treated in special case code. This requires 4 mulx 47 per limb instead of 8 in the general case. 48 49 For big endian systems we need HALF_ENDIAN_ADJ included in the src[i] 50 addressing, to get the two halves of each limb read in the correct order. 51 This is kept in an adj variable. Doing that measures about 6 c/l faster 52 than just writing HALF_ENDIAN_ADJ(i) in the loop. The latter shouldn't 53 be 6 cycles worth of work, but perhaps it doesn't schedule well (on gcc 54 3.2.1 at least). 55 56 A simple udivx/umulx loop for the 32-bit case was attempted for small 57 sizes, but at size==2 it was only about the same speed and at size==3 was 58 slower. */ 59 60 static mp_limb_t 61 mpn_mod_1_anynorm (mp_srcptr src_limbptr, mp_size_t size_limbs, mp_limb_t d_limb) 62 { 63 int norm, norm_rshift; 64 mp_limb_t src_high_limb; 65 mp_size_t i; 66 67 ASSERT (size_limbs >= 0); 68 ASSERT (d_limb != 0); 69 70 if (UNLIKELY (size_limbs == 0)) 71 return 0; 72 73 src_high_limb = src_limbptr[size_limbs-1]; 74 75 /* udivx is good for size==1, and no need to bother checking limb<divisor, 76 since if that's likely the caller should check */ 77 if (UNLIKELY (size_limbs == 1)) 78 return src_high_limb % d_limb; 79 80 if (d_limb <= CNST_LIMB(0xFFFFFFFF)) 81 { 82 unsigned *src, n1, n0, r, dummy_q, nshift, norm_rmask; 83 mp_size_t size, adj; 84 mp_limb_t dinv_limb; 85 86 size = 2 * size_limbs; /* halfwords */ 87 src = (unsigned *) src_limbptr; 88 89 /* prospective initial remainder, if < d */ 90 r = src_high_limb >> 32; 91 92 /* If the length of the source is uniformly distributed, then there's 93 a 50% chance of the high 32-bits being zero, which we can skip. */ 94 if (r == 0) 95 { 96 r = (unsigned) src_high_limb; 97 size--; 98 ASSERT (size > 0); /* because always even */ 99 } 100 101 /* Skip a division if high < divisor. Having the test here before 102 normalizing will still skip as often as possible. */ 103 if (r < d_limb) 104 { 105 size--; 106 ASSERT (size > 0); /* because size==1 handled above */ 107 } 108 else 109 r = 0; 110 111 count_leading_zeros_32 (norm, d_limb); 112 norm -= 32; 113 d_limb <<= norm; 114 115 norm_rshift = 32 - norm; 116 norm_rmask = (norm == 0 ? 0 : 0xFFFFFFFF); 117 i = size-1; 118 adj = HALF_ENDIAN_ADJ (i); 119 n1 = src [i + adj]; 120 r = (r << norm) | ((n1 >> norm_rshift) & norm_rmask); 121 122 invert_half_limb (dinv_limb, d_limb); 123 adj = -adj; 124 125 for (i--; i >= 0; i--) 126 { 127 n0 = src [i + adj]; 128 adj = -adj; 129 nshift = (n1 << norm) | ((n0 >> norm_rshift) & norm_rmask); 130 udiv_qrnnd_half_preinv (dummy_q, r, r, nshift, d_limb, dinv_limb); 131 n1 = n0; 132 } 133 134 /* same as loop, but without n0 */ 135 nshift = n1 << norm; 136 udiv_qrnnd_half_preinv (dummy_q, r, r, nshift, d_limb, dinv_limb); 137 138 ASSERT ((r & ((1 << norm) - 1)) == 0); 139 return r >> norm; 140 } 141 else 142 { 143 mp_srcptr src; 144 mp_size_t size; 145 mp_limb_t n1, n0, r, dinv, dummy_q, nshift, norm_rmask; 146 147 src = src_limbptr; 148 size = size_limbs; 149 r = src_high_limb; /* initial remainder */ 150 151 /* Skip a division if high < divisor. Having the test here before 152 normalizing will still skip as often as possible. */ 153 if (r < d_limb) 154 { 155 size--; 156 ASSERT (size > 0); /* because size==1 handled above */ 157 } 158 else 159 r = 0; 160 161 count_leading_zeros (norm, d_limb); 162 d_limb <<= norm; 163 164 norm_rshift = GMP_LIMB_BITS - norm; 165 norm_rmask = (norm == 0 ? 0 : 0xFFFFFFFF); 166 167 src += size; 168 n1 = *--src; 169 r = (r << norm) | ((n1 >> norm_rshift) & norm_rmask); 170 171 invert_limb (dinv, d_limb); 172 173 for (i = size-2; i >= 0; i--) 174 { 175 n0 = *--src; 176 nshift = (n1 << norm) | ((n0 >> norm_rshift) & norm_rmask); 177 udiv_qrnnd_preinv (dummy_q, r, r, nshift, d_limb, dinv); 178 n1 = n0; 179 } 180 181 /* same as loop, but without n0 */ 182 nshift = n1 << norm; 183 udiv_qrnnd_preinv (dummy_q, r, r, nshift, d_limb, dinv); 184 185 ASSERT ((r & ((CNST_LIMB(1) << norm) - 1)) == 0); 186 return r >> norm; 187 } 188 } 189 190 mp_limb_t 191 mpn_mod_1 (mp_srcptr ap, mp_size_t n, mp_limb_t b) 192 { 193 ASSERT (n >= 0); 194 ASSERT (b != 0); 195 196 /* Should this be handled at all? Rely on callers? Note un==0 is currently 197 required by mpz/fdiv_r_ui.c and possibly other places. */ 198 if (n == 0) 199 return 0; 200 201 if (UNLIKELY ((b & GMP_NUMB_HIGHBIT) != 0)) 202 { 203 if (BELOW_THRESHOLD (n, MOD_1N_TO_MOD_1_1_THRESHOLD)) 204 { 205 return mpn_mod_1_anynorm (ap, n, b); 206 } 207 else 208 { 209 mp_limb_t pre[4]; 210 mpn_mod_1_1p_cps (pre, b); 211 return mpn_mod_1_1p (ap, n, b, pre); 212 } 213 } 214 else 215 { 216 if (BELOW_THRESHOLD (n, MOD_1U_TO_MOD_1_1_THRESHOLD)) 217 { 218 return mpn_mod_1_anynorm (ap, n, b); 219 } 220 else if (BELOW_THRESHOLD (n, MOD_1_1_TO_MOD_1_2_THRESHOLD)) 221 { 222 mp_limb_t pre[4]; 223 mpn_mod_1_1p_cps (pre, b); 224 return mpn_mod_1_1p (ap, n, b << pre[1], pre); 225 } 226 else if (BELOW_THRESHOLD (n, MOD_1_2_TO_MOD_1_4_THRESHOLD) || UNLIKELY (b > GMP_NUMB_MASK / 4)) 227 { 228 mp_limb_t pre[5]; 229 mpn_mod_1s_2p_cps (pre, b); 230 return mpn_mod_1s_2p (ap, n, b << pre[1], pre); 231 } 232 else 233 { 234 mp_limb_t pre[7]; 235 mpn_mod_1s_4p_cps (pre, b); 236 return mpn_mod_1s_4p (ap, n, b << pre[1], pre); 237 } 238 } 239 }