github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/sparc64/mod_1_4.c (about) 1 /* mpn_mod_1s_4p (ap, n, b, cps) 2 Divide (ap,,n) by b. Return the single-limb remainder. 3 Requires that d < B / 4. 4 5 Contributed to the GNU project by Torbjorn Granlund. 6 Based on a suggestion by Peter L. Montgomery. 7 8 THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY 9 SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST 10 GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. 11 12 Copyright 2008-2010 Free Software Foundation, Inc. 13 14 This file is part of the GNU MP Library. 15 16 The GNU MP Library is free software; you can redistribute it and/or modify 17 it under the terms of either: 18 19 * the GNU Lesser General Public License as published by the Free 20 Software Foundation; either version 3 of the License, or (at your 21 option) any later version. 22 23 or 24 25 * the GNU General Public License as published by the Free Software 26 Foundation; either version 2 of the License, or (at your option) any 27 later version. 28 29 or both in parallel, as here. 30 31 The GNU MP Library is distributed in the hope that it will be useful, but 32 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 33 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 34 for more details. 35 36 You should have received copies of the GNU General Public License and the 37 GNU Lesser General Public License along with the GNU MP Library. If not, 38 see https://www.gnu.org/licenses/. */ 39 40 #include "gmp.h" 41 #include "gmp-impl.h" 42 #include "longlong.h" 43 44 #include "mpn/sparc64/sparc64.h" 45 46 void 47 mpn_mod_1s_4p_cps (mp_limb_t cps[7], mp_limb_t b) 48 { 49 mp_limb_t bi; 50 mp_limb_t B1modb, B2modb, B3modb, B4modb, B5modb; 51 int cnt; 52 53 ASSERT (b <= (~(mp_limb_t) 0) / 4); 54 55 count_leading_zeros (cnt, b); 56 57 b <<= cnt; 58 invert_limb (bi, b); 59 60 cps[0] = bi; 61 cps[1] = cnt; 62 63 B1modb = -b * ((bi >> (GMP_LIMB_BITS-cnt)) | (CNST_LIMB(1) << cnt)); 64 ASSERT (B1modb <= b); /* NB: not fully reduced mod b */ 65 cps[2] = B1modb >> cnt; 66 67 udiv_rnnd_preinv (B2modb, B1modb, CNST_LIMB(0), b, bi); 68 cps[3] = B2modb >> cnt; 69 70 udiv_rnnd_preinv (B3modb, B2modb, CNST_LIMB(0), b, bi); 71 cps[4] = B3modb >> cnt; 72 73 udiv_rnnd_preinv (B4modb, B3modb, CNST_LIMB(0), b, bi); 74 cps[5] = B4modb >> cnt; 75 76 udiv_rnnd_preinv (B5modb, B4modb, CNST_LIMB(0), b, bi); 77 cps[6] = B5modb >> cnt; 78 79 #if WANT_ASSERT 80 { 81 int i; 82 b = cps[2]; 83 for (i = 3; i <= 6; i++) 84 { 85 b += cps[i]; 86 ASSERT (b >= cps[i]); 87 } 88 } 89 #endif 90 } 91 92 mp_limb_t 93 mpn_mod_1s_4p (mp_srcptr ap, mp_size_t n, mp_limb_t b, const mp_limb_t cps[7]) 94 { 95 mp_limb_t rh, rl, bi, ph, pl, ch, cl, r; 96 mp_limb_t B1modb, B2modb, B3modb, B4modb, B5modb; 97 mp_size_t i; 98 int cnt; 99 100 ASSERT (n >= 1); 101 102 B1modb = cps[2]; 103 B2modb = cps[3]; 104 B3modb = cps[4]; 105 B4modb = cps[5]; 106 B5modb = cps[6]; 107 108 if ((b >> 32) == 0) 109 { 110 switch (n & 3) 111 { 112 case 0: 113 umul_ppmm_s (ph, pl, ap[n - 3], B1modb); 114 add_ssaaaa (ph, pl, ph, pl, CNST_LIMB(0), ap[n - 4]); 115 umul_ppmm_s (ch, cl, ap[n - 2], B2modb); 116 add_ssaaaa (ph, pl, ph, pl, ch, cl); 117 umul_ppmm_s (rh, rl, ap[n - 1], B3modb); 118 add_ssaaaa (rh, rl, rh, rl, ph, pl); 119 n -= 4; 120 break; 121 case 1: 122 rh = 0; 123 rl = ap[n - 1]; 124 n -= 1; 125 break; 126 case 2: 127 rh = ap[n - 1]; 128 rl = ap[n - 2]; 129 n -= 2; 130 break; 131 case 3: 132 umul_ppmm_s (ph, pl, ap[n - 2], B1modb); 133 add_ssaaaa (ph, pl, ph, pl, CNST_LIMB(0), ap[n - 3]); 134 umul_ppmm_s (rh, rl, ap[n - 1], B2modb); 135 add_ssaaaa (rh, rl, rh, rl, ph, pl); 136 n -= 3; 137 break; 138 } 139 140 for (i = n - 4; i >= 0; i -= 4) 141 { 142 /* rr = ap[i] < B 143 + ap[i+1] * (B mod b) <= (B-1)(b-1) 144 + ap[i+2] * (B^2 mod b) <= (B-1)(b-1) 145 + ap[i+3] * (B^3 mod b) <= (B-1)(b-1) 146 + LO(rr) * (B^4 mod b) <= (B-1)(b-1) 147 + HI(rr) * (B^5 mod b) <= (B-1)(b-1) 148 */ 149 umul_ppmm_s (ph, pl, ap[i + 1], B1modb); 150 add_ssaaaa (ph, pl, ph, pl, CNST_LIMB(0), ap[i + 0]); 151 152 umul_ppmm_s (ch, cl, ap[i + 2], B2modb); 153 add_ssaaaa (ph, pl, ph, pl, ch, cl); 154 155 umul_ppmm_s (ch, cl, ap[i + 3], B3modb); 156 add_ssaaaa (ph, pl, ph, pl, ch, cl); 157 158 umul_ppmm_s (ch, cl, rl, B4modb); 159 add_ssaaaa (ph, pl, ph, pl, ch, cl); 160 161 umul_ppmm_s (rh, rl, rh, B5modb); 162 add_ssaaaa (rh, rl, rh, rl, ph, pl); 163 } 164 165 umul_ppmm_s (rh, cl, rh, B1modb); 166 add_ssaaaa (rh, rl, rh, rl, CNST_LIMB(0), cl); 167 } 168 else 169 { 170 switch (n & 3) 171 { 172 case 0: 173 umul_ppmm (ph, pl, ap[n - 3], B1modb); 174 add_ssaaaa (ph, pl, ph, pl, 0, ap[n - 4]); 175 umul_ppmm (ch, cl, ap[n - 2], B2modb); 176 add_ssaaaa (ph, pl, ph, pl, ch, cl); 177 umul_ppmm (rh, rl, ap[n - 1], B3modb); 178 add_ssaaaa (rh, rl, rh, rl, ph, pl); 179 n -= 4; 180 break; 181 case 1: 182 rh = 0; 183 rl = ap[n - 1]; 184 n -= 1; 185 break; 186 case 2: 187 rh = ap[n - 1]; 188 rl = ap[n - 2]; 189 n -= 2; 190 break; 191 case 3: 192 umul_ppmm (ph, pl, ap[n - 2], B1modb); 193 add_ssaaaa (ph, pl, ph, pl, 0, ap[n - 3]); 194 umul_ppmm (rh, rl, ap[n - 1], B2modb); 195 add_ssaaaa (rh, rl, rh, rl, ph, pl); 196 n -= 3; 197 break; 198 } 199 200 for (i = n - 4; i >= 0; i -= 4) 201 { 202 /* rr = ap[i] < B 203 + ap[i+1] * (B mod b) <= (B-1)(b-1) 204 + ap[i+2] * (B^2 mod b) <= (B-1)(b-1) 205 + ap[i+3] * (B^3 mod b) <= (B-1)(b-1) 206 + LO(rr) * (B^4 mod b) <= (B-1)(b-1) 207 + HI(rr) * (B^5 mod b) <= (B-1)(b-1) 208 */ 209 umul_ppmm (ph, pl, ap[i + 1], B1modb); 210 add_ssaaaa (ph, pl, ph, pl, 0, ap[i + 0]); 211 212 umul_ppmm (ch, cl, ap[i + 2], B2modb); 213 add_ssaaaa (ph, pl, ph, pl, ch, cl); 214 215 umul_ppmm (ch, cl, ap[i + 3], B3modb); 216 add_ssaaaa (ph, pl, ph, pl, ch, cl); 217 218 umul_ppmm (ch, cl, rl, B4modb); 219 add_ssaaaa (ph, pl, ph, pl, ch, cl); 220 221 umul_ppmm (rh, rl, rh, B5modb); 222 add_ssaaaa (rh, rl, rh, rl, ph, pl); 223 } 224 225 umul_ppmm (rh, cl, rh, B1modb); 226 add_ssaaaa (rh, rl, rh, rl, 0, cl); 227 } 228 229 bi = cps[0]; 230 cnt = cps[1]; 231 232 r = (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt)); 233 udiv_rnnd_preinv (r, r, rl << cnt, b, bi); 234 235 return r >> cnt; 236 }