github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/generic/div_qr_1n_pi1.c (about) 1 /* mpn_div_qr_1n_pi1 2 3 Contributed to the GNU project by Niels Möller 4 5 THIS FILE CONTAINS INTERNAL FUNCTIONS WITH MUTABLE INTERFACES. IT IS ONLY 6 SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST 7 GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. 8 9 10 Copyright 2013 Free Software Foundation, Inc. 11 12 This file is part of the GNU MP Library. 13 14 The GNU MP Library is free software; you can redistribute it and/or modify 15 it under the terms of either: 16 17 * the GNU Lesser General Public License as published by the Free 18 Software Foundation; either version 3 of the License, or (at your 19 option) any later version. 20 21 or 22 23 * the GNU General Public License as published by the Free Software 24 Foundation; either version 2 of the License, or (at your option) any 25 later version. 26 27 or both in parallel, as here. 28 29 The GNU MP Library is distributed in the hope that it will be useful, but 30 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 31 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 32 for more details. 33 34 You should have received copies of the GNU General Public License and the 35 GNU Lesser General Public License along with the GNU MP Library. If not, 36 see https://www.gnu.org/licenses/. */ 37 38 #include "gmp.h" 39 #include "gmp-impl.h" 40 #include "longlong.h" 41 42 #if GMP_NAIL_BITS > 0 43 #error Nail bits not supported 44 #endif 45 46 #ifndef DIV_QR_1N_METHOD 47 #define DIV_QR_1N_METHOD 2 48 #endif 49 50 /* FIXME: Duplicated in mod_1_1.c. Move to gmp-impl.h */ 51 52 #if defined (__GNUC__) && ! defined (NO_ASM) 53 54 #if HAVE_HOST_CPU_FAMILY_x86 && W_TYPE_SIZE == 32 55 #define add_mssaaaa(m, s1, s0, a1, a0, b1, b0) \ 56 __asm__ ( "add %6, %k2\n\t" \ 57 "adc %4, %k1\n\t" \ 58 "sbb %k0, %k0" \ 59 : "=r" (m), "=r" (s1), "=&r" (s0) \ 60 : "1" ((USItype)(a1)), "g" ((USItype)(b1)), \ 61 "%2" ((USItype)(a0)), "g" ((USItype)(b0))) 62 #endif 63 64 #if HAVE_HOST_CPU_FAMILY_x86_64 && W_TYPE_SIZE == 64 65 #define add_mssaaaa(m, s1, s0, a1, a0, b1, b0) \ 66 __asm__ ( "add %6, %q2\n\t" \ 67 "adc %4, %q1\n\t" \ 68 "sbb %q0, %q0" \ 69 : "=r" (m), "=r" (s1), "=&r" (s0) \ 70 : "1" ((UDItype)(a1)), "rme" ((UDItype)(b1)), \ 71 "%2" ((UDItype)(a0)), "rme" ((UDItype)(b0))) 72 #endif 73 74 #if defined (__sparc__) && W_TYPE_SIZE == 32 75 #define add_mssaaaa(m, sh, sl, ah, al, bh, bl) \ 76 __asm__ ( "addcc %r5, %6, %2\n\t" \ 77 "addxcc %r3, %4, %1\n\t" \ 78 "subx %%g0, %%g0, %0" \ 79 : "=r" (m), "=r" (sh), "=&r" (sl) \ 80 : "rJ" (ah), "rI" (bh), "%rJ" (al), "rI" (bl) \ 81 __CLOBBER_CC) 82 #endif 83 84 #if defined (__sparc__) && W_TYPE_SIZE == 64 85 #define add_mssaaaa(m, sh, sl, ah, al, bh, bl) \ 86 __asm__ ( "addcc %r5, %6, %2\n\t" \ 87 "addccc %r7, %8, %%g0\n\t" \ 88 "addccc %r3, %4, %1\n\t" \ 89 "clr %0\n\t" \ 90 "movcs %%xcc, -1, %0" \ 91 : "=r" (m), "=r" (sh), "=&r" (sl) \ 92 : "rJ" (ah), "rI" (bh), "%rJ" (al), "rI" (bl), \ 93 "rJ" ((al) >> 32), "rI" ((bl) >> 32) \ 94 __CLOBBER_CC) 95 #if __VIS__ >= 0x300 96 #undef add_mssaaaa 97 #define add_mssaaaa(m, sh, sl, ah, al, bh, bl) \ 98 __asm__ ( "addcc %r5, %6, %2\n\t" \ 99 "addxccc %r3, %4, %1\n\t" \ 100 "clr %0\n\t" \ 101 "movcs %%xcc, -1, %0" \ 102 : "=r" (m), "=r" (sh), "=&r" (sl) \ 103 : "rJ" (ah), "rI" (bh), "%rJ" (al), "rI" (bl) \ 104 __CLOBBER_CC) 105 #endif 106 #endif 107 108 #if HAVE_HOST_CPU_FAMILY_powerpc && !defined (_LONG_LONG_LIMB) 109 /* This works fine for 32-bit and 64-bit limbs, except for 64-bit limbs with a 110 processor running in 32-bit mode, since the carry flag then gets the 32-bit 111 carry. */ 112 #define add_mssaaaa(m, s1, s0, a1, a0, b1, b0) \ 113 __asm__ ( "add%I6c %2, %5, %6\n\t" \ 114 "adde %1, %3, %4\n\t" \ 115 "subfe %0, %0, %0\n\t" \ 116 "nor %0, %0, %0" \ 117 : "=r" (m), "=r" (s1), "=&r" (s0) \ 118 : "r" (a1), "r" (b1), "%r" (a0), "rI" (b0)) 119 #endif 120 121 #if defined (__s390x__) && W_TYPE_SIZE == 64 122 #define add_mssaaaa(m, s1, s0, a1, a0, b1, b0) \ 123 __asm__ ( "algr %2, %6\n\t" \ 124 "alcgr %1, %4\n\t" \ 125 "lghi %0, 0\n\t" \ 126 "alcgr %0, %0\n\t" \ 127 "lcgr %0, %0" \ 128 : "=r" (m), "=r" (s1), "=&r" (s0) \ 129 : "1" ((UDItype)(a1)), "r" ((UDItype)(b1)), \ 130 "%2" ((UDItype)(a0)), "r" ((UDItype)(b0)) __CLOBBER_CC) 131 #endif 132 133 #if defined (__arm__) && !defined (__thumb__) && W_TYPE_SIZE == 32 134 #define add_mssaaaa(m, sh, sl, ah, al, bh, bl) \ 135 __asm__ ( "adds %2, %5, %6\n\t" \ 136 "adcs %1, %3, %4\n\t" \ 137 "movcc %0, #0\n\t" \ 138 "movcs %0, #-1" \ 139 : "=r" (m), "=r" (sh), "=&r" (sl) \ 140 : "r" (ah), "rI" (bh), "%r" (al), "rI" (bl) __CLOBBER_CC) 141 #endif 142 #endif /* defined (__GNUC__) */ 143 144 #ifndef add_mssaaaa 145 #define add_mssaaaa(m, s1, s0, a1, a0, b1, b0) \ 146 do { \ 147 UWtype __s0, __s1, __c0, __c1; \ 148 __s0 = (a0) + (b0); \ 149 __s1 = (a1) + (b1); \ 150 __c0 = __s0 < (a0); \ 151 __c1 = __s1 < (a1); \ 152 (s0) = __s0; \ 153 __s1 = __s1 + __c0; \ 154 (s1) = __s1; \ 155 (m) = - (__c1 + (__s1 < __c0)); \ 156 } while (0) 157 #endif 158 159 #if DIV_QR_1N_METHOD == 1 160 161 /* Divides (uh B^n + {up, n}) by d, storing the quotient at {qp, n}. 162 Requires that uh < d. */ 163 mp_limb_t 164 mpn_div_qr_1n_pi1 (mp_ptr qp, mp_srcptr up, mp_size_t n, mp_limb_t uh, 165 mp_limb_t d, mp_limb_t dinv) 166 { 167 ASSERT (n > 0); 168 ASSERT (uh < d); 169 ASSERT (d & GMP_NUMB_HIGHBIT); 170 ASSERT (MPN_SAME_OR_SEPARATE_P (qp, up, n)); 171 172 do 173 { 174 mp_limb_t q, ul; 175 176 ul = up[--n]; 177 udiv_qrnnd_preinv (q, uh, uh, ul, d, dinv); 178 qp[n] = q; 179 } 180 while (n > 0); 181 182 return uh; 183 } 184 185 #elif DIV_QR_1N_METHOD == 2 186 187 mp_limb_t 188 mpn_div_qr_1n_pi1 (mp_ptr qp, mp_srcptr up, mp_size_t n, mp_limb_t u1, 189 mp_limb_t d, mp_limb_t dinv) 190 { 191 mp_limb_t B2; 192 mp_limb_t u0, u2; 193 mp_limb_t q0, q1; 194 mp_limb_t p0, p1; 195 mp_limb_t t; 196 mp_size_t j; 197 198 ASSERT (d & GMP_LIMB_HIGHBIT); 199 ASSERT (n > 0); 200 ASSERT (u1 < d); 201 202 if (n == 1) 203 { 204 udiv_qrnnd_preinv (qp[0], u1, u1, up[0], d, dinv); 205 return u1; 206 } 207 208 /* FIXME: Could be precomputed */ 209 B2 = -d*dinv; 210 211 umul_ppmm (q1, q0, dinv, u1); 212 umul_ppmm (p1, p0, B2, u1); 213 q1 += u1; 214 ASSERT (q1 >= u1); 215 u0 = up[n-1]; /* Early read, to allow qp == up. */ 216 qp[n-1] = q1; 217 218 add_mssaaaa (u2, u1, u0, u0, up[n-2], p1, p0); 219 220 /* FIXME: Keep q1 in a variable between iterations, to reduce number 221 of memory accesses. */ 222 for (j = n-2; j-- > 0; ) 223 { 224 mp_limb_t q2, cy; 225 226 /* Additions for the q update: 227 * +-------+ 228 * |u1 * v | 229 * +---+---+ 230 * | u1| 231 * +---+---+ 232 * | 1 | v | (conditional on u2) 233 * +---+---+ 234 * | 1 | (conditional on u0 + u2 B2 carry) 235 * +---+ 236 * + | q0| 237 * -+---+---+---+ 238 * | q2| q1| q0| 239 * +---+---+---+ 240 */ 241 umul_ppmm (p1, t, u1, dinv); 242 add_ssaaaa (q2, q1, -u2, u2 & dinv, CNST_LIMB(0), u1); 243 add_ssaaaa (q2, q1, q2, q1, CNST_LIMB(0), p1); 244 add_ssaaaa (q2, q1, q2, q1, CNST_LIMB(0), q0); 245 q0 = t; 246 247 umul_ppmm (p1, p0, u1, B2); 248 ADDC_LIMB (cy, u0, u0, u2 & B2); 249 u0 -= (-cy) & d; 250 251 /* Final q update */ 252 add_ssaaaa (q2, q1, q2, q1, CNST_LIMB(0), cy); 253 qp[j+1] = q1; 254 MPN_INCR_U (qp+j+2, n-j-2, q2); 255 256 add_mssaaaa (u2, u1, u0, u0, up[j], p1, p0); 257 } 258 259 q1 = (u2 > 0); 260 u1 -= (-q1) & d; 261 262 t = (u1 >= d); 263 q1 += t; 264 u1 -= (-t) & d; 265 266 udiv_qrnnd_preinv (t, u0, u1, u0, d, dinv); 267 add_ssaaaa (q1, q0, q1, q0, CNST_LIMB(0), t); 268 269 MPN_INCR_U (qp+1, n-1, q1); 270 271 qp[0] = q0; 272 return u0; 273 } 274 275 #else 276 #error Unknown DIV_QR_1N_METHOD 277 #endif