github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/generic/toom_interpolate_16pts.c (about) 1 /* Interpolation for the algorithm Toom-Cook 8.5-way. 2 3 Contributed to the GNU project by Marco Bodrato. 4 5 THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY 6 SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST 7 GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. 8 9 Copyright 2009, 2010, 2012 Free Software Foundation, Inc. 10 11 This file is part of the GNU MP Library. 12 13 The GNU MP Library is free software; you can redistribute it and/or modify 14 it under the terms of either: 15 16 * the GNU Lesser General Public License as published by the Free 17 Software Foundation; either version 3 of the License, or (at your 18 option) any later version. 19 20 or 21 22 * the GNU General Public License as published by the Free Software 23 Foundation; either version 2 of the License, or (at your option) any 24 later version. 25 26 or both in parallel, as here. 27 28 The GNU MP Library is distributed in the hope that it will be useful, but 29 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 30 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 31 for more details. 32 33 You should have received copies of the GNU General Public License and the 34 GNU Lesser General Public License along with the GNU MP Library. If not, 35 see https://www.gnu.org/licenses/. */ 36 37 38 #include "gmp.h" 39 #include "gmp-impl.h" 40 41 #if GMP_NUMB_BITS < 29 42 #error Not implemented: Both sublsh_n(,,,28) should be corrected; r2 and r5 need one more LIMB. 43 #endif 44 45 #if GMP_NUMB_BITS < 28 46 #error Not implemented: divexact_by188513325 and _by182712915 will not work. 47 #endif 48 49 50 #if HAVE_NATIVE_mpn_sublsh_n 51 #define DO_mpn_sublsh_n(dst,src,n,s,ws) mpn_sublsh_n(dst,dst,src,n,s) 52 #else 53 static mp_limb_t 54 DO_mpn_sublsh_n(mp_ptr dst, mp_srcptr src, mp_size_t n, unsigned int s, mp_ptr ws) 55 { 56 #if USE_MUL_1 && 0 57 return mpn_submul_1(dst,src,n,CNST_LIMB(1) <<(s)); 58 #else 59 mp_limb_t __cy; 60 __cy = mpn_lshift(ws,src,n,s); 61 return __cy + mpn_sub_n(dst,dst,ws,n); 62 #endif 63 } 64 #endif 65 66 #if HAVE_NATIVE_mpn_addlsh_n 67 #define DO_mpn_addlsh_n(dst,src,n,s,ws) mpn_addlsh_n(dst,dst,src,n,s) 68 #else 69 static mp_limb_t 70 DO_mpn_addlsh_n(mp_ptr dst, mp_srcptr src, mp_size_t n, unsigned int s, mp_ptr ws) 71 { 72 #if USE_MUL_1 && 0 73 return mpn_addmul_1(dst,src,n,CNST_LIMB(1) <<(s)); 74 #else 75 mp_limb_t __cy; 76 __cy = mpn_lshift(ws,src,n,s); 77 return __cy + mpn_add_n(dst,dst,ws,n); 78 #endif 79 } 80 #endif 81 82 #if HAVE_NATIVE_mpn_subrsh 83 #define DO_mpn_subrsh(dst,nd,src,ns,s,ws) mpn_subrsh(dst,nd,src,ns,s) 84 #else 85 /* FIXME: This is not a correct definition, it assumes no carry */ 86 #define DO_mpn_subrsh(dst,nd,src,ns,s,ws) \ 87 do { \ 88 mp_limb_t __cy; \ 89 MPN_DECR_U (dst, nd, src[0] >> s); \ 90 __cy = DO_mpn_sublsh_n (dst, src + 1, ns - 1, GMP_NUMB_BITS - s, ws); \ 91 MPN_DECR_U (dst + ns - 1, nd - ns + 1, __cy); \ 92 } while (0) 93 #endif 94 95 96 /* FIXME: tuneup should decide the best variant */ 97 #ifndef AORSMUL_FASTER_AORS_AORSLSH 98 #define AORSMUL_FASTER_AORS_AORSLSH 1 99 #endif 100 #ifndef AORSMUL_FASTER_AORS_2AORSLSH 101 #define AORSMUL_FASTER_AORS_2AORSLSH 1 102 #endif 103 #ifndef AORSMUL_FASTER_2AORSLSH 104 #define AORSMUL_FASTER_2AORSLSH 1 105 #endif 106 #ifndef AORSMUL_FASTER_3AORSLSH 107 #define AORSMUL_FASTER_3AORSLSH 1 108 #endif 109 110 #if GMP_NUMB_BITS < 43 111 #define BIT_CORRECTION 1 112 #define CORRECTION_BITS GMP_NUMB_BITS 113 #else 114 #define BIT_CORRECTION 0 115 #define CORRECTION_BITS 0 116 #endif 117 118 #define BINVERT_9 \ 119 ((((GMP_NUMB_MAX / 9) << (6 - GMP_NUMB_BITS % 6)) * 8 & GMP_NUMB_MAX) | 0x39) 120 121 #define BINVERT_255 \ 122 (GMP_NUMB_MAX - ((GMP_NUMB_MAX / 255) << (8 - GMP_NUMB_BITS % 8))) 123 124 /* FIXME: find some more general expressions for inverses */ 125 #if GMP_LIMB_BITS == 32 126 #define BINVERT_2835 (GMP_NUMB_MASK & CNST_LIMB(0x53E3771B)) 127 #define BINVERT_42525 (GMP_NUMB_MASK & CNST_LIMB(0x9F314C35)) 128 #define BINVERT_182712915 (GMP_NUMB_MASK & CNST_LIMB(0x550659DB)) 129 #define BINVERT_188513325 (GMP_NUMB_MASK & CNST_LIMB(0xFBC333A5)) 130 #define BINVERT_255x182712915L (GMP_NUMB_MASK & CNST_LIMB(0x6FC4CB25)) 131 #define BINVERT_255x188513325L (GMP_NUMB_MASK & CNST_LIMB(0x6864275B)) 132 #if GMP_NAIL_BITS == 0 133 #define BINVERT_255x182712915H CNST_LIMB(0x1B649A07) 134 #define BINVERT_255x188513325H CNST_LIMB(0x06DB993A) 135 #else /* GMP_NAIL_BITS != 0 */ 136 #define BINVERT_255x182712915H \ 137 (GMP_NUMB_MASK & CNST_LIMB((0x1B649A07<<GMP_NAIL_BITS) | (0x6FC4CB25>>GMP_NUMB_BITS))) 138 #define BINVERT_255x188513325H \ 139 (GMP_NUMB_MASK & CNST_LIMB((0x06DB993A<<GMP_NAIL_BITS) | (0x6864275B>>GMP_NUMB_BITS))) 140 #endif 141 #else 142 #if GMP_LIMB_BITS == 64 143 #define BINVERT_2835 (GMP_NUMB_MASK & CNST_LIMB(0x938CC70553E3771B)) 144 #define BINVERT_42525 (GMP_NUMB_MASK & CNST_LIMB(0xE7B40D449F314C35)) 145 #define BINVERT_255x182712915 (GMP_NUMB_MASK & CNST_LIMB(0x1B649A076FC4CB25)) 146 #define BINVERT_255x188513325 (GMP_NUMB_MASK & CNST_LIMB(0x06DB993A6864275B)) 147 #endif 148 #endif 149 150 #ifndef mpn_divexact_by255 151 #if GMP_NUMB_BITS % 8 == 0 152 #define mpn_divexact_by255(dst,src,size) \ 153 (255 & 1 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 255))) 154 #else 155 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1 156 #define mpn_divexact_by255(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(255),BINVERT_255,0) 157 #else 158 #define mpn_divexact_by255(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(255)) 159 #endif 160 #endif 161 #endif 162 163 #ifndef mpn_divexact_by255x4 164 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1 165 #define mpn_divexact_by255x4(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(255),BINVERT_255,2) 166 #else 167 #define mpn_divexact_by255x4(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(255)<<2) 168 #endif 169 #endif 170 171 #ifndef mpn_divexact_by9x16 172 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1 173 #define mpn_divexact_by9x16(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(9),BINVERT_9,4) 174 #else 175 #define mpn_divexact_by9x16(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(9)<<4) 176 #endif 177 #endif 178 179 #ifndef mpn_divexact_by42525x16 180 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_42525) 181 #define mpn_divexact_by42525x16(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(42525),BINVERT_42525,4) 182 #else 183 #define mpn_divexact_by42525x16(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(42525)<<4) 184 #endif 185 #endif 186 187 #ifndef mpn_divexact_by2835x64 188 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_2835) 189 #define mpn_divexact_by2835x64(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(2835),BINVERT_2835,6) 190 #else 191 #define mpn_divexact_by2835x64(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(2835)<<6) 192 #endif 193 #endif 194 195 #ifndef mpn_divexact_by255x182712915 196 #if GMP_NUMB_BITS < 36 197 #if HAVE_NATIVE_mpn_bdiv_q_2_pi2 && defined(BINVERT_255x182712915H) 198 /* FIXME: use mpn_bdiv_q_2_pi2 */ 199 #endif 200 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_182712915) 201 #define mpn_divexact_by255x182712915(dst,src,size) \ 202 do { \ 203 mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(182712915),BINVERT_182712915,0); \ 204 mpn_divexact_by255(dst,dst,size); \ 205 } while(0) 206 #else 207 #define mpn_divexact_by255x182712915(dst,src,size) \ 208 do { \ 209 mpn_divexact_1(dst,src,size,CNST_LIMB(182712915)); \ 210 mpn_divexact_by255(dst,dst,size); \ 211 } while(0) 212 #endif 213 #else /* GMP_NUMB_BITS > 35 */ 214 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_255x182712915) 215 #define mpn_divexact_by255x182712915(dst,src,size) \ 216 mpn_pi1_bdiv_q_1(dst,src,size,255*CNST_LIMB(182712915),BINVERT_255x182712915,0) 217 #else 218 #define mpn_divexact_by255x182712915(dst,src,size) mpn_divexact_1(dst,src,size,255*CNST_LIMB(182712915)) 219 #endif 220 #endif /* GMP_NUMB_BITS >?< 36 */ 221 #endif 222 223 #ifndef mpn_divexact_by255x188513325 224 #if GMP_NUMB_BITS < 36 225 #if HAVE_NATIVE_mpn_bdiv_q_1_pi2 && defined(BINVERT_255x188513325H) 226 /* FIXME: use mpn_bdiv_q_1_pi2 */ 227 #endif 228 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_188513325) 229 #define mpn_divexact_by255x188513325(dst,src,size) \ 230 do { \ 231 mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(188513325),BINVERT_188513325,0); \ 232 mpn_divexact_by255(dst,dst,size); \ 233 } while(0) 234 #else 235 #define mpn_divexact_by255x188513325(dst,src,size) \ 236 do { \ 237 mpn_divexact_1(dst,src,size,CNST_LIMB(188513325)); \ 238 mpn_divexact_by255(dst,dst,size); \ 239 } while(0) 240 #endif 241 #else /* GMP_NUMB_BITS > 35 */ 242 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_255x188513325) 243 #define mpn_divexact_by255x188513325(dst,src,size) \ 244 mpn_pi1_bdiv_q_1(dst,src,size,255*CNST_LIMB(188513325),BINVERT_255x188513325,0) 245 #else 246 #define mpn_divexact_by255x188513325(dst,src,size) mpn_divexact_1(dst,src,size,255*CNST_LIMB(188513325)) 247 #endif 248 #endif /* GMP_NUMB_BITS >?< 36 */ 249 #endif 250 251 /* Interpolation for Toom-8.5 (or Toom-8), using the evaluation 252 points: infinity(8.5 only), +-8, +-4, +-2, +-1, +-1/4, +-1/2, 253 +-1/8, 0. More precisely, we want to compute 254 f(2^(GMP_NUMB_BITS * n)) for a polynomial f of degree 15 (or 255 14), given the 16 (rsp. 15) values: 256 257 r0 = limit at infinity of f(x) / x^7, 258 r1 = f(8),f(-8), 259 r2 = f(4),f(-4), 260 r3 = f(2),f(-2), 261 r4 = f(1),f(-1), 262 r5 = f(1/4),f(-1/4), 263 r6 = f(1/2),f(-1/2), 264 r7 = f(1/8),f(-1/8), 265 r8 = f(0). 266 267 All couples of the form f(n),f(-n) must be already mixed with 268 toom_couple_handling(f(n),...,f(-n),...) 269 270 The result is stored in {pp, spt + 7*n (or 8*n)}. 271 At entry, r8 is stored at {pp, 2n}, 272 r6 is stored at {pp + 3n, 3n + 1}. 273 r4 is stored at {pp + 7n, 3n + 1}. 274 r2 is stored at {pp +11n, 3n + 1}. 275 r0 is stored at {pp +15n, spt}. 276 277 The other values are 3n+1 limbs each (with most significant limbs small). 278 279 Negative intermediate results are stored two-complemented. 280 Inputs are destroyed. 281 */ 282 283 void 284 mpn_toom_interpolate_16pts (mp_ptr pp, mp_ptr r1, mp_ptr r3, mp_ptr r5, mp_ptr r7, 285 mp_size_t n, mp_size_t spt, int half, mp_ptr wsi) 286 { 287 mp_limb_t cy; 288 mp_size_t n3; 289 mp_size_t n3p1; 290 n3 = 3 * n; 291 n3p1 = n3 + 1; 292 293 #define r6 (pp + n3) /* 3n+1 */ 294 #define r4 (pp + 7 * n) /* 3n+1 */ 295 #define r2 (pp +11 * n) /* 3n+1 */ 296 #define r0 (pp +15 * n) /* s+t <= 2*n */ 297 298 ASSERT( spt <= 2 * n ); 299 /******************************* interpolation *****************************/ 300 if( half != 0) { 301 cy = mpn_sub_n (r4, r4, r0, spt); 302 MPN_DECR_U (r4 + spt, n3p1 - spt, cy); 303 304 cy = DO_mpn_sublsh_n (r3, r0, spt, 14, wsi); 305 MPN_DECR_U (r3 + spt, n3p1 - spt, cy); 306 DO_mpn_subrsh(r6, n3p1, r0, spt, 2, wsi); 307 308 cy = DO_mpn_sublsh_n (r2, r0, spt, 28, wsi); 309 MPN_DECR_U (r2 + spt, n3p1 - spt, cy); 310 DO_mpn_subrsh(r5, n3p1, r0, spt, 4, wsi); 311 312 cy = DO_mpn_sublsh_n (r1 + BIT_CORRECTION, r0, spt, 42 - CORRECTION_BITS, wsi); 313 #if BIT_CORRECTION 314 cy = mpn_sub_1 (r1 + spt + BIT_CORRECTION, r1 + spt + BIT_CORRECTION, 315 n3p1 - spt - BIT_CORRECTION, cy); 316 ASSERT (BIT_CORRECTION > 0 || cy == 0); 317 /* FIXME: assumes r7[n3p1] is writable (it is if r5 follows). */ 318 cy = r7[n3p1]; 319 r7[n3p1] = 0x80; 320 #else 321 MPN_DECR_U (r1 + spt + BIT_CORRECTION, n3p1 - spt - BIT_CORRECTION, cy); 322 #endif 323 DO_mpn_subrsh(r7, n3p1 + BIT_CORRECTION, r0, spt, 6, wsi); 324 #if BIT_CORRECTION 325 /* FIXME: assumes r7[n3p1] is writable. */ 326 ASSERT ( BIT_CORRECTION > 0 || r7[n3p1] == 0x80 ); 327 r7[n3p1] = cy; 328 #endif 329 }; 330 331 r5[n3] -= DO_mpn_sublsh_n (r5 + n, pp, 2 * n, 28, wsi); 332 DO_mpn_subrsh(r2 + n, 2 * n + 1, pp, 2 * n, 4, wsi); 333 334 #if HAVE_NATIVE_mpn_add_n_sub_n 335 mpn_add_n_sub_n (r2, r5, r5, r2, n3p1); 336 #else 337 mpn_sub_n (wsi, r5, r2, n3p1); /* can be negative */ 338 ASSERT_NOCARRY(mpn_add_n (r2, r2, r5, n3p1)); 339 MP_PTR_SWAP(r5, wsi); 340 #endif 341 342 r6[n3] -= DO_mpn_sublsh_n (r6 + n, pp, 2 * n, 14, wsi); 343 DO_mpn_subrsh(r3 + n, 2 * n + 1, pp, 2 * n, 2, wsi); 344 345 #if HAVE_NATIVE_mpn_add_n_sub_n 346 mpn_add_n_sub_n (r3, r6, r6, r3, n3p1); 347 #else 348 ASSERT_NOCARRY(mpn_add_n (wsi, r3, r6, n3p1)); 349 mpn_sub_n (r6, r6, r3, n3p1); /* can be negative */ 350 MP_PTR_SWAP(r3, wsi); 351 #endif 352 353 cy = DO_mpn_sublsh_n (r7 + n + BIT_CORRECTION, pp, 2 * n, 42 - CORRECTION_BITS, wsi); 354 #if BIT_CORRECTION 355 MPN_DECR_U (r1 + n, 2 * n + 1, pp[0] >> 6); 356 cy = DO_mpn_sublsh_n (r1 + n, pp + 1, 2 * n - 1, GMP_NUMB_BITS - 6, wsi); 357 cy = mpn_sub_1(r1 + 3 * n - 1, r1 + 3 * n - 1, 2, cy); 358 ASSERT ( BIT_CORRECTION > 0 || cy != 0 ); 359 #else 360 r7[n3] -= cy; 361 DO_mpn_subrsh(r1 + n, 2 * n + 1, pp, 2 * n, 6, wsi); 362 #endif 363 364 #if HAVE_NATIVE_mpn_add_n_sub_n 365 mpn_add_n_sub_n (r1, r7, r7, r1, n3p1); 366 #else 367 mpn_sub_n (wsi, r7, r1, n3p1); /* can be negative */ 368 mpn_add_n (r1, r1, r7, n3p1); /* if BIT_CORRECTION != 0, can give a carry. */ 369 MP_PTR_SWAP(r7, wsi); 370 #endif 371 372 r4[n3] -= mpn_sub_n (r4+n, r4+n, pp, 2 * n); 373 374 #if AORSMUL_FASTER_2AORSLSH 375 mpn_submul_1 (r5, r6, n3p1, 1028); /* can be negative */ 376 #else 377 DO_mpn_sublsh_n (r5, r6, n3p1, 2, wsi); /* can be negative */ 378 DO_mpn_sublsh_n (r5, r6, n3p1,10, wsi); /* can be negative */ 379 #endif 380 381 mpn_submul_1 (r7, r5, n3p1, 1300); /* can be negative */ 382 #if AORSMUL_FASTER_3AORSLSH 383 mpn_submul_1 (r7, r6, n3p1, 1052688); /* can be negative */ 384 #else 385 DO_mpn_sublsh_n (r7, r6, n3p1, 4, wsi); /* can be negative */ 386 DO_mpn_sublsh_n (r7, r6, n3p1,12, wsi); /* can be negative */ 387 DO_mpn_sublsh_n (r7, r6, n3p1,20, wsi); /* can be negative */ 388 #endif 389 mpn_divexact_by255x188513325(r7, r7, n3p1); 390 391 mpn_submul_1 (r5, r7, n3p1, 12567555); /* can be negative */ 392 /* A division by 2835x64 follows. Warning: the operand can be negative! */ 393 mpn_divexact_by2835x64(r5, r5, n3p1); 394 if ((r5[n3] & (GMP_NUMB_MAX << (GMP_NUMB_BITS-7))) != 0) 395 r5[n3] |= (GMP_NUMB_MAX << (GMP_NUMB_BITS-6)); 396 397 #if AORSMUL_FASTER_AORS_AORSLSH 398 mpn_submul_1 (r6, r7, n3p1, 4095); /* can be negative */ 399 #else 400 mpn_add_n (r6, r6, r7, n3p1); /* can give a carry */ 401 DO_mpn_sublsh_n (r6, r7, n3p1, 12, wsi); /* can be negative */ 402 #endif 403 #if AORSMUL_FASTER_2AORSLSH 404 mpn_addmul_1 (r6, r5, n3p1, 240); /* can be negative */ 405 #else 406 DO_mpn_addlsh_n (r6, r5, n3p1, 8, wsi); /* can give a carry */ 407 DO_mpn_sublsh_n (r6, r5, n3p1, 4, wsi); /* can be negative */ 408 #endif 409 /* A division by 255x4 follows. Warning: the operand can be negative! */ 410 mpn_divexact_by255x4(r6, r6, n3p1); 411 if ((r6[n3] & (GMP_NUMB_MAX << (GMP_NUMB_BITS-3))) != 0) 412 r6[n3] |= (GMP_NUMB_MAX << (GMP_NUMB_BITS-2)); 413 414 ASSERT_NOCARRY(DO_mpn_sublsh_n (r3, r4, n3p1, 7, wsi)); 415 416 ASSERT_NOCARRY(DO_mpn_sublsh_n (r2, r4, n3p1, 13, wsi)); 417 ASSERT_NOCARRY(mpn_submul_1 (r2, r3, n3p1, 400)); 418 419 /* If GMP_NUMB_BITS < 42 next operations on r1 can give a carry!*/ 420 DO_mpn_sublsh_n (r1, r4, n3p1, 19, wsi); 421 mpn_submul_1 (r1, r2, n3p1, 1428); 422 mpn_submul_1 (r1, r3, n3p1, 112896); 423 mpn_divexact_by255x182712915(r1, r1, n3p1); 424 425 ASSERT_NOCARRY(mpn_submul_1 (r2, r1, n3p1, 15181425)); 426 mpn_divexact_by42525x16(r2, r2, n3p1); 427 428 #if AORSMUL_FASTER_AORS_2AORSLSH 429 ASSERT_NOCARRY(mpn_submul_1 (r3, r1, n3p1, 3969)); 430 #else 431 ASSERT_NOCARRY(mpn_sub_n (r3, r3, r1, n3p1)); 432 ASSERT_NOCARRY(DO_mpn_addlsh_n (r3, r1, n3p1, 7, wsi)); 433 ASSERT_NOCARRY(DO_mpn_sublsh_n (r3, r1, n3p1, 12, wsi)); 434 #endif 435 ASSERT_NOCARRY(mpn_submul_1 (r3, r2, n3p1, 900)); 436 mpn_divexact_by9x16(r3, r3, n3p1); 437 438 ASSERT_NOCARRY(mpn_sub_n (r4, r4, r1, n3p1)); 439 ASSERT_NOCARRY(mpn_sub_n (r4, r4, r3, n3p1)); 440 ASSERT_NOCARRY(mpn_sub_n (r4, r4, r2, n3p1)); 441 442 mpn_add_n (r6, r2, r6, n3p1); 443 ASSERT_NOCARRY(mpn_rshift(r6, r6, n3p1, 1)); 444 ASSERT_NOCARRY(mpn_sub_n (r2, r2, r6, n3p1)); 445 446 mpn_sub_n (r5, r3, r5, n3p1); 447 ASSERT_NOCARRY(mpn_rshift(r5, r5, n3p1, 1)); 448 ASSERT_NOCARRY(mpn_sub_n (r3, r3, r5, n3p1)); 449 450 mpn_add_n (r7, r1, r7, n3p1); 451 ASSERT_NOCARRY(mpn_rshift(r7, r7, n3p1, 1)); 452 ASSERT_NOCARRY(mpn_sub_n (r1, r1, r7, n3p1)); 453 454 /* last interpolation steps... */ 455 /* ... could be mixed with recomposition 456 ||H-r7|M-r7|L-r7| ||H-r5|M-r5|L-r5| 457 */ 458 459 /***************************** recomposition *******************************/ 460 /* 461 pp[] prior to operations: 462 |M r0|L r0|___||H r2|M r2|L r2|___||H r4|M r4|L r4|___||H r6|M r6|L r6|____|H_r8|L r8|pp 463 464 summation scheme for remaining operations: 465 |__16|n_15|n_14|n_13|n_12|n_11|n_10|n__9|n__8|n__7|n__6|n__5|n__4|n__3|n__2|n___|n___|pp 466 |M r0|L r0|___||H r2|M r2|L r2|___||H r4|M r4|L r4|___||H r6|M r6|L r6|____|H_r8|L r8|pp 467 ||H r1|M r1|L r1| ||H r3|M r3|L r3| ||H_r5|M_r5|L_r5| ||H r7|M r7|L r7| 468 */ 469 470 cy = mpn_add_n (pp + n, pp + n, r7, n); 471 cy = mpn_add_1 (pp + 2 * n, r7 + n, n, cy); 472 #if HAVE_NATIVE_mpn_add_nc 473 cy = r7[n3] + mpn_add_nc(pp + n3, pp + n3, r7 + 2 * n, n, cy); 474 #else 475 MPN_INCR_U (r7 + 2 * n, n + 1, cy); 476 cy = r7[n3] + mpn_add_n (pp + n3, pp + n3, r7 + 2 * n, n); 477 #endif 478 MPN_INCR_U (pp + 4 * n, 2 * n + 1, cy); 479 480 pp[2 * n3]+= mpn_add_n (pp + 5 * n, pp + 5 * n, r5, n); 481 cy = mpn_add_1 (pp + 2 * n3, r5 + n, n, pp[2 * n3]); 482 #if HAVE_NATIVE_mpn_add_nc 483 cy = r5[n3] + mpn_add_nc(pp + 7 * n, pp + 7 * n, r5 + 2 * n, n, cy); 484 #else 485 MPN_INCR_U (r5 + 2 * n, n + 1, cy); 486 cy = r5[n3] + mpn_add_n (pp + 7 * n, pp + 7 * n, r5 + 2 * n, n); 487 #endif 488 MPN_INCR_U (pp + 8 * n, 2 * n + 1, cy); 489 490 pp[10 * n]+= mpn_add_n (pp + 9 * n, pp + 9 * n, r3, n); 491 cy = mpn_add_1 (pp + 10 * n, r3 + n, n, pp[10 * n]); 492 #if HAVE_NATIVE_mpn_add_nc 493 cy = r3[n3] + mpn_add_nc(pp +11 * n, pp +11 * n, r3 + 2 * n, n, cy); 494 #else 495 MPN_INCR_U (r3 + 2 * n, n + 1, cy); 496 cy = r3[n3] + mpn_add_n (pp +11 * n, pp +11 * n, r3 + 2 * n, n); 497 #endif 498 MPN_INCR_U (pp +12 * n, 2 * n + 1, cy); 499 500 pp[14 * n]+=mpn_add_n (pp +13 * n, pp +13 * n, r1, n); 501 if ( half ) { 502 cy = mpn_add_1 (pp + 14 * n, r1 + n, n, pp[14 * n]); 503 #if HAVE_NATIVE_mpn_add_nc 504 if(LIKELY(spt > n)) { 505 cy = r1[n3] + mpn_add_nc(pp + 15 * n, pp + 15 * n, r1 + 2 * n, n, cy); 506 MPN_INCR_U (pp + 16 * n, spt - n, cy); 507 } else { 508 ASSERT_NOCARRY(mpn_add_nc(pp + 15 * n, pp + 15 * n, r1 + 2 * n, spt, cy)); 509 } 510 #else 511 MPN_INCR_U (r1 + 2 * n, n + 1, cy); 512 if(LIKELY(spt > n)) { 513 cy = r1[n3] + mpn_add_n (pp + 15 * n, pp + 15 * n, r1 + 2 * n, n); 514 MPN_INCR_U (pp + 16 * n, spt - n, cy); 515 } else { 516 ASSERT_NOCARRY(mpn_add_n (pp + 15 * n, pp + 15 * n, r1 + 2 * n, spt)); 517 } 518 #endif 519 } else { 520 ASSERT_NOCARRY(mpn_add_1 (pp + 14 * n, r1 + n, spt, pp[14 * n])); 521 } 522 523 #undef r0 524 #undef r2 525 #undef r4 526 #undef r6 527 }