github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/generic/toom_interpolate_16pts.c

github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/generic/toom_interpolate_16pts.c (about)

     1  /* Interpolation for the algorithm Toom-Cook 8.5-way.
     2  
     3     Contributed to the GNU project by Marco Bodrato.
     4  
     5     THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
     6     SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
     7     GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
     8  
     9  Copyright 2009, 2010, 2012 Free Software Foundation, Inc.
    10  
    11  This file is part of the GNU MP Library.
    12  
    13  The GNU MP Library is free software; you can redistribute it and/or modify
    14  it under the terms of either:
    15  
    16    * the GNU Lesser General Public License as published by the Free
    17      Software Foundation; either version 3 of the License, or (at your
    18      option) any later version.
    19  
    20  or
    21  
    22    * the GNU General Public License as published by the Free Software
    23      Foundation; either version 2 of the License, or (at your option) any
    24      later version.
    25  
    26  or both in parallel, as here.
    27  
    28  The GNU MP Library is distributed in the hope that it will be useful, but
    29  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    30  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    31  for more details.
    32  
    33  You should have received copies of the GNU General Public License and the
    34  GNU Lesser General Public License along with the GNU MP Library.  If not,
    35  see https://www.gnu.org/licenses/.  */
    36  
    37  
    38  #include "gmp.h"
    39  #include "gmp-impl.h"
    40  
    41  #if GMP_NUMB_BITS < 29
    42  #error Not implemented: Both sublsh_n(,,,28) should be corrected; r2 and r5 need one more LIMB.
    43  #endif
    44  
    45  #if GMP_NUMB_BITS < 28
    46  #error Not implemented: divexact_by188513325 and _by182712915 will not work.
    47  #endif
    48  
    49  
    50  #if HAVE_NATIVE_mpn_sublsh_n
    51  #define DO_mpn_sublsh_n(dst,src,n,s,ws) mpn_sublsh_n(dst,dst,src,n,s)
    52  #else
    53  static mp_limb_t
    54  DO_mpn_sublsh_n(mp_ptr dst, mp_srcptr src, mp_size_t n, unsigned int s, mp_ptr ws)
    55  {
    56  #if USE_MUL_1 && 0
    57    return mpn_submul_1(dst,src,n,CNST_LIMB(1) <<(s));
    58  #else
    59    mp_limb_t __cy;
    60    __cy = mpn_lshift(ws,src,n,s);
    61    return    __cy + mpn_sub_n(dst,dst,ws,n);
    62  #endif
    63  }
    64  #endif
    65  
    66  #if HAVE_NATIVE_mpn_addlsh_n
    67  #define DO_mpn_addlsh_n(dst,src,n,s,ws) mpn_addlsh_n(dst,dst,src,n,s)
    68  #else
    69  static mp_limb_t
    70  DO_mpn_addlsh_n(mp_ptr dst, mp_srcptr src, mp_size_t n, unsigned int s, mp_ptr ws)
    71  {
    72  #if USE_MUL_1 && 0
    73    return mpn_addmul_1(dst,src,n,CNST_LIMB(1) <<(s));
    74  #else
    75    mp_limb_t __cy;
    76    __cy = mpn_lshift(ws,src,n,s);
    77    return    __cy + mpn_add_n(dst,dst,ws,n);
    78  #endif
    79  }
    80  #endif
    81  
    82  #if HAVE_NATIVE_mpn_subrsh
    83  #define DO_mpn_subrsh(dst,nd,src,ns,s,ws) mpn_subrsh(dst,nd,src,ns,s)
    84  #else
    85  /* FIXME: This is not a correct definition, it assumes no carry */
    86  #define DO_mpn_subrsh(dst,nd,src,ns,s,ws)				\
    87  do {									\
    88    mp_limb_t __cy;							\
    89    MPN_DECR_U (dst, nd, src[0] >> s);					\
    90    __cy = DO_mpn_sublsh_n (dst, src + 1, ns - 1, GMP_NUMB_BITS - s, ws);	\
    91    MPN_DECR_U (dst + ns - 1, nd - ns + 1, __cy);				\
    92  } while (0)
    93  #endif
    94  
    95  
    96  /* FIXME: tuneup should decide the best variant */
    97  #ifndef AORSMUL_FASTER_AORS_AORSLSH
    98  #define AORSMUL_FASTER_AORS_AORSLSH 1
    99  #endif
   100  #ifndef AORSMUL_FASTER_AORS_2AORSLSH
   101  #define AORSMUL_FASTER_AORS_2AORSLSH 1
   102  #endif
   103  #ifndef AORSMUL_FASTER_2AORSLSH
   104  #define AORSMUL_FASTER_2AORSLSH 1
   105  #endif
   106  #ifndef AORSMUL_FASTER_3AORSLSH
   107  #define AORSMUL_FASTER_3AORSLSH 1
   108  #endif
   109  
   110  #if GMP_NUMB_BITS < 43
   111  #define BIT_CORRECTION 1
   112  #define CORRECTION_BITS GMP_NUMB_BITS
   113  #else
   114  #define BIT_CORRECTION 0
   115  #define CORRECTION_BITS 0
   116  #endif
   117  
   118  #define BINVERT_9 \
   119    ((((GMP_NUMB_MAX / 9) << (6 - GMP_NUMB_BITS % 6)) * 8 & GMP_NUMB_MAX) | 0x39)
   120  
   121  #define BINVERT_255 \
   122    (GMP_NUMB_MAX - ((GMP_NUMB_MAX / 255) << (8 - GMP_NUMB_BITS % 8)))
   123  
   124    /* FIXME: find some more general expressions for inverses */
   125  #if GMP_LIMB_BITS == 32
   126  #define BINVERT_2835  (GMP_NUMB_MASK &		CNST_LIMB(0x53E3771B))
   127  #define BINVERT_42525 (GMP_NUMB_MASK &		CNST_LIMB(0x9F314C35))
   128  #define BINVERT_182712915 (GMP_NUMB_MASK &	CNST_LIMB(0x550659DB))
   129  #define BINVERT_188513325 (GMP_NUMB_MASK &	CNST_LIMB(0xFBC333A5))
   130  #define BINVERT_255x182712915L (GMP_NUMB_MASK &	CNST_LIMB(0x6FC4CB25))
   131  #define BINVERT_255x188513325L (GMP_NUMB_MASK &	CNST_LIMB(0x6864275B))
   132  #if GMP_NAIL_BITS == 0
   133  #define BINVERT_255x182712915H CNST_LIMB(0x1B649A07)
   134  #define BINVERT_255x188513325H CNST_LIMB(0x06DB993A)
   135  #else /* GMP_NAIL_BITS != 0 */
   136  #define BINVERT_255x182712915H \
   137    (GMP_NUMB_MASK & CNST_LIMB((0x1B649A07<<GMP_NAIL_BITS) | (0x6FC4CB25>>GMP_NUMB_BITS)))
   138  #define BINVERT_255x188513325H \
   139    (GMP_NUMB_MASK & CNST_LIMB((0x06DB993A<<GMP_NAIL_BITS) | (0x6864275B>>GMP_NUMB_BITS)))
   140  #endif
   141  #else
   142  #if GMP_LIMB_BITS == 64
   143  #define BINVERT_2835  (GMP_NUMB_MASK &	CNST_LIMB(0x938CC70553E3771B))
   144  #define BINVERT_42525 (GMP_NUMB_MASK &	CNST_LIMB(0xE7B40D449F314C35))
   145  #define BINVERT_255x182712915  (GMP_NUMB_MASK &	CNST_LIMB(0x1B649A076FC4CB25))
   146  #define BINVERT_255x188513325  (GMP_NUMB_MASK &	CNST_LIMB(0x06DB993A6864275B))
   147  #endif
   148  #endif
   149  
   150  #ifndef mpn_divexact_by255
   151  #if GMP_NUMB_BITS % 8 == 0
   152  #define mpn_divexact_by255(dst,src,size) \
   153    (255 & 1 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 255)))
   154  #else
   155  #if HAVE_NATIVE_mpn_pi1_bdiv_q_1
   156  #define mpn_divexact_by255(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(255),BINVERT_255,0)
   157  #else
   158  #define mpn_divexact_by255(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(255))
   159  #endif
   160  #endif
   161  #endif
   162  
   163  #ifndef mpn_divexact_by255x4
   164  #if HAVE_NATIVE_mpn_pi1_bdiv_q_1
   165  #define mpn_divexact_by255x4(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(255),BINVERT_255,2)
   166  #else
   167  #define mpn_divexact_by255x4(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(255)<<2)
   168  #endif
   169  #endif
   170  
   171  #ifndef mpn_divexact_by9x16
   172  #if HAVE_NATIVE_mpn_pi1_bdiv_q_1
   173  #define mpn_divexact_by9x16(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(9),BINVERT_9,4)
   174  #else
   175  #define mpn_divexact_by9x16(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(9)<<4)
   176  #endif
   177  #endif
   178  
   179  #ifndef mpn_divexact_by42525x16
   180  #if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_42525)
   181  #define mpn_divexact_by42525x16(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(42525),BINVERT_42525,4)
   182  #else
   183  #define mpn_divexact_by42525x16(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(42525)<<4)
   184  #endif
   185  #endif
   186  
   187  #ifndef mpn_divexact_by2835x64
   188  #if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_2835)
   189  #define mpn_divexact_by2835x64(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(2835),BINVERT_2835,6)
   190  #else
   191  #define mpn_divexact_by2835x64(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(2835)<<6)
   192  #endif
   193  #endif
   194  
   195  #ifndef  mpn_divexact_by255x182712915
   196  #if GMP_NUMB_BITS < 36
   197  #if HAVE_NATIVE_mpn_bdiv_q_2_pi2 && defined(BINVERT_255x182712915H)
   198  /* FIXME: use mpn_bdiv_q_2_pi2 */
   199  #endif
   200  #if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_182712915)
   201  #define mpn_divexact_by255x182712915(dst,src,size)				\
   202    do {										\
   203      mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(182712915),BINVERT_182712915,0);	\
   204      mpn_divexact_by255(dst,dst,size);						\
   205    } while(0)
   206  #else
   207  #define mpn_divexact_by255x182712915(dst,src,size)	\
   208    do {							\
   209      mpn_divexact_1(dst,src,size,CNST_LIMB(182712915));	\
   210      mpn_divexact_by255(dst,dst,size);			\
   211    } while(0)
   212  #endif
   213  #else /* GMP_NUMB_BITS > 35 */
   214  #if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_255x182712915)
   215  #define mpn_divexact_by255x182712915(dst,src,size) \
   216    mpn_pi1_bdiv_q_1(dst,src,size,255*CNST_LIMB(182712915),BINVERT_255x182712915,0)
   217  #else
   218  #define mpn_divexact_by255x182712915(dst,src,size) mpn_divexact_1(dst,src,size,255*CNST_LIMB(182712915))
   219  #endif
   220  #endif /* GMP_NUMB_BITS >?< 36 */
   221  #endif
   222  
   223  #ifndef  mpn_divexact_by255x188513325
   224  #if GMP_NUMB_BITS < 36
   225  #if HAVE_NATIVE_mpn_bdiv_q_1_pi2 && defined(BINVERT_255x188513325H)
   226  /* FIXME: use mpn_bdiv_q_1_pi2 */
   227  #endif
   228  #if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_188513325)
   229  #define mpn_divexact_by255x188513325(dst,src,size)			\
   230    do {									\
   231      mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(188513325),BINVERT_188513325,0);	\
   232      mpn_divexact_by255(dst,dst,size);					\
   233    } while(0)
   234  #else
   235  #define mpn_divexact_by255x188513325(dst,src,size)	\
   236    do {							\
   237      mpn_divexact_1(dst,src,size,CNST_LIMB(188513325));	\
   238      mpn_divexact_by255(dst,dst,size);			\
   239    } while(0)
   240  #endif
   241  #else /* GMP_NUMB_BITS > 35 */
   242  #if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_255x188513325)
   243  #define mpn_divexact_by255x188513325(dst,src,size) \
   244    mpn_pi1_bdiv_q_1(dst,src,size,255*CNST_LIMB(188513325),BINVERT_255x188513325,0)
   245  #else
   246  #define mpn_divexact_by255x188513325(dst,src,size) mpn_divexact_1(dst,src,size,255*CNST_LIMB(188513325))
   247  #endif
   248  #endif /* GMP_NUMB_BITS >?< 36 */
   249  #endif
   250  
   251  /* Interpolation for Toom-8.5 (or Toom-8), using the evaluation
   252     points: infinity(8.5 only), +-8, +-4, +-2, +-1, +-1/4, +-1/2,
   253     +-1/8, 0. More precisely, we want to compute
   254     f(2^(GMP_NUMB_BITS * n)) for a polynomial f of degree 15 (or
   255     14), given the 16 (rsp. 15) values:
   256  
   257       r0 = limit at infinity of f(x) / x^7,
   258       r1 = f(8),f(-8),
   259       r2 = f(4),f(-4),
   260       r3 = f(2),f(-2),
   261       r4 = f(1),f(-1),
   262       r5 = f(1/4),f(-1/4),
   263       r6 = f(1/2),f(-1/2),
   264       r7 = f(1/8),f(-1/8),
   265       r8 = f(0).
   266  
   267     All couples of the form f(n),f(-n) must be already mixed with
   268     toom_couple_handling(f(n),...,f(-n),...)
   269  
   270     The result is stored in {pp, spt + 7*n (or 8*n)}.
   271     At entry, r8 is stored at {pp, 2n},
   272     r6 is stored at {pp + 3n, 3n + 1}.
   273     r4 is stored at {pp + 7n, 3n + 1}.
   274     r2 is stored at {pp +11n, 3n + 1}.
   275     r0 is stored at {pp +15n, spt}.
   276  
   277     The other values are 3n+1 limbs each (with most significant limbs small).
   278  
   279     Negative intermediate results are stored two-complemented.
   280     Inputs are destroyed.
   281  */
   282  
   283  void
   284  mpn_toom_interpolate_16pts (mp_ptr pp, mp_ptr r1, mp_ptr r3, mp_ptr r5, mp_ptr r7,
   285  			mp_size_t n, mp_size_t spt, int half, mp_ptr wsi)
   286  {
   287    mp_limb_t cy;
   288    mp_size_t n3;
   289    mp_size_t n3p1;
   290    n3 = 3 * n;
   291    n3p1 = n3 + 1;
   292  
   293  #define   r6    (pp + n3)			/* 3n+1 */
   294  #define   r4    (pp + 7 * n)			/* 3n+1 */
   295  #define   r2    (pp +11 * n)			/* 3n+1 */
   296  #define   r0    (pp +15 * n)			/* s+t <= 2*n */
   297  
   298    ASSERT( spt <= 2 * n );
   299    /******************************* interpolation *****************************/
   300    if( half != 0) {
   301      cy = mpn_sub_n (r4, r4, r0, spt);
   302      MPN_DECR_U (r4 + spt, n3p1 - spt, cy);
   303  
   304      cy = DO_mpn_sublsh_n (r3, r0, spt, 14, wsi);
   305      MPN_DECR_U (r3 + spt, n3p1 - spt, cy);
   306      DO_mpn_subrsh(r6, n3p1, r0, spt, 2, wsi);
   307  
   308      cy = DO_mpn_sublsh_n (r2, r0, spt, 28, wsi);
   309      MPN_DECR_U (r2 + spt, n3p1 - spt, cy);
   310      DO_mpn_subrsh(r5, n3p1, r0, spt, 4, wsi);
   311  
   312      cy = DO_mpn_sublsh_n (r1 + BIT_CORRECTION, r0, spt, 42 - CORRECTION_BITS, wsi);
   313  #if BIT_CORRECTION
   314      cy = mpn_sub_1 (r1 + spt + BIT_CORRECTION, r1 + spt + BIT_CORRECTION,
   315  		    n3p1 - spt - BIT_CORRECTION, cy);
   316      ASSERT (BIT_CORRECTION > 0 || cy == 0);
   317      /* FIXME: assumes r7[n3p1] is writable (it is if r5 follows). */
   318      cy = r7[n3p1];
   319      r7[n3p1] = 0x80;
   320  #else
   321      MPN_DECR_U (r1 + spt + BIT_CORRECTION, n3p1 - spt - BIT_CORRECTION, cy);
   322  #endif
   323      DO_mpn_subrsh(r7, n3p1 + BIT_CORRECTION, r0, spt, 6, wsi);
   324  #if BIT_CORRECTION
   325      /* FIXME: assumes r7[n3p1] is writable. */
   326      ASSERT ( BIT_CORRECTION > 0 || r7[n3p1] == 0x80 );
   327      r7[n3p1] = cy;
   328  #endif
   329    };
   330  
   331    r5[n3] -= DO_mpn_sublsh_n (r5 + n, pp, 2 * n, 28, wsi);
   332    DO_mpn_subrsh(r2 + n, 2 * n + 1, pp, 2 * n, 4, wsi);
   333  
   334  #if HAVE_NATIVE_mpn_add_n_sub_n
   335    mpn_add_n_sub_n (r2, r5, r5, r2, n3p1);
   336  #else
   337    mpn_sub_n (wsi, r5, r2, n3p1); /* can be negative */
   338    ASSERT_NOCARRY(mpn_add_n (r2, r2, r5, n3p1));
   339    MP_PTR_SWAP(r5, wsi);
   340  #endif
   341  
   342    r6[n3] -= DO_mpn_sublsh_n (r6 + n, pp, 2 * n, 14, wsi);
   343    DO_mpn_subrsh(r3 + n, 2 * n + 1, pp, 2 * n, 2, wsi);
   344  
   345  #if HAVE_NATIVE_mpn_add_n_sub_n
   346    mpn_add_n_sub_n (r3, r6, r6, r3, n3p1);
   347  #else
   348    ASSERT_NOCARRY(mpn_add_n (wsi, r3, r6, n3p1));
   349    mpn_sub_n (r6, r6, r3, n3p1); /* can be negative */
   350    MP_PTR_SWAP(r3, wsi);
   351  #endif
   352  
   353    cy = DO_mpn_sublsh_n (r7 + n + BIT_CORRECTION, pp, 2 * n, 42 - CORRECTION_BITS, wsi);
   354  #if BIT_CORRECTION
   355    MPN_DECR_U (r1 + n, 2 * n + 1, pp[0] >> 6);
   356    cy = DO_mpn_sublsh_n (r1 + n, pp + 1, 2 * n - 1, GMP_NUMB_BITS - 6, wsi);
   357    cy = mpn_sub_1(r1 + 3 * n - 1, r1 + 3 * n - 1, 2, cy);
   358    ASSERT ( BIT_CORRECTION > 0 || cy != 0 );
   359  #else
   360    r7[n3] -= cy;
   361    DO_mpn_subrsh(r1 + n, 2 * n + 1, pp, 2 * n, 6, wsi);
   362  #endif
   363  
   364  #if HAVE_NATIVE_mpn_add_n_sub_n
   365    mpn_add_n_sub_n (r1, r7, r7, r1, n3p1);
   366  #else
   367    mpn_sub_n (wsi, r7, r1, n3p1); /* can be negative */
   368    mpn_add_n (r1, r1, r7, n3p1);  /* if BIT_CORRECTION != 0, can give a carry. */
   369    MP_PTR_SWAP(r7, wsi);
   370  #endif
   371  
   372    r4[n3] -= mpn_sub_n (r4+n, r4+n, pp, 2 * n);
   373  
   374  #if AORSMUL_FASTER_2AORSLSH
   375    mpn_submul_1 (r5, r6, n3p1, 1028); /* can be negative */
   376  #else
   377    DO_mpn_sublsh_n (r5, r6, n3p1, 2, wsi); /* can be negative */
   378    DO_mpn_sublsh_n (r5, r6, n3p1,10, wsi); /* can be negative */
   379  #endif
   380  
   381    mpn_submul_1 (r7, r5, n3p1, 1300); /* can be negative */
   382  #if AORSMUL_FASTER_3AORSLSH
   383    mpn_submul_1 (r7, r6, n3p1, 1052688); /* can be negative */
   384  #else
   385    DO_mpn_sublsh_n (r7, r6, n3p1, 4, wsi); /* can be negative */
   386    DO_mpn_sublsh_n (r7, r6, n3p1,12, wsi); /* can be negative */
   387    DO_mpn_sublsh_n (r7, r6, n3p1,20, wsi); /* can be negative */
   388  #endif
   389    mpn_divexact_by255x188513325(r7, r7, n3p1);
   390  
   391    mpn_submul_1 (r5, r7, n3p1, 12567555); /* can be negative */
   392    /* A division by 2835x64 follows. Warning: the operand can be negative! */
   393    mpn_divexact_by2835x64(r5, r5, n3p1);
   394    if ((r5[n3] & (GMP_NUMB_MAX << (GMP_NUMB_BITS-7))) != 0)
   395      r5[n3] |= (GMP_NUMB_MAX << (GMP_NUMB_BITS-6));
   396  
   397  #if AORSMUL_FASTER_AORS_AORSLSH
   398    mpn_submul_1 (r6, r7, n3p1, 4095); /* can be negative */
   399  #else
   400    mpn_add_n (r6, r6, r7, n3p1); /* can give a carry */
   401    DO_mpn_sublsh_n (r6, r7, n3p1, 12, wsi); /* can be negative */
   402  #endif
   403  #if AORSMUL_FASTER_2AORSLSH
   404    mpn_addmul_1 (r6, r5, n3p1, 240); /* can be negative */
   405  #else
   406    DO_mpn_addlsh_n (r6, r5, n3p1, 8, wsi); /* can give a carry */
   407    DO_mpn_sublsh_n (r6, r5, n3p1, 4, wsi); /* can be negative */
   408  #endif
   409    /* A division by 255x4 follows. Warning: the operand can be negative! */
   410    mpn_divexact_by255x4(r6, r6, n3p1);
   411    if ((r6[n3] & (GMP_NUMB_MAX << (GMP_NUMB_BITS-3))) != 0)
   412      r6[n3] |= (GMP_NUMB_MAX << (GMP_NUMB_BITS-2));
   413  
   414    ASSERT_NOCARRY(DO_mpn_sublsh_n (r3, r4, n3p1, 7, wsi));
   415  
   416    ASSERT_NOCARRY(DO_mpn_sublsh_n (r2, r4, n3p1, 13, wsi));
   417    ASSERT_NOCARRY(mpn_submul_1 (r2, r3, n3p1, 400));
   418  
   419    /* If GMP_NUMB_BITS < 42 next operations on r1 can give a carry!*/
   420    DO_mpn_sublsh_n (r1, r4, n3p1, 19, wsi);
   421    mpn_submul_1 (r1, r2, n3p1, 1428);
   422    mpn_submul_1 (r1, r3, n3p1, 112896);
   423    mpn_divexact_by255x182712915(r1, r1, n3p1);
   424  
   425    ASSERT_NOCARRY(mpn_submul_1 (r2, r1, n3p1, 15181425));
   426    mpn_divexact_by42525x16(r2, r2, n3p1);
   427  
   428  #if AORSMUL_FASTER_AORS_2AORSLSH
   429    ASSERT_NOCARRY(mpn_submul_1 (r3, r1, n3p1, 3969));
   430  #else
   431    ASSERT_NOCARRY(mpn_sub_n (r3, r3, r1, n3p1));
   432    ASSERT_NOCARRY(DO_mpn_addlsh_n (r3, r1, n3p1, 7, wsi));
   433    ASSERT_NOCARRY(DO_mpn_sublsh_n (r3, r1, n3p1, 12, wsi));
   434  #endif
   435    ASSERT_NOCARRY(mpn_submul_1 (r3, r2, n3p1, 900));
   436    mpn_divexact_by9x16(r3, r3, n3p1);
   437  
   438    ASSERT_NOCARRY(mpn_sub_n (r4, r4, r1, n3p1));
   439    ASSERT_NOCARRY(mpn_sub_n (r4, r4, r3, n3p1));
   440    ASSERT_NOCARRY(mpn_sub_n (r4, r4, r2, n3p1));
   441  
   442    mpn_add_n (r6, r2, r6, n3p1);
   443    ASSERT_NOCARRY(mpn_rshift(r6, r6, n3p1, 1));
   444    ASSERT_NOCARRY(mpn_sub_n (r2, r2, r6, n3p1));
   445  
   446    mpn_sub_n (r5, r3, r5, n3p1);
   447    ASSERT_NOCARRY(mpn_rshift(r5, r5, n3p1, 1));
   448    ASSERT_NOCARRY(mpn_sub_n (r3, r3, r5, n3p1));
   449  
   450    mpn_add_n (r7, r1, r7, n3p1);
   451    ASSERT_NOCARRY(mpn_rshift(r7, r7, n3p1, 1));
   452    ASSERT_NOCARRY(mpn_sub_n (r1, r1, r7, n3p1));
   453  
   454    /* last interpolation steps... */
   455    /* ... could be mixed with recomposition
   456  	||H-r7|M-r7|L-r7|   ||H-r5|M-r5|L-r5|
   457    */
   458  
   459    /***************************** recomposition *******************************/
   460    /*
   461      pp[] prior to operations:
   462      |M r0|L r0|___||H r2|M r2|L r2|___||H r4|M r4|L r4|___||H r6|M r6|L r6|____|H_r8|L r8|pp
   463  
   464      summation scheme for remaining operations:
   465      |__16|n_15|n_14|n_13|n_12|n_11|n_10|n__9|n__8|n__7|n__6|n__5|n__4|n__3|n__2|n___|n___|pp
   466      |M r0|L r0|___||H r2|M r2|L r2|___||H r4|M r4|L r4|___||H r6|M r6|L r6|____|H_r8|L r8|pp
   467  	||H r1|M r1|L r1|   ||H r3|M r3|L r3|   ||H_r5|M_r5|L_r5|   ||H r7|M r7|L r7|
   468    */
   469  
   470    cy = mpn_add_n (pp + n, pp + n, r7, n);
   471    cy = mpn_add_1 (pp + 2 * n, r7 + n, n, cy);
   472  #if HAVE_NATIVE_mpn_add_nc
   473    cy = r7[n3] + mpn_add_nc(pp + n3, pp + n3, r7 + 2 * n, n, cy);
   474  #else
   475    MPN_INCR_U (r7 + 2 * n, n + 1, cy);
   476    cy = r7[n3] + mpn_add_n (pp + n3, pp + n3, r7 + 2 * n, n);
   477  #endif
   478    MPN_INCR_U (pp + 4 * n, 2 * n + 1, cy);
   479  
   480    pp[2 * n3]+= mpn_add_n (pp + 5 * n, pp + 5 * n, r5, n);
   481    cy = mpn_add_1 (pp + 2 * n3, r5 + n, n, pp[2 * n3]);
   482  #if HAVE_NATIVE_mpn_add_nc
   483    cy = r5[n3] + mpn_add_nc(pp + 7 * n, pp + 7 * n, r5 + 2 * n, n, cy);
   484  #else
   485    MPN_INCR_U (r5 + 2 * n, n + 1, cy);
   486    cy = r5[n3] + mpn_add_n (pp + 7 * n, pp + 7 * n, r5 + 2 * n, n);
   487  #endif
   488    MPN_INCR_U (pp + 8 * n, 2 * n + 1, cy);
   489  
   490    pp[10 * n]+= mpn_add_n (pp + 9 * n, pp + 9 * n, r3, n);
   491    cy = mpn_add_1 (pp + 10 * n, r3 + n, n, pp[10 * n]);
   492  #if HAVE_NATIVE_mpn_add_nc
   493    cy = r3[n3] + mpn_add_nc(pp +11 * n, pp +11 * n, r3 + 2 * n, n, cy);
   494  #else
   495    MPN_INCR_U (r3 + 2 * n, n + 1, cy);
   496    cy = r3[n3] + mpn_add_n (pp +11 * n, pp +11 * n, r3 + 2 * n, n);
   497  #endif
   498    MPN_INCR_U (pp +12 * n, 2 * n + 1, cy);
   499  
   500    pp[14 * n]+=mpn_add_n (pp +13 * n, pp +13 * n, r1, n);
   501    if ( half ) {
   502      cy = mpn_add_1 (pp + 14 * n, r1 + n, n, pp[14 * n]);
   503  #if HAVE_NATIVE_mpn_add_nc
   504      if(LIKELY(spt > n)) {
   505        cy = r1[n3] + mpn_add_nc(pp + 15 * n, pp + 15 * n, r1 + 2 * n, n, cy);
   506        MPN_INCR_U (pp + 16 * n, spt - n, cy);
   507      } else {
   508        ASSERT_NOCARRY(mpn_add_nc(pp + 15 * n, pp + 15 * n, r1 + 2 * n, spt, cy));
   509      }
   510  #else
   511      MPN_INCR_U (r1 + 2 * n, n + 1, cy);
   512      if(LIKELY(spt > n)) {
   513        cy = r1[n3] + mpn_add_n (pp + 15 * n, pp + 15 * n, r1 + 2 * n, n);
   514        MPN_INCR_U (pp + 16 * n, spt - n, cy);
   515      } else {
   516        ASSERT_NOCARRY(mpn_add_n (pp + 15 * n, pp + 15 * n, r1 + 2 * n, spt));
   517      }
   518  #endif
   519    } else {
   520      ASSERT_NOCARRY(mpn_add_1 (pp + 14 * n, r1 + n, spt, pp[14 * n]));
   521    }
   522  
   523  #undef   r0
   524  #undef   r2
   525  #undef   r4
   526  #undef   r6
   527  }