github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/generic/toom_interpolate_8pts.c (about)

     1  /* mpn_toom_interpolate_8pts -- Interpolate for toom54, 63, 72.
     2  
     3     Contributed to the GNU project by Marco Bodrato.
     4  
     5     THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
     6     SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
     7     GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
     8  
     9  Copyright 2009, 2011, 2012 Free Software Foundation, Inc.
    10  
    11  This file is part of the GNU MP Library.
    12  
    13  The GNU MP Library is free software; you can redistribute it and/or modify
    14  it under the terms of either:
    15  
    16    * the GNU Lesser General Public License as published by the Free
    17      Software Foundation; either version 3 of the License, or (at your
    18      option) any later version.
    19  
    20  or
    21  
    22    * the GNU General Public License as published by the Free Software
    23      Foundation; either version 2 of the License, or (at your option) any
    24      later version.
    25  
    26  or both in parallel, as here.
    27  
    28  The GNU MP Library is distributed in the hope that it will be useful, but
    29  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    30  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    31  for more details.
    32  
    33  You should have received copies of the GNU General Public License and the
    34  GNU Lesser General Public License along with the GNU MP Library.  If not,
    35  see https://www.gnu.org/licenses/.  */
    36  
    37  #include "gmp.h"
    38  #include "gmp-impl.h"
    39  
    40  #define BINVERT_3 MODLIMB_INVERSE_3
    41  
    42  #define BINVERT_15 \
    43    ((((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 4)) / 15) * 14 * 16 & GMP_NUMB_MAX) + 15)
    44  
    45  #define BINVERT_45 ((BINVERT_15 * BINVERT_3) & GMP_NUMB_MASK)
    46  
    47  #ifndef mpn_divexact_by3
    48  #if HAVE_NATIVE_mpn_pi1_bdiv_q_1
    49  #define mpn_divexact_by3(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,3,BINVERT_3,0)
    50  #else
    51  #define mpn_divexact_by3(dst,src,size) mpn_divexact_1(dst,src,size,3)
    52  #endif
    53  #endif
    54  
    55  #ifndef mpn_divexact_by45
    56  #if GMP_NUMB_BITS % 12 == 0
    57  #define mpn_divexact_by45(dst,src,size) \
    58    (63 & 19 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 45)))
    59  #else
    60  #if HAVE_NATIVE_mpn_pi1_bdiv_q_1
    61  #define mpn_divexact_by45(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,45,BINVERT_45,0)
    62  #else
    63  #define mpn_divexact_by45(dst,src,size) mpn_divexact_1(dst,src,size,45)
    64  #endif
    65  #endif
    66  #endif
    67  
    68  #if HAVE_NATIVE_mpn_sublsh2_n_ip1
    69  #define DO_mpn_sublsh2_n(dst,src,n,ws) mpn_sublsh2_n_ip1(dst,src,n)
    70  #else
    71  #define DO_mpn_sublsh2_n(dst,src,n,ws) DO_mpn_sublsh_n(dst,src,n,2,ws)
    72  #endif
    73  
    74  #if HAVE_NATIVE_mpn_sublsh_n
    75  #define DO_mpn_sublsh_n(dst,src,n,s,ws) mpn_sublsh_n (dst,dst,src,n,s)
    76  #else
    77  static mp_limb_t
    78  DO_mpn_sublsh_n (mp_ptr dst, mp_srcptr src, mp_size_t n, unsigned int s, mp_ptr ws)
    79  {
    80  #if USE_MUL_1 && 0
    81    return mpn_submul_1(dst,src,n,CNST_LIMB(1) <<(s));
    82  #else
    83    mp_limb_t __cy;
    84    __cy = mpn_lshift (ws,src,n,s);
    85    return __cy + mpn_sub_n (dst,dst,ws,n);
    86  #endif
    87  }
    88  #endif
    89  
    90  
    91  #if HAVE_NATIVE_mpn_subrsh
    92  #define DO_mpn_subrsh(dst,nd,src,ns,s,ws) mpn_subrsh (dst,nd,src,ns,s)
    93  #else
    94  /* This is not a correct definition, it assumes no carry */
    95  #define DO_mpn_subrsh(dst,nd,src,ns,s,ws)				\
    96  do {									\
    97    mp_limb_t __cy;							\
    98    MPN_DECR_U (dst, nd, src[0] >> s);					\
    99    __cy = DO_mpn_sublsh_n (dst, src + 1, ns - 1, GMP_NUMB_BITS - s, ws);	\
   100    MPN_DECR_U (dst + ns - 1, nd - ns + 1, __cy);				\
   101  } while (0)
   102  #endif
   103  
   104  /* Interpolation for Toom-4.5 (or Toom-4), using the evaluation
   105     points: infinity(4.5 only), 4, -4, 2, -2, 1, -1, 0. More precisely,
   106     we want to compute f(2^(GMP_NUMB_BITS * n)) for a polynomial f of
   107     degree 7 (or 6), given the 8 (rsp. 7) values:
   108  
   109       r1 = limit at infinity of f(x) / x^7,
   110       r2 = f(4),
   111       r3 = f(-4),
   112       r4 = f(2),
   113       r5 = f(-2),
   114       r6 = f(1),
   115       r7 = f(-1),
   116       r8 = f(0).
   117  
   118     All couples of the form f(n),f(-n) must be already mixed with
   119     toom_couple_handling(f(n),...,f(-n),...)
   120  
   121     The result is stored in {pp, spt + 7*n (or 6*n)}.
   122     At entry, r8 is stored at {pp, 2n},
   123     r5 is stored at {pp + 3n, 3n + 1}.
   124  
   125     The other values are 2n+... limbs each (with most significant limbs small).
   126  
   127     All intermediate results are positive.
   128     Inputs are destroyed.
   129  */
   130  
   131  void
   132  mpn_toom_interpolate_8pts (mp_ptr pp, mp_size_t n,
   133  			   mp_ptr r3, mp_ptr r7,
   134  			   mp_size_t spt, mp_ptr ws)
   135  {
   136    mp_limb_signed_t cy;
   137    mp_ptr r5, r1;
   138    r5 = (pp + 3 * n);			/* 3n+1 */
   139    r1 = (pp + 7 * n);			/* spt */
   140  
   141    /******************************* interpolation *****************************/
   142  
   143    DO_mpn_subrsh(r3+n, 2 * n + 1, pp, 2 * n, 4, ws);
   144    cy = DO_mpn_sublsh_n (r3, r1, spt, 12, ws);
   145    MPN_DECR_U (r3 + spt, 3 * n + 1 - spt, cy);
   146  
   147    DO_mpn_subrsh(r5+n, 2 * n + 1, pp, 2 * n, 2, ws);
   148    cy = DO_mpn_sublsh_n (r5, r1, spt, 6, ws);
   149    MPN_DECR_U (r5 + spt, 3 * n + 1 - spt, cy);
   150  
   151    r7[3*n] -= mpn_sub_n (r7+n, r7+n, pp, 2 * n);
   152    cy = mpn_sub_n (r7, r7, r1, spt);
   153    MPN_DECR_U (r7 + spt, 3 * n + 1 - spt, cy);
   154  
   155    ASSERT_NOCARRY(mpn_sub_n (r3, r3, r5, 3 * n + 1));
   156    ASSERT_NOCARRY(mpn_rshift(r3, r3, 3 * n + 1, 2));
   157  
   158    ASSERT_NOCARRY(mpn_sub_n (r5, r5, r7, 3 * n + 1));
   159  
   160    ASSERT_NOCARRY(mpn_sub_n (r3, r3, r5, 3 * n + 1));
   161  
   162    mpn_divexact_by45 (r3, r3, 3 * n + 1);
   163  
   164    ASSERT_NOCARRY(mpn_divexact_by3 (r5, r5, 3 * n + 1));
   165  
   166    ASSERT_NOCARRY(DO_mpn_sublsh2_n (r5, r3, 3 * n + 1, ws));
   167  
   168    /* last interpolation steps... */
   169    /* ... are mixed with recomposition */
   170  
   171    /***************************** recomposition *******************************/
   172    /*
   173      pp[] prior to operations:
   174       |_H r1|_L r1|____||_H r5|_M_r5|_L r5|_____|_H r8|_L r8|pp
   175  
   176      summation scheme for remaining operations:
   177       |____8|n___7|n___6|n___5|n___4|n___3|n___2|n____|n____|pp
   178       |_H r1|_L r1|____||_H*r5|_M r5|_L r5|_____|_H_r8|_L r8|pp
   179  	  ||_H r3|_M r3|_L*r3|
   180  				  ||_H_r7|_M_r7|_L_r7|
   181  		      ||-H r3|-M r3|-L*r3|
   182  				  ||-H*r5|-M_r5|-L_r5|
   183    */
   184  
   185    cy = mpn_add_n (pp + n, pp + n, r7, n); /* Hr8+Lr7-Lr5 */
   186    cy-= mpn_sub_n (pp + n, pp + n, r5, n);
   187    if (0 > cy)
   188      MPN_DECR_U (r7 + n, 2*n + 1, 1);
   189    else
   190      MPN_INCR_U (r7 + n, 2*n + 1, cy);
   191  
   192    cy = mpn_sub_n (pp + 2*n, r7 + n, r5 + n, n); /* Mr7-Mr5 */
   193    MPN_DECR_U (r7 + 2*n, n + 1, cy);
   194  
   195    cy = mpn_add_n (pp + 3*n, r5, r7+ 2*n, n+1); /* Hr7+Lr5 */
   196    r5[3*n]+= mpn_add_n (r5 + 2*n, r5 + 2*n, r3, n); /* Hr5+Lr3 */
   197    cy-= mpn_sub_n (pp + 3*n, pp + 3*n, r5 + 2*n, n+1); /* Hr7-Hr5+Lr5-Lr3 */
   198    if (UNLIKELY(0 > cy))
   199      MPN_DECR_U (r5 + n + 1, 2*n, 1);
   200    else
   201      MPN_INCR_U (r5 + n + 1, 2*n, cy);
   202  
   203    ASSERT_NOCARRY(mpn_sub_n(pp + 4*n, r5 + n, r3 + n, 2*n +1)); /* Mr5-Mr3,Hr5-Hr3 */
   204  
   205    cy = mpn_add_1 (pp + 6*n, r3 + n, n, pp[6*n]);
   206    MPN_INCR_U (r3 + 2*n, n + 1, cy);
   207    cy = mpn_add_n (pp + 7*n, pp + 7*n, r3 + 2*n, n);
   208    if (LIKELY(spt != n))
   209      MPN_INCR_U (pp + 8*n, spt - n, cy + r3[3*n]);
   210    else
   211      ASSERT (r3[3*n] + cy == 0);
   212  }