github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/generic/toom_interpolate_6pts.c (about)

     1  /* mpn_toom_interpolate_6pts -- Interpolate for toom43, 52
     2  
     3     Contributed to the GNU project by Marco Bodrato.
     4  
     5     THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
     6     SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
     7     GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
     8  
     9  Copyright 2009, 2010, 2012 Free Software Foundation, Inc.
    10  
    11  This file is part of the GNU MP Library.
    12  
    13  The GNU MP Library is free software; you can redistribute it and/or modify
    14  it under the terms of either:
    15  
    16    * the GNU Lesser General Public License as published by the Free
    17      Software Foundation; either version 3 of the License, or (at your
    18      option) any later version.
    19  
    20  or
    21  
    22    * the GNU General Public License as published by the Free Software
    23      Foundation; either version 2 of the License, or (at your option) any
    24      later version.
    25  
    26  or both in parallel, as here.
    27  
    28  The GNU MP Library is distributed in the hope that it will be useful, but
    29  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    30  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    31  for more details.
    32  
    33  You should have received copies of the GNU General Public License and the
    34  GNU Lesser General Public License along with the GNU MP Library.  If not,
    35  see https://www.gnu.org/licenses/.  */
    36  
    37  #include "gmp.h"
    38  #include "gmp-impl.h"
    39  
    40  /* For odd divisors, mpn_divexact_1 works fine with two's complement. */
    41  #ifndef mpn_divexact_by3
    42  #if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && MODLIMB_INVERSE_3
    43  #define mpn_divexact_by3(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,3,MODLIMB_INVERSE_3,0)
    44  #else
    45  #define mpn_divexact_by3(dst,src,size) mpn_divexact_1(dst,src,size,3)
    46  #endif
    47  #endif
    48  
    49  /* Interpolation for Toom-3.5, using the evaluation points: infinity,
    50     1, -1, 2, -2. More precisely, we want to compute
    51     f(2^(GMP_NUMB_BITS * n)) for a polynomial f of degree 5, given the
    52     six values
    53  
    54       w5 = f(0),
    55       w4 = f(-1),
    56       w3 = f(1)
    57       w2 = f(-2),
    58       w1 = f(2),
    59       w0 = limit at infinity of f(x) / x^5,
    60  
    61     The result is stored in {pp, 5*n + w0n}. At entry, w5 is stored at
    62     {pp, 2n}, w3 is stored at {pp + 2n, 2n+1}, and w0 is stored at
    63     {pp + 5n, w0n}. The other values are 2n + 1 limbs each (with most
    64     significant limbs small). f(-1) and f(-2) may be negative, signs
    65     determined by the flag bits. All intermediate results are positive.
    66     Inputs are destroyed.
    67  
    68     Interpolation sequence was taken from the paper: "Integer and
    69     Polynomial Multiplication: Towards Optimal Toom-Cook Matrices".
    70     Some slight variations were introduced: adaptation to "gmp
    71     instruction set", and a final saving of an operation by interlacing
    72     interpolation and recomposition phases.
    73  */
    74  
    75  void
    76  mpn_toom_interpolate_6pts (mp_ptr pp, mp_size_t n, enum toom6_flags flags,
    77  			   mp_ptr w4, mp_ptr w2, mp_ptr w1,
    78  			   mp_size_t w0n)
    79  {
    80    mp_limb_t cy;
    81    /* cy6 can be stored in w1[2*n], cy4 in w4[0], embankment in w2[0] */
    82    mp_limb_t cy4, cy6, embankment;
    83  
    84    ASSERT( n > 0 );
    85    ASSERT( 2*n >= w0n && w0n > 0 );
    86  
    87  #define w5  pp					/* 2n   */
    88  #define w3  (pp + 2 * n)			/* 2n+1 */
    89  #define w0  (pp + 5 * n)			/* w0n  */
    90  
    91    /* Interpolate with sequence:
    92       W2 =(W1 - W2)>>2
    93       W1 =(W1 - W5)>>1
    94       W1 =(W1 - W2)>>1
    95       W4 =(W3 - W4)>>1
    96       W2 =(W2 - W4)/3
    97       W3 = W3 - W4 - W5
    98       W1 =(W1 - W3)/3
    99       // Last steps are mixed with recomposition...
   100       W2 = W2 - W0<<2
   101       W4 = W4 - W2
   102       W3 = W3 - W1
   103       W2 = W2 - W0
   104    */
   105  
   106    /* W2 =(W1 - W2)>>2 */
   107    if (flags & toom6_vm2_neg)
   108      mpn_add_n (w2, w1, w2, 2 * n + 1);
   109    else
   110      mpn_sub_n (w2, w1, w2, 2 * n + 1);
   111    mpn_rshift (w2, w2, 2 * n + 1, 2);
   112  
   113    /* W1 =(W1 - W5)>>1 */
   114    w1[2*n] -= mpn_sub_n (w1, w1, w5, 2*n);
   115    mpn_rshift (w1, w1, 2 * n + 1, 1);
   116  
   117    /* W1 =(W1 - W2)>>1 */
   118  #if HAVE_NATIVE_mpn_rsh1sub_n
   119    mpn_rsh1sub_n (w1, w1, w2, 2 * n + 1);
   120  #else
   121    mpn_sub_n (w1, w1, w2, 2 * n + 1);
   122    mpn_rshift (w1, w1, 2 * n + 1, 1);
   123  #endif
   124  
   125    /* W4 =(W3 - W4)>>1 */
   126    if (flags & toom6_vm1_neg)
   127      {
   128  #if HAVE_NATIVE_mpn_rsh1add_n
   129        mpn_rsh1add_n (w4, w3, w4, 2 * n + 1);
   130  #else
   131        mpn_add_n (w4, w3, w4, 2 * n + 1);
   132        mpn_rshift (w4, w4, 2 * n + 1, 1);
   133  #endif
   134      }
   135    else
   136      {
   137  #if HAVE_NATIVE_mpn_rsh1sub_n
   138        mpn_rsh1sub_n (w4, w3, w4, 2 * n + 1);
   139  #else
   140        mpn_sub_n (w4, w3, w4, 2 * n + 1);
   141        mpn_rshift (w4, w4, 2 * n + 1, 1);
   142  #endif
   143      }
   144  
   145    /* W2 =(W2 - W4)/3 */
   146    mpn_sub_n (w2, w2, w4, 2 * n + 1);
   147    mpn_divexact_by3 (w2, w2, 2 * n + 1);
   148  
   149    /* W3 = W3 - W4 - W5 */
   150    mpn_sub_n (w3, w3, w4, 2 * n + 1);
   151    w3[2 * n] -= mpn_sub_n (w3, w3, w5, 2 * n);
   152  
   153    /* W1 =(W1 - W3)/3 */
   154    mpn_sub_n (w1, w1, w3, 2 * n + 1);
   155    mpn_divexact_by3 (w1, w1, 2 * n + 1);
   156  
   157    /*
   158      [1 0 0 0 0 0;
   159       0 1 0 0 0 0;
   160       1 0 1 0 0 0;
   161       0 1 0 1 0 0;
   162       1 0 1 0 1 0;
   163       0 0 0 0 0 1]
   164  
   165      pp[] prior to operations:
   166       |_H w0__|_L w0__|______||_H w3__|_L w3__|_H w5__|_L w5__|
   167  
   168      summation scheme for remaining operations:
   169       |______________5|n_____4|n_____3|n_____2|n______|n______|pp
   170       |_H w0__|_L w0__|______||_H w3__|_L w3__|_H w5__|_L w5__|
   171  				    || H w4  | L w4  |
   172  		    || H w2  | L w2  |
   173  	    || H w1  | L w1  |
   174  			    ||-H w1  |-L w1  |
   175  		     |-H w0  |-L w0 ||-H w2  |-L w2  |
   176    */
   177    cy = mpn_add_n (pp + n, pp + n, w4, 2 * n + 1);
   178    MPN_INCR_U (pp + 3 * n + 1, n, cy);
   179  
   180    /* W2 -= W0<<2 */
   181  #if HAVE_NATIVE_mpn_sublsh_n || HAVE_NATIVE_mpn_sublsh2_n_ip1
   182  #if HAVE_NATIVE_mpn_sublsh2_n_ip1
   183    cy = mpn_sublsh2_n_ip1 (w2, w0, w0n);
   184  #else
   185    cy = mpn_sublsh_n (w2, w2, w0, w0n, 2);
   186  #endif
   187  #else
   188    /* {W4,2*n+1} is now free and can be overwritten. */
   189    cy = mpn_lshift(w4, w0, w0n, 2);
   190    cy+= mpn_sub_n(w2, w2, w4, w0n);
   191  #endif
   192    MPN_DECR_U (w2 + w0n, 2 * n + 1 - w0n, cy);
   193  
   194    /* W4L = W4L - W2L */
   195    cy = mpn_sub_n (pp + n, pp + n, w2, n);
   196    MPN_DECR_U (w3, 2 * n + 1, cy);
   197  
   198    /* W3H = W3H + W2L */
   199    cy4 = w3[2 * n] + mpn_add_n (pp + 3 * n, pp + 3 * n, w2, n);
   200    /* W1L + W2H */
   201    cy = w2[2 * n] + mpn_add_n (pp + 4 * n, w1, w2 + n, n);
   202    MPN_INCR_U (w1 + n, n + 1, cy);
   203  
   204    /* W0 = W0 + W1H */
   205    if (LIKELY (w0n > n))
   206      cy6 = w1[2 * n] + mpn_add_n (w0, w0, w1 + n, n);
   207    else
   208      cy6 = mpn_add_n (w0, w0, w1 + n, w0n);
   209  
   210    /*
   211      summation scheme for the next operation:
   212       |...____5|n_____4|n_____3|n_____2|n______|n______|pp
   213       |...w0___|_w1_w2_|_H w3__|_L w3__|_H w5__|_L w5__|
   214  		     ...-w0___|-w1_w2 |
   215    */
   216    /* if(LIKELY(w0n>n)) the two operands below DO overlap! */
   217    cy = mpn_sub_n (pp + 2 * n, pp + 2 * n, pp + 4 * n, n + w0n);
   218  
   219    /* embankment is a "dirty trick" to avoid carry/borrow propagation
   220       beyond allocated memory */
   221    embankment = w0[w0n - 1] - 1;
   222    w0[w0n - 1] = 1;
   223    if (LIKELY (w0n > n)) {
   224      if (cy4 > cy6)
   225        MPN_INCR_U (pp + 4 * n, w0n + n, cy4 - cy6);
   226      else
   227        MPN_DECR_U (pp + 4 * n, w0n + n, cy6 - cy4);
   228      MPN_DECR_U (pp + 3 * n + w0n, 2 * n, cy);
   229      MPN_INCR_U (w0 + n, w0n - n, cy6);
   230    } else {
   231      MPN_INCR_U (pp + 4 * n, w0n + n, cy4);
   232      MPN_DECR_U (pp + 3 * n + w0n, 2 * n, cy + cy6);
   233    }
   234    w0[w0n - 1] += embankment;
   235  
   236  #undef w5
   237  #undef w3
   238  #undef w0
   239  
   240  }