github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/generic/toom_interpolate_5pts.c

github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/generic/toom_interpolate_5pts.c (about)

     1  /* mpn_toom_interpolate_5pts -- Interpolate for toom3, 33, 42.
     2  
     3     Contributed to the GNU project by Robert Harley.
     4     Improvements by Paul Zimmermann and Marco Bodrato.
     5  
     6     THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
     7     SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
     8     GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
     9  
    10  Copyright 2000-2003, 2005-2007, 2009 Free Software Foundation, Inc.
    11  
    12  This file is part of the GNU MP Library.
    13  
    14  The GNU MP Library is free software; you can redistribute it and/or modify
    15  it under the terms of either:
    16  
    17    * the GNU Lesser General Public License as published by the Free
    18      Software Foundation; either version 3 of the License, or (at your
    19      option) any later version.
    20  
    21  or
    22  
    23    * the GNU General Public License as published by the Free Software
    24      Foundation; either version 2 of the License, or (at your option) any
    25      later version.
    26  
    27  or both in parallel, as here.
    28  
    29  The GNU MP Library is distributed in the hope that it will be useful, but
    30  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    31  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    32  for more details.
    33  
    34  You should have received copies of the GNU General Public License and the
    35  GNU Lesser General Public License along with the GNU MP Library.  If not,
    36  see https://www.gnu.org/licenses/.  */
    37  
    38  #include "gmp.h"
    39  #include "gmp-impl.h"
    40  
    41  void
    42  mpn_toom_interpolate_5pts (mp_ptr c, mp_ptr v2, mp_ptr vm1,
    43  			   mp_size_t k, mp_size_t twor, int sa,
    44  			   mp_limb_t vinf0)
    45  {
    46    mp_limb_t cy, saved;
    47    mp_size_t twok;
    48    mp_size_t kk1;
    49    mp_ptr c1, v1, c3, vinf;
    50  
    51    twok = k + k;
    52    kk1 = twok + 1;
    53  
    54    c1 = c  + k;
    55    v1 = c1 + k;
    56    c3 = v1 + k;
    57    vinf = c3 + k;
    58  
    59  #define v0 (c)
    60    /* (1) v2 <- v2-vm1 < v2+|vm1|,       (16 8 4 2 1) - (1 -1 1 -1  1) =
    61       thus 0 <= v2 < 50*B^(2k) < 2^6*B^(2k)             (15 9 3  3  0)
    62    */
    63    if (sa)
    64      ASSERT_NOCARRY (mpn_add_n (v2, v2, vm1, kk1));
    65    else
    66      ASSERT_NOCARRY (mpn_sub_n (v2, v2, vm1, kk1));
    67  
    68    /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
    69         v0       v1       hi(vinf)       |vm1|     v2-vm1      EMPTY */
    70  
    71    ASSERT_NOCARRY (mpn_divexact_by3 (v2, v2, kk1));    /* v2 <- v2 / 3 */
    72  						      /* (5 3 1 1 0)*/
    73  
    74    /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
    75         v0       v1      hi(vinf)       |vm1|     (v2-vm1)/3    EMPTY */
    76  
    77    /* (2) vm1 <- tm1 := (v1 - vm1) / 2  [(1 1 1 1 1) - (1 -1 1 -1 1)] / 2 =
    78       tm1 >= 0                                         (0  1 0  1 0)
    79       No carry comes out from {v1, kk1} +/- {vm1, kk1},
    80       and the division by two is exact.
    81       If (sa!=0) the sign of vm1 is negative */
    82    if (sa)
    83      {
    84  #ifdef HAVE_NATIVE_mpn_rsh1add_n
    85        mpn_rsh1add_n (vm1, v1, vm1, kk1);
    86  #else
    87        ASSERT_NOCARRY (mpn_add_n (vm1, v1, vm1, kk1));
    88        ASSERT_NOCARRY (mpn_rshift (vm1, vm1, kk1, 1));
    89  #endif
    90      }
    91    else
    92      {
    93  #ifdef HAVE_NATIVE_mpn_rsh1sub_n
    94        mpn_rsh1sub_n (vm1, v1, vm1, kk1);
    95  #else
    96        ASSERT_NOCARRY (mpn_sub_n (vm1, v1, vm1, kk1));
    97        ASSERT_NOCARRY (mpn_rshift (vm1, vm1, kk1, 1));
    98  #endif
    99      }
   100  
   101    /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
   102         v0       v1        hi(vinf)       tm1     (v2-vm1)/3    EMPTY */
   103  
   104    /* (3) v1 <- t1 := v1 - v0    (1 1 1 1 1) - (0 0 0 0 1) = (1 1 1 1 0)
   105       t1 >= 0
   106    */
   107    vinf[0] -= mpn_sub_n (v1, v1, c, twok);
   108  
   109    /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
   110         v0     v1-v0        hi(vinf)       tm1     (v2-vm1)/3    EMPTY */
   111  
   112    /* (4) v2 <- t2 := ((v2-vm1)/3-t1)/2 = (v2-vm1-3*t1)/6
   113       t2 >= 0                  [(5 3 1 1 0) - (1 1 1 1 0)]/2 = (2 1 0 0 0)
   114    */
   115  #ifdef HAVE_NATIVE_mpn_rsh1sub_n
   116    mpn_rsh1sub_n (v2, v2, v1, kk1);
   117  #else
   118    ASSERT_NOCARRY (mpn_sub_n (v2, v2, v1, kk1));
   119    ASSERT_NOCARRY (mpn_rshift (v2, v2, kk1, 1));
   120  #endif
   121  
   122    /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
   123         v0     v1-v0        hi(vinf)     tm1    (v2-vm1-3t1)/6    EMPTY */
   124  
   125    /* (5) v1 <- t1-tm1           (1 1 1 1 0) - (0 1 0 1 0) = (1 0 1 0 0)
   126       result is v1 >= 0
   127    */
   128    ASSERT_NOCARRY (mpn_sub_n (v1, v1, vm1, kk1));
   129  
   130    /* We do not need to read the value in vm1, so we add it in {c+k, ...} */
   131    cy = mpn_add_n (c1, c1, vm1, kk1);
   132    MPN_INCR_U (c3 + 1, twor + k - 1, cy); /* 2n-(3k+1) = 2r+k-1 */
   133    /* Memory allocated for vm1 is now free, it can be recycled ...*/
   134  
   135    /* (6) v2 <- v2 - 2*vinf,     (2 1 0 0 0) - 2*(1 0 0 0 0) = (0 1 0 0 0)
   136       result is v2 >= 0 */
   137    saved = vinf[0];       /* Remember v1's highest byte (will be overwritten). */
   138    vinf[0] = vinf0;       /* Set the right value for vinf0                     */
   139  #ifdef HAVE_NATIVE_mpn_sublsh1_n_ip1
   140    cy = mpn_sublsh1_n_ip1 (v2, vinf, twor);
   141  #else
   142    /* Overwrite unused vm1 */
   143    cy = mpn_lshift (vm1, vinf, twor, 1);
   144    cy += mpn_sub_n (v2, v2, vm1, twor);
   145  #endif
   146    MPN_DECR_U (v2 + twor, kk1 - twor, cy);
   147  
   148    /* Current matrix is
   149       [1 0 0 0 0; vinf
   150        0 1 0 0 0; v2
   151        1 0 1 0 0; v1
   152        0 1 0 1 0; vm1
   153        0 0 0 0 1] v0
   154       Some values already are in-place (we added vm1 in the correct position)
   155       | vinf|  v1 |  v0 |
   156  	      | vm1 |
   157       One still is in a separated area
   158  	| +v2 |
   159       We have to compute v1-=vinf; vm1 -= v2,
   160  	   |-vinf|
   161  	      | -v2 |
   162       Carefully reordering operations we can avoid to compute twice the sum
   163       of the high half of v2 plus the low half of vinf.
   164    */
   165  
   166    /* Add the high half of t2 in {vinf} */
   167    if ( LIKELY(twor > k + 1) ) { /* This is the expected flow  */
   168      cy = mpn_add_n (vinf, vinf, v2 + k, k + 1);
   169      MPN_INCR_U (c3 + kk1, twor - k - 1, cy); /* 2n-(5k+1) = 2r-k-1 */
   170    } else { /* triggered only by very unbalanced cases like
   171  	      (k+k+(k-2))x(k+k+1) , should be handled by toom32 */
   172      ASSERT_NOCARRY (mpn_add_n (vinf, vinf, v2 + k, twor));
   173    }
   174    /* (7) v1 <- v1 - vinf,       (1 0 1 0 0) - (1 0 0 0 0) = (0 0 1 0 0)
   175       result is >= 0 */
   176    /* Side effect: we also subtracted (high half) vm1 -= v2 */
   177    cy = mpn_sub_n (v1, v1, vinf, twor);          /* vinf is at most twor long.  */
   178    vinf0 = vinf[0];                     /* Save again the right value for vinf0 */
   179    vinf[0] = saved;
   180    MPN_DECR_U (v1 + twor, kk1 - twor, cy);       /* Treat the last bytes.       */
   181  
   182    /* (8) vm1 <- vm1-v2          (0 1 0 1 0) - (0 1 0 0 0) = (0 0 0 1 0)
   183       Operate only on the low half.
   184    */
   185    cy = mpn_sub_n (c1, c1, v2, k);
   186    MPN_DECR_U (v1, kk1, cy);
   187  
   188    /********************* Beginning the final phase **********************/
   189  
   190    /* Most of the recomposition was done */
   191  
   192    /* add t2 in {c+3k, ...}, but only the low half */
   193    cy = mpn_add_n (c3, c3, v2, k);
   194    vinf[0] += cy;
   195    ASSERT(vinf[0] >= cy); /* No carry */
   196    MPN_INCR_U (vinf, twor, vinf0); /* Add vinf0, propagate carry. */
   197  
   198  #undef v0
   199  }