github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/generic/toom3_sqr.c (about)

     1  /* mpn_toom3_sqr -- Square {ap,an}.
     2  
     3     Contributed to the GNU project by Torbjorn Granlund.
     4     Additional improvements by Marco Bodrato.
     5  
     6     THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
     7     SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
     8     GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
     9  
    10  Copyright 2006-2010, 2012 Free Software Foundation, Inc.
    11  
    12  This file is part of the GNU MP Library.
    13  
    14  The GNU MP Library is free software; you can redistribute it and/or modify
    15  it under the terms of either:
    16  
    17    * the GNU Lesser General Public License as published by the Free
    18      Software Foundation; either version 3 of the License, or (at your
    19      option) any later version.
    20  
    21  or
    22  
    23    * the GNU General Public License as published by the Free Software
    24      Foundation; either version 2 of the License, or (at your option) any
    25      later version.
    26  
    27  or both in parallel, as here.
    28  
    29  The GNU MP Library is distributed in the hope that it will be useful, but
    30  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    31  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    32  for more details.
    33  
    34  You should have received copies of the GNU General Public License and the
    35  GNU Lesser General Public License along with the GNU MP Library.  If not,
    36  see https://www.gnu.org/licenses/.  */
    37  
    38  
    39  #include "gmp.h"
    40  #include "gmp-impl.h"
    41  
    42  /* Evaluate in: -1, 0, +1, +2, +inf
    43  
    44    <-s--><--n--><--n-->
    45     ____ ______ ______
    46    |_a2_|___a1_|___a0_|
    47  
    48    v0  =  a0         ^2 #   A(0)^2
    49    v1  = (a0+ a1+ a2)^2 #   A(1)^2    ah  <= 2
    50    vm1 = (a0- a1+ a2)^2 #  A(-1)^2   |ah| <= 1
    51    v2  = (a0+2a1+4a2)^2 #   A(2)^2    ah  <= 6
    52    vinf=          a2 ^2 # A(inf)^2
    53  */
    54  
    55  #if TUNE_PROGRAM_BUILD || WANT_FAT_BINARY
    56  #define MAYBE_sqr_basecase 1
    57  #define MAYBE_sqr_toom3   1
    58  #else
    59  #define MAYBE_sqr_basecase						\
    60    (SQR_TOOM3_THRESHOLD < 3 * SQR_TOOM2_THRESHOLD)
    61  #define MAYBE_sqr_toom3							\
    62    (SQR_TOOM4_THRESHOLD >= 3 * SQR_TOOM3_THRESHOLD)
    63  #endif
    64  
    65  #define TOOM3_SQR_REC(p, a, n, ws)					\
    66    do {									\
    67      if (MAYBE_sqr_basecase						\
    68  	&& BELOW_THRESHOLD (n, SQR_TOOM2_THRESHOLD))			\
    69        mpn_sqr_basecase (p, a, n);					\
    70      else if (! MAYBE_sqr_toom3						\
    71  	     || BELOW_THRESHOLD (n, SQR_TOOM3_THRESHOLD))		\
    72        mpn_toom2_sqr (p, a, n, ws);					\
    73      else								\
    74        mpn_toom3_sqr (p, a, n, ws);					\
    75    } while (0)
    76  
    77  void
    78  mpn_toom3_sqr (mp_ptr pp,
    79  	       mp_srcptr ap, mp_size_t an,
    80  	       mp_ptr scratch)
    81  {
    82    const int __gmpn_cpuvec_initialized = 1;
    83    mp_size_t n, s;
    84    mp_limb_t cy, vinf0;
    85    mp_ptr gp;
    86    mp_ptr as1, asm1, as2;
    87  
    88  #define a0  ap
    89  #define a1  (ap + n)
    90  #define a2  (ap + 2*n)
    91  
    92    n = (an + 2) / (size_t) 3;
    93  
    94    s = an - 2 * n;
    95  
    96    ASSERT (0 < s && s <= n);
    97  
    98    as1 = scratch + 4 * n + 4;
    99    asm1 = scratch + 2 * n + 2;
   100    as2 = pp + n + 1;
   101  
   102    gp = scratch;
   103  
   104    /* Compute as1 and asm1.  */
   105    cy = mpn_add (gp, a0, n, a2, s);
   106  #if HAVE_NATIVE_mpn_add_n_sub_n
   107    if (cy == 0 && mpn_cmp (gp, a1, n) < 0)
   108      {
   109        cy = mpn_add_n_sub_n (as1, asm1, a1, gp, n);
   110        as1[n] = cy >> 1;
   111        asm1[n] = 0;
   112      }
   113    else
   114      {
   115        mp_limb_t cy2;
   116        cy2 = mpn_add_n_sub_n (as1, asm1, gp, a1, n);
   117        as1[n] = cy + (cy2 >> 1);
   118        asm1[n] = cy - (cy2 & 1);
   119      }
   120  #else
   121    as1[n] = cy + mpn_add_n (as1, gp, a1, n);
   122    if (cy == 0 && mpn_cmp (gp, a1, n) < 0)
   123      {
   124        mpn_sub_n (asm1, a1, gp, n);
   125        asm1[n] = 0;
   126      }
   127    else
   128      {
   129        cy -= mpn_sub_n (asm1, gp, a1, n);
   130        asm1[n] = cy;
   131      }
   132  #endif
   133  
   134    /* Compute as2.  */
   135  #if HAVE_NATIVE_mpn_rsblsh1_n
   136    cy = mpn_add_n (as2, a2, as1, s);
   137    if (s != n)
   138      cy = mpn_add_1 (as2 + s, as1 + s, n - s, cy);
   139    cy += as1[n];
   140    cy = 2 * cy + mpn_rsblsh1_n (as2, a0, as2, n);
   141  #else
   142  #if HAVE_NATIVE_mpn_addlsh1_n
   143    cy  = mpn_addlsh1_n (as2, a1, a2, s);
   144    if (s != n)
   145      cy = mpn_add_1 (as2 + s, a1 + s, n - s, cy);
   146    cy = 2 * cy + mpn_addlsh1_n (as2, a0, as2, n);
   147  #else
   148    cy = mpn_add_n (as2, a2, as1, s);
   149    if (s != n)
   150      cy = mpn_add_1 (as2 + s, as1 + s, n - s, cy);
   151    cy += as1[n];
   152    cy = 2 * cy + mpn_lshift (as2, as2, n, 1);
   153    cy -= mpn_sub_n (as2, as2, a0, n);
   154  #endif
   155  #endif
   156    as2[n] = cy;
   157  
   158    ASSERT (as1[n] <= 2);
   159    ASSERT (asm1[n] <= 1);
   160  
   161  #define v0    pp				/* 2n */
   162  #define v1    (pp + 2 * n)			/* 2n+1 */
   163  #define vinf  (pp + 4 * n)			/* s+s */
   164  #define vm1   scratch				/* 2n+1 */
   165  #define v2    (scratch + 2 * n + 1)		/* 2n+2 */
   166  #define scratch_out  (scratch + 5 * n + 5)
   167  
   168    /* vm1, 2n+1 limbs */
   169  #ifdef SMALLER_RECURSION
   170    TOOM3_SQR_REC (vm1, asm1, n, scratch_out);
   171    cy = 0;
   172    if (asm1[n] != 0)
   173      cy = asm1[n] + mpn_add_n (vm1 + n, vm1 + n, asm1, n);
   174    if (asm1[n] != 0)
   175      cy += mpn_add_n (vm1 + n, vm1 + n, asm1, n);
   176    vm1[2 * n] = cy;
   177  #else
   178    TOOM3_SQR_REC (vm1, asm1, n + 1, scratch_out);
   179  #endif
   180  
   181    TOOM3_SQR_REC (v2, as2, n + 1, scratch_out);	/* v2, 2n+1 limbs */
   182  
   183    TOOM3_SQR_REC (vinf, a2, s, scratch_out);	/* vinf, s+s limbs */
   184  
   185    vinf0 = vinf[0];				/* v1 overlaps with this */
   186  
   187  #ifdef SMALLER_RECURSION
   188    /* v1, 2n+1 limbs */
   189    TOOM3_SQR_REC (v1, as1, n, scratch_out);
   190    if (as1[n] == 1)
   191      {
   192        cy = as1[n] + mpn_add_n (v1 + n, v1 + n, as1, n);
   193      }
   194    else if (as1[n] != 0)
   195      {
   196  #if HAVE_NATIVE_mpn_addlsh1_n
   197        cy = 2 * as1[n] + mpn_addlsh1_n (v1 + n, v1 + n, as1, n);
   198  #else
   199        cy = 2 * as1[n] + mpn_addmul_1 (v1 + n, as1, n, CNST_LIMB(2));
   200  #endif
   201      }
   202    else
   203      cy = 0;
   204    if (as1[n] == 1)
   205      {
   206        cy += mpn_add_n (v1 + n, v1 + n, as1, n);
   207      }
   208    else if (as1[n] != 0)
   209      {
   210  #if HAVE_NATIVE_mpn_addlsh1_n
   211        cy += mpn_addlsh1_n (v1 + n, v1 + n, as1, n);
   212  #else
   213        cy += mpn_addmul_1 (v1 + n, as1, n, CNST_LIMB(2));
   214  #endif
   215      }
   216    v1[2 * n] = cy;
   217  #else
   218    cy = vinf[1];
   219    TOOM3_SQR_REC (v1, as1, n + 1, scratch_out);
   220    vinf[1] = cy;
   221  #endif
   222  
   223    TOOM3_SQR_REC (v0, ap, n, scratch_out);	/* v0, 2n limbs */
   224  
   225    mpn_toom_interpolate_5pts (pp, v2, vm1, n, s + s, 0, vinf0);
   226  }