github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/generic/toom8_sqr.c (about)

     1  /* Implementation of the squaring algorithm with Toom-Cook 8.5-way.
     2  
     3     Contributed to the GNU project by Marco Bodrato.
     4  
     5     THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
     6     SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
     7     GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
     8  
     9  Copyright 2009, 2012 Free Software Foundation, Inc.
    10  
    11  This file is part of the GNU MP Library.
    12  
    13  The GNU MP Library is free software; you can redistribute it and/or modify
    14  it under the terms of either:
    15  
    16    * the GNU Lesser General Public License as published by the Free
    17      Software Foundation; either version 3 of the License, or (at your
    18      option) any later version.
    19  
    20  or
    21  
    22    * the GNU General Public License as published by the Free Software
    23      Foundation; either version 2 of the License, or (at your option) any
    24      later version.
    25  
    26  or both in parallel, as here.
    27  
    28  The GNU MP Library is distributed in the hope that it will be useful, but
    29  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    30  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    31  for more details.
    32  
    33  You should have received copies of the GNU General Public License and the
    34  GNU Lesser General Public License along with the GNU MP Library.  If not,
    35  see https://www.gnu.org/licenses/.  */
    36  
    37  
    38  #include "gmp.h"
    39  #include "gmp-impl.h"
    40  
    41  #if GMP_NUMB_BITS < 29
    42  #error Not implemented.
    43  #endif
    44  
    45  #if GMP_NUMB_BITS < 43
    46  #define BIT_CORRECTION 1
    47  #define CORRECTION_BITS GMP_NUMB_BITS
    48  #else
    49  #define BIT_CORRECTION 0
    50  #define CORRECTION_BITS 0
    51  #endif
    52  
    53  #ifndef SQR_TOOM8_THRESHOLD
    54  #define SQR_TOOM8_THRESHOLD MUL_TOOM8H_THRESHOLD
    55  #endif
    56  
    57  #ifndef SQR_TOOM6_THRESHOLD
    58  #define SQR_TOOM6_THRESHOLD MUL_TOOM6H_THRESHOLD
    59  #endif
    60  
    61  #if TUNE_PROGRAM_BUILD
    62  #define MAYBE_sqr_basecase 1
    63  #define MAYBE_sqr_above_basecase   1
    64  #define MAYBE_sqr_toom2   1
    65  #define MAYBE_sqr_above_toom2   1
    66  #define MAYBE_sqr_toom3   1
    67  #define MAYBE_sqr_above_toom3   1
    68  #define MAYBE_sqr_toom4   1
    69  #define MAYBE_sqr_above_toom4   1
    70  #define MAYBE_sqr_above_toom6   1
    71  #else
    72  #define SQR_TOOM8_MAX					\
    73    ((SQR_FFT_THRESHOLD <= MP_SIZE_T_MAX - (8*2-1+7)) ?	\
    74     ((SQR_FFT_THRESHOLD+8*2-1+7)/8)			\
    75     : MP_SIZE_T_MAX )
    76  #define MAYBE_sqr_basecase					\
    77    (SQR_TOOM8_THRESHOLD < 8 * SQR_TOOM2_THRESHOLD)
    78  #define MAYBE_sqr_above_basecase				\
    79    (SQR_TOOM8_MAX >= SQR_TOOM2_THRESHOLD)
    80  #define MAYBE_sqr_toom2						\
    81    (SQR_TOOM8_THRESHOLD < 8 * SQR_TOOM3_THRESHOLD)
    82  #define MAYBE_sqr_above_toom2					\
    83    (SQR_TOOM8_MAX >= SQR_TOOM3_THRESHOLD)
    84  #define MAYBE_sqr_toom3						\
    85    (SQR_TOOM8_THRESHOLD < 8 * SQR_TOOM4_THRESHOLD)
    86  #define MAYBE_sqr_above_toom3					\
    87    (SQR_TOOM8_MAX >= SQR_TOOM4_THRESHOLD)
    88  #define MAYBE_sqr_toom4						\
    89    (SQR_TOOM8_THRESHOLD < 8 * SQR_TOOM6_THRESHOLD)
    90  #define MAYBE_sqr_above_toom4					\
    91    (SQR_TOOM8_MAX >= SQR_TOOM6_THRESHOLD)
    92  #define MAYBE_sqr_above_toom6					\
    93    (SQR_TOOM8_MAX >= SQR_TOOM8_THRESHOLD)
    94  #endif
    95  
    96  #define TOOM8_SQR_REC(p, a, f, p2, a2, n, ws)				\
    97    do {									\
    98      if (MAYBE_sqr_basecase && ( !MAYBE_sqr_above_basecase		\
    99  	|| BELOW_THRESHOLD (n, SQR_TOOM2_THRESHOLD))) {			\
   100        mpn_sqr_basecase (p, a, n);					\
   101        if (f) mpn_sqr_basecase (p2, a2, n);				\
   102      } else if (MAYBE_sqr_toom2 && ( !MAYBE_sqr_above_toom2		\
   103  	     || BELOW_THRESHOLD (n, SQR_TOOM3_THRESHOLD))) {		\
   104        mpn_toom2_sqr (p, a, n, ws);					\
   105        if (f) mpn_toom2_sqr (p2, a2, n, ws);				\
   106      } else if (MAYBE_sqr_toom3 && ( !MAYBE_sqr_above_toom3		\
   107  	     || BELOW_THRESHOLD (n, SQR_TOOM4_THRESHOLD))) {		\
   108        mpn_toom3_sqr (p, a, n, ws);					\
   109        if (f) mpn_toom3_sqr (p2, a2, n, ws);				\
   110      } else if (MAYBE_sqr_toom4 && ( !MAYBE_sqr_above_toom4		\
   111  	     || BELOW_THRESHOLD (n, SQR_TOOM6_THRESHOLD))) {		\
   112        mpn_toom4_sqr (p, a, n, ws);					\
   113        if (f) mpn_toom4_sqr (p2, a2, n, ws);				\
   114      } else if (! MAYBE_sqr_above_toom6					\
   115  	     || BELOW_THRESHOLD (n, SQR_TOOM8_THRESHOLD)) {		\
   116        mpn_toom6_sqr (p, a, n, ws);					\
   117        if (f) mpn_toom6_sqr (p2, a2, n, ws);				\
   118      } else {								\
   119        mpn_toom8_sqr (p, a, n, ws);					\
   120        if (f) mpn_toom8_sqr (p2, a2, n, ws);				\
   121      }									\
   122    } while (0)
   123  
   124  void
   125  mpn_toom8_sqr  (mp_ptr pp, mp_srcptr ap, mp_size_t an, mp_ptr scratch)
   126  {
   127    mp_size_t n, s;
   128  
   129    /***************************** decomposition *******************************/
   130  
   131    ASSERT ( an >= 40 );
   132  
   133    n = 1 + ((an - 1)>>3);
   134  
   135    s = an - 7 * n;
   136  
   137    ASSERT (0 < s && s <= n);
   138    ASSERT ( s + s > 3 );
   139  
   140  #define   r6    (pp + 3 * n)			/* 3n+1 */
   141  #define   r4    (pp + 7 * n)			/* 3n+1 */
   142  #define   r2    (pp +11 * n)			/* 3n+1 */
   143  #define   r0    (pp +15 * n)			/* s+t <= 2*n */
   144  #define   r7    (scratch)			/* 3n+1 */
   145  #define   r5    (scratch + 3 * n + 1)		/* 3n+1 */
   146  #define   r3    (scratch + 6 * n + 2)		/* 3n+1 */
   147  #define   r1    (scratch + 9 * n + 3)		/* 3n+1 */
   148  #define   v0    (pp +11 * n)			/* n+1 */
   149  #define   v2    (pp +13 * n+2)			/* n+1 */
   150  #define   wse   (scratch +12 * n + 4)		/* 3n+1 */
   151  
   152    /* Alloc also 3n+1 limbs for ws... toom_interpolate_16pts may
   153       need all of them, when DO_mpn_sublsh_n usea a scratch  */
   154  /*   if (scratch == NULL) */
   155  /*     scratch = TMP_SALLOC_LIMBS (30 * n + 6); */
   156  
   157    /********************** evaluation and recursive calls *********************/
   158    /* $\pm1/8$ */
   159    mpn_toom_eval_pm2rexp (v2, v0, 7, ap, n, s, 3, pp);
   160    /* A(-1/8)*B(-1/8)*8^. */ /* A(+1/8)*B(+1/8)*8^. */
   161    TOOM8_SQR_REC(pp, v0, 2, r7, v2, n + 1, wse);
   162    mpn_toom_couple_handling (r7, 2 * n + 1 + BIT_CORRECTION, pp, 0, n, 3, 0);
   163  
   164    /* $\pm1/4$ */
   165    mpn_toom_eval_pm2rexp (v2, v0, 7, ap, n, s, 2, pp);
   166    /* A(-1/4)*B(-1/4)*4^. */ /* A(+1/4)*B(+1/4)*4^. */
   167    TOOM8_SQR_REC(pp, v0, 2, r5, v2, n + 1, wse);
   168    mpn_toom_couple_handling (r5, 2 * n + 1, pp, 0, n, 2, 0);
   169  
   170    /* $\pm2$ */
   171    mpn_toom_eval_pm2 (v2, v0, 7, ap, n, s, pp);
   172    /* A(-2)*B(-2) */ /* A(+2)*B(+2) */
   173    TOOM8_SQR_REC(pp, v0, 2, r3, v2, n + 1, wse);
   174    mpn_toom_couple_handling (r3, 2 * n + 1, pp, 0, n, 1, 2);
   175  
   176    /* $\pm8$ */
   177    mpn_toom_eval_pm2exp (v2, v0, 7, ap, n, s, 3, pp);
   178    /* A(-8)*B(-8) */ /* A(+8)*B(+8) */
   179    TOOM8_SQR_REC(pp, v0, 2, r1, v2, n + 1, wse);
   180    mpn_toom_couple_handling (r1, 2 * n + 1 + BIT_CORRECTION, pp, 0, n, 3, 6);
   181  
   182    /* $\pm1/2$ */
   183    mpn_toom_eval_pm2rexp (v2, v0, 7, ap, n, s, 1, pp);
   184    /* A(-1/2)*B(-1/2)*2^. */ /* A(+1/2)*B(+1/2)*2^. */
   185    TOOM8_SQR_REC(pp, v0, 2, r6, v2, n + 1, wse);
   186    mpn_toom_couple_handling (r6, 2 * n + 1, pp, 0, n, 1, 0);
   187  
   188    /* $\pm1$ */
   189    mpn_toom_eval_pm1 (v2, v0, 7, ap, n, s,    pp);
   190    /* A(-1)*B(-1) */ /* A(1)*B(1) */
   191    TOOM8_SQR_REC(pp, v0, 2, r4, v2, n + 1, wse);
   192    mpn_toom_couple_handling (r4, 2 * n + 1, pp, 0, n, 0, 0);
   193  
   194    /* $\pm4$ */
   195    mpn_toom_eval_pm2exp (v2, v0, 7, ap, n, s, 2, pp);
   196    /* A(-4)*B(-4) */ /* A(+4)*B(+4) */
   197    TOOM8_SQR_REC(pp, v0, 2, r2, v2, n + 1, wse);
   198    mpn_toom_couple_handling (r2, 2 * n + 1, pp, 0, n, 2, 4);
   199  
   200  #undef v0
   201  #undef v2
   202  
   203    /* A(0)*B(0) */
   204    TOOM8_SQR_REC(pp, ap, 0, pp, ap, n, wse);
   205  
   206    mpn_toom_interpolate_16pts (pp, r1, r3, r5, r7, n, 2 * s, 0, wse);
   207  
   208  #undef r0
   209  #undef r1
   210  #undef r2
   211  #undef r3
   212  #undef r4
   213  #undef r5
   214  #undef r6
   215  #undef wse
   216  
   217  }
   218  
   219  #undef TOOM8_SQR_REC
   220  #undef MAYBE_sqr_basecase
   221  #undef MAYBE_sqr_above_basecase
   222  #undef MAYBE_sqr_toom2
   223  #undef MAYBE_sqr_above_toom2
   224  #undef MAYBE_sqr_toom3
   225  #undef MAYBE_sqr_above_toom3
   226  #undef MAYBE_sqr_above_toom4