github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/generic/toom6_sqr.c (about)

     1  /* Implementation of the squaring algorithm with Toom-Cook 6.5-way.
     2  
     3     Contributed to the GNU project by Marco Bodrato.
     4  
     5     THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
     6     SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
     7     GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
     8  
     9  Copyright 2009 Free Software Foundation, Inc.
    10  
    11  This file is part of the GNU MP Library.
    12  
    13  The GNU MP Library is free software; you can redistribute it and/or modify
    14  it under the terms of either:
    15  
    16    * the GNU Lesser General Public License as published by the Free
    17      Software Foundation; either version 3 of the License, or (at your
    18      option) any later version.
    19  
    20  or
    21  
    22    * the GNU General Public License as published by the Free Software
    23      Foundation; either version 2 of the License, or (at your option) any
    24      later version.
    25  
    26  or both in parallel, as here.
    27  
    28  The GNU MP Library is distributed in the hope that it will be useful, but
    29  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    30  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    31  for more details.
    32  
    33  You should have received copies of the GNU General Public License and the
    34  GNU Lesser General Public License along with the GNU MP Library.  If not,
    35  see https://www.gnu.org/licenses/.  */
    36  
    37  
    38  #include "gmp.h"
    39  #include "gmp-impl.h"
    40  
    41  
    42  #if GMP_NUMB_BITS < 21
    43  #error Not implemented.
    44  #endif
    45  
    46  
    47  #if TUNE_PROGRAM_BUILD
    48  #define MAYBE_sqr_basecase 1
    49  #define MAYBE_sqr_above_basecase   1
    50  #define MAYBE_sqr_toom2   1
    51  #define MAYBE_sqr_above_toom2   1
    52  #define MAYBE_sqr_toom3   1
    53  #define MAYBE_sqr_above_toom3   1
    54  #define MAYBE_sqr_above_toom4   1
    55  #else
    56  #ifdef  SQR_TOOM8_THRESHOLD
    57  #define SQR_TOOM6_MAX ((SQR_TOOM8_THRESHOLD+6*2-1+5)/6)
    58  #else
    59  #define SQR_TOOM6_MAX					\
    60    ((SQR_FFT_THRESHOLD <= MP_SIZE_T_MAX - (6*2-1+5)) ?	\
    61     ((SQR_FFT_THRESHOLD+6*2-1+5)/6)			\
    62     : MP_SIZE_T_MAX )
    63  #endif
    64  #define MAYBE_sqr_basecase					\
    65    (SQR_TOOM6_THRESHOLD < 6 * SQR_TOOM2_THRESHOLD)
    66  #define MAYBE_sqr_above_basecase				\
    67    (SQR_TOOM6_MAX >=  SQR_TOOM2_THRESHOLD)
    68  #define MAYBE_sqr_toom2						\
    69    (SQR_TOOM6_THRESHOLD < 6 * SQR_TOOM3_THRESHOLD)
    70  #define MAYBE_sqr_above_toom2					\
    71    (SQR_TOOM6_MAX >= SQR_TOOM3_THRESHOLD)
    72  #define MAYBE_sqr_toom3						\
    73    (SQR_TOOM6_THRESHOLD < 6 * SQR_TOOM4_THRESHOLD)
    74  #define MAYBE_sqr_above_toom3					\
    75    (SQR_TOOM6_MAX >= SQR_TOOM4_THRESHOLD)
    76  #define MAYBE_sqr_above_toom4					\
    77    (SQR_TOOM6_MAX >= SQR_TOOM6_THRESHOLD)
    78  #endif
    79  
    80  #define TOOM6_SQR_REC(p, a, n, ws)					\
    81    do {									\
    82      if (MAYBE_sqr_basecase && ( !MAYBE_sqr_above_basecase		\
    83  	|| BELOW_THRESHOLD (n, SQR_TOOM2_THRESHOLD)))			\
    84        mpn_sqr_basecase (p, a, n);					\
    85      else if (MAYBE_sqr_toom2 && ( !MAYBE_sqr_above_toom2		\
    86  	     || BELOW_THRESHOLD (n, SQR_TOOM3_THRESHOLD)))		\
    87        mpn_toom2_sqr (p, a, n, ws);					\
    88      else if (MAYBE_sqr_toom3 && ( !MAYBE_sqr_above_toom3		\
    89  	     || BELOW_THRESHOLD (n, SQR_TOOM4_THRESHOLD)))		\
    90        mpn_toom3_sqr (p, a, n, ws);					\
    91      else if (! MAYBE_sqr_above_toom4					\
    92  	     || BELOW_THRESHOLD (n, SQR_TOOM6_THRESHOLD))		\
    93        mpn_toom4_sqr (p, a, n, ws);					\
    94      else								\
    95        mpn_toom6_sqr (p, a, n, ws);					\
    96    } while (0)
    97  
    98  void
    99  mpn_toom6_sqr  (mp_ptr pp, mp_srcptr ap, mp_size_t an, mp_ptr scratch)
   100  {
   101    mp_size_t n, s;
   102  
   103    /***************************** decomposition *******************************/
   104  
   105    ASSERT( an >= 18 );
   106  
   107    n = 1 + (an - 1) / (size_t) 6;
   108  
   109    s = an - 5 * n;
   110  
   111    ASSERT (0 < s && s <= n);
   112  
   113  #define   r4    (pp + 3 * n)			/* 3n+1 */
   114  #define   r2    (pp + 7 * n)			/* 3n+1 */
   115  #define   r0    (pp +11 * n)			/* s+t <= 2*n */
   116  #define   r5    (scratch)			/* 3n+1 */
   117  #define   r3    (scratch + 3 * n + 1)		/* 3n+1 */
   118  #define   r1    (scratch + 6 * n + 2)		/* 3n+1 */
   119  #define   v0    (pp + 7 * n)			/* n+1 */
   120  #define   v2    (pp + 9 * n+2)			/* n+1 */
   121  #define   wse   (scratch + 9 * n + 3)		/* 3n+1 */
   122  
   123    /* Alloc also 3n+1 limbs for ws... toom_interpolate_12pts may
   124       need all of them, when DO_mpn_sublsh_n usea a scratch  */
   125  /*   if (scratch== NULL) */
   126  /*     scratch = TMP_SALLOC_LIMBS (12 * n + 6); */
   127  
   128    /********************** evaluation and recursive calls *********************/
   129    /* $\pm1/2$ */
   130    mpn_toom_eval_pm2rexp (v2, v0, 5, ap, n, s, 1, pp);
   131    TOOM6_SQR_REC(pp, v0, n + 1, wse); /* A(-1/2)*B(-1/2)*2^. */
   132    TOOM6_SQR_REC(r5, v2, n + 1, wse); /* A(+1/2)*B(+1/2)*2^. */
   133    mpn_toom_couple_handling (r5, 2 * n + 1, pp, 0, n, 1, 0);
   134  
   135    /* $\pm1$ */
   136    mpn_toom_eval_pm1 (v2, v0, 5, ap, n, s,    pp);
   137    TOOM6_SQR_REC(pp, v0, n + 1, wse); /* A(-1)*B(-1) */
   138    TOOM6_SQR_REC(r3, v2, n + 1, wse); /* A(1)*B(1) */
   139    mpn_toom_couple_handling (r3, 2 * n + 1, pp, 0, n, 0, 0);
   140  
   141    /* $\pm4$ */
   142    mpn_toom_eval_pm2exp (v2, v0, 5, ap, n, s, 2, pp);
   143    TOOM6_SQR_REC(pp, v0, n + 1, wse); /* A(-4)*B(-4) */
   144    TOOM6_SQR_REC(r1, v2, n + 1, wse); /* A(+4)*B(+4) */
   145    mpn_toom_couple_handling (r1, 2 * n + 1, pp, 0, n, 2, 4);
   146  
   147    /* $\pm1/4$ */
   148    mpn_toom_eval_pm2rexp (v2, v0, 5, ap, n, s, 2, pp);
   149    TOOM6_SQR_REC(pp, v0, n + 1, wse); /* A(-1/4)*B(-1/4)*4^. */
   150    TOOM6_SQR_REC(r4, v2, n + 1, wse); /* A(+1/4)*B(+1/4)*4^. */
   151    mpn_toom_couple_handling (r4, 2 * n + 1, pp, 0, n, 2, 0);
   152  
   153    /* $\pm2$ */
   154    mpn_toom_eval_pm2 (v2, v0, 5, ap, n, s, pp);
   155    TOOM6_SQR_REC(pp, v0, n + 1, wse); /* A(-2)*B(-2) */
   156    TOOM6_SQR_REC(r2, v2, n + 1, wse); /* A(+2)*B(+2) */
   157    mpn_toom_couple_handling (r2, 2 * n + 1, pp, 0, n, 1, 2);
   158  
   159  #undef v0
   160  #undef v2
   161  
   162    /* A(0)*B(0) */
   163    TOOM6_SQR_REC(pp, ap, n, wse);
   164  
   165    mpn_toom_interpolate_12pts (pp, r1, r3, r5, n, 2 * s, 0, wse);
   166  
   167  #undef r0
   168  #undef r1
   169  #undef r2
   170  #undef r3
   171  #undef r4
   172  #undef r5
   173  
   174  }
   175  #undef TOOM6_SQR_REC
   176  #undef MAYBE_sqr_basecase
   177  #undef MAYBE_sqr_above_basecase
   178  #undef MAYBE_sqr_toom2
   179  #undef MAYBE_sqr_above_toom2
   180  #undef MAYBE_sqr_toom3
   181  #undef MAYBE_sqr_above_toom3
   182  #undef MAYBE_sqr_above_toom4