github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/generic/toom6h_mul.c (about)

     1  /* Implementation of the multiplication algorithm for Toom-Cook 6.5-way.
     2  
     3     Contributed to the GNU project by Marco Bodrato.
     4  
     5     THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
     6     SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
     7     GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
     8  
     9  Copyright 2009, 2010, 2012 Free Software Foundation, Inc.
    10  
    11  This file is part of the GNU MP Library.
    12  
    13  The GNU MP Library is free software; you can redistribute it and/or modify
    14  it under the terms of either:
    15  
    16    * the GNU Lesser General Public License as published by the Free
    17      Software Foundation; either version 3 of the License, or (at your
    18      option) any later version.
    19  
    20  or
    21  
    22    * the GNU General Public License as published by the Free Software
    23      Foundation; either version 2 of the License, or (at your option) any
    24      later version.
    25  
    26  or both in parallel, as here.
    27  
    28  The GNU MP Library is distributed in the hope that it will be useful, but
    29  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    30  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    31  for more details.
    32  
    33  You should have received copies of the GNU General Public License and the
    34  GNU Lesser General Public License along with the GNU MP Library.  If not,
    35  see https://www.gnu.org/licenses/.  */
    36  
    37  
    38  #include "gmp.h"
    39  #include "gmp-impl.h"
    40  
    41  
    42  #if GMP_NUMB_BITS < 21
    43  #error Not implemented.
    44  #endif
    45  
    46  #if TUNE_PROGRAM_BUILD
    47  #define MAYBE_mul_basecase 1
    48  #define MAYBE_mul_toom22   1
    49  #define MAYBE_mul_toom33   1
    50  #define MAYBE_mul_toom6h   1
    51  #else
    52  #define MAYBE_mul_basecase						\
    53    (MUL_TOOM6H_THRESHOLD < 6 * MUL_TOOM22_THRESHOLD)
    54  #define MAYBE_mul_toom22						\
    55    (MUL_TOOM6H_THRESHOLD < 6 * MUL_TOOM33_THRESHOLD)
    56  #define MAYBE_mul_toom33						\
    57    (MUL_TOOM6H_THRESHOLD < 6 * MUL_TOOM44_THRESHOLD)
    58  #define MAYBE_mul_toom6h						\
    59    (MUL_FFT_THRESHOLD >= 6 * MUL_TOOM6H_THRESHOLD)
    60  #endif
    61  
    62  #define TOOM6H_MUL_N_REC(p, a, b, f, p2, a2, b2, n, ws)			\
    63    do {									\
    64      if (MAYBE_mul_basecase						\
    65  	&& BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD)) {			\
    66        mpn_mul_basecase (p, a, n, b, n);					\
    67        if (f)								\
    68  	mpn_mul_basecase (p2, a2, n, b2, n);				\
    69      } else if (MAYBE_mul_toom22						\
    70  	       && BELOW_THRESHOLD (n, MUL_TOOM33_THRESHOLD)) {		\
    71        mpn_toom22_mul (p, a, n, b, n, ws);				\
    72        if (f)								\
    73  	mpn_toom22_mul (p2, a2, n, b2, n, ws);				\
    74      } else if (MAYBE_mul_toom33						\
    75  	       && BELOW_THRESHOLD (n, MUL_TOOM44_THRESHOLD)) {		\
    76        mpn_toom33_mul (p, a, n, b, n, ws);				\
    77        if (f)								\
    78  	mpn_toom33_mul (p2, a2, n, b2, n, ws);				\
    79      } else if (! MAYBE_mul_toom6h					\
    80  	       || BELOW_THRESHOLD (n, MUL_TOOM6H_THRESHOLD)) {		\
    81        mpn_toom44_mul (p, a, n, b, n, ws);				\
    82        if (f)								\
    83  	mpn_toom44_mul (p2, a2, n, b2, n, ws);				\
    84      } else {								\
    85        mpn_toom6h_mul (p, a, n, b, n, ws);				\
    86        if (f)								\
    87  	mpn_toom6h_mul (p2, a2, n, b2, n, ws);				\
    88      }									\
    89    } while (0)
    90  
    91  #define TOOM6H_MUL_REC(p, a, na, b, nb, ws)		\
    92    do { mpn_mul (p, a, na, b, nb);			\
    93    } while (0)
    94  
    95  /* Toom-6.5 , compute the product {pp,an+bn} <- {ap,an} * {bp,bn}
    96     With: an >= bn >= 46, an*6 <  bn * 17.
    97     It _may_ work with bn<=46 and bn*17 < an*6 < bn*18
    98  
    99     Evaluate in: infinity, +4, -4, +2, -2, +1, -1, +1/2, -1/2, +1/4, -1/4, 0.
   100  */
   101  /* Estimate on needed scratch:
   102     S(n) <= (n+5)\6*10+4+MAX(S((n+5)\6),1+2*(n+5)\6),
   103     since n>42; S(n) <= ceil(log(n)/log(6))*(10+4)+n*12\6 < n*2 + lg2(n)*6
   104   */
   105  
   106  void
   107  mpn_toom6h_mul   (mp_ptr pp,
   108  		  mp_srcptr ap, mp_size_t an,
   109  		  mp_srcptr bp, mp_size_t bn, mp_ptr scratch)
   110  {
   111    mp_size_t n, s, t;
   112    int p, q, half;
   113    int sign;
   114  
   115    /***************************** decomposition *******************************/
   116  
   117    ASSERT (an >= bn);
   118    /* Can not handle too much unbalancement */
   119    ASSERT (bn >= 42);
   120    /* Can not handle too much unbalancement */
   121    ASSERT ((an*3 <  bn * 8) || (bn >= 46 && an * 6 <  bn * 17));
   122  
   123    /* Limit num/den is a rational number between
   124       (12/11)^(log(4)/log(2*4-1)) and (12/11)^(log(6)/log(2*6-1))             */
   125  #define LIMIT_numerator (18)
   126  #define LIMIT_denominat (17)
   127  
   128    if (LIKELY (an * LIMIT_denominat < LIMIT_numerator * bn)) /* is 6*... < 6*... */
   129      {
   130        n = 1 + (an - 1) / (size_t) 6;
   131        p = q = 5;
   132        half = 0;
   133  
   134        s = an - 5 * n;
   135        t = bn - 5 * n;
   136      }
   137    else {
   138      if (an * 5 * LIMIT_numerator < LIMIT_denominat * 7 * bn)
   139        { p = 7; q = 6; }
   140      else if (an * 5 * LIMIT_denominat < LIMIT_numerator * 7 * bn)
   141        { p = 7; q = 5; }
   142      else if (an * LIMIT_numerator < LIMIT_denominat * 2 * bn)  /* is 4*... < 8*... */
   143        { p = 8; q = 5; }
   144      else if (an * LIMIT_denominat < LIMIT_numerator * 2 * bn)  /* is 4*... < 8*... */
   145        { p = 8; q = 4; }
   146      else
   147        { p = 9; q = 4; }
   148  
   149      half = (p ^ q) & 1;
   150      n = 1 + (q * an >= p * bn ? (an - 1) / (size_t) p : (bn - 1) / (size_t) q);
   151      p--; q--;
   152  
   153      s = an - p * n;
   154      t = bn - q * n;
   155  
   156      /* With LIMIT = 16/15, the following recover is needed only if bn<=73*/
   157      if (half) { /* Recover from badly chosen splitting */
   158        if (UNLIKELY (s<1)) {p--; s+=n; half=0;}
   159        else if (UNLIKELY (t<1)) {q--; t+=n; half=0;}
   160      }
   161    }
   162  #undef LIMIT_numerator
   163  #undef LIMIT_denominat
   164  
   165    ASSERT (0 < s && s <= n);
   166    ASSERT (0 < t && t <= n);
   167    ASSERT (half || s + t > 3);
   168    ASSERT (n > 2);
   169  
   170  #define   r4    (pp + 3 * n)			/* 3n+1 */
   171  #define   r2    (pp + 7 * n)			/* 3n+1 */
   172  #define   r0    (pp +11 * n)			/* s+t <= 2*n */
   173  #define   r5    (scratch)			/* 3n+1 */
   174  #define   r3    (scratch + 3 * n + 1)		/* 3n+1 */
   175  #define   r1    (scratch + 6 * n + 2)		/* 3n+1 */
   176  #define   v0    (pp + 7 * n)			/* n+1 */
   177  #define   v1    (pp + 8 * n+1)			/* n+1 */
   178  #define   v2    (pp + 9 * n+2)			/* n+1 */
   179  #define   v3    (scratch + 9 * n + 3)		/* n+1 */
   180  #define   wsi   (scratch + 9 * n + 3)		/* 3n+1 */
   181  #define   wse   (scratch +10 * n + 4)		/* 2n+1 */
   182  
   183    /* Alloc also 3n+1 limbs for wsi... toom_interpolate_12pts may
   184       need all of them  */
   185  /*   if (scratch == NULL) */
   186  /*     scratch = TMP_SALLOC_LIMBS(mpn_toom6_sqr_itch(n * 6)); */
   187    ASSERT (12 * n + 6 <= mpn_toom6h_mul_itch(an,bn));
   188    ASSERT (12 * n + 6 <= mpn_toom6_sqr_itch(n * 6));
   189  
   190    /********************** evaluation and recursive calls *********************/
   191    /* $\pm1/2$ */
   192    sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 1, pp) ^
   193  	 mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 1, pp);
   194    /* A(-1/2)*B(-1/2)*2^. */ /* A(+1/2)*B(+1/2)*2^. */
   195    TOOM6H_MUL_N_REC(pp, v0, v1, 2, r5, v2, v3, n + 1, wse);
   196    mpn_toom_couple_handling (r5, 2 * n + 1, pp, sign, n, 1+half , half);
   197  
   198    /* $\pm1$ */
   199    sign = mpn_toom_eval_pm1 (v2, v0, p, ap, n, s,    pp);
   200    if (UNLIKELY (q == 3))
   201      sign ^= mpn_toom_eval_dgr3_pm1 (v3, v1, bp, n, t,    pp);
   202    else
   203      sign ^= mpn_toom_eval_pm1 (v3, v1, q, bp, n, t,    pp);
   204    /* A(-1)*B(-1) */ /* A(1)*B(1) */
   205    TOOM6H_MUL_N_REC(pp, v0, v1, 2, r3, v2, v3, n + 1, wse);
   206    mpn_toom_couple_handling (r3, 2 * n + 1, pp, sign, n, 0, 0);
   207  
   208    /* $\pm4$ */
   209    sign = mpn_toom_eval_pm2exp (v2, v0, p, ap, n, s, 2, pp) ^
   210  	 mpn_toom_eval_pm2exp (v3, v1, q, bp, n, t, 2, pp);
   211    /* A(-4)*B(-4) */
   212    TOOM6H_MUL_N_REC(pp, v0, v1, 2, r1, v2, v3, n + 1, wse); /* A(+4)*B(+4) */
   213    mpn_toom_couple_handling (r1, 2 * n + 1, pp, sign, n, 2, 4);
   214  
   215    /* $\pm1/4$ */
   216    sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 2, pp) ^
   217  	 mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 2, pp);
   218    /* A(-1/4)*B(-1/4)*4^. */ /* A(+1/4)*B(+1/4)*4^. */
   219    TOOM6H_MUL_N_REC(pp, v0, v1, 2, r4, v2, v3, n + 1, wse);
   220    mpn_toom_couple_handling (r4, 2 * n + 1, pp, sign, n, 2*(1+half), 2*(half));
   221  
   222    /* $\pm2$ */
   223    sign = mpn_toom_eval_pm2 (v2, v0, p, ap, n, s, pp) ^
   224  	 mpn_toom_eval_pm2 (v3, v1, q, bp, n, t, pp);
   225    /* A(-2)*B(-2) */ /* A(+2)*B(+2) */
   226    TOOM6H_MUL_N_REC(pp, v0, v1, 2, r2, v2, v3, n + 1, wse);
   227    mpn_toom_couple_handling (r2, 2 * n + 1, pp, sign, n, 1, 2);
   228  
   229  #undef v0
   230  #undef v1
   231  #undef v2
   232  #undef v3
   233  #undef wse
   234  
   235    /* A(0)*B(0) */
   236    TOOM6H_MUL_N_REC(pp, ap, bp, 0, pp, ap, bp, n, wsi);
   237  
   238    /* Infinity */
   239    if (UNLIKELY (half != 0)) {
   240      if (s > t) {
   241        TOOM6H_MUL_REC(r0, ap + p * n, s, bp + q * n, t, wsi);
   242      } else {
   243        TOOM6H_MUL_REC(r0, bp + q * n, t, ap + p * n, s, wsi);
   244      };
   245    };
   246  
   247    mpn_toom_interpolate_12pts (pp, r1, r3, r5, n, s+t, half, wsi);
   248  
   249  #undef r0
   250  #undef r1
   251  #undef r2
   252  #undef r3
   253  #undef r4
   254  #undef r5
   255  #undef wsi
   256  }
   257  
   258  #undef TOOM6H_MUL_N_REC
   259  #undef TOOM6H_MUL_REC
   260  #undef MAYBE_mul_basecase
   261  #undef MAYBE_mul_toom22
   262  #undef MAYBE_mul_toom33
   263  #undef MAYBE_mul_toom6h