github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/generic/toom8h_mul.c (about)

     1  /* Implementation of the multiplication algorithm for Toom-Cook 8.5-way.
     2  
     3     Contributed to the GNU project by Marco Bodrato.
     4  
     5     THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
     6     SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
     7     GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
     8  
     9  Copyright 2009, 2010, 2012 Free Software Foundation, Inc.
    10  
    11  This file is part of the GNU MP Library.
    12  
    13  The GNU MP Library is free software; you can redistribute it and/or modify
    14  it under the terms of either:
    15  
    16    * the GNU Lesser General Public License as published by the Free
    17      Software Foundation; either version 3 of the License, or (at your
    18      option) any later version.
    19  
    20  or
    21  
    22    * the GNU General Public License as published by the Free Software
    23      Foundation; either version 2 of the License, or (at your option) any
    24      later version.
    25  
    26  or both in parallel, as here.
    27  
    28  The GNU MP Library is distributed in the hope that it will be useful, but
    29  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    30  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    31  for more details.
    32  
    33  You should have received copies of the GNU General Public License and the
    34  GNU Lesser General Public License along with the GNU MP Library.  If not,
    35  see https://www.gnu.org/licenses/.  */
    36  
    37  
    38  #include "gmp.h"
    39  #include "gmp-impl.h"
    40  
    41  
    42  #if GMP_NUMB_BITS < 29
    43  #error Not implemented.
    44  #endif
    45  
    46  #if GMP_NUMB_BITS < 43
    47  #define BIT_CORRECTION 1
    48  #define CORRECTION_BITS GMP_NUMB_BITS
    49  #else
    50  #define BIT_CORRECTION 0
    51  #define CORRECTION_BITS 0
    52  #endif
    53  
    54  
    55  #if TUNE_PROGRAM_BUILD
    56  #define MAYBE_mul_basecase 1
    57  #define MAYBE_mul_toom22   1
    58  #define MAYBE_mul_toom33   1
    59  #define MAYBE_mul_toom44   1
    60  #define MAYBE_mul_toom8h   1
    61  #else
    62  #define MAYBE_mul_basecase						\
    63    (MUL_TOOM8H_THRESHOLD < 8 * MUL_TOOM22_THRESHOLD)
    64  #define MAYBE_mul_toom22						\
    65    (MUL_TOOM8H_THRESHOLD < 8 * MUL_TOOM33_THRESHOLD)
    66  #define MAYBE_mul_toom33						\
    67    (MUL_TOOM8H_THRESHOLD < 8 * MUL_TOOM44_THRESHOLD)
    68  #define MAYBE_mul_toom44						\
    69    (MUL_TOOM8H_THRESHOLD < 8 * MUL_TOOM6H_THRESHOLD)
    70  #define MAYBE_mul_toom8h						\
    71    (MUL_FFT_THRESHOLD >= 8 * MUL_TOOM8H_THRESHOLD)
    72  #endif
    73  
    74  #define TOOM8H_MUL_N_REC(p, a, b, f, p2, a2, b2, n, ws)			\
    75    do {									\
    76      if (MAYBE_mul_basecase						\
    77  	&& BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD)) {			\
    78        mpn_mul_basecase (p, a, n, b, n);					\
    79        if (f) mpn_mul_basecase (p2, a2, n, b2, n);			\
    80      } else if (MAYBE_mul_toom22						\
    81  	     && BELOW_THRESHOLD (n, MUL_TOOM33_THRESHOLD)) {		\
    82        mpn_toom22_mul (p, a, n, b, n, ws);				\
    83        if (f) mpn_toom22_mul (p2, a2, n, b2, n, ws);			\
    84      } else if (MAYBE_mul_toom33						\
    85  	     && BELOW_THRESHOLD (n, MUL_TOOM44_THRESHOLD)) {		\
    86        mpn_toom33_mul (p, a, n, b, n, ws);				\
    87        if (f) mpn_toom33_mul (p2, a2, n, b2, n, ws);			\
    88      } else if (MAYBE_mul_toom44						\
    89  	     && BELOW_THRESHOLD (n, MUL_TOOM6H_THRESHOLD)) {		\
    90        mpn_toom44_mul (p, a, n, b, n, ws);				\
    91        if (f) mpn_toom44_mul (p2, a2, n, b2, n, ws);			\
    92      } else if (! MAYBE_mul_toom8h					\
    93  	     || BELOW_THRESHOLD (n, MUL_TOOM8H_THRESHOLD)) {		\
    94        mpn_toom6h_mul (p, a, n, b, n, ws);				\
    95        if (f) mpn_toom6h_mul (p2, a2, n, b2, n, ws);			\
    96      } else {								\
    97        mpn_toom8h_mul (p, a, n, b, n, ws);				\
    98        if (f) mpn_toom8h_mul (p2, a2, n, b2, n, ws);			\
    99      }									\
   100    } while (0)
   101  
   102  #define TOOM8H_MUL_REC(p, a, na, b, nb, ws)		\
   103    do { mpn_mul (p, a, na, b, nb); } while (0)
   104  
   105  /* Toom-8.5 , compute the product {pp,an+bn} <- {ap,an} * {bp,bn}
   106     With: an >= bn >= 86, an*5 <  bn * 11.
   107     It _may_ work with bn<=?? and bn*?? < an*? < bn*??
   108  
   109     Evaluate in: infinity, +8,-8,+4,-4,+2,-2,+1,-1,+1/2,-1/2,+1/4,-1/4,+1/8,-1/8,0.
   110  */
   111  /* Estimate on needed scratch:
   112     S(n) <= (n+7)\8*13+5+MAX(S((n+7)\8),1+2*(n+7)\8),
   113     since n>80; S(n) <= ceil(log(n/10)/log(8))*(13+5)+n*15\8 < n*15\8 + lg2(n)*6
   114   */
   115  
   116  void
   117  mpn_toom8h_mul   (mp_ptr pp,
   118  		  mp_srcptr ap, mp_size_t an,
   119  		  mp_srcptr bp, mp_size_t bn, mp_ptr scratch)
   120  {
   121    mp_size_t n, s, t;
   122    int p, q, half;
   123    int sign;
   124  
   125    /***************************** decomposition *******************************/
   126  
   127    ASSERT (an >= bn);
   128    /* Can not handle too small operands */
   129    ASSERT (bn >= 86);
   130    /* Can not handle too much unbalancement */
   131    ASSERT (an <= bn*4);
   132    ASSERT (GMP_NUMB_BITS > 11*3 || an*4 <= bn*11);
   133    ASSERT (GMP_NUMB_BITS > 10*3 || an*1 <= bn* 2);
   134    ASSERT (GMP_NUMB_BITS >  9*3 || an*2 <= bn* 3);
   135  
   136    /* Limit num/den is a rational number between
   137       (16/15)^(log(6)/log(2*6-1)) and (16/15)^(log(8)/log(2*8-1))             */
   138  #define LIMIT_numerator (21)
   139  #define LIMIT_denominat (20)
   140  
   141    if (LIKELY (an == bn) || an * (LIMIT_denominat>>1) < LIMIT_numerator * (bn>>1) ) /* is 8*... < 8*... */
   142      {
   143        half = 0;
   144        n = 1 + ((an - 1)>>3);
   145        p = q = 7;
   146        s = an - 7 * n;
   147        t = bn - 7 * n;
   148      }
   149    else
   150      {
   151        if (an * 13 < 16 * bn) /* (an*7*LIMIT_numerator<LIMIT_denominat*9*bn) */
   152  	{ p = 9; q = 8; }
   153        else if (GMP_NUMB_BITS <= 9*3 ||
   154  	       an *(LIMIT_denominat>>1) < (LIMIT_numerator/7*9) * (bn>>1))
   155  	{ p = 9; q = 7; }
   156        else if (an * 10 < 33 * (bn>>1)) /* (an*3*LIMIT_numerator<LIMIT_denominat*5*bn) */
   157  	{ p =10; q = 7; }
   158        else if (GMP_NUMB_BITS <= 10*3 ||
   159  	       an * (LIMIT_denominat/5) < (LIMIT_numerator/3) * bn)
   160  	{ p =10; q = 6; }
   161        else if (an * 6 < 13 * bn) /*(an * 5 * LIMIT_numerator < LIMIT_denominat *11 * bn)*/
   162  	{ p =11; q = 6; }
   163        else if (GMP_NUMB_BITS <= 11*3 ||
   164  	       an * 4 < 9 * bn)
   165  	{ p =11; q = 5; }
   166        else if (an *(LIMIT_numerator/3) < LIMIT_denominat * bn)  /* is 4*... <12*... */
   167  	{ p =12; q = 5; }
   168        else if (GMP_NUMB_BITS <= 12*3 ||
   169  	       an * 9 < 28 * bn )  /* is 4*... <12*... */
   170  	{ p =12; q = 4; }
   171        else
   172  	{ p =13; q = 4; }
   173  
   174        half = (p+q)&1;
   175        n = 1 + (q * an >= p * bn ? (an - 1) / (size_t) p : (bn - 1) / (size_t) q);
   176        p--; q--;
   177  
   178        s = an - p * n;
   179        t = bn - q * n;
   180  
   181        if(half) { /* Recover from badly chosen splitting */
   182  	if (UNLIKELY (s<1)) {p--; s+=n; half=0;}
   183  	else if (UNLIKELY (t<1)) {q--; t+=n; half=0;}
   184        }
   185      }
   186  #undef LIMIT_numerator
   187  #undef LIMIT_denominat
   188  
   189    ASSERT (0 < s && s <= n);
   190    ASSERT (0 < t && t <= n);
   191    ASSERT (half || s + t > 3);
   192    ASSERT (n > 2);
   193  
   194  #define   r6    (pp + 3 * n)			/* 3n+1 */
   195  #define   r4    (pp + 7 * n)			/* 3n+1 */
   196  #define   r2    (pp +11 * n)			/* 3n+1 */
   197  #define   r0    (pp +15 * n)			/* s+t <= 2*n */
   198  #define   r7    (scratch)			/* 3n+1 */
   199  #define   r5    (scratch + 3 * n + 1)		/* 3n+1 */
   200  #define   r3    (scratch + 6 * n + 2)		/* 3n+1 */
   201  #define   r1    (scratch + 9 * n + 3)		/* 3n+1 */
   202  #define   v0    (pp +11 * n)			/* n+1 */
   203  #define   v1    (pp +12 * n+1)			/* n+1 */
   204  #define   v2    (pp +13 * n+2)			/* n+1 */
   205  #define   v3    (scratch +12 * n + 4)		/* n+1 */
   206  #define   wsi   (scratch +12 * n + 4)		/* 3n+1 */
   207  #define   wse   (scratch +13 * n + 5)		/* 2n+1 */
   208  
   209    /* Alloc also 3n+1 limbs for wsi... toom_interpolate_16pts may
   210       need all of them  */
   211  /*   if (scratch == NULL) */
   212  /*     scratch = TMP_SALLOC_LIMBS(mpn_toom8_sqr_itch(n * 8)); */
   213    ASSERT (15 * n + 6 <= mpn_toom8h_mul_itch (an, bn));
   214    ASSERT (15 * n + 6 <= mpn_toom8_sqr_itch (n * 8));
   215  
   216    /********************** evaluation and recursive calls *********************/
   217  
   218    /* $\pm1/8$ */
   219    sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 3, pp) ^
   220  	 mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 3, pp);
   221    /* A(-1/8)*B(-1/8)*8^. */ /* A(+1/8)*B(+1/8)*8^. */
   222    TOOM8H_MUL_N_REC(pp, v0, v1, 2, r7, v2, v3, n + 1, wse);
   223    mpn_toom_couple_handling (r7, 2 * n + 1 + BIT_CORRECTION, pp, sign, n, 3*(1+half), 3*(half));
   224  
   225    /* $\pm1/4$ */
   226    sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 2, pp) ^
   227  	 mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 2, pp);
   228    /* A(-1/4)*B(-1/4)*4^. */ /* A(+1/4)*B(+1/4)*4^. */
   229    TOOM8H_MUL_N_REC(pp, v0, v1, 2, r5, v2, v3, n + 1, wse);
   230    mpn_toom_couple_handling (r5, 2 * n + 1, pp, sign, n, 2*(1+half), 2*(half));
   231  
   232    /* $\pm2$ */
   233    sign = mpn_toom_eval_pm2 (v2, v0, p, ap, n, s, pp) ^
   234  	 mpn_toom_eval_pm2 (v3, v1, q, bp, n, t, pp);
   235    /* A(-2)*B(-2) */ /* A(+2)*B(+2) */
   236    TOOM8H_MUL_N_REC(pp, v0, v1, 2, r3, v2, v3, n + 1, wse);
   237    mpn_toom_couple_handling (r3, 2 * n + 1, pp, sign, n, 1, 2);
   238  
   239    /* $\pm8$ */
   240    sign = mpn_toom_eval_pm2exp (v2, v0, p, ap, n, s, 3, pp) ^
   241  	 mpn_toom_eval_pm2exp (v3, v1, q, bp, n, t, 3, pp);
   242    /* A(-8)*B(-8) */ /* A(+8)*B(+8) */
   243    TOOM8H_MUL_N_REC(pp, v0, v1, 2, r1, v2, v3, n + 1, wse);
   244    mpn_toom_couple_handling (r1, 2 * n + 1 + BIT_CORRECTION, pp, sign, n, 3, 6);
   245  
   246    /* $\pm1/2$ */
   247    sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 1, pp) ^
   248  	 mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 1, pp);
   249    /* A(-1/2)*B(-1/2)*2^. */ /* A(+1/2)*B(+1/2)*2^. */
   250    TOOM8H_MUL_N_REC(pp, v0, v1, 2, r6, v2, v3, n + 1, wse);
   251    mpn_toom_couple_handling (r6, 2 * n + 1, pp, sign, n, 1+half, half);
   252  
   253    /* $\pm1$ */
   254    sign = mpn_toom_eval_pm1 (v2, v0, p, ap, n, s,    pp);
   255    if (GMP_NUMB_BITS > 12*3 && UNLIKELY (q == 3))
   256      sign ^= mpn_toom_eval_dgr3_pm1 (v3, v1, bp, n, t,    pp);
   257    else
   258      sign ^= mpn_toom_eval_pm1 (v3, v1, q, bp, n, t,    pp);
   259    /* A(-1)*B(-1) */ /* A(1)*B(1) */
   260    TOOM8H_MUL_N_REC(pp, v0, v1, 2, r4, v2, v3, n + 1, wse);
   261    mpn_toom_couple_handling (r4, 2 * n + 1, pp, sign, n, 0, 0);
   262  
   263    /* $\pm4$ */
   264    sign = mpn_toom_eval_pm2exp (v2, v0, p, ap, n, s, 2, pp) ^
   265  	 mpn_toom_eval_pm2exp (v3, v1, q, bp, n, t, 2, pp);
   266    /* A(-4)*B(-4) */ /* A(+4)*B(+4) */
   267    TOOM8H_MUL_N_REC(pp, v0, v1, 2, r2, v2, v3, n + 1, wse);
   268    mpn_toom_couple_handling (r2, 2 * n + 1, pp, sign, n, 2, 4);
   269  
   270  #undef v0
   271  #undef v1
   272  #undef v2
   273  #undef v3
   274  #undef wse
   275  
   276    /* A(0)*B(0) */
   277    TOOM8H_MUL_N_REC(pp, ap, bp, 0, pp, ap, bp, n, wsi);
   278  
   279    /* Infinity */
   280    if (UNLIKELY (half != 0)) {
   281      if (s > t) {
   282        TOOM8H_MUL_REC(r0, ap + p * n, s, bp + q * n, t, wsi);
   283      } else {
   284        TOOM8H_MUL_REC(r0, bp + q * n, t, ap + p * n, s, wsi);
   285      };
   286    };
   287  
   288    mpn_toom_interpolate_16pts (pp, r1, r3, r5, r7, n, s+t, half, wsi);
   289  
   290  #undef r0
   291  #undef r1
   292  #undef r2
   293  #undef r3
   294  #undef r4
   295  #undef r5
   296  #undef r6
   297  #undef wsi
   298  }
   299  
   300  #undef TOOM8H_MUL_N_REC
   301  #undef TOOM8H_MUL_REC
   302  #undef MAYBE_mul_basecase
   303  #undef MAYBE_mul_toom22
   304  #undef MAYBE_mul_toom33
   305  #undef MAYBE_mul_toom44
   306  #undef MAYBE_mul_toom8h