github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/generic/toom63_mul.c (about)

     1  /* Implementation of the algorithm for Toom-Cook 4.5-way.
     2  
     3     Contributed to the GNU project by Marco Bodrato.
     4  
     5     THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
     6     SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
     7     GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
     8  
     9  Copyright 2009, 2012 Free Software Foundation, Inc.
    10  
    11  This file is part of the GNU MP Library.
    12  
    13  The GNU MP Library is free software; you can redistribute it and/or modify
    14  it under the terms of either:
    15  
    16    * the GNU Lesser General Public License as published by the Free
    17      Software Foundation; either version 3 of the License, or (at your
    18      option) any later version.
    19  
    20  or
    21  
    22    * the GNU General Public License as published by the Free Software
    23      Foundation; either version 2 of the License, or (at your option) any
    24      later version.
    25  
    26  or both in parallel, as here.
    27  
    28  The GNU MP Library is distributed in the hope that it will be useful, but
    29  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    30  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    31  for more details.
    32  
    33  You should have received copies of the GNU General Public License and the
    34  GNU Lesser General Public License along with the GNU MP Library.  If not,
    35  see https://www.gnu.org/licenses/.  */
    36  
    37  
    38  #include "gmp.h"
    39  #include "gmp-impl.h"
    40  
    41  /* Stores |{ap,n}-{bp,n}| in {rp,n}, returns the sign. */
    42  static int
    43  abs_sub_n (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n)
    44  {
    45    mp_limb_t  x, y;
    46    while (--n >= 0)
    47      {
    48        x = ap[n];
    49        y = bp[n];
    50        if (x != y)
    51  	{
    52  	  n++;
    53  	  if (x > y)
    54  	    {
    55  	      mpn_sub_n (rp, ap, bp, n);
    56  	      return 0;
    57  	    }
    58  	  else
    59  	    {
    60  	      mpn_sub_n (rp, bp, ap, n);
    61  	      return ~0;
    62  	    }
    63  	}
    64        rp[n] = 0;
    65      }
    66    return 0;
    67  }
    68  
    69  static int
    70  abs_sub_add_n (mp_ptr rm, mp_ptr rp, mp_srcptr rs, mp_size_t n) {
    71    int result;
    72    result = abs_sub_n (rm, rp, rs, n);
    73    ASSERT_NOCARRY(mpn_add_n (rp, rp, rs, n));
    74    return result;
    75  }
    76  
    77  
    78  /* Toom-4.5, the splitting 6x3 unbalanced version.
    79     Evaluate in: infinity, +4, -4, +2, -2, +1, -1, 0.
    80  
    81    <--s-><--n--><--n--><--n--><--n--><--n-->
    82     ____ ______ ______ ______ ______ ______
    83    |_a5_|__a4__|__a3__|__a2__|__a1__|__a0__|
    84  			|b2_|__b1__|__b0__|
    85  			<-t-><--n--><--n-->
    86  
    87  */
    88  #define TOOM_63_MUL_N_REC(p, a, b, n, ws)		\
    89    do {	mpn_mul_n (p, a, b, n);				\
    90    } while (0)
    91  
    92  #define TOOM_63_MUL_REC(p, a, na, b, nb, ws)		\
    93    do {	mpn_mul (p, a, na, b, nb);			\
    94    } while (0)
    95  
    96  void
    97  mpn_toom63_mul (mp_ptr pp,
    98  		mp_srcptr ap, mp_size_t an,
    99  		mp_srcptr bp, mp_size_t bn, mp_ptr scratch)
   100  {
   101    mp_size_t n, s, t;
   102    mp_limb_t cy;
   103    int sign;
   104  
   105    /***************************** decomposition *******************************/
   106  #define a5  (ap + 5 * n)
   107  #define b0  (bp + 0 * n)
   108  #define b1  (bp + 1 * n)
   109  #define b2  (bp + 2 * n)
   110  
   111    ASSERT (an >= bn);
   112    n = 1 + (an >= 2 * bn ? (an - 1) / (size_t) 6 : (bn - 1) / (size_t) 3);
   113  
   114    s = an - 5 * n;
   115    t = bn - 2 * n;
   116  
   117    ASSERT (0 < s && s <= n);
   118    ASSERT (0 < t && t <= n);
   119    /* WARNING! it assumes s+t>=n */
   120    ASSERT ( s + t >= n );
   121    ASSERT ( s + t > 4);
   122    /* WARNING! it assumes n>1 */
   123    ASSERT ( n > 2);
   124  
   125  #define   r8    pp				/* 2n   */
   126  #define   r7    scratch				/* 3n+1 */
   127  #define   r5    (pp + 3*n)			/* 3n+1 */
   128  #define   v0    (pp + 3*n)			/* n+1 */
   129  #define   v1    (pp + 4*n+1)			/* n+1 */
   130  #define   v2    (pp + 5*n+2)			/* n+1 */
   131  #define   v3    (pp + 6*n+3)			/* n+1 */
   132  #define   r3    (scratch + 3 * n + 1)		/* 3n+1 */
   133  #define   r1    (pp + 7*n)			/* s+t <= 2*n */
   134  #define   ws    (scratch + 6 * n + 2)		/* ??? */
   135  
   136    /* Alloc also 3n+1 limbs for ws... mpn_toom_interpolate_8pts may
   137       need all of them, when DO_mpn_sublsh_n usea a scratch  */
   138  /*   if (scratch == NULL) scratch = TMP_SALLOC_LIMBS (9 * n + 3); */
   139  
   140    /********************** evaluation and recursive calls *********************/
   141    /* $\pm4$ */
   142    sign = mpn_toom_eval_pm2exp (v2, v0, 5, ap, n, s, 2, pp);
   143    pp[n] = mpn_lshift (pp, b1, n, 2); /* 4b1 */
   144    /* FIXME: use addlsh */
   145    v3[t] = mpn_lshift (v3, b2, t, 4);/* 16b2 */
   146    if ( n == t )
   147      v3[n]+= mpn_add_n (v3, v3, b0, n); /* 16b2+b0 */
   148    else
   149      v3[n] = mpn_add (v3, b0, n, v3, t+1); /* 16b2+b0 */
   150    sign ^= abs_sub_add_n (v1, v3, pp, n + 1);
   151    TOOM_63_MUL_N_REC(pp, v0, v1, n + 1, ws); /* A(-4)*B(-4) */
   152    TOOM_63_MUL_N_REC(r3, v2, v3, n + 1, ws); /* A(+4)*B(+4) */
   153    mpn_toom_couple_handling (r3, 2*n+1, pp, sign, n, 2, 4);
   154  
   155    /* $\pm1$ */
   156    sign = mpn_toom_eval_pm1 (v2, v0, 5, ap, n, s,    pp);
   157    /* Compute bs1 and bsm1. Code taken from toom33 */
   158    cy = mpn_add (ws, b0, n, b2, t);
   159  #if HAVE_NATIVE_mpn_add_n_sub_n
   160    if (cy == 0 && mpn_cmp (ws, b1, n) < 0)
   161      {
   162        cy = mpn_add_n_sub_n (v3, v1, b1, ws, n);
   163        v3[n] = cy >> 1;
   164        v1[n] = 0;
   165        sign = ~sign;
   166      }
   167    else
   168      {
   169        mp_limb_t cy2;
   170        cy2 = mpn_add_n_sub_n (v3, v1, ws, b1, n);
   171        v3[n] = cy + (cy2 >> 1);
   172        v1[n] = cy - (cy2 & 1);
   173      }
   174  #else
   175    v3[n] = cy + mpn_add_n (v3, ws, b1, n);
   176    if (cy == 0 && mpn_cmp (ws, b1, n) < 0)
   177      {
   178        mpn_sub_n (v1, b1, ws, n);
   179        v1[n] = 0;
   180        sign = ~sign;
   181      }
   182    else
   183      {
   184        cy -= mpn_sub_n (v1, ws, b1, n);
   185        v1[n] = cy;
   186      }
   187  #endif
   188    TOOM_63_MUL_N_REC(pp, v0, v1, n + 1, ws); /* A(-1)*B(-1) */
   189    TOOM_63_MUL_N_REC(r7, v2, v3, n + 1, ws); /* A(1)*B(1) */
   190    mpn_toom_couple_handling (r7, 2*n+1, pp, sign, n, 0, 0);
   191  
   192    /* $\pm2$ */
   193    sign = mpn_toom_eval_pm2 (v2, v0, 5, ap, n, s, pp);
   194    pp[n] = mpn_lshift (pp, b1, n, 1); /* 2b1 */
   195    /* FIXME: use addlsh or addlsh2 */
   196    v3[t] = mpn_lshift (v3, b2, t, 2);/* 4b2 */
   197    if ( n == t )
   198      v3[n]+= mpn_add_n (v3, v3, b0, n); /* 4b2+b0 */
   199    else
   200      v3[n] = mpn_add (v3, b0, n, v3, t+1); /* 4b2+b0 */
   201    sign ^= abs_sub_add_n (v1, v3, pp, n + 1);
   202    TOOM_63_MUL_N_REC(pp, v0, v1, n + 1, ws); /* A(-2)*B(-2) */
   203    TOOM_63_MUL_N_REC(r5, v2, v3, n + 1, ws); /* A(+2)*B(+2) */
   204    mpn_toom_couple_handling (r5, 2*n+1, pp, sign, n, 1, 2);
   205  
   206    /* A(0)*B(0) */
   207    TOOM_63_MUL_N_REC(pp, ap, bp, n, ws);
   208  
   209    /* Infinity */
   210    if (s > t) {
   211      TOOM_63_MUL_REC(r1, a5, s, b2, t, ws);
   212    } else {
   213      TOOM_63_MUL_REC(r1, b2, t, a5, s, ws);
   214    };
   215  
   216    mpn_toom_interpolate_8pts (pp, n, r3, r7, s + t, ws);
   217  
   218  #undef a5
   219  #undef b0
   220  #undef b1
   221  #undef b2
   222  #undef r1
   223  #undef r3
   224  #undef r5
   225  #undef v0
   226  #undef v1
   227  #undef v2
   228  #undef v3
   229  #undef r7
   230  #undef r8
   231  #undef ws
   232  }