github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/generic/toom33_mul.c (about)

     1  /* mpn_toom33_mul -- Multiply {ap,an} and {p,bn} where an and bn are close in
     2     size.  Or more accurately, bn <= an < (3/2)bn.
     3  
     4     Contributed to the GNU project by Torbjorn Granlund.
     5     Additional improvements by Marco Bodrato.
     6  
     7     THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
     8     SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
     9     GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
    10  
    11  Copyright 2006-2008, 2010, 2012 Free Software Foundation, Inc.
    12  
    13  This file is part of the GNU MP Library.
    14  
    15  The GNU MP Library is free software; you can redistribute it and/or modify
    16  it under the terms of either:
    17  
    18    * the GNU Lesser General Public License as published by the Free
    19      Software Foundation; either version 3 of the License, or (at your
    20      option) any later version.
    21  
    22  or
    23  
    24    * the GNU General Public License as published by the Free Software
    25      Foundation; either version 2 of the License, or (at your option) any
    26      later version.
    27  
    28  or both in parallel, as here.
    29  
    30  The GNU MP Library is distributed in the hope that it will be useful, but
    31  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    32  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    33  for more details.
    34  
    35  You should have received copies of the GNU General Public License and the
    36  GNU Lesser General Public License along with the GNU MP Library.  If not,
    37  see https://www.gnu.org/licenses/.  */
    38  
    39  
    40  #include "gmp.h"
    41  #include "gmp-impl.h"
    42  
    43  /* Evaluate in: -1, 0, +1, +2, +inf
    44  
    45    <-s--><--n--><--n-->
    46     ____ ______ ______
    47    |_a2_|___a1_|___a0_|
    48     |b2_|___b1_|___b0_|
    49     <-t-><--n--><--n-->
    50  
    51    v0  =  a0         * b0          #   A(0)*B(0)
    52    v1  = (a0+ a1+ a2)*(b0+ b1+ b2) #   A(1)*B(1)      ah  <= 2  bh <= 2
    53    vm1 = (a0- a1+ a2)*(b0- b1+ b2) #  A(-1)*B(-1)    |ah| <= 1  bh <= 1
    54    v2  = (a0+2a1+4a2)*(b0+2b1+4b2) #   A(2)*B(2)      ah  <= 6  bh <= 6
    55    vinf=          a2 *         b2  # A(inf)*B(inf)
    56  */
    57  
    58  #if TUNE_PROGRAM_BUILD || WANT_FAT_BINARY
    59  #define MAYBE_mul_basecase 1
    60  #define MAYBE_mul_toom33   1
    61  #else
    62  #define MAYBE_mul_basecase						\
    63    (MUL_TOOM33_THRESHOLD < 3 * MUL_TOOM22_THRESHOLD)
    64  #define MAYBE_mul_toom33						\
    65    (MUL_TOOM44_THRESHOLD >= 3 * MUL_TOOM33_THRESHOLD)
    66  #endif
    67  
    68  /* FIXME: TOOM33_MUL_N_REC is not quite right for a balanced
    69     multiplication at the infinity point. We may have
    70     MAYBE_mul_basecase == 0, and still get s just below
    71     MUL_TOOM22_THRESHOLD. If MUL_TOOM33_THRESHOLD == 7, we can even get
    72     s == 1 and mpn_toom22_mul will crash.
    73  */
    74  
    75  #define TOOM33_MUL_N_REC(p, a, b, n, ws)				\
    76    do {									\
    77      if (MAYBE_mul_basecase						\
    78  	&& BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))			\
    79        mpn_mul_basecase (p, a, n, b, n);					\
    80      else if (! MAYBE_mul_toom33						\
    81  	     || BELOW_THRESHOLD (n, MUL_TOOM33_THRESHOLD))		\
    82        mpn_toom22_mul (p, a, n, b, n, ws);				\
    83      else								\
    84        mpn_toom33_mul (p, a, n, b, n, ws);				\
    85    } while (0)
    86  
    87  void
    88  mpn_toom33_mul (mp_ptr pp,
    89  		mp_srcptr ap, mp_size_t an,
    90  		mp_srcptr bp, mp_size_t bn,
    91  		mp_ptr scratch)
    92  {
    93    const int __gmpn_cpuvec_initialized = 1;
    94    mp_size_t n, s, t;
    95    int vm1_neg;
    96    mp_limb_t cy, vinf0;
    97    mp_ptr gp;
    98    mp_ptr as1, asm1, as2;
    99    mp_ptr bs1, bsm1, bs2;
   100  
   101  #define a0  ap
   102  #define a1  (ap + n)
   103  #define a2  (ap + 2*n)
   104  #define b0  bp
   105  #define b1  (bp + n)
   106  #define b2  (bp + 2*n)
   107  
   108    n = (an + 2) / (size_t) 3;
   109  
   110    s = an - 2 * n;
   111    t = bn - 2 * n;
   112  
   113    ASSERT (an >= bn);
   114  
   115    ASSERT (0 < s && s <= n);
   116    ASSERT (0 < t && t <= n);
   117  
   118    as1  = scratch + 4 * n + 4;
   119    asm1 = scratch + 2 * n + 2;
   120    as2 = pp + n + 1;
   121  
   122    bs1 = pp;
   123    bsm1 = scratch + 3 * n + 3; /* we need 4n+4 <= 4n+s+t */
   124    bs2 = pp + 2 * n + 2;
   125  
   126    gp = scratch;
   127  
   128    vm1_neg = 0;
   129  
   130    /* Compute as1 and asm1.  */
   131    cy = mpn_add (gp, a0, n, a2, s);
   132  #if HAVE_NATIVE_mpn_add_n_sub_n
   133    if (cy == 0 && mpn_cmp (gp, a1, n) < 0)
   134      {
   135        cy = mpn_add_n_sub_n (as1, asm1, a1, gp, n);
   136        as1[n] = cy >> 1;
   137        asm1[n] = 0;
   138        vm1_neg = 1;
   139      }
   140    else
   141      {
   142        mp_limb_t cy2;
   143        cy2 = mpn_add_n_sub_n (as1, asm1, gp, a1, n);
   144        as1[n] = cy + (cy2 >> 1);
   145        asm1[n] = cy - (cy2 & 1);
   146      }
   147  #else
   148    as1[n] = cy + mpn_add_n (as1, gp, a1, n);
   149    if (cy == 0 && mpn_cmp (gp, a1, n) < 0)
   150      {
   151        mpn_sub_n (asm1, a1, gp, n);
   152        asm1[n] = 0;
   153        vm1_neg = 1;
   154      }
   155    else
   156      {
   157        cy -= mpn_sub_n (asm1, gp, a1, n);
   158        asm1[n] = cy;
   159      }
   160  #endif
   161  
   162    /* Compute as2.  */
   163  #if HAVE_NATIVE_mpn_rsblsh1_n
   164    cy = mpn_add_n (as2, a2, as1, s);
   165    if (s != n)
   166      cy = mpn_add_1 (as2 + s, as1 + s, n - s, cy);
   167    cy += as1[n];
   168    cy = 2 * cy + mpn_rsblsh1_n (as2, a0, as2, n);
   169  #else
   170  #if HAVE_NATIVE_mpn_addlsh1_n
   171    cy  = mpn_addlsh1_n (as2, a1, a2, s);
   172    if (s != n)
   173      cy = mpn_add_1 (as2 + s, a1 + s, n - s, cy);
   174    cy = 2 * cy + mpn_addlsh1_n (as2, a0, as2, n);
   175  #else
   176    cy = mpn_add_n (as2, a2, as1, s);
   177    if (s != n)
   178      cy = mpn_add_1 (as2 + s, as1 + s, n - s, cy);
   179    cy += as1[n];
   180    cy = 2 * cy + mpn_lshift (as2, as2, n, 1);
   181    cy -= mpn_sub_n (as2, as2, a0, n);
   182  #endif
   183  #endif
   184    as2[n] = cy;
   185  
   186    /* Compute bs1 and bsm1.  */
   187    cy = mpn_add (gp, b0, n, b2, t);
   188  #if HAVE_NATIVE_mpn_add_n_sub_n
   189    if (cy == 0 && mpn_cmp (gp, b1, n) < 0)
   190      {
   191        cy = mpn_add_n_sub_n (bs1, bsm1, b1, gp, n);
   192        bs1[n] = cy >> 1;
   193        bsm1[n] = 0;
   194        vm1_neg ^= 1;
   195      }
   196    else
   197      {
   198        mp_limb_t cy2;
   199        cy2 = mpn_add_n_sub_n (bs1, bsm1, gp, b1, n);
   200        bs1[n] = cy + (cy2 >> 1);
   201        bsm1[n] = cy - (cy2 & 1);
   202      }
   203  #else
   204    bs1[n] = cy + mpn_add_n (bs1, gp, b1, n);
   205    if (cy == 0 && mpn_cmp (gp, b1, n) < 0)
   206      {
   207        mpn_sub_n (bsm1, b1, gp, n);
   208        bsm1[n] = 0;
   209        vm1_neg ^= 1;
   210      }
   211    else
   212      {
   213        cy -= mpn_sub_n (bsm1, gp, b1, n);
   214        bsm1[n] = cy;
   215      }
   216  #endif
   217  
   218    /* Compute bs2.  */
   219  #if HAVE_NATIVE_mpn_rsblsh1_n
   220    cy = mpn_add_n (bs2, b2, bs1, t);
   221    if (t != n)
   222      cy = mpn_add_1 (bs2 + t, bs1 + t, n - t, cy);
   223    cy += bs1[n];
   224    cy = 2 * cy + mpn_rsblsh1_n (bs2, b0, bs2, n);
   225  #else
   226  #if HAVE_NATIVE_mpn_addlsh1_n
   227    cy  = mpn_addlsh1_n (bs2, b1, b2, t);
   228    if (t != n)
   229      cy = mpn_add_1 (bs2 + t, b1 + t, n - t, cy);
   230    cy = 2 * cy + mpn_addlsh1_n (bs2, b0, bs2, n);
   231  #else
   232    cy  = mpn_add_n (bs2, bs1, b2, t);
   233    if (t != n)
   234      cy = mpn_add_1 (bs2 + t, bs1 + t, n - t, cy);
   235    cy += bs1[n];
   236    cy = 2 * cy + mpn_lshift (bs2, bs2, n, 1);
   237    cy -= mpn_sub_n (bs2, bs2, b0, n);
   238  #endif
   239  #endif
   240    bs2[n] = cy;
   241  
   242    ASSERT (as1[n] <= 2);
   243    ASSERT (bs1[n] <= 2);
   244    ASSERT (asm1[n] <= 1);
   245    ASSERT (bsm1[n] <= 1);
   246    ASSERT (as2[n] <= 6);
   247    ASSERT (bs2[n] <= 6);
   248  
   249  #define v0    pp				/* 2n */
   250  #define v1    (pp + 2 * n)			/* 2n+1 */
   251  #define vinf  (pp + 4 * n)			/* s+t */
   252  #define vm1   scratch				/* 2n+1 */
   253  #define v2    (scratch + 2 * n + 1)		/* 2n+2 */
   254  #define scratch_out  (scratch + 5 * n + 5)
   255  
   256    /* vm1, 2n+1 limbs */
   257  #ifdef SMALLER_RECURSION
   258    TOOM33_MUL_N_REC (vm1, asm1, bsm1, n, scratch_out);
   259    cy = 0;
   260    if (asm1[n] != 0)
   261      cy = bsm1[n] + mpn_add_n (vm1 + n, vm1 + n, bsm1, n);
   262    if (bsm1[n] != 0)
   263      cy += mpn_add_n (vm1 + n, vm1 + n, asm1, n);
   264    vm1[2 * n] = cy;
   265  #else
   266    TOOM33_MUL_N_REC (vm1, asm1, bsm1, n + 1, scratch_out);
   267  #endif
   268  
   269    TOOM33_MUL_N_REC (v2, as2, bs2, n + 1, scratch_out);	/* v2, 2n+1 limbs */
   270  
   271    /* vinf, s+t limbs */
   272    if (s > t)  mpn_mul (vinf, a2, s, b2, t);
   273    else        TOOM33_MUL_N_REC (vinf, a2, b2, s, scratch_out);
   274  
   275    vinf0 = vinf[0];				/* v1 overlaps with this */
   276  
   277  #ifdef SMALLER_RECURSION
   278    /* v1, 2n+1 limbs */
   279    TOOM33_MUL_N_REC (v1, as1, bs1, n, scratch_out);
   280    if (as1[n] == 1)
   281      {
   282        cy = bs1[n] + mpn_add_n (v1 + n, v1 + n, bs1, n);
   283      }
   284    else if (as1[n] != 0)
   285      {
   286  #if HAVE_NATIVE_mpn_addlsh1_n
   287        cy = 2 * bs1[n] + mpn_addlsh1_n (v1 + n, v1 + n, bs1, n);
   288  #else
   289        cy = 2 * bs1[n] + mpn_addmul_1 (v1 + n, bs1, n, CNST_LIMB(2));
   290  #endif
   291      }
   292    else
   293      cy = 0;
   294    if (bs1[n] == 1)
   295      {
   296        cy += mpn_add_n (v1 + n, v1 + n, as1, n);
   297      }
   298    else if (bs1[n] != 0)
   299      {
   300  #if HAVE_NATIVE_mpn_addlsh1_n
   301        cy += mpn_addlsh1_n (v1 + n, v1 + n, as1, n);
   302  #else
   303        cy += mpn_addmul_1 (v1 + n, as1, n, CNST_LIMB(2));
   304  #endif
   305      }
   306    v1[2 * n] = cy;
   307  #else
   308    cy = vinf[1];
   309    TOOM33_MUL_N_REC (v1, as1, bs1, n + 1, scratch_out);
   310    vinf[1] = cy;
   311  #endif
   312  
   313    TOOM33_MUL_N_REC (v0, ap, bp, n, scratch_out);	/* v0, 2n limbs */
   314  
   315    mpn_toom_interpolate_5pts (pp, v2, vm1, n, s + t, vm1_neg, vinf0);
   316  }