github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/generic/toom42_mul.c (about)

     1  /* mpn_toom42_mul -- Multiply {ap,an} and {bp,bn} where an is nominally twice
     2     as large as bn.  Or more accurately, (3/2)bn < an < 4bn.
     3  
     4     Contributed to the GNU project by Torbjorn Granlund.
     5     Additional improvements by Marco Bodrato.
     6  
     7     The idea of applying toom to unbalanced multiplication is due to Marco
     8     Bodrato and Alberto Zanoni.
     9  
    10     THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
    11     SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
    12     GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
    13  
    14  Copyright 2006-2008, 2012, 2014 Free Software Foundation, Inc.
    15  
    16  This file is part of the GNU MP Library.
    17  
    18  The GNU MP Library is free software; you can redistribute it and/or modify
    19  it under the terms of either:
    20  
    21    * the GNU Lesser General Public License as published by the Free
    22      Software Foundation; either version 3 of the License, or (at your
    23      option) any later version.
    24  
    25  or
    26  
    27    * the GNU General Public License as published by the Free Software
    28      Foundation; either version 2 of the License, or (at your option) any
    29      later version.
    30  
    31  or both in parallel, as here.
    32  
    33  The GNU MP Library is distributed in the hope that it will be useful, but
    34  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    35  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    36  for more details.
    37  
    38  You should have received copies of the GNU General Public License and the
    39  GNU Lesser General Public License along with the GNU MP Library.  If not,
    40  see https://www.gnu.org/licenses/.  */
    41  
    42  
    43  #include "gmp.h"
    44  #include "gmp-impl.h"
    45  
    46  /* Evaluate in: -1, 0, +1, +2, +inf
    47  
    48    <-s-><--n--><--n--><--n-->
    49     ___ ______ ______ ______
    50    |a3_|___a2_|___a1_|___a0_|
    51  	       |_b1_|___b0_|
    52  	       <-t--><--n-->
    53  
    54    v0  =  a0             * b0      #   A(0)*B(0)
    55    v1  = (a0+ a1+ a2+ a3)*(b0+ b1) #   A(1)*B(1)      ah  <= 3  bh <= 1
    56    vm1 = (a0- a1+ a2- a3)*(b0- b1) #  A(-1)*B(-1)    |ah| <= 1  bh  = 0
    57    v2  = (a0+2a1+4a2+8a3)*(b0+2b1) #   A(2)*B(2)      ah  <= 14 bh <= 2
    58    vinf=              a3 *     b1  # A(inf)*B(inf)
    59  */
    60  
    61  #define TOOM42_MUL_N_REC(p, a, b, n, ws)				\
    62    do {									\
    63      mpn_mul_n (p, a, b, n);						\
    64    } while (0)
    65  
    66  void
    67  mpn_toom42_mul (mp_ptr pp,
    68  		mp_srcptr ap, mp_size_t an,
    69  		mp_srcptr bp, mp_size_t bn,
    70  		mp_ptr scratch)
    71  {
    72    mp_size_t n, s, t;
    73    int vm1_neg;
    74    mp_limb_t cy, vinf0;
    75    mp_ptr a0_a2;
    76    mp_ptr as1, asm1, as2;
    77    mp_ptr bs1, bsm1, bs2;
    78    mp_ptr tmp;
    79    TMP_DECL;
    80  
    81  #define a0  ap
    82  #define a1  (ap + n)
    83  #define a2  (ap + 2*n)
    84  #define a3  (ap + 3*n)
    85  #define b0  bp
    86  #define b1  (bp + n)
    87  
    88    n = an >= 2 * bn ? (an + 3) >> 2 : (bn + 1) >> 1;
    89  
    90    s = an - 3 * n;
    91    t = bn - n;
    92  
    93    ASSERT (0 < s && s <= n);
    94    ASSERT (0 < t && t <= n);
    95  
    96    TMP_MARK;
    97  
    98    tmp = TMP_ALLOC_LIMBS (6 * n + 5);
    99    as1  = tmp; tmp += n + 1;
   100    asm1 = tmp; tmp += n + 1;
   101    as2  = tmp; tmp += n + 1;
   102    bs1  = tmp; tmp += n + 1;
   103    bsm1 = tmp; tmp += n;
   104    bs2  = tmp; tmp += n + 1;
   105  
   106    a0_a2 = pp;
   107  
   108    /* Compute as1 and asm1.  */
   109    vm1_neg = mpn_toom_eval_dgr3_pm1 (as1, asm1, ap, n, s, a0_a2) & 1;
   110  
   111    /* Compute as2.  */
   112  #if HAVE_NATIVE_mpn_addlsh1_n
   113    cy  = mpn_addlsh1_n (as2, a2, a3, s);
   114    if (s != n)
   115      cy = mpn_add_1 (as2 + s, a2 + s, n - s, cy);
   116    cy = 2 * cy + mpn_addlsh1_n (as2, a1, as2, n);
   117    cy = 2 * cy + mpn_addlsh1_n (as2, a0, as2, n);
   118  #else
   119    cy  = mpn_lshift (as2, a3, s, 1);
   120    cy += mpn_add_n (as2, a2, as2, s);
   121    if (s != n)
   122      cy = mpn_add_1 (as2 + s, a2 + s, n - s, cy);
   123    cy = 2 * cy + mpn_lshift (as2, as2, n, 1);
   124    cy += mpn_add_n (as2, a1, as2, n);
   125    cy = 2 * cy + mpn_lshift (as2, as2, n, 1);
   126    cy += mpn_add_n (as2, a0, as2, n);
   127  #endif
   128    as2[n] = cy;
   129  
   130    /* Compute bs1 and bsm1.  */
   131    if (t == n)
   132      {
   133  #if HAVE_NATIVE_mpn_add_n_sub_n
   134        if (mpn_cmp (b0, b1, n) < 0)
   135  	{
   136  	  cy = mpn_add_n_sub_n (bs1, bsm1, b1, b0, n);
   137  	  vm1_neg ^= 1;
   138  	}
   139        else
   140  	{
   141  	  cy = mpn_add_n_sub_n (bs1, bsm1, b0, b1, n);
   142  	}
   143        bs1[n] = cy >> 1;
   144  #else
   145        bs1[n] = mpn_add_n (bs1, b0, b1, n);
   146  
   147        if (mpn_cmp (b0, b1, n) < 0)
   148  	{
   149  	  mpn_sub_n (bsm1, b1, b0, n);
   150  	  vm1_neg ^= 1;
   151  	}
   152        else
   153  	{
   154  	  mpn_sub_n (bsm1, b0, b1, n);
   155  	}
   156  #endif
   157      }
   158    else
   159      {
   160        bs1[n] = mpn_add (bs1, b0, n, b1, t);
   161  
   162        if (mpn_zero_p (b0 + t, n - t) && mpn_cmp (b0, b1, t) < 0)
   163  	{
   164  	  mpn_sub_n (bsm1, b1, b0, t);
   165  	  MPN_ZERO (bsm1 + t, n - t);
   166  	  vm1_neg ^= 1;
   167  	}
   168        else
   169  	{
   170  	  mpn_sub (bsm1, b0, n, b1, t);
   171  	}
   172      }
   173  
   174    /* Compute bs2, recycling bs1. bs2=bs1+b1  */
   175    mpn_add (bs2, bs1, n + 1, b1, t);
   176  
   177    ASSERT (as1[n] <= 3);
   178    ASSERT (bs1[n] <= 1);
   179    ASSERT (asm1[n] <= 1);
   180  /*ASSERT (bsm1[n] == 0);*/
   181    ASSERT (as2[n] <= 14);
   182    ASSERT (bs2[n] <= 2);
   183  
   184  #define v0    pp				/* 2n */
   185  #define v1    (pp + 2 * n)			/* 2n+1 */
   186  #define vinf  (pp + 4 * n)			/* s+t */
   187  #define vm1   scratch				/* 2n+1 */
   188  #define v2    (scratch + 2 * n + 1)		/* 2n+2 */
   189  #define scratch_out	scratch + 4 * n + 4	/* Currently unused. */
   190  
   191    /* vm1, 2n+1 limbs */
   192    TOOM42_MUL_N_REC (vm1, asm1, bsm1, n, scratch_out);
   193    cy = 0;
   194    if (asm1[n] != 0)
   195      cy = mpn_add_n (vm1 + n, vm1 + n, bsm1, n);
   196    vm1[2 * n] = cy;
   197  
   198    TOOM42_MUL_N_REC (v2, as2, bs2, n + 1, scratch_out);	/* v2, 2n+1 limbs */
   199  
   200    /* vinf, s+t limbs */
   201    if (s > t)  mpn_mul (vinf, a3, s, b1, t);
   202    else        mpn_mul (vinf, b1, t, a3, s);
   203  
   204    vinf0 = vinf[0];				/* v1 overlaps with this */
   205  
   206    /* v1, 2n+1 limbs */
   207    TOOM42_MUL_N_REC (v1, as1, bs1, n, scratch_out);
   208    if (as1[n] == 1)
   209      {
   210        cy = bs1[n] + mpn_add_n (v1 + n, v1 + n, bs1, n);
   211      }
   212    else if (as1[n] == 2)
   213      {
   214  #if HAVE_NATIVE_mpn_addlsh1_n
   215        cy = 2 * bs1[n] + mpn_addlsh1_n (v1 + n, v1 + n, bs1, n);
   216  #else
   217        cy = 2 * bs1[n] + mpn_addmul_1 (v1 + n, bs1, n, CNST_LIMB(2));
   218  #endif
   219      }
   220    else if (as1[n] == 3)
   221      {
   222        cy = 3 * bs1[n] + mpn_addmul_1 (v1 + n, bs1, n, CNST_LIMB(3));
   223      }
   224    else
   225      cy = 0;
   226    if (bs1[n] != 0)
   227      cy += mpn_add_n (v1 + n, v1 + n, as1, n);
   228    v1[2 * n] = cy;
   229  
   230    TOOM42_MUL_N_REC (v0, ap, bp, n, scratch_out);	/* v0, 2n limbs */
   231  
   232    mpn_toom_interpolate_5pts (pp, v2, vm1, n, s + t, vm1_neg, vinf0);
   233  
   234    TMP_FREE;
   235  }