github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/generic/toom22_mul.c (about)

     1  /* mpn_toom22_mul -- Multiply {ap,an} and {bp,bn} where an >= bn.  Or more
     2     accurately, bn <= an < 2bn.
     3  
     4     Contributed to the GNU project by Torbjorn Granlund.
     5  
     6     THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
     7     SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
     8     GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
     9  
    10  Copyright 2006-2010, 2012, 2014 Free Software Foundation, Inc.
    11  
    12  This file is part of the GNU MP Library.
    13  
    14  The GNU MP Library is free software; you can redistribute it and/or modify
    15  it under the terms of either:
    16  
    17    * the GNU Lesser General Public License as published by the Free
    18      Software Foundation; either version 3 of the License, or (at your
    19      option) any later version.
    20  
    21  or
    22  
    23    * the GNU General Public License as published by the Free Software
    24      Foundation; either version 2 of the License, or (at your option) any
    25      later version.
    26  
    27  or both in parallel, as here.
    28  
    29  The GNU MP Library is distributed in the hope that it will be useful, but
    30  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    31  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    32  for more details.
    33  
    34  You should have received copies of the GNU General Public License and the
    35  GNU Lesser General Public License along with the GNU MP Library.  If not,
    36  see https://www.gnu.org/licenses/.  */
    37  
    38  
    39  #include "gmp.h"
    40  #include "gmp-impl.h"
    41  
    42  /* Evaluate in: -1, 0, +inf
    43  
    44    <-s--><--n-->
    45     ____ ______
    46    |_a1_|___a0_|
    47     |b1_|___b0_|
    48     <-t-><--n-->
    49  
    50    v0  =  a0     * b0       #   A(0)*B(0)
    51    vm1 = (a0- a1)*(b0- b1)  #  A(-1)*B(-1)
    52    vinf=      a1 *     b1   # A(inf)*B(inf)
    53  */
    54  
    55  #if TUNE_PROGRAM_BUILD || WANT_FAT_BINARY
    56  #define MAYBE_mul_toom22   1
    57  #else
    58  #define MAYBE_mul_toom22						\
    59    (MUL_TOOM33_THRESHOLD >= 2 * MUL_TOOM22_THRESHOLD)
    60  #endif
    61  
    62  #define TOOM22_MUL_N_REC(p, a, b, n, ws)				\
    63    do {									\
    64      if (! MAYBE_mul_toom22						\
    65  	|| BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))			\
    66        mpn_mul_basecase (p, a, n, b, n);					\
    67      else								\
    68        mpn_toom22_mul (p, a, n, b, n, ws);				\
    69    } while (0)
    70  
    71  /* Normally, this calls mul_basecase or toom22_mul.  But when when the fraction
    72     MUL_TOOM33_THRESHOLD / MUL_TOOM22_THRESHOLD is large, an initially small
    73     relative unbalance will become a larger and larger relative unbalance with
    74     each recursion (the difference s-t will be invariant over recursive calls).
    75     Therefore, we need to call toom32_mul.  FIXME: Suppress depending on
    76     MUL_TOOM33_THRESHOLD / MUL_TOOM22_THRESHOLD and on MUL_TOOM22_THRESHOLD.  */
    77  #define TOOM22_MUL_REC(p, a, an, b, bn, ws)				\
    78    do {									\
    79      if (! MAYBE_mul_toom22						\
    80  	|| BELOW_THRESHOLD (bn, MUL_TOOM22_THRESHOLD))			\
    81        mpn_mul_basecase (p, a, an, b, bn);				\
    82      else if (4 * an < 5 * bn)						\
    83        mpn_toom22_mul (p, a, an, b, bn, ws);				\
    84      else								\
    85        mpn_toom32_mul (p, a, an, b, bn, ws);				\
    86    } while (0)
    87  
    88  void
    89  mpn_toom22_mul (mp_ptr pp,
    90  		mp_srcptr ap, mp_size_t an,
    91  		mp_srcptr bp, mp_size_t bn,
    92  		mp_ptr scratch)
    93  {
    94    const int __gmpn_cpuvec_initialized = 1;
    95    mp_size_t n, s, t;
    96    int vm1_neg;
    97    mp_limb_t cy, cy2;
    98    mp_ptr asm1;
    99    mp_ptr bsm1;
   100  
   101  #define a0  ap
   102  #define a1  (ap + n)
   103  #define b0  bp
   104  #define b1  (bp + n)
   105  
   106    s = an >> 1;
   107    n = an - s;
   108    t = bn - n;
   109  
   110    ASSERT (an >= bn);
   111  
   112    ASSERT (0 < s && s <= n && s >= n - 1);
   113    ASSERT (0 < t && t <= s);
   114  
   115    asm1 = pp;
   116    bsm1 = pp + n;
   117  
   118    vm1_neg = 0;
   119  
   120    /* Compute asm1.  */
   121    if (s == n)
   122      {
   123        if (mpn_cmp (a0, a1, n) < 0)
   124  	{
   125  	  mpn_sub_n (asm1, a1, a0, n);
   126  	  vm1_neg = 1;
   127  	}
   128        else
   129  	{
   130  	  mpn_sub_n (asm1, a0, a1, n);
   131  	}
   132      }
   133    else /* n - s == 1 */
   134      {
   135        if (a0[s] == 0 && mpn_cmp (a0, a1, s) < 0)
   136  	{
   137  	  mpn_sub_n (asm1, a1, a0, s);
   138  	  asm1[s] = 0;
   139  	  vm1_neg = 1;
   140  	}
   141        else
   142  	{
   143  	  asm1[s] = a0[s] - mpn_sub_n (asm1, a0, a1, s);
   144  	}
   145      }
   146  
   147    /* Compute bsm1.  */
   148    if (t == n)
   149      {
   150        if (mpn_cmp (b0, b1, n) < 0)
   151  	{
   152  	  mpn_sub_n (bsm1, b1, b0, n);
   153  	  vm1_neg ^= 1;
   154  	}
   155        else
   156  	{
   157  	  mpn_sub_n (bsm1, b0, b1, n);
   158  	}
   159      }
   160    else
   161      {
   162        if (mpn_zero_p (b0 + t, n - t) && mpn_cmp (b0, b1, t) < 0)
   163  	{
   164  	  mpn_sub_n (bsm1, b1, b0, t);
   165  	  MPN_ZERO (bsm1 + t, n - t);
   166  	  vm1_neg ^= 1;
   167  	}
   168        else
   169  	{
   170  	  mpn_sub (bsm1, b0, n, b1, t);
   171  	}
   172      }
   173  
   174  #define v0	pp				/* 2n */
   175  #define vinf	(pp + 2 * n)			/* s+t */
   176  #define vm1	scratch				/* 2n */
   177  #define scratch_out	scratch + 2 * n
   178  
   179    /* vm1, 2n limbs */
   180    TOOM22_MUL_N_REC (vm1, asm1, bsm1, n, scratch_out);
   181  
   182    if (s > t)  TOOM22_MUL_REC (vinf, a1, s, b1, t, scratch_out);
   183    else        TOOM22_MUL_N_REC (vinf, a1, b1, s, scratch_out);
   184  
   185    /* v0, 2n limbs */
   186    TOOM22_MUL_N_REC (v0, ap, bp, n, scratch_out);
   187  
   188    /* H(v0) + L(vinf) */
   189    cy = mpn_add_n (pp + 2 * n, v0 + n, vinf, n);
   190  
   191    /* L(v0) + H(v0) */
   192    cy2 = cy + mpn_add_n (pp + n, pp + 2 * n, v0, n);
   193  
   194    /* L(vinf) + H(vinf) */
   195    cy += mpn_add (pp + 2 * n, pp + 2 * n, n, vinf + n, s + t - n);
   196  
   197    if (vm1_neg)
   198      cy += mpn_add_n (pp + n, pp + n, vm1, 2 * n);
   199    else
   200      cy -= mpn_sub_n (pp + n, pp + n, vm1, 2 * n);
   201  
   202    ASSERT (cy + 1  <= 3);
   203    ASSERT (cy2 <= 2);
   204  
   205    MPN_INCR_U (pp + 2 * n, s + t, cy2);
   206    if (LIKELY (cy <= 2))
   207      /* if s+t==n, cy is zero, but we should not acces pp[3*n] at all. */
   208      MPN_INCR_U (pp + 3 * n, s + t - n, cy);
   209    else
   210      MPN_DECR_U (pp + 3 * n, s + t - n, 1);
   211  }