github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/generic/toom52_mul.c

github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/generic/toom52_mul.c (about)

     1  /* mpn_toom52_mul -- Multiply {ap,an} and {bp,bn} where an is nominally 4/3
     2     times as large as bn.  Or more accurately, bn < an < 2 bn.
     3  
     4     Contributed to the GNU project by Marco Bodrato.
     5  
     6     The idea of applying toom to unbalanced multiplication is due to Marco
     7     Bodrato and Alberto Zanoni.
     8  
     9     THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
    10     SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
    11     GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
    12  
    13  Copyright 2009 Free Software Foundation, Inc.
    14  
    15  This file is part of the GNU MP Library.
    16  
    17  The GNU MP Library is free software; you can redistribute it and/or modify
    18  it under the terms of either:
    19  
    20    * the GNU Lesser General Public License as published by the Free
    21      Software Foundation; either version 3 of the License, or (at your
    22      option) any later version.
    23  
    24  or
    25  
    26    * the GNU General Public License as published by the Free Software
    27      Foundation; either version 2 of the License, or (at your option) any
    28      later version.
    29  
    30  or both in parallel, as here.
    31  
    32  The GNU MP Library is distributed in the hope that it will be useful, but
    33  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    34  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    35  for more details.
    36  
    37  You should have received copies of the GNU General Public License and the
    38  GNU Lesser General Public License along with the GNU MP Library.  If not,
    39  see https://www.gnu.org/licenses/.  */
    40  
    41  
    42  #include "gmp.h"
    43  #include "gmp-impl.h"
    44  
    45  /* Evaluate in: -2, -1, 0, +1, +2, +inf
    46  
    47    <-s-><--n--><--n--><--n--><--n-->
    48     ___ ______ ______ ______ ______
    49    |a4_|___a3_|___a2_|___a1_|___a0_|
    50  			|b1|___b0_|
    51  			<t-><--n-->
    52  
    53    v0  =  a0                  * b0      #   A(0)*B(0)
    54    v1  = (a0+ a1+ a2+ a3+  a4)*(b0+ b1) #   A(1)*B(1)      ah  <= 4   bh <= 1
    55    vm1 = (a0- a1+ a2- a3+  a4)*(b0- b1) #  A(-1)*B(-1)    |ah| <= 2   bh  = 0
    56    v2  = (a0+2a1+4a2+8a3+16a4)*(b0+2b1) #   A(2)*B(2)      ah  <= 30  bh <= 2
    57    vm2 = (a0-2a1+4a2-8a3+16a4)*(b0-2b1) #  A(-2)*B(-2)    |ah| <= 20 |bh|<= 1
    58    vinf=                   a4 *     b1  # A(inf)*B(inf)
    59  
    60    Some slight optimization in evaluation are taken from the paper:
    61    "Towards Optimal Toom-Cook Multiplication for Univariate and
    62    Multivariate Polynomials in Characteristic 2 and 0."
    63  */
    64  
    65  void
    66  mpn_toom52_mul (mp_ptr pp,
    67  		mp_srcptr ap, mp_size_t an,
    68  		mp_srcptr bp, mp_size_t bn, mp_ptr scratch)
    69  {
    70    mp_size_t n, s, t;
    71    enum toom6_flags flags;
    72  
    73  #define a0  ap
    74  #define a1  (ap + n)
    75  #define a2  (ap + 2 * n)
    76  #define a3  (ap + 3 * n)
    77  #define a4  (ap + 4 * n)
    78  #define b0  bp
    79  #define b1  (bp + n)
    80  
    81    n = 1 + (2 * an >= 5 * bn ? (an - 1) / (size_t) 5 : (bn - 1) >> 1);
    82  
    83    s = an - 4 * n;
    84    t = bn - n;
    85  
    86    ASSERT (0 < s && s <= n);
    87    ASSERT (0 < t && t <= n);
    88  
    89    /* Ensures that 5 values of n+1 limbs each fits in the product area.
    90       Borderline cases are an = 32, bn = 8, n = 7, and an = 36, bn = 9,
    91       n = 8. */
    92    ASSERT (s+t >= 5);
    93  
    94  #define v0    pp				/* 2n */
    95  #define vm1   (scratch)				/* 2n+1 */
    96  #define v1    (pp + 2 * n)			/* 2n+1 */
    97  #define vm2   (scratch + 2 * n + 1)		/* 2n+1 */
    98  #define v2    (scratch + 4 * n + 2)		/* 2n+1 */
    99  #define vinf  (pp + 5 * n)			/* s+t */
   100  #define bs1    pp				/* n+1 */
   101  #define bsm1  (scratch + 2 * n + 2)		/* n   */
   102  #define asm1  (scratch + 3 * n + 3)		/* n+1 */
   103  #define asm2  (scratch + 4 * n + 4)		/* n+1 */
   104  #define bsm2  (pp + n + 1)			/* n+1 */
   105  #define bs2   (pp + 2 * n + 2)			/* n+1 */
   106  #define as2   (pp + 3 * n + 3)			/* n+1 */
   107  #define as1   (pp + 4 * n + 4)			/* n+1 */
   108  
   109    /* Scratch need is 6 * n + 3 + 1. We need one extra limb, because
   110       products will overwrite 2n+2 limbs. */
   111  
   112  #define a0a2  scratch
   113  #define a1a3  asm1
   114  
   115    /* Compute as2 and asm2.  */
   116    flags = (enum toom6_flags) (toom6_vm2_neg & mpn_toom_eval_pm2 (as2, asm2, 4, ap, n, s, a1a3));
   117  
   118    /* Compute bs1 and bsm1.  */
   119    if (t == n)
   120      {
   121  #if HAVE_NATIVE_mpn_add_n_sub_n
   122        mp_limb_t cy;
   123  
   124        if (mpn_cmp (b0, b1, n) < 0)
   125  	{
   126  	  cy = mpn_add_n_sub_n (bs1, bsm1, b1, b0, n);
   127  	  flags = (enum toom6_flags) (flags ^ toom6_vm1_neg);
   128  	}
   129        else
   130  	{
   131  	  cy = mpn_add_n_sub_n (bs1, bsm1, b0, b1, n);
   132  	}
   133        bs1[n] = cy >> 1;
   134  #else
   135        bs1[n] = mpn_add_n (bs1, b0, b1, n);
   136        if (mpn_cmp (b0, b1, n) < 0)
   137  	{
   138  	  mpn_sub_n (bsm1, b1, b0, n);
   139  	  flags = (enum toom6_flags) (flags ^ toom6_vm1_neg);
   140  	}
   141        else
   142  	{
   143  	  mpn_sub_n (bsm1, b0, b1, n);
   144  	}
   145  #endif
   146      }
   147    else
   148      {
   149        bs1[n] = mpn_add (bs1, b0, n, b1, t);
   150        if (mpn_zero_p (b0 + t, n - t) && mpn_cmp (b0, b1, t) < 0)
   151  	{
   152  	  mpn_sub_n (bsm1, b1, b0, t);
   153  	  MPN_ZERO (bsm1 + t, n - t);
   154  	  flags = (enum toom6_flags) (flags ^ toom6_vm1_neg);
   155  	}
   156        else
   157  	{
   158  	  mpn_sub (bsm1, b0, n, b1, t);
   159  	}
   160      }
   161  
   162    /* Compute bs2 and bsm2, recycling bs1 and bsm1. bs2=bs1+b1; bsm2=bsm1-b1  */
   163    mpn_add (bs2, bs1, n+1, b1, t);
   164    if (flags & toom6_vm1_neg )
   165      {
   166        bsm2[n] = mpn_add (bsm2, bsm1, n, b1, t);
   167        flags = (enum toom6_flags) (flags ^ toom6_vm2_neg);
   168      }
   169    else
   170      {
   171        bsm2[n] = 0;
   172        if (t == n)
   173  	{
   174  	  if (mpn_cmp (bsm1, b1, n) < 0)
   175  	    {
   176  	      mpn_sub_n (bsm2, b1, bsm1, n);
   177  	      flags = (enum toom6_flags) (flags ^ toom6_vm2_neg);
   178  	    }
   179  	  else
   180  	    {
   181  	      mpn_sub_n (bsm2, bsm1, b1, n);
   182  	    }
   183  	}
   184        else
   185  	{
   186  	  if (mpn_zero_p (bsm1 + t, n - t) && mpn_cmp (bsm1, b1, t) < 0)
   187  	    {
   188  	      mpn_sub_n (bsm2, b1, bsm1, t);
   189  	      MPN_ZERO (bsm2 + t, n - t);
   190  	      flags = (enum toom6_flags) (flags ^ toom6_vm2_neg);
   191  	    }
   192  	  else
   193  	    {
   194  	      mpn_sub (bsm2, bsm1, n, b1, t);
   195  	    }
   196  	}
   197      }
   198  
   199    /* Compute as1 and asm1.  */
   200    flags = (enum toom6_flags) (flags ^ (toom6_vm1_neg & mpn_toom_eval_pm1 (as1, asm1, 4, ap, n, s, a0a2)));
   201  
   202    ASSERT (as1[n] <= 4);
   203    ASSERT (bs1[n] <= 1);
   204    ASSERT (asm1[n] <= 2);
   205  /*   ASSERT (bsm1[n] <= 1); */
   206    ASSERT (as2[n] <=30);
   207    ASSERT (bs2[n] <= 2);
   208    ASSERT (asm2[n] <= 20);
   209    ASSERT (bsm2[n] <= 1);
   210  
   211    /* vm1, 2n+1 limbs */
   212    mpn_mul (vm1, asm1, n+1, bsm1, n);  /* W4 */
   213  
   214    /* vm2, 2n+1 limbs */
   215    mpn_mul_n (vm2, asm2, bsm2, n+1);  /* W2 */
   216  
   217    /* v2, 2n+1 limbs */
   218    mpn_mul_n (v2, as2, bs2, n+1);  /* W1 */
   219  
   220    /* v1, 2n+1 limbs */
   221    mpn_mul_n (v1, as1, bs1, n+1);  /* W3 */
   222  
   223    /* vinf, s+t limbs */   /* W0 */
   224    if (s > t)  mpn_mul (vinf, a4, s, b1, t);
   225    else        mpn_mul (vinf, b1, t, a4, s);
   226  
   227    /* v0, 2n limbs */
   228    mpn_mul_n (v0, ap, bp, n);  /* W5 */
   229  
   230    mpn_toom_interpolate_6pts (pp, n, flags, vm1, vm2, v2, t + s);
   231  
   232  #undef v0
   233  #undef vm1
   234  #undef v1
   235  #undef vm2
   236  #undef v2
   237  #undef vinf
   238  #undef bs1
   239  #undef bs2
   240  #undef bsm1
   241  #undef bsm2
   242  #undef asm1
   243  #undef asm2
   244  #undef as1
   245  #undef as2
   246  #undef a0a2
   247  #undef b0b2
   248  #undef a1a3
   249  #undef a0
   250  #undef a1
   251  #undef a2
   252  #undef a3
   253  #undef b0
   254  #undef b1
   255  #undef b2
   256  
   257  }