github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/generic/toom43_mul.c (about)

     1  /* mpn_toom43_mul -- Multiply {ap,an} and {bp,bn} where an is nominally 4/3
     2     times as large as bn.  Or more accurately, bn < an < 2 bn.
     3  
     4     Contributed to the GNU project by Marco Bodrato.
     5  
     6     The idea of applying toom to unbalanced multiplication is due to Marco
     7     Bodrato and Alberto Zanoni.
     8  
     9     THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
    10     SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
    11     GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
    12  
    13  Copyright 2009 Free Software Foundation, Inc.
    14  
    15  This file is part of the GNU MP Library.
    16  
    17  The GNU MP Library is free software; you can redistribute it and/or modify
    18  it under the terms of either:
    19  
    20    * the GNU Lesser General Public License as published by the Free
    21      Software Foundation; either version 3 of the License, or (at your
    22      option) any later version.
    23  
    24  or
    25  
    26    * the GNU General Public License as published by the Free Software
    27      Foundation; either version 2 of the License, or (at your option) any
    28      later version.
    29  
    30  or both in parallel, as here.
    31  
    32  The GNU MP Library is distributed in the hope that it will be useful, but
    33  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    34  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    35  for more details.
    36  
    37  You should have received copies of the GNU General Public License and the
    38  GNU Lesser General Public License along with the GNU MP Library.  If not,
    39  see https://www.gnu.org/licenses/.  */
    40  
    41  
    42  #include "gmp.h"
    43  #include "gmp-impl.h"
    44  
    45  /* Evaluate in: -2, -1, 0, +1, +2, +inf
    46  
    47    <-s-><--n--><--n--><--n-->
    48     ___ ______ ______ ______
    49    |a3_|___a2_|___a1_|___a0_|
    50  	|_b2_|___b1_|___b0_|
    51  	<-t--><--n--><--n-->
    52  
    53    v0  =  a0             * b0          #   A(0)*B(0)
    54    v1  = (a0+ a1+ a2+ a3)*(b0+ b1+ b2) #   A(1)*B(1)      ah  <= 3  bh <= 2
    55    vm1 = (a0- a1+ a2- a3)*(b0- b1+ b2) #  A(-1)*B(-1)    |ah| <= 1 |bh|<= 1
    56    v2  = (a0+2a1+4a2+8a3)*(b0+2b1+4b2) #   A(2)*B(2)      ah  <= 14 bh <= 6
    57    vm2 = (a0-2a1+4a2-8a3)*(b0-2b1+4b2) #  A(-2)*B(-2)    |ah| <= 9 |bh|<= 4
    58    vinf=              a3 *         b2  # A(inf)*B(inf)
    59  */
    60  
    61  void
    62  mpn_toom43_mul (mp_ptr pp,
    63  		mp_srcptr ap, mp_size_t an,
    64  		mp_srcptr bp, mp_size_t bn, mp_ptr scratch)
    65  {
    66    mp_size_t n, s, t;
    67    enum toom6_flags flags;
    68    mp_limb_t cy;
    69  
    70  #define a0  ap
    71  #define a1  (ap + n)
    72  #define a2  (ap + 2 * n)
    73  #define a3  (ap + 3 * n)
    74  #define b0  bp
    75  #define b1  (bp + n)
    76  #define b2  (bp + 2 * n)
    77  
    78    n = 1 + (3 * an >= 4 * bn ? (an - 1) >> 2 : (bn - 1) / (size_t) 3);
    79  
    80    s = an - 3 * n;
    81    t = bn - 2 * n;
    82  
    83    ASSERT (0 < s && s <= n);
    84    ASSERT (0 < t && t <= n);
    85  
    86    /* This is true whenever an >= 25 or bn >= 19, I think. It
    87       guarantees that we can fit 5 values of size n+1 in the product
    88       area. */
    89    ASSERT (s+t >= 5);
    90  
    91  #define v0    pp				/* 2n */
    92  #define vm1   (scratch)				/* 2n+1 */
    93  #define v1    (pp + 2*n)			/* 2n+1 */
    94  #define vm2   (scratch + 2 * n + 1)		/* 2n+1 */
    95  #define v2    (scratch + 4 * n + 2)		/* 2n+1 */
    96  #define vinf  (pp + 5 * n)			/* s+t */
    97  #define bs1    pp				/* n+1 */
    98  #define bsm1  (scratch + 2 * n + 2)		/* n+1 */
    99  #define asm1  (scratch + 3 * n + 3)		/* n+1 */
   100  #define asm2  (scratch + 4 * n + 4)		/* n+1 */
   101  #define bsm2  (pp + n + 1)			/* n+1 */
   102  #define bs2   (pp + 2 * n + 2)			/* n+1 */
   103  #define as2   (pp + 3 * n + 3)			/* n+1 */
   104  #define as1   (pp + 4 * n + 4)			/* n+1 */
   105  
   106    /* Total sccratch need is 6 * n + 3 + 1; we allocate one extra
   107       limb, because products will overwrite 2n+2 limbs. */
   108  
   109  #define a0a2  scratch
   110  #define b0b2  scratch
   111  #define a1a3  asm1
   112  #define b1d   bsm1
   113  
   114    /* Compute as2 and asm2.  */
   115    flags = (enum toom6_flags) (toom6_vm2_neg & mpn_toom_eval_dgr3_pm2 (as2, asm2, ap, n, s, a1a3));
   116  
   117    /* Compute bs2 and bsm2.  */
   118    b1d[n] = mpn_lshift (b1d, b1, n, 1);			/*       2b1      */
   119    cy  = mpn_lshift (b0b2, b2, t, 2);			/*  4b2           */
   120    cy += mpn_add_n (b0b2, b0b2, b0, t);			/*  4b2      + b0 */
   121    if (t != n)
   122      cy = mpn_add_1 (b0b2 + t, b0 + t, n - t, cy);
   123    b0b2[n] = cy;
   124  
   125  #if HAVE_NATIVE_mpn_add_n_sub_n
   126    if (mpn_cmp (b0b2, b1d, n+1) < 0)
   127      {
   128        mpn_add_n_sub_n (bs2, bsm2, b1d, b0b2, n+1);
   129        flags = (enum toom6_flags) (flags ^ toom6_vm2_neg);
   130      }
   131    else
   132      {
   133        mpn_add_n_sub_n (bs2, bsm2, b0b2, b1d, n+1);
   134      }
   135  #else
   136    mpn_add_n (bs2, b0b2, b1d, n+1);
   137    if (mpn_cmp (b0b2, b1d, n+1) < 0)
   138      {
   139        mpn_sub_n (bsm2, b1d, b0b2, n+1);
   140        flags = (enum toom6_flags) (flags ^ toom6_vm2_neg);
   141      }
   142    else
   143      {
   144        mpn_sub_n (bsm2, b0b2, b1d, n+1);
   145      }
   146  #endif
   147  
   148    /* Compute as1 and asm1.  */
   149    flags = (enum toom6_flags) (flags ^ (toom6_vm1_neg & mpn_toom_eval_dgr3_pm1 (as1, asm1, ap, n, s, a0a2)));
   150  
   151    /* Compute bs1 and bsm1.  */
   152    bsm1[n] = mpn_add (bsm1, b0, n, b2, t);
   153  #if HAVE_NATIVE_mpn_add_n_sub_n
   154    if (bsm1[n] == 0 && mpn_cmp (bsm1, b1, n) < 0)
   155      {
   156        cy = mpn_add_n_sub_n (bs1, bsm1, b1, bsm1, n);
   157        bs1[n] = cy >> 1;
   158        flags = (enum toom6_flags) (flags ^ toom6_vm1_neg);
   159      }
   160    else
   161      {
   162        cy = mpn_add_n_sub_n (bs1, bsm1, bsm1, b1, n);
   163        bs1[n] = bsm1[n] + (cy >> 1);
   164        bsm1[n]-= cy & 1;
   165      }
   166  #else
   167    bs1[n] = bsm1[n] + mpn_add_n (bs1, bsm1, b1, n);
   168    if (bsm1[n] == 0 && mpn_cmp (bsm1, b1, n) < 0)
   169      {
   170        mpn_sub_n (bsm1, b1, bsm1, n);
   171        flags = (enum toom6_flags) (flags ^ toom6_vm1_neg);
   172      }
   173    else
   174      {
   175        bsm1[n] -= mpn_sub_n (bsm1, bsm1, b1, n);
   176      }
   177  #endif
   178  
   179    ASSERT (as1[n] <= 3);
   180    ASSERT (bs1[n] <= 2);
   181    ASSERT (asm1[n] <= 1);
   182    ASSERT (bsm1[n] <= 1);
   183    ASSERT (as2[n] <=14);
   184    ASSERT (bs2[n] <= 6);
   185    ASSERT (asm2[n] <= 9);
   186    ASSERT (bsm2[n] <= 4);
   187  
   188    /* vm1, 2n+1 limbs */
   189    mpn_mul_n (vm1, asm1, bsm1, n+1);  /* W4 */
   190  
   191    /* vm2, 2n+1 limbs */
   192    mpn_mul_n (vm2, asm2, bsm2, n+1);  /* W2 */
   193  
   194    /* v2, 2n+1 limbs */
   195    mpn_mul_n (v2, as2, bs2, n+1);  /* W1 */
   196  
   197    /* v1, 2n+1 limbs */
   198    mpn_mul_n (v1, as1, bs1, n+1);  /* W3 */
   199  
   200    /* vinf, s+t limbs */   /* W0 */
   201    if (s > t)  mpn_mul (vinf, a3, s, b2, t);
   202    else        mpn_mul (vinf, b2, t, a3, s);
   203  
   204    /* v0, 2n limbs */
   205    mpn_mul_n (v0, ap, bp, n);  /* W5 */
   206  
   207    mpn_toom_interpolate_6pts (pp, n, flags, vm1, vm2, v2, t + s);
   208  
   209  #undef v0
   210  #undef vm1
   211  #undef v1
   212  #undef vm2
   213  #undef v2
   214  #undef vinf
   215  #undef bs1
   216  #undef bs2
   217  #undef bsm1
   218  #undef bsm2
   219  #undef asm1
   220  #undef asm2
   221  /* #undef as1 */
   222  /* #undef as2 */
   223  #undef a0a2
   224  #undef b0b2
   225  #undef a1a3
   226  #undef b1d
   227  #undef a0
   228  #undef a1
   229  #undef a2
   230  #undef a3
   231  #undef b0
   232  #undef b1
   233  #undef b2
   234  }