github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/generic/mulmid.c (about)

     1  /* mpn_mulmid -- middle product
     2  
     3     Contributed by David Harvey.
     4  
     5     THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
     6     SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
     7     GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
     8  
     9  Copyright 2011 Free Software Foundation, Inc.
    10  
    11  This file is part of the GNU MP Library.
    12  
    13  The GNU MP Library is free software; you can redistribute it and/or modify
    14  it under the terms of either:
    15  
    16    * the GNU Lesser General Public License as published by the Free
    17      Software Foundation; either version 3 of the License, or (at your
    18      option) any later version.
    19  
    20  or
    21  
    22    * the GNU General Public License as published by the Free Software
    23      Foundation; either version 2 of the License, or (at your option) any
    24      later version.
    25  
    26  or both in parallel, as here.
    27  
    28  The GNU MP Library is distributed in the hope that it will be useful, but
    29  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    30  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    31  for more details.
    32  
    33  You should have received copies of the GNU General Public License and the
    34  GNU Lesser General Public License along with the GNU MP Library.  If not,
    35  see https://www.gnu.org/licenses/.  */
    36  
    37  
    38  #include "gmp.h"
    39  #include "gmp-impl.h"
    40  
    41  
    42  #define CHUNK (200 + MULMID_TOOM42_THRESHOLD)
    43  
    44  
    45  void
    46  mpn_mulmid (mp_ptr rp,
    47              mp_srcptr ap, mp_size_t an,
    48              mp_srcptr bp, mp_size_t bn)
    49  {
    50    mp_size_t rn, k;
    51    mp_ptr scratch, temp;
    52  
    53    ASSERT (an >= bn);
    54    ASSERT (bn >= 1);
    55    ASSERT (! MPN_OVERLAP_P (rp, an - bn + 3, ap, an));
    56    ASSERT (! MPN_OVERLAP_P (rp, an - bn + 3, bp, bn));
    57  
    58    if (bn < MULMID_TOOM42_THRESHOLD)
    59      {
    60        /* region not tall enough to make toom42 worthwhile for any portion */
    61  
    62        if (an < CHUNK)
    63  	{
    64  	  /* region not too wide either, just call basecase directly */
    65  	  mpn_mulmid_basecase (rp, ap, an, bp, bn);
    66  	  return;
    67  	}
    68  
    69        /* Region quite wide. For better locality, use basecase on chunks:
    70  
    71  	 AAABBBCC..
    72  	 .AAABBBCC.
    73  	 ..AAABBBCC
    74        */
    75  
    76        k = CHUNK - bn + 1;    /* number of diagonals per chunk */
    77  
    78        /* first chunk (marked A in the above diagram) */
    79        mpn_mulmid_basecase (rp, ap, CHUNK, bp, bn);
    80  
    81        /* remaining chunks (B, C, etc) */
    82        an -= k;
    83  
    84        while (an >= CHUNK)
    85  	{
    86  	  mp_limb_t t0, t1, cy;
    87  	  ap += k, rp += k;
    88  	  t0 = rp[0], t1 = rp[1];
    89  	  mpn_mulmid_basecase (rp, ap, CHUNK, bp, bn);
    90  	  ADDC_LIMB (cy, rp[0], rp[0], t0);    /* add back saved limbs */
    91  	  MPN_INCR_U (rp + 1, k + 1, t1 + cy);
    92  	  an -= k;
    93  	}
    94  
    95        if (an >= bn)
    96  	{
    97  	  /* last remaining chunk */
    98  	  mp_limb_t t0, t1, cy;
    99  	  ap += k, rp += k;
   100  	  t0 = rp[0], t1 = rp[1];
   101  	  mpn_mulmid_basecase (rp, ap, an, bp, bn);
   102  	  ADDC_LIMB (cy, rp[0], rp[0], t0);
   103  	  MPN_INCR_U (rp + 1, an - bn + 2, t1 + cy);
   104  	}
   105  
   106        return;
   107      }
   108  
   109    /* region is tall enough for toom42 */
   110  
   111    rn = an - bn + 1;
   112  
   113    if (rn < MULMID_TOOM42_THRESHOLD)
   114      {
   115        /* region not wide enough to make toom42 worthwhile for any portion */
   116  
   117        TMP_DECL;
   118  
   119        if (bn < CHUNK)
   120  	{
   121  	  /* region not too tall either, just call basecase directly */
   122  	  mpn_mulmid_basecase (rp, ap, an, bp, bn);
   123  	  return;
   124  	}
   125  
   126        /* Region quite tall. For better locality, use basecase on chunks:
   127  
   128  	 AAAAA....
   129  	 .AAAAA...
   130  	 ..BBBBB..
   131  	 ...BBBBB.
   132  	 ....CCCCC
   133        */
   134  
   135        TMP_MARK;
   136  
   137        temp = TMP_ALLOC_LIMBS (rn + 2);
   138  
   139        /* first chunk (marked A in the above diagram) */
   140        bp += bn - CHUNK, an -= bn - CHUNK;
   141        mpn_mulmid_basecase (rp, ap, an, bp, CHUNK);
   142  
   143        /* remaining chunks (B, C, etc) */
   144        bn -= CHUNK;
   145  
   146        while (bn >= CHUNK)
   147  	{
   148  	  ap += CHUNK, bp -= CHUNK;
   149  	  mpn_mulmid_basecase (temp, ap, an, bp, CHUNK);
   150  	  mpn_add_n (rp, rp, temp, rn + 2);
   151  	  bn -= CHUNK;
   152  	}
   153  
   154        if (bn)
   155  	{
   156  	  /* last remaining chunk */
   157  	  ap += CHUNK, bp -= bn;
   158  	  mpn_mulmid_basecase (temp, ap, rn + bn - 1, bp, bn);
   159  	  mpn_add_n (rp, rp, temp, rn + 2);
   160  	}
   161  
   162        TMP_FREE;
   163        return;
   164      }
   165  
   166    /* we're definitely going to use toom42 somewhere */
   167  
   168    if (bn > rn)
   169      {
   170        /* slice region into chunks, use toom42 on all chunks except possibly
   171  	 the last:
   172  
   173           AA....
   174           .AA...
   175           ..BB..
   176           ...BB.
   177           ....CC
   178        */
   179  
   180        TMP_DECL;
   181        TMP_MARK;
   182  
   183        temp = TMP_ALLOC_LIMBS (rn + 2 + mpn_toom42_mulmid_itch (rn));
   184        scratch = temp + rn + 2;
   185  
   186        /* first chunk (marked A in the above diagram) */
   187        bp += bn - rn;
   188        mpn_toom42_mulmid (rp, ap, bp, rn, scratch);
   189  
   190        /* remaining chunks (B, C, etc) */
   191        bn -= rn;
   192  
   193        while (bn >= rn)
   194          {
   195            ap += rn, bp -= rn;
   196  	  mpn_toom42_mulmid (temp, ap, bp, rn, scratch);
   197            mpn_add_n (rp, rp, temp, rn + 2);
   198            bn -= rn;
   199          }
   200  
   201        if (bn)
   202          {
   203            /* last remaining chunk */
   204            ap += rn, bp -= bn;
   205  	  mpn_mulmid (temp, ap, rn + bn - 1, bp, bn);
   206            mpn_add_n (rp, rp, temp, rn + 2);
   207          }
   208  
   209        TMP_FREE;
   210      }
   211    else
   212      {
   213        /* slice region into chunks, use toom42 on all chunks except possibly
   214  	 the last:
   215  
   216           AAABBBCC..
   217           .AAABBBCC.
   218           ..AAABBBCC
   219        */
   220  
   221        TMP_DECL;
   222        TMP_MARK;
   223  
   224        scratch = TMP_ALLOC_LIMBS (mpn_toom42_mulmid_itch (bn));
   225  
   226        /* first chunk (marked A in the above diagram) */
   227        mpn_toom42_mulmid (rp, ap, bp, bn, scratch);
   228  
   229        /* remaining chunks (B, C, etc) */
   230        rn -= bn;
   231  
   232        while (rn >= bn)
   233          {
   234  	  mp_limb_t t0, t1, cy;
   235            ap += bn, rp += bn;
   236            t0 = rp[0], t1 = rp[1];
   237            mpn_toom42_mulmid (rp, ap, bp, bn, scratch);
   238  	  ADDC_LIMB (cy, rp[0], rp[0], t0);     /* add back saved limbs */
   239  	  MPN_INCR_U (rp + 1, bn + 1, t1 + cy);
   240  	  rn -= bn;
   241          }
   242  
   243        TMP_FREE;
   244  
   245        if (rn)
   246          {
   247            /* last remaining chunk */
   248  	  mp_limb_t t0, t1, cy;
   249            ap += bn, rp += bn;
   250            t0 = rp[0], t1 = rp[1];
   251            mpn_mulmid (rp, ap, rn + bn - 1, bp, bn);
   252  	  ADDC_LIMB (cy, rp[0], rp[0], t0);
   253  	  MPN_INCR_U (rp + 1, rn + 1, t1 + cy);
   254          }
   255      }
   256  }