github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/generic/mu_div_q.c (about)

     1  /* mpn_mu_div_q.
     2  
     3     Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
     4  
     5     THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
     6     SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
     7     GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
     8  
     9  Copyright 2005-2007, 2009, 2010, 2013 Free Software Foundation, Inc.
    10  
    11  This file is part of the GNU MP Library.
    12  
    13  The GNU MP Library is free software; you can redistribute it and/or modify
    14  it under the terms of either:
    15  
    16    * the GNU Lesser General Public License as published by the Free
    17      Software Foundation; either version 3 of the License, or (at your
    18      option) any later version.
    19  
    20  or
    21  
    22    * the GNU General Public License as published by the Free Software
    23      Foundation; either version 2 of the License, or (at your option) any
    24      later version.
    25  
    26  or both in parallel, as here.
    27  
    28  The GNU MP Library is distributed in the hope that it will be useful, but
    29  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    30  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    31  for more details.
    32  
    33  You should have received copies of the GNU General Public License and the
    34  GNU Lesser General Public License along with the GNU MP Library.  If not,
    35  see https://www.gnu.org/licenses/.  */
    36  
    37  
    38  /*
    39     The idea of the algorithm used herein is to compute a smaller inverted value
    40     than used in the standard Barrett algorithm, and thus save time in the
    41     Newton iterations, and pay just a small price when using the inverted value
    42     for developing quotient bits.  This algorithm was presented at ICMS 2006.
    43  */
    44  
    45  /*
    46    Things to work on:
    47  
    48    1. This is a rudimentary implementation of mpn_mu_div_q.  The algorithm is
    49       probably close to optimal, except when mpn_mu_divappr_q fails.
    50  
    51    2. We used to fall back to mpn_mu_div_qr when we detect a possible
    52       mpn_mu_divappr_q rounding problem, now we multiply and compare.
    53       Unfortunately, since mpn_mu_divappr_q does not return the partial
    54       remainder, this also doesn't become optimal.  A mpn_mu_divappr_qr could
    55       solve that.
    56  
    57    3. The allocations done here should be made from the scratch area, which
    58       then would need to be amended.
    59  */
    60  
    61  #include <stdlib.h>		/* for NULL */
    62  #include "gmp.h"
    63  #include "gmp-impl.h"
    64  
    65  
    66  mp_limb_t
    67  mpn_mu_div_q (mp_ptr qp,
    68  	      mp_srcptr np, mp_size_t nn,
    69  	      mp_srcptr dp, mp_size_t dn,
    70  	      mp_ptr scratch)
    71  {
    72    mp_ptr tp, rp;
    73    mp_size_t qn;
    74    mp_limb_t cy, qh;
    75    TMP_DECL;
    76  
    77    TMP_MARK;
    78  
    79    qn = nn - dn;
    80  
    81    tp = TMP_BALLOC_LIMBS (qn + 1);
    82  
    83    if (qn >= dn)			/* nn >= 2*dn + 1 */
    84      {
    85         /* |_______________________|   dividend
    86  			 |________|   divisor  */
    87  
    88        rp = TMP_BALLOC_LIMBS (nn + 1);
    89        MPN_COPY (rp + 1, np, nn);
    90        rp[0] = 0;
    91  
    92        qh = mpn_cmp (rp + 1 + nn - dn, dp, dn) >= 0;
    93        if (qh != 0)
    94  	mpn_sub_n (rp + 1 + nn - dn, rp + 1 + nn - dn, dp, dn);
    95  
    96        cy = mpn_mu_divappr_q (tp, rp, nn + 1, dp, dn, scratch);
    97  
    98        if (UNLIKELY (cy != 0))
    99  	{
   100  	  /* Since the partial remainder fed to mpn_preinv_mu_divappr_q was
   101  	     canonically reduced, replace the returned value of B^(qn-dn)+eps
   102  	     by the largest possible value.  */
   103  	  mp_size_t i;
   104  	  for (i = 0; i < qn + 1; i++)
   105  	    tp[i] = GMP_NUMB_MAX;
   106  	}
   107  
   108        /* The max error of mpn_mu_divappr_q is +4.  If the low quotient limb is
   109  	 smaller than the max error, we cannot trust the quotient.  */
   110        if (tp[0] > 4)
   111  	{
   112  	  MPN_COPY (qp, tp + 1, qn);
   113  	}
   114        else
   115  	{
   116  	  mp_limb_t cy;
   117  	  mp_ptr pp;
   118  
   119  	  pp = rp;
   120  	  mpn_mul (pp, tp + 1, qn, dp, dn);
   121  
   122  	  cy = (qh != 0) ? mpn_add_n (pp + qn, pp + qn, dp, dn) : 0;
   123  
   124  	  if (cy || mpn_cmp (pp, np, nn) > 0) /* At most is wrong by one, no cycle. */
   125  	    qh -= mpn_sub_1 (qp, tp + 1, qn, 1);
   126  	  else /* Same as above */
   127  	    MPN_COPY (qp, tp + 1, qn);
   128  	}
   129      }
   130    else
   131      {
   132         /* |_______________________|   dividend
   133  		 |________________|   divisor  */
   134  
   135        /* FIXME: When nn = 2dn-1, qn becomes dn-1, and the numerator size passed
   136  	 here becomes 2dn, i.e., more than nn.  This shouldn't hurt, since only
   137  	 the most significant dn-1 limbs will actually be read, but it is not
   138  	 pretty.  */
   139  
   140        qh = mpn_mu_divappr_q (tp, np + nn - (2 * qn + 2), 2 * qn + 2,
   141  			     dp + dn - (qn + 1), qn + 1, scratch);
   142  
   143        /* The max error of mpn_mu_divappr_q is +4, but we get an additional
   144           error from the divisor truncation.  */
   145        if (tp[0] > 6)
   146  	{
   147  	  MPN_COPY (qp, tp + 1, qn);
   148  	}
   149        else
   150  	{
   151  	  mp_limb_t cy;
   152  
   153  	  /* FIXME: a shorter product should be enough; we may use already
   154  	     allocated space... */
   155  	  rp = TMP_BALLOC_LIMBS (nn);
   156  	  mpn_mul (rp, dp, dn, tp + 1, qn);
   157  
   158  	  cy = (qh != 0) ? mpn_add_n (rp + qn, rp + qn, dp, dn) : 0;
   159  
   160  	  if (cy || mpn_cmp (rp, np, nn) > 0) /* At most is wrong by one, no cycle. */
   161  	    qh -= mpn_sub_1 (qp, tp + 1, qn, 1);
   162  	  else /* Same as above */
   163  	    MPN_COPY (qp, tp + 1, qn);
   164  	}
   165      }
   166  
   167    TMP_FREE;
   168    return qh;
   169  }
   170  
   171  mp_size_t
   172  mpn_mu_div_q_itch (mp_size_t nn, mp_size_t dn, int mua_k)
   173  {
   174    mp_size_t qn;
   175  
   176    qn = nn - dn;
   177    if (qn >= dn)
   178      {
   179        return mpn_mu_divappr_q_itch (nn + 1, dn, mua_k);
   180      }
   181    else
   182      {
   183        return mpn_mu_divappr_q_itch (2 * qn + 2, qn + 1, mua_k);
   184      }
   185  }