github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/generic/divexact.c (about)

     1  /* mpn_divexact(qp,np,nn,dp,dn,tp) -- Divide N = {np,nn} by D = {dp,dn} storing
     2     the result in Q = {qp,nn-dn+1} expecting no remainder.  Overlap allowed
     3     between Q and N; all other overlap disallowed.
     4  
     5     Contributed to the GNU project by Torbjorn Granlund.
     6  
     7     THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
     8     SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
     9     GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
    10  
    11  Copyright 2006, 2007, 2009 Free Software Foundation, Inc.
    12  
    13  This file is part of the GNU MP Library.
    14  
    15  The GNU MP Library is free software; you can redistribute it and/or modify
    16  it under the terms of either:
    17  
    18    * the GNU Lesser General Public License as published by the Free
    19      Software Foundation; either version 3 of the License, or (at your
    20      option) any later version.
    21  
    22  or
    23  
    24    * the GNU General Public License as published by the Free Software
    25      Foundation; either version 2 of the License, or (at your option) any
    26      later version.
    27  
    28  or both in parallel, as here.
    29  
    30  The GNU MP Library is distributed in the hope that it will be useful, but
    31  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    32  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    33  for more details.
    34  
    35  You should have received copies of the GNU General Public License and the
    36  GNU Lesser General Public License along with the GNU MP Library.  If not,
    37  see https://www.gnu.org/licenses/.  */
    38  
    39  
    40  #include "gmp.h"
    41  #include "gmp-impl.h"
    42  #include "longlong.h"
    43  
    44  #if 1
    45  void
    46  mpn_divexact (mp_ptr qp,
    47  	      mp_srcptr np, mp_size_t nn,
    48  	      mp_srcptr dp, mp_size_t dn)
    49  {
    50    unsigned shift;
    51    mp_size_t qn;
    52    mp_ptr tp;
    53    TMP_DECL;
    54  
    55    ASSERT (dn > 0);
    56    ASSERT (nn >= dn);
    57    ASSERT (dp[dn-1] > 0);
    58  
    59    while (dp[0] == 0)
    60      {
    61        ASSERT (np[0] == 0);
    62        dp++;
    63        np++;
    64        dn--;
    65        nn--;
    66      }
    67  
    68    if (dn == 1)
    69      {
    70        MPN_DIVREM_OR_DIVEXACT_1 (qp, np, nn, dp[0]);
    71        return;
    72      }
    73  
    74    TMP_MARK;
    75  
    76    qn = nn + 1 - dn;
    77    count_trailing_zeros (shift, dp[0]);
    78  
    79    if (shift > 0)
    80      {
    81        mp_ptr wp;
    82        mp_size_t ss;
    83        ss = (dn > qn) ? qn + 1 : dn;
    84  
    85        tp = TMP_ALLOC_LIMBS (ss);
    86        mpn_rshift (tp, dp, ss, shift);
    87        dp = tp;
    88  
    89        /* Since we have excluded dn == 1, we have nn > qn, and we need
    90  	 to shift one limb beyond qn. */
    91        wp = TMP_ALLOC_LIMBS (qn + 1);
    92        mpn_rshift (wp, np, qn + 1, shift);
    93        np = wp;
    94      }
    95  
    96    if (dn > qn)
    97      dn = qn;
    98  
    99    tp = TMP_ALLOC_LIMBS (mpn_bdiv_q_itch (qn, dn));
   100    mpn_bdiv_q (qp, np, qn, dp, dn, tp);
   101    TMP_FREE;
   102  }
   103  
   104  #else
   105  
   106  /* We use the Jebelean's bidirectional exact division algorithm.  This is
   107     somewhat naively implemented, with equal quotient parts done by 2-adic
   108     division and truncating division.  Since 2-adic division is faster, it
   109     should be used for a larger chunk.
   110  
   111     This code is horrendously ugly, in all sorts of ways.
   112  
   113     * It was hacked without much care or thought, but with a testing program.
   114     * It handles scratch space frivolously, and furthermore the itch function
   115       is broken.
   116     * Doesn't provide any measures to deal with mu_divappr_q's +3 error.  We
   117       have yet to provoke an error due to this, though.
   118     * Algorithm selection leaves a lot to be desired.  In particular, the choice
   119       between DC and MU isn't a point, but we treat it like one.
   120     * It makes the msb part 1 or 2 limbs larger than the lsb part, in spite of
   121       that the latter is faster.  We should at least reverse this, but perhaps
   122       we should make the lsb part considerably larger.  (How do we tune this?)
   123  */
   124  
   125  mp_size_t
   126  mpn_divexact_itch (mp_size_t nn, mp_size_t dn)
   127  {
   128    return nn + dn;		/* FIXME this is not right */
   129  }
   130  
   131  void
   132  mpn_divexact (mp_ptr qp,
   133  	      mp_srcptr np, mp_size_t nn,
   134  	      mp_srcptr dp, mp_size_t dn,
   135  	      mp_ptr scratch)
   136  {
   137    mp_size_t qn;
   138    mp_size_t nn0, qn0;
   139    mp_size_t nn1, qn1;
   140    mp_ptr tp;
   141    mp_limb_t qml;
   142    mp_limb_t qh;
   143    int cnt;
   144    mp_ptr xdp;
   145    mp_limb_t di;
   146    mp_limb_t cy;
   147    gmp_pi1_t dinv;
   148    TMP_DECL;
   149  
   150    TMP_MARK;
   151  
   152    qn = nn - dn + 1;
   153  
   154    /* For small divisors, and small quotients, don't use Jebelean's algorithm. */
   155    if (dn < DIVEXACT_JEB_THRESHOLD || qn < DIVEXACT_JEB_THRESHOLD)
   156      {
   157        tp = scratch;
   158        MPN_COPY (tp, np, qn);
   159        binvert_limb (di, dp[0]);  di = -di;
   160        dn = MIN (dn, qn);
   161        mpn_sbpi1_bdiv_q (qp, tp, qn, dp, dn, di);
   162        TMP_FREE;
   163        return;
   164      }
   165  
   166    qn0 = ((nn - dn) >> 1) + 1;	/* low quotient size */
   167  
   168    /* If quotient is much larger than the divisor, the bidirectional algorithm
   169       does not work as currently implemented.  Fall back to plain bdiv.  */
   170    if (qn0 > dn)
   171      {
   172        if (BELOW_THRESHOLD (dn, DC_BDIV_Q_THRESHOLD))
   173  	{
   174  	  tp = scratch;
   175  	  MPN_COPY (tp, np, qn);
   176  	  binvert_limb (di, dp[0]);  di = -di;
   177  	  dn = MIN (dn, qn);
   178  	  mpn_sbpi1_bdiv_q (qp, tp, qn, dp, dn, di);
   179  	}
   180        else if (BELOW_THRESHOLD (dn, MU_BDIV_Q_THRESHOLD))
   181  	{
   182  	  tp = scratch;
   183  	  MPN_COPY (tp, np, qn);
   184  	  binvert_limb (di, dp[0]);  di = -di;
   185  	  mpn_dcpi1_bdiv_q (qp, tp, qn, dp, dn, di);
   186  	}
   187        else
   188  	{
   189  	  mpn_mu_bdiv_q (qp, np, qn, dp, dn, scratch);
   190  	}
   191        TMP_FREE;
   192        return;
   193      }
   194  
   195    nn0 = qn0 + qn0;
   196  
   197    nn1 = nn0 - 1 + ((nn-dn) & 1);
   198    qn1 = qn0;
   199    if (LIKELY (qn0 != dn))
   200      {
   201        nn1 = nn1 + 1;
   202        qn1 = qn1 + 1;
   203        if (UNLIKELY (dp[dn - 1] == 1 && qn1 != dn))
   204  	{
   205  	  /* If the leading divisor limb == 1, i.e. has just one bit, we have
   206  	     to include an extra limb in order to get the needed overlap.  */
   207  	  /* FIXME: Now with the mu_divappr_q function, we should really need
   208  	     more overlap. That indicates one of two things: (1) The test code
   209  	     is not good. (2) We actually overlap too much by default.  */
   210  	  nn1 = nn1 + 1;
   211  	  qn1 = qn1 + 1;
   212  	}
   213      }
   214  
   215    tp = TMP_ALLOC_LIMBS (nn1 + 1);
   216  
   217    count_leading_zeros (cnt, dp[dn - 1]);
   218  
   219    /* Normalize divisor, store into tmp area.  */
   220    if (cnt != 0)
   221      {
   222        xdp = TMP_ALLOC_LIMBS (qn1);
   223        mpn_lshift (xdp, dp + dn - qn1, qn1, cnt);
   224      }
   225    else
   226      {
   227        xdp = (mp_ptr) dp + dn - qn1;
   228      }
   229  
   230    /* Shift dividend according to the divisor normalization.  */
   231    /* FIXME: We compute too much here for XX_divappr_q, but these functions'
   232       interfaces want a pointer to the imaginative least significant limb, not
   233       to the least significant *used* limb.  Of course, we could leave nn1-qn1
   234       rubbish limbs in the low part, to save some time.  */
   235    if (cnt != 0)
   236      {
   237        cy = mpn_lshift (tp, np + nn - nn1, nn1, cnt);
   238        if (cy != 0)
   239  	{
   240  	  tp[nn1] = cy;
   241  	  nn1++;
   242  	}
   243      }
   244    else
   245      {
   246        /* FIXME: This copy is not needed for mpn_mu_divappr_q, except when the
   247  	 mpn_sub_n right before is executed.  */
   248        MPN_COPY (tp, np + nn - nn1, nn1);
   249      }
   250  
   251    invert_pi1 (dinv, xdp[qn1 - 1], xdp[qn1 - 2]);
   252    if (BELOW_THRESHOLD (qn1, DC_DIVAPPR_Q_THRESHOLD))
   253      {
   254        qp[qn0 - 1 + nn1 - qn1] = mpn_sbpi1_divappr_q (qp + qn0 - 1, tp, nn1, xdp, qn1, dinv.inv32);
   255      }
   256    else if (BELOW_THRESHOLD (qn1, MU_DIVAPPR_Q_THRESHOLD))
   257      {
   258        qp[qn0 - 1 + nn1 - qn1] = mpn_dcpi1_divappr_q (qp + qn0 - 1, tp, nn1, xdp, qn1, &dinv);
   259      }
   260    else
   261      {
   262        /* FIXME: mpn_mu_divappr_q doesn't handle qh != 0.  Work around it with a
   263  	 conditional subtraction here.  */
   264        qh = mpn_cmp (tp + nn1 - qn1, xdp, qn1) >= 0;
   265        if (qh)
   266  	mpn_sub_n (tp + nn1 - qn1, tp + nn1 - qn1, xdp, qn1);
   267        mpn_mu_divappr_q (qp + qn0 - 1, tp, nn1, xdp, qn1, scratch);
   268        qp[qn0 - 1 + nn1 - qn1] = qh;
   269      }
   270    qml = qp[qn0 - 1];
   271  
   272    binvert_limb (di, dp[0]);  di = -di;
   273  
   274    if (BELOW_THRESHOLD (qn0, DC_BDIV_Q_THRESHOLD))
   275      {
   276        MPN_COPY (tp, np, qn0);
   277        mpn_sbpi1_bdiv_q (qp, tp, qn0, dp, qn0, di);
   278      }
   279    else if (BELOW_THRESHOLD (qn0, MU_BDIV_Q_THRESHOLD))
   280      {
   281        MPN_COPY (tp, np, qn0);
   282        mpn_dcpi1_bdiv_q (qp, tp, qn0, dp, qn0, di);
   283      }
   284    else
   285      {
   286        mpn_mu_bdiv_q (qp, np, qn0, dp, qn0, scratch);
   287      }
   288  
   289    if (qml < qp[qn0 - 1])
   290      mpn_decr_u (qp + qn0, 1);
   291  
   292    TMP_FREE;
   293  }
   294  #endif