github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/generic/div_q.c (about)

     1  /* mpn_div_q -- division for arbitrary size operands.
     2  
     3     Contributed to the GNU project by Torbjorn Granlund.
     4  
     5     THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
     6     SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
     7     GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
     8  
     9  Copyright 2009, 2010 Free Software Foundation, Inc.
    10  
    11  This file is part of the GNU MP Library.
    12  
    13  The GNU MP Library is free software; you can redistribute it and/or modify
    14  it under the terms of either:
    15  
    16    * the GNU Lesser General Public License as published by the Free
    17      Software Foundation; either version 3 of the License, or (at your
    18      option) any later version.
    19  
    20  or
    21  
    22    * the GNU General Public License as published by the Free Software
    23      Foundation; either version 2 of the License, or (at your option) any
    24      later version.
    25  
    26  or both in parallel, as here.
    27  
    28  The GNU MP Library is distributed in the hope that it will be useful, but
    29  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    30  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    31  for more details.
    32  
    33  You should have received copies of the GNU General Public License and the
    34  GNU Lesser General Public License along with the GNU MP Library.  If not,
    35  see https://www.gnu.org/licenses/.  */
    36  
    37  #include "gmp.h"
    38  #include "gmp-impl.h"
    39  #include "longlong.h"
    40  
    41  
    42  /* Compute Q = N/D with truncation.
    43       N = {np,nn}
    44       D = {dp,dn}
    45       Q = {qp,nn-dn+1}
    46       T = {scratch,nn+1} is scratch space
    47     N and D are both untouched by the computation.
    48     N and T may overlap; pass the same space if N is irrelevant after the call,
    49     but note that tp needs an extra limb.
    50  
    51     Operand requirements:
    52       N >= D > 0
    53       dp[dn-1] != 0
    54       No overlap between the N, D, and Q areas.
    55  
    56     This division function does not clobber its input operands, since it is
    57     intended to support average-O(qn) division, and for that to be effective, it
    58     cannot put requirements on callers to copy a O(nn) operand.
    59  
    60     If a caller does not care about the value of {np,nn+1} after calling this
    61     function, it should pass np also for the scratch argument.  This function
    62     will then save some time and space by avoiding allocation and copying.
    63     (FIXME: Is this a good design?  We only really save any copying for
    64     already-normalised divisors, which should be rare.  It also prevents us from
    65     reasonably asking for all scratch space we need.)
    66  
    67     We write nn-dn+1 limbs for the quotient, but return void.  Why not return
    68     the most significant quotient limb?  Look at the 4 main code blocks below
    69     (consisting of an outer if-else where each arm contains an if-else). It is
    70     tricky for the first code block, since the mpn_*_div_q calls will typically
    71     generate all nn-dn+1 and return 0 or 1.  I don't see how to fix that unless
    72     we generate the most significant quotient limb here, before calling
    73     mpn_*_div_q, or put the quotient in a temporary area.  Since this is a
    74     critical division case (the SB sub-case in particular) copying is not a good
    75     idea.
    76  
    77     It might make sense to split the if-else parts of the (qn + FUDGE
    78     >= dn) blocks into separate functions, since we could promise quite
    79     different things to callers in these two cases.  The 'then' case
    80     benefits from np=scratch, and it could perhaps even tolerate qp=np,
    81     saving some headache for many callers.
    82  
    83     FIXME: Scratch allocation leaves a lot to be desired.  E.g., for the MU size
    84     operands, we do not reuse the huge scratch for adjustments.  This can be a
    85     serious waste of memory for the largest operands.
    86  */
    87  
    88  /* FUDGE determines when to try getting an approximate quotient from the upper
    89     parts of the dividend and divisor, then adjust.  N.B. FUDGE must be >= 2
    90     for the code to be correct.  */
    91  #define FUDGE 5			/* FIXME: tune this */
    92  
    93  #define DC_DIV_Q_THRESHOLD      DC_DIVAPPR_Q_THRESHOLD
    94  #define MU_DIV_Q_THRESHOLD      MU_DIVAPPR_Q_THRESHOLD
    95  #define MUPI_DIV_Q_THRESHOLD  MUPI_DIVAPPR_Q_THRESHOLD
    96  #ifndef MUPI_DIVAPPR_Q_THRESHOLD
    97  #define MUPI_DIVAPPR_Q_THRESHOLD  MUPI_DIV_QR_THRESHOLD
    98  #endif
    99  
   100  void
   101  mpn_div_q (mp_ptr qp,
   102  	   mp_srcptr np, mp_size_t nn,
   103  	   mp_srcptr dp, mp_size_t dn, mp_ptr scratch)
   104  {
   105    mp_ptr new_dp, new_np, tp, rp;
   106    mp_limb_t cy, dh, qh;
   107    mp_size_t new_nn, qn;
   108    gmp_pi1_t dinv;
   109    int cnt;
   110    TMP_DECL;
   111    TMP_MARK;
   112  
   113    ASSERT (nn >= dn);
   114    ASSERT (dn > 0);
   115    ASSERT (dp[dn - 1] != 0);
   116    ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1, np, nn));
   117    ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1, dp, dn));
   118    ASSERT (MPN_SAME_OR_SEPARATE_P (np, scratch, nn));
   119  
   120    ASSERT_ALWAYS (FUDGE >= 2);
   121  
   122    if (dn == 1)
   123      {
   124        mpn_divrem_1 (qp, 0L, np, nn, dp[dn - 1]);
   125        return;
   126      }
   127  
   128    qn = nn - dn + 1;		/* Quotient size, high limb might be zero */
   129  
   130    if (qn + FUDGE >= dn)
   131      {
   132        /* |________________________|
   133                            |_______|  */
   134        new_np = scratch;
   135  
   136        dh = dp[dn - 1];
   137        if (LIKELY ((dh & GMP_NUMB_HIGHBIT) == 0))
   138  	{
   139  	  count_leading_zeros (cnt, dh);
   140  
   141  	  cy = mpn_lshift (new_np, np, nn, cnt);
   142  	  new_np[nn] = cy;
   143  	  new_nn = nn + (cy != 0);
   144  
   145  	  new_dp = TMP_ALLOC_LIMBS (dn);
   146  	  mpn_lshift (new_dp, dp, dn, cnt);
   147  
   148  	  if (dn == 2)
   149  	    {
   150  	      qh = mpn_divrem_2 (qp, 0L, new_np, new_nn, new_dp);
   151  	    }
   152  	  else if (BELOW_THRESHOLD (dn, DC_DIV_Q_THRESHOLD) ||
   153  		   BELOW_THRESHOLD (new_nn - dn, DC_DIV_Q_THRESHOLD))
   154  	    {
   155  	      invert_pi1 (dinv, new_dp[dn - 1], new_dp[dn - 2]);
   156  	      qh = mpn_sbpi1_div_q (qp, new_np, new_nn, new_dp, dn, dinv.inv32);
   157  	    }
   158  	  else if (BELOW_THRESHOLD (dn, MUPI_DIV_Q_THRESHOLD) ||   /* fast condition */
   159  		   BELOW_THRESHOLD (nn, 2 * MU_DIV_Q_THRESHOLD) || /* fast condition */
   160  		   (double) (2 * (MU_DIV_Q_THRESHOLD - MUPI_DIV_Q_THRESHOLD)) * dn /* slow... */
   161  		   + (double) MUPI_DIV_Q_THRESHOLD * nn > (double) dn * nn)   /* ...condition */
   162  	    {
   163  	      invert_pi1 (dinv, new_dp[dn - 1], new_dp[dn - 2]);
   164  	      qh = mpn_dcpi1_div_q (qp, new_np, new_nn, new_dp, dn, &dinv);
   165  	    }
   166  	  else
   167  	    {
   168  	      mp_size_t itch = mpn_mu_div_q_itch (new_nn, dn, 0);
   169  	      mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
   170  	      qh = mpn_mu_div_q (qp, new_np, new_nn, new_dp, dn, scratch);
   171  	    }
   172  	  if (cy == 0)
   173  	    qp[qn - 1] = qh;
   174  	  else if (UNLIKELY (qh != 0))
   175  	    {
   176  	      /* This happens only when the quotient is close to B^n and
   177  		 mpn_*_divappr_q returned B^n.  */
   178  	      mp_size_t i, n;
   179  	      n = new_nn - dn;
   180  	      for (i = 0; i < n; i++)
   181  		qp[i] = GMP_NUMB_MAX;
   182  	      qh = 0;		/* currently ignored */
   183  	    }
   184  	}
   185        else  /* divisor is already normalised */
   186  	{
   187  	  if (new_np != np)
   188  	    MPN_COPY (new_np, np, nn);
   189  
   190  	  if (dn == 2)
   191  	    {
   192  	      qh = mpn_divrem_2 (qp, 0L, new_np, nn, dp);
   193  	    }
   194  	  else if (BELOW_THRESHOLD (dn, DC_DIV_Q_THRESHOLD) ||
   195  		   BELOW_THRESHOLD (nn - dn, DC_DIV_Q_THRESHOLD))
   196  	    {
   197  	      invert_pi1 (dinv, dh, dp[dn - 2]);
   198  	      qh = mpn_sbpi1_div_q (qp, new_np, nn, dp, dn, dinv.inv32);
   199  	    }
   200  	  else if (BELOW_THRESHOLD (dn, MUPI_DIV_Q_THRESHOLD) ||   /* fast condition */
   201  		   BELOW_THRESHOLD (nn, 2 * MU_DIV_Q_THRESHOLD) || /* fast condition */
   202  		   (double) (2 * (MU_DIV_Q_THRESHOLD - MUPI_DIV_Q_THRESHOLD)) * dn /* slow... */
   203  		   + (double) MUPI_DIV_Q_THRESHOLD * nn > (double) dn * nn)   /* ...condition */
   204  	    {
   205  	      invert_pi1 (dinv, dh, dp[dn - 2]);
   206  	      qh = mpn_dcpi1_div_q (qp, new_np, nn, dp, dn, &dinv);
   207  	    }
   208  	  else
   209  	    {
   210  	      mp_size_t itch = mpn_mu_div_q_itch (nn, dn, 0);
   211  	      mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
   212  	      qh = mpn_mu_div_q (qp, np, nn, dp, dn, scratch);
   213  	    }
   214  	  qp[nn - dn] = qh;
   215  	}
   216      }
   217    else
   218      {
   219        /* |________________________|
   220                  |_________________|  */
   221        tp = TMP_ALLOC_LIMBS (qn + 1);
   222  
   223        new_np = scratch;
   224        new_nn = 2 * qn + 1;
   225        if (new_np == np)
   226  	/* We need {np,nn} to remain untouched until the final adjustment, so
   227  	   we need to allocate separate space for new_np.  */
   228  	new_np = TMP_ALLOC_LIMBS (new_nn + 1);
   229  
   230  
   231        dh = dp[dn - 1];
   232        if (LIKELY ((dh & GMP_NUMB_HIGHBIT) == 0))
   233  	{
   234  	  count_leading_zeros (cnt, dh);
   235  
   236  	  cy = mpn_lshift (new_np, np + nn - new_nn, new_nn, cnt);
   237  	  new_np[new_nn] = cy;
   238  
   239  	  new_nn += (cy != 0);
   240  
   241  	  new_dp = TMP_ALLOC_LIMBS (qn + 1);
   242  	  mpn_lshift (new_dp, dp + dn - (qn + 1), qn + 1, cnt);
   243  	  new_dp[0] |= dp[dn - (qn + 1) - 1] >> (GMP_NUMB_BITS - cnt);
   244  
   245  	  if (qn + 1 == 2)
   246  	    {
   247  	      qh = mpn_divrem_2 (tp, 0L, new_np, new_nn, new_dp);
   248  	    }
   249  	  else if (BELOW_THRESHOLD (qn, DC_DIVAPPR_Q_THRESHOLD - 1))
   250  	    {
   251  	      invert_pi1 (dinv, new_dp[qn], new_dp[qn - 1]);
   252  	      qh = mpn_sbpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, dinv.inv32);
   253  	    }
   254  	  else if (BELOW_THRESHOLD (qn, MU_DIVAPPR_Q_THRESHOLD - 1))
   255  	    {
   256  	      invert_pi1 (dinv, new_dp[qn], new_dp[qn - 1]);
   257  	      qh = mpn_dcpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, &dinv);
   258  	    }
   259  	  else
   260  	    {
   261  	      mp_size_t itch = mpn_mu_divappr_q_itch (new_nn, qn + 1, 0);
   262  	      mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
   263  	      qh = mpn_mu_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, scratch);
   264  	    }
   265  	  if (cy == 0)
   266  	    tp[qn] = qh;
   267  	  else if (UNLIKELY (qh != 0))
   268  	    {
   269  	      /* This happens only when the quotient is close to B^n and
   270  		 mpn_*_divappr_q returned B^n.  */
   271  	      mp_size_t i, n;
   272  	      n = new_nn - (qn + 1);
   273  	      for (i = 0; i < n; i++)
   274  		tp[i] = GMP_NUMB_MAX;
   275  	      qh = 0;		/* currently ignored */
   276  	    }
   277  	}
   278        else  /* divisor is already normalised */
   279  	{
   280  	  MPN_COPY (new_np, np + nn - new_nn, new_nn); /* pointless if MU will be used */
   281  
   282  	  new_dp = (mp_ptr) dp + dn - (qn + 1);
   283  
   284  	  if (qn == 2 - 1)
   285  	    {
   286  	      qh = mpn_divrem_2 (tp, 0L, new_np, new_nn, new_dp);
   287  	    }
   288  	  else if (BELOW_THRESHOLD (qn, DC_DIVAPPR_Q_THRESHOLD - 1))
   289  	    {
   290  	      invert_pi1 (dinv, dh, new_dp[qn - 1]);
   291  	      qh = mpn_sbpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, dinv.inv32);
   292  	    }
   293  	  else if (BELOW_THRESHOLD (qn, MU_DIVAPPR_Q_THRESHOLD - 1))
   294  	    {
   295  	      invert_pi1 (dinv, dh, new_dp[qn - 1]);
   296  	      qh = mpn_dcpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, &dinv);
   297  	    }
   298  	  else
   299  	    {
   300  	      mp_size_t itch = mpn_mu_divappr_q_itch (new_nn, qn + 1, 0);
   301  	      mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
   302  	      qh = mpn_mu_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, scratch);
   303  	    }
   304  	  tp[qn] = qh;
   305  	}
   306  
   307        MPN_COPY (qp, tp + 1, qn);
   308        if (tp[0] <= 4)
   309          {
   310  	  mp_size_t rn;
   311  
   312            rp = TMP_ALLOC_LIMBS (dn + qn);
   313            mpn_mul (rp, dp, dn, tp + 1, qn);
   314  	  rn = dn + qn;
   315  	  rn -= rp[rn - 1] == 0;
   316  
   317            if (rn > nn || mpn_cmp (np, rp, nn) < 0)
   318              mpn_decr_u (qp, 1);
   319          }
   320      }
   321  
   322    TMP_FREE;
   323  }