github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/generic/mu_bdiv_q.c (about)

     1  /* mpn_mu_bdiv_q(qp,np,nn,dp,dn,tp) -- Compute {np,nn} / {dp,dn} mod B^nn.
     2     storing the result in {qp,nn}.  Overlap allowed between Q and N; all other
     3     overlap disallowed.
     4  
     5     Contributed to the GNU project by Torbjorn Granlund.
     6  
     7     THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
     8     SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
     9     GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
    10  
    11  Copyright 2005-2007, 2009, 2010 Free Software Foundation, Inc.
    12  
    13  This file is part of the GNU MP Library.
    14  
    15  The GNU MP Library is free software; you can redistribute it and/or modify
    16  it under the terms of either:
    17  
    18    * the GNU Lesser General Public License as published by the Free
    19      Software Foundation; either version 3 of the License, or (at your
    20      option) any later version.
    21  
    22  or
    23  
    24    * the GNU General Public License as published by the Free Software
    25      Foundation; either version 2 of the License, or (at your option) any
    26      later version.
    27  
    28  or both in parallel, as here.
    29  
    30  The GNU MP Library is distributed in the hope that it will be useful, but
    31  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    32  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    33  for more details.
    34  
    35  You should have received copies of the GNU General Public License and the
    36  GNU Lesser General Public License along with the GNU MP Library.  If not,
    37  see https://www.gnu.org/licenses/.  */
    38  
    39  
    40  /*
    41     The idea of the algorithm used herein is to compute a smaller inverted value
    42     than used in the standard Barrett algorithm, and thus save time in the
    43     Newton iterations, and pay just a small price when using the inverted value
    44     for developing quotient bits.  This algorithm was presented at ICMS 2006.
    45  */
    46  
    47  #include "gmp.h"
    48  #include "gmp-impl.h"
    49  
    50  
    51  /* N = {np,nn}
    52     D = {dp,dn}
    53  
    54     Requirements: N >= D
    55  		 D >= 1
    56  		 D odd
    57  		 dn >= 2
    58  		 nn >= 2
    59  		 scratch space as determined by mpn_mu_bdiv_q_itch(nn,dn).
    60  
    61     Write quotient to Q = {qp,nn}.
    62  
    63     FIXME: When iterating, perhaps do the small step before loop, not after.
    64     FIXME: Try to avoid the scalar divisions when computing inverse size.
    65     FIXME: Trim allocation for (qn > dn) case, 3*dn might be possible.  In
    66  	  particular, when dn==in, tp and rp could use the same space.
    67     FIXME: Trim final quotient calculation to qn limbs of precision.
    68  */
    69  void
    70  mpn_mu_bdiv_q (mp_ptr qp,
    71  	       mp_srcptr np, mp_size_t nn,
    72  	       mp_srcptr dp, mp_size_t dn,
    73  	       mp_ptr scratch)
    74  {
    75    mp_size_t qn;
    76    mp_size_t in;
    77    int cy, c0;
    78    mp_size_t tn, wn;
    79  
    80    qn = nn;
    81  
    82    ASSERT (dn >= 2);
    83    ASSERT (qn >= 2);
    84  
    85    if (qn > dn)
    86      {
    87        mp_size_t b;
    88  
    89        /* |_______________________|   dividend
    90  			|________|   divisor  */
    91  
    92  #define ip           scratch			/* in */
    93  #define rp           (scratch + in)		/* dn or rest >= binvert_itch(in) */
    94  #define tp           (scratch + in + dn)	/* dn+in or next_size(dn) */
    95  #define scratch_out  (scratch + in + dn + tn)	/* mulmod_bnm1_itch(next_size(dn)) */
    96  
    97        /* Compute an inverse size that is a nice partition of the quotient.  */
    98        b = (qn - 1) / dn + 1;	/* ceil(qn/dn), number of blocks */
    99        in = (qn - 1) / b + 1;	/* ceil(qn/b) = ceil(qn / ceil(qn/dn)) */
   100  
   101        /* Some notes on allocation:
   102  
   103  	 When in = dn, R dies when mpn_mullo returns, if in < dn the low in
   104  	 limbs of R dies at that point.  We could save memory by letting T live
   105  	 just under R, and let the upper part of T expand into R. These changes
   106  	 should reduce itch to perhaps 3dn.
   107         */
   108  
   109        mpn_binvert (ip, dp, in, rp);
   110  
   111        cy = 0;
   112  
   113        MPN_COPY (rp, np, dn);
   114        np += dn;
   115        mpn_mullo_n (qp, rp, ip, in);
   116        qn -= in;
   117  
   118        while (qn > in)
   119  	{
   120  	  if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
   121  	    mpn_mul (tp, dp, dn, qp, in);	/* mulhi, need tp[dn+in-1...in] */
   122  	  else
   123  	    {
   124  	      tn = mpn_mulmod_bnm1_next_size (dn);
   125  	      mpn_mulmod_bnm1 (tp, tn, dp, dn, qp, in, scratch_out);
   126  	      wn = dn + in - tn;		/* number of wrapped limbs */
   127  	      if (wn > 0)
   128  		{
   129  		  c0 = mpn_sub_n (tp + tn, tp, rp, wn);
   130  		  mpn_decr_u (tp + wn, c0);
   131  		}
   132  	    }
   133  
   134  	  qp += in;
   135  	  if (dn != in)
   136  	    {
   137  	      /* Subtract tp[dn-1...in] from partial remainder.  */
   138  	      cy += mpn_sub_n (rp, rp + in, tp + in, dn - in);
   139  	      if (cy == 2)
   140  		{
   141  		  mpn_incr_u (tp + dn, 1);
   142  		  cy = 1;
   143  		}
   144  	    }
   145  	  /* Subtract tp[dn+in-1...dn] from dividend.  */
   146  	  cy = mpn_sub_nc (rp + dn - in, np, tp + dn, in, cy);
   147  	  np += in;
   148  	  mpn_mullo_n (qp, rp, ip, in);
   149  	  qn -= in;
   150  	}
   151  
   152        /* Generate last qn limbs.
   153  	 FIXME: It should be possible to limit precision here, since qn is
   154  	 typically somewhat smaller than dn.  No big gains expected.  */
   155  
   156        if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
   157  	mpn_mul (tp, dp, dn, qp, in);		/* mulhi, need tp[qn+in-1...in] */
   158        else
   159  	{
   160  	  tn = mpn_mulmod_bnm1_next_size (dn);
   161  	  mpn_mulmod_bnm1 (tp, tn, dp, dn, qp, in, scratch_out);
   162  	  wn = dn + in - tn;			/* number of wrapped limbs */
   163  	  if (wn > 0)
   164  	    {
   165  	      c0 = mpn_sub_n (tp + tn, tp, rp, wn);
   166  	      mpn_decr_u (tp + wn, c0);
   167  	    }
   168  	}
   169  
   170        qp += in;
   171        if (dn != in)
   172  	{
   173  	  cy += mpn_sub_n (rp, rp + in, tp + in, dn - in);
   174  	  if (cy == 2)
   175  	    {
   176  	      mpn_incr_u (tp + dn, 1);
   177  	      cy = 1;
   178  	    }
   179  	}
   180  
   181        mpn_sub_nc (rp + dn - in, np, tp + dn, qn - (dn - in), cy);
   182        mpn_mullo_n (qp, rp, ip, qn);
   183  
   184  #undef ip
   185  #undef rp
   186  #undef tp
   187  #undef scratch_out
   188     }
   189    else
   190      {
   191        /* |_______________________|   dividend
   192  		|________________|   divisor  */
   193  
   194  #define ip           scratch		/* in */
   195  #define tp           (scratch + in)	/* qn+in or next_size(qn) or rest >= binvert_itch(in) */
   196  #define scratch_out  (scratch + in + tn)/* mulmod_bnm1_itch(next_size(qn)) */
   197  
   198        /* Compute half-sized inverse.  */
   199        in = qn - (qn >> 1);
   200  
   201        mpn_binvert (ip, dp, in, tp);
   202  
   203        mpn_mullo_n (qp, np, ip, in);		/* low `in' quotient limbs */
   204  
   205        if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
   206  	mpn_mul (tp, dp, qn, qp, in);		/* mulhigh */
   207        else
   208  	{
   209  	  tn = mpn_mulmod_bnm1_next_size (qn);
   210  	  mpn_mulmod_bnm1 (tp, tn, dp, qn, qp, in, scratch_out);
   211  	  wn = qn + in - tn;			/* number of wrapped limbs */
   212  	  if (wn > 0)
   213  	    {
   214  	      c0 = mpn_cmp (tp, np, wn) < 0;
   215  	      mpn_decr_u (tp + wn, c0);
   216  	    }
   217  	}
   218  
   219        mpn_sub_n (tp, np + in, tp + in, qn - in);
   220        mpn_mullo_n (qp + in, tp, ip, qn - in);	/* high qn-in quotient limbs */
   221  
   222  #undef ip
   223  #undef tp
   224  #undef scratch_out
   225      }
   226  }
   227  
   228  mp_size_t
   229  mpn_mu_bdiv_q_itch (mp_size_t nn, mp_size_t dn)
   230  {
   231    mp_size_t qn, in, tn, itch_binvert, itch_out, itches;
   232    mp_size_t b;
   233  
   234    ASSERT_ALWAYS (DC_BDIV_Q_THRESHOLD < MU_BDIV_Q_THRESHOLD);
   235  
   236    qn = nn;
   237  
   238    if (qn > dn)
   239      {
   240        b = (qn - 1) / dn + 1;	/* ceil(qn/dn), number of blocks */
   241        in = (qn - 1) / b + 1;	/* ceil(qn/b) = ceil(qn / ceil(qn/dn)) */
   242        if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
   243  	{
   244  	  tn = dn + in;
   245  	  itch_out = 0;
   246  	}
   247        else
   248  	{
   249  	  tn = mpn_mulmod_bnm1_next_size (dn);
   250  	  itch_out = mpn_mulmod_bnm1_itch (tn, dn, in);
   251  	}
   252        itches = dn + tn + itch_out;
   253      }
   254    else
   255      {
   256        in = qn - (qn >> 1);
   257        if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
   258  	{
   259  	  tn = qn + in;
   260  	  itch_out = 0;
   261  	}
   262        else
   263  	{
   264  	  tn = mpn_mulmod_bnm1_next_size (qn);
   265  	  itch_out = mpn_mulmod_bnm1_itch (tn, qn, in);
   266  	}
   267        itches = tn + itch_out;
   268      }
   269  
   270    itch_binvert = mpn_binvert_itch (in);
   271    return in + MAX (itches, itch_binvert);
   272  }
   273