github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/generic/mu_bdiv_qr.c (about)

     1  /* mpn_mu_bdiv_qr(qp,rp,np,nn,dp,dn,tp) -- Compute {np,nn} / {dp,dn} mod B^qn,
     2     where qn = nn-dn, storing the result in {qp,qn}.  Overlap allowed between Q
     3     and N; all other overlap disallowed.
     4  
     5     Contributed to the GNU project by Torbjorn Granlund.
     6  
     7     THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
     8     SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
     9     GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
    10  
    11  Copyright 2005-2007, 2009, 2010, 2012 Free Software Foundation, Inc.
    12  
    13  This file is part of the GNU MP Library.
    14  
    15  The GNU MP Library is free software; you can redistribute it and/or modify
    16  it under the terms of either:
    17  
    18    * the GNU Lesser General Public License as published by the Free
    19      Software Foundation; either version 3 of the License, or (at your
    20      option) any later version.
    21  
    22  or
    23  
    24    * the GNU General Public License as published by the Free Software
    25      Foundation; either version 2 of the License, or (at your option) any
    26      later version.
    27  
    28  or both in parallel, as here.
    29  
    30  The GNU MP Library is distributed in the hope that it will be useful, but
    31  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    32  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    33  for more details.
    34  
    35  You should have received copies of the GNU General Public License and the
    36  GNU Lesser General Public License along with the GNU MP Library.  If not,
    37  see https://www.gnu.org/licenses/.  */
    38  
    39  
    40  /*
    41     The idea of the algorithm used herein is to compute a smaller inverted value
    42     than used in the standard Barrett algorithm, and thus save time in the
    43     Newton iterations, and pay just a small price when using the inverted value
    44     for developing quotient bits.  This algorithm was presented at ICMS 2006.
    45  */
    46  
    47  #include "gmp.h"
    48  #include "gmp-impl.h"
    49  
    50  
    51  /* N = {np,nn}
    52     D = {dp,dn}
    53  
    54     Requirements: N >= D
    55  		 D >= 1
    56  		 D odd
    57  		 dn >= 2
    58  		 nn >= 2
    59  		 scratch space as determined by mpn_mu_bdiv_qr_itch(nn,dn).
    60  
    61     Write quotient to Q = {qp,nn-dn}.
    62  
    63     FIXME: When iterating, perhaps do the small step before loop, not after.
    64     FIXME: Try to avoid the scalar divisions when computing inverse size.
    65     FIXME: Trim allocation for (qn > dn) case, 3*dn might be possible.  In
    66  	  particular, when dn==in, tp and rp could use the same space.
    67  */
    68  mp_limb_t
    69  mpn_mu_bdiv_qr (mp_ptr qp,
    70  		mp_ptr rp,
    71  		mp_srcptr np, mp_size_t nn,
    72  		mp_srcptr dp, mp_size_t dn,
    73  		mp_ptr scratch)
    74  {
    75    mp_size_t qn;
    76    mp_size_t in;
    77    mp_limb_t cy, c0;
    78    mp_size_t tn, wn;
    79  
    80    qn = nn - dn;
    81  
    82    ASSERT (dn >= 2);
    83    ASSERT (qn >= 2);
    84  
    85    if (qn > dn)
    86      {
    87        mp_size_t b;
    88  
    89        /* |_______________________|   dividend
    90  			|________|   divisor  */
    91  
    92  #define ip           scratch		/* in */
    93  #define tp           (scratch + in)	/* dn+in or next_size(dn) or rest >= binvert_itch(in) */
    94  #define scratch_out  (scratch + in + tn)/* mulmod_bnm1_itch(next_size(dn)) */
    95  
    96        /* Compute an inverse size that is a nice partition of the quotient.  */
    97        b = (qn - 1) / dn + 1;	/* ceil(qn/dn), number of blocks */
    98        in = (qn - 1) / b + 1;	/* ceil(qn/b) = ceil(qn / ceil(qn/dn)) */
    99  
   100        /* Some notes on allocation:
   101  
   102  	 When in = dn, R dies when mpn_mullo returns, if in < dn the low in
   103  	 limbs of R dies at that point.  We could save memory by letting T live
   104  	 just under R, and let the upper part of T expand into R. These changes
   105  	 should reduce itch to perhaps 3dn.
   106         */
   107  
   108        mpn_binvert (ip, dp, in, tp);
   109  
   110        MPN_COPY (rp, np, dn);
   111        np += dn;
   112        cy = 0;
   113  
   114        while (qn > in)
   115  	{
   116  	  mpn_mullo_n (qp, rp, ip, in);
   117  
   118  	  if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
   119  	    mpn_mul (tp, dp, dn, qp, in);	/* mulhi, need tp[dn+in-1...in] */
   120  	  else
   121  	    {
   122  	      tn = mpn_mulmod_bnm1_next_size (dn);
   123  	      mpn_mulmod_bnm1 (tp, tn, dp, dn, qp, in, scratch_out);
   124  	      wn = dn + in - tn;		/* number of wrapped limbs */
   125  	      if (wn > 0)
   126  		{
   127  		  c0 = mpn_sub_n (tp + tn, tp, rp, wn);
   128  		  mpn_decr_u (tp + wn, c0);
   129  		}
   130  	    }
   131  
   132  	  qp += in;
   133  	  qn -= in;
   134  
   135  	  if (dn != in)
   136  	    {
   137  	      /* Subtract tp[dn-1...in] from partial remainder.  */
   138  	      cy += mpn_sub_n (rp, rp + in, tp + in, dn - in);
   139  	      if (cy == 2)
   140  		{
   141  		  mpn_incr_u (tp + dn, 1);
   142  		  cy = 1;
   143  		}
   144  	    }
   145  	  /* Subtract tp[dn+in-1...dn] from dividend.  */
   146  	  cy = mpn_sub_nc (rp + dn - in, np, tp + dn, in, cy);
   147  	  np += in;
   148  	}
   149  
   150        /* Generate last qn limbs.  */
   151        mpn_mullo_n (qp, rp, ip, qn);
   152  
   153        if (BELOW_THRESHOLD (qn, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
   154  	mpn_mul (tp, dp, dn, qp, qn);		/* mulhi, need tp[qn+in-1...in] */
   155        else
   156  	{
   157  	  tn = mpn_mulmod_bnm1_next_size (dn);
   158  	  mpn_mulmod_bnm1 (tp, tn, dp, dn, qp, qn, scratch_out);
   159  	  wn = dn + qn - tn;			/* number of wrapped limbs */
   160  	  if (wn > 0)
   161  	    {
   162  	      c0 = mpn_sub_n (tp + tn, tp, rp, wn);
   163  	      mpn_decr_u (tp + wn, c0);
   164  	    }
   165  	}
   166  
   167        if (dn != qn)
   168  	{
   169  	  cy += mpn_sub_n (rp, rp + qn, tp + qn, dn - qn);
   170  	  if (cy == 2)
   171  	    {
   172  	      mpn_incr_u (tp + dn, 1);
   173  	      cy = 1;
   174  	    }
   175  	}
   176        return mpn_sub_nc (rp + dn - qn, np, tp + dn, qn, cy);
   177  
   178  #undef ip
   179  #undef tp
   180  #undef scratch_out
   181      }
   182    else
   183      {
   184        /* |_______________________|   dividend
   185  		|________________|   divisor  */
   186  
   187  #define ip           scratch		/* in */
   188  #define tp           (scratch + in)	/* dn+in or next_size(dn) or rest >= binvert_itch(in) */
   189  #define scratch_out  (scratch + in + tn)/* mulmod_bnm1_itch(next_size(dn)) */
   190  
   191        /* Compute half-sized inverse.  */
   192        in = qn - (qn >> 1);
   193  
   194        mpn_binvert (ip, dp, in, tp);
   195  
   196        mpn_mullo_n (qp, np, ip, in);		/* low `in' quotient limbs */
   197  
   198        if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
   199  	mpn_mul (tp, dp, dn, qp, in);		/* mulhigh */
   200        else
   201  	{
   202  	  tn = mpn_mulmod_bnm1_next_size (dn);
   203  	  mpn_mulmod_bnm1 (tp, tn, dp, dn, qp, in, scratch_out);
   204  	  wn = dn + in - tn;			/* number of wrapped limbs */
   205  	  if (wn > 0)
   206  	    {
   207  	      c0 = mpn_sub_n (tp + tn, tp, np, wn);
   208  	      mpn_decr_u (tp + wn, c0);
   209  	    }
   210  	}
   211  
   212        qp += in;
   213        qn -= in;
   214  
   215        cy = mpn_sub_n (rp, np + in, tp + in, dn);
   216        mpn_mullo_n (qp, rp, ip, qn);		/* high qn quotient limbs */
   217  
   218        if (BELOW_THRESHOLD (qn, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
   219  	mpn_mul (tp, dp, dn, qp, qn);		/* mulhigh */
   220        else
   221  	{
   222  	  tn = mpn_mulmod_bnm1_next_size (dn);
   223  	  mpn_mulmod_bnm1 (tp, tn, dp, dn, qp, qn, scratch_out);
   224  	  wn = dn + qn - tn;			/* number of wrapped limbs */
   225  	  if (wn > 0)
   226  	    {
   227  	      c0 = mpn_sub_n (tp + tn, tp, rp, wn);
   228  	      mpn_decr_u (tp + wn, c0);
   229  	    }
   230  	}
   231  
   232        cy += mpn_sub_n (rp, rp + qn, tp + qn, dn - qn);
   233        if (cy == 2)
   234  	{
   235  	  mpn_incr_u (tp + dn, 1);
   236  	  cy = 1;
   237  	}
   238        return mpn_sub_nc (rp + dn - qn, np + dn + in, tp + dn, qn, cy);
   239  
   240  #undef ip
   241  #undef tp
   242  #undef scratch_out
   243      }
   244  }
   245  
   246  mp_size_t
   247  mpn_mu_bdiv_qr_itch (mp_size_t nn, mp_size_t dn)
   248  {
   249    mp_size_t qn, in, tn, itch_binvert, itch_out, itches;
   250    mp_size_t b;
   251  
   252    ASSERT_ALWAYS (DC_BDIV_Q_THRESHOLD < MU_BDIV_Q_THRESHOLD);
   253  
   254    qn = nn - dn;
   255  
   256    if (qn > dn)
   257      {
   258        b = (qn - 1) / dn + 1;	/* ceil(qn/dn), number of blocks */
   259        in = (qn - 1) / b + 1;	/* ceil(qn/b) = ceil(qn / ceil(qn/dn)) */
   260      }
   261    else
   262      {
   263        in = qn - (qn >> 1);
   264      }
   265  
   266    if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
   267      {
   268        tn = dn + in;
   269        itch_out = 0;
   270      }
   271    else
   272      {
   273        tn = mpn_mulmod_bnm1_next_size (dn);
   274        itch_out = mpn_mulmod_bnm1_itch (tn, dn, in);
   275      }
   276  
   277    itch_binvert = mpn_binvert_itch (in);
   278    itches = tn + itch_out;
   279    return in + MAX (itches, itch_binvert);
   280  }