github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/generic/mu_divappr_q.c (about)

     1  /* mpn_mu_divappr_q, mpn_preinv_mu_divappr_q.
     2  
     3     Compute Q = floor(N / D) + e.  N is nn limbs, D is dn limbs and must be
     4     normalized, and Q must be nn-dn limbs, 0 <= e <= 4.  The requirement that Q
     5     is nn-dn limbs (and not nn-dn+1 limbs) was put in place in order to allow us
     6     to let N be unmodified during the operation.
     7  
     8     Contributed to the GNU project by Torbjorn Granlund.
     9  
    10     THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
    11     SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
    12     GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
    13  
    14  Copyright 2005-2007, 2009, 2010 Free Software Foundation, Inc.
    15  
    16  This file is part of the GNU MP Library.
    17  
    18  The GNU MP Library is free software; you can redistribute it and/or modify
    19  it under the terms of either:
    20  
    21    * the GNU Lesser General Public License as published by the Free
    22      Software Foundation; either version 3 of the License, or (at your
    23      option) any later version.
    24  
    25  or
    26  
    27    * the GNU General Public License as published by the Free Software
    28      Foundation; either version 2 of the License, or (at your option) any
    29      later version.
    30  
    31  or both in parallel, as here.
    32  
    33  The GNU MP Library is distributed in the hope that it will be useful, but
    34  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    35  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    36  for more details.
    37  
    38  You should have received copies of the GNU General Public License and the
    39  GNU Lesser General Public License along with the GNU MP Library.  If not,
    40  see https://www.gnu.org/licenses/.  */
    41  
    42  
    43  /*
    44     The idea of the algorithm used herein is to compute a smaller inverted value
    45     than used in the standard Barrett algorithm, and thus save time in the
    46     Newton iterations, and pay just a small price when using the inverted value
    47     for developing quotient bits.  This algorithm was presented at ICMS 2006.
    48  */
    49  
    50  /* CAUTION: This code and the code in mu_div_qr.c should be edited in sync.
    51  
    52   Things to work on:
    53  
    54    * The itch/scratch scheme isn't perhaps such a good idea as it once seemed,
    55      demonstrated by the fact that the mpn_invertappr function's scratch needs
    56      mean that we need to keep a large allocation long after it is needed.
    57      Things are worse as mpn_mul_fft does not accept any scratch parameter,
    58      which means we'll have a large memory hole while in mpn_mul_fft.  In
    59      general, a peak scratch need in the beginning of a function isn't
    60      well-handled by the itch/scratch scheme.
    61  */
    62  
    63  #ifdef STAT
    64  #undef STAT
    65  #define STAT(x) x
    66  #else
    67  #define STAT(x)
    68  #endif
    69  
    70  #include <stdlib.h>		/* for NULL */
    71  #include "gmp.h"
    72  #include "gmp-impl.h"
    73  
    74  
    75  mp_limb_t
    76  mpn_mu_divappr_q (mp_ptr qp,
    77  		  mp_srcptr np,
    78  		  mp_size_t nn,
    79  		  mp_srcptr dp,
    80  		  mp_size_t dn,
    81  		  mp_ptr scratch)
    82  {
    83    mp_size_t qn, in;
    84    mp_limb_t cy, qh;
    85    mp_ptr ip, tp;
    86  
    87    ASSERT (dn > 1);
    88  
    89    qn = nn - dn;
    90  
    91    /* If Q is smaller than D, truncate operands. */
    92    if (qn + 1 < dn)
    93      {
    94        np += dn - (qn + 1);
    95        nn -= dn - (qn + 1);
    96        dp += dn - (qn + 1);
    97        dn = qn + 1;
    98      }
    99  
   100    /* Compute the inverse size.  */
   101    in = mpn_mu_divappr_q_choose_in (qn, dn, 0);
   102    ASSERT (in <= dn);
   103  
   104  #if 1
   105    /* This alternative inverse computation method gets slightly more accurate
   106       results.  FIXMEs: (1) Temp allocation needs not analysed (2) itch function
   107       not adapted (3) mpn_invertappr scratch needs not met.  */
   108    ip = scratch;
   109    tp = scratch + in + 1;
   110  
   111    /* compute an approximate inverse on (in+1) limbs */
   112    if (dn == in)
   113      {
   114        MPN_COPY (tp + 1, dp, in);
   115        tp[0] = 1;
   116        mpn_invertappr (ip, tp, in + 1, tp + in + 1);
   117        MPN_COPY_INCR (ip, ip + 1, in);
   118      }
   119    else
   120      {
   121        cy = mpn_add_1 (tp, dp + dn - (in + 1), in + 1, 1);
   122        if (UNLIKELY (cy != 0))
   123  	MPN_ZERO (ip, in);
   124        else
   125  	{
   126  	  mpn_invertappr (ip, tp, in + 1, tp + in + 1);
   127  	  MPN_COPY_INCR (ip, ip + 1, in);
   128  	}
   129      }
   130  #else
   131    /* This older inverse computation method gets slightly worse results than the
   132       one above.  */
   133    ip = scratch;
   134    tp = scratch + in;
   135  
   136    /* Compute inverse of D to in+1 limbs, then round to 'in' limbs.  Ideally the
   137       inversion function should do this automatically.  */
   138    if (dn == in)
   139      {
   140        tp[in + 1] = 0;
   141        MPN_COPY (tp + in + 2, dp, in);
   142        mpn_invertappr (tp, tp + in + 1, in + 1, NULL);
   143      }
   144    else
   145      {
   146        mpn_invertappr (tp, dp + dn - (in + 1), in + 1, NULL);
   147      }
   148    cy = mpn_sub_1 (tp, tp, in + 1, GMP_NUMB_HIGHBIT);
   149    if (UNLIKELY (cy != 0))
   150      MPN_ZERO (tp + 1, in);
   151    MPN_COPY (ip, tp + 1, in);
   152  #endif
   153  
   154    qh = mpn_preinv_mu_divappr_q (qp, np, nn, dp, dn, ip, in, scratch + in);
   155  
   156    return qh;
   157  }
   158  
   159  mp_limb_t
   160  mpn_preinv_mu_divappr_q (mp_ptr qp,
   161  			 mp_srcptr np,
   162  			 mp_size_t nn,
   163  			 mp_srcptr dp,
   164  			 mp_size_t dn,
   165  			 mp_srcptr ip,
   166  			 mp_size_t in,
   167  			 mp_ptr scratch)
   168  {
   169    mp_size_t qn;
   170    mp_limb_t cy, cx, qh;
   171    mp_limb_t r;
   172    mp_size_t tn, wn;
   173  
   174  #define rp           scratch
   175  #define tp           (scratch + dn)
   176  #define scratch_out  (scratch + dn + tn)
   177  
   178    qn = nn - dn;
   179  
   180    np += qn;
   181    qp += qn;
   182  
   183    qh = mpn_cmp (np, dp, dn) >= 0;
   184    if (qh != 0)
   185      mpn_sub_n (rp, np, dp, dn);
   186    else
   187      MPN_COPY (rp, np, dn);
   188  
   189    if (qn == 0)
   190      return qh;			/* Degenerate use.  Should we allow this? */
   191  
   192    while (qn > 0)
   193      {
   194        if (qn < in)
   195  	{
   196  	  ip += in - qn;
   197  	  in = qn;
   198  	}
   199        np -= in;
   200        qp -= in;
   201  
   202        /* Compute the next block of quotient limbs by multiplying the inverse I
   203  	 by the upper part of the partial remainder R.  */
   204        mpn_mul_n (tp, rp + dn - in, ip, in);		/* mulhi  */
   205        cy = mpn_add_n (qp, tp + in, rp + dn - in, in);	/* I's msb implicit */
   206        ASSERT_ALWAYS (cy == 0);
   207  
   208        qn -= in;
   209        if (qn == 0)
   210  	break;
   211  
   212        /* Compute the product of the quotient block and the divisor D, to be
   213  	 subtracted from the partial remainder combined with new limbs from the
   214  	 dividend N.  We only really need the low dn limbs.  */
   215  
   216        if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
   217  	mpn_mul (tp, dp, dn, qp, in);		/* dn+in limbs, high 'in' cancels */
   218        else
   219  	{
   220  	  tn = mpn_mulmod_bnm1_next_size (dn + 1);
   221  	  mpn_mulmod_bnm1 (tp, tn, dp, dn, qp, in, scratch_out);
   222  	  wn = dn + in - tn;			/* number of wrapped limbs */
   223  	  if (wn > 0)
   224  	    {
   225  	      cy = mpn_sub_n (tp, tp, rp + dn - wn, wn);
   226  	      cy = mpn_sub_1 (tp + wn, tp + wn, tn - wn, cy);
   227  	      cx = mpn_cmp (rp + dn - in, tp + dn, tn - dn) < 0;
   228  	      ASSERT_ALWAYS (cx >= cy);
   229  	      mpn_incr_u (tp, cx - cy);
   230  	    }
   231  	}
   232  
   233        r = rp[dn - in] - tp[dn];
   234  
   235        /* Subtract the product from the partial remainder combined with new
   236  	 limbs from the dividend N, generating a new partial remainder R.  */
   237        if (dn != in)
   238  	{
   239  	  cy = mpn_sub_n (tp, np, tp, in);	/* get next 'in' limbs from N */
   240  	  cy = mpn_sub_nc (tp + in, rp, tp + in, dn - in, cy);
   241  	  MPN_COPY (rp, tp, dn);		/* FIXME: try to avoid this */
   242  	}
   243        else
   244  	{
   245  	  cy = mpn_sub_n (rp, np, tp, in);	/* get next 'in' limbs from N */
   246  	}
   247  
   248        STAT (int i; int err = 0;
   249  	    static int errarr[5]; static int err_rec; static int tot);
   250  
   251        /* Check the remainder R and adjust the quotient as needed.  */
   252        r -= cy;
   253        while (r != 0)
   254  	{
   255  	  /* We loop 0 times with about 69% probability, 1 time with about 31%
   256  	     probability, 2 times with about 0.6% probability, if inverse is
   257  	     computed as recommended.  */
   258  	  mpn_incr_u (qp, 1);
   259  	  cy = mpn_sub_n (rp, rp, dp, dn);
   260  	  r -= cy;
   261  	  STAT (err++);
   262  	}
   263        if (mpn_cmp (rp, dp, dn) >= 0)
   264  	{
   265  	  /* This is executed with about 76% probability.  */
   266  	  mpn_incr_u (qp, 1);
   267  	  cy = mpn_sub_n (rp, rp, dp, dn);
   268  	  STAT (err++);
   269  	}
   270  
   271        STAT (
   272  	    tot++;
   273  	    errarr[err]++;
   274  	    if (err > err_rec)
   275  	      err_rec = err;
   276  	    if (tot % 0x10000 == 0)
   277  	      {
   278  		for (i = 0; i <= err_rec; i++)
   279  		  printf ("  %d(%.1f%%)", errarr[i], 100.0*errarr[i]/tot);
   280  		printf ("\n");
   281  	      }
   282  	    );
   283      }
   284  
   285    /* FIXME: We should perhaps be somewhat more elegant in our rounding of the
   286       quotient.  For now, just make sure the returned quotient is >= the real
   287       quotient; add 3 with saturating arithmetic.  */
   288    qn = nn - dn;
   289    cy += mpn_add_1 (qp, qp, qn, 3);
   290    if (cy != 0)
   291      {
   292        if (qh != 0)
   293  	{
   294  	  /* Return a quotient of just 1-bits, with qh set.  */
   295  	  mp_size_t i;
   296  	  for (i = 0; i < qn; i++)
   297  	    qp[i] = GMP_NUMB_MAX;
   298  	}
   299        else
   300  	{
   301  	  /* Propagate carry into qh.  */
   302  	  qh = 1;
   303  	}
   304      }
   305  
   306    return qh;
   307  }
   308  
   309  /* In case k=0 (automatic choice), we distinguish 3 cases:
   310     (a) dn < qn:         in = ceil(qn / ceil(qn/dn))
   311     (b) dn/3 < qn <= dn: in = ceil(qn / 2)
   312     (c) qn < dn/3:       in = qn
   313     In all cases we have in <= dn.
   314   */
   315  mp_size_t
   316  mpn_mu_divappr_q_choose_in (mp_size_t qn, mp_size_t dn, int k)
   317  {
   318    mp_size_t in;
   319  
   320    if (k == 0)
   321      {
   322        mp_size_t b;
   323        if (qn > dn)
   324  	{
   325  	  /* Compute an inverse size that is a nice partition of the quotient.  */
   326  	  b = (qn - 1) / dn + 1;	/* ceil(qn/dn), number of blocks */
   327  	  in = (qn - 1) / b + 1;	/* ceil(qn/b) = ceil(qn / ceil(qn/dn)) */
   328  	}
   329        else if (3 * qn > dn)
   330  	{
   331  	  in = (qn - 1) / 2 + 1;	/* b = 2 */
   332  	}
   333        else
   334  	{
   335  	  in = (qn - 1) / 1 + 1;	/* b = 1 */
   336  	}
   337      }
   338    else
   339      {
   340        mp_size_t xn;
   341        xn = MIN (dn, qn);
   342        in = (xn - 1) / k + 1;
   343      }
   344  
   345    return in;
   346  }
   347  
   348  mp_size_t
   349  mpn_mu_divappr_q_itch (mp_size_t nn, mp_size_t dn, int mua_k)
   350  {
   351    mp_size_t qn, in, itch_local, itch_out, itch_invapp;
   352  
   353    qn = nn - dn;
   354    if (qn + 1 < dn)
   355      {
   356        dn = qn + 1;
   357      }
   358    in = mpn_mu_divappr_q_choose_in (qn, dn, mua_k);
   359  
   360    itch_local = mpn_mulmod_bnm1_next_size (dn + 1);
   361    itch_out = mpn_mulmod_bnm1_itch (itch_local, dn, in);
   362    itch_invapp = mpn_invertappr_itch (in + 1) + in + 2; /* 3in + 4 */
   363  
   364    ASSERT (dn + itch_local + itch_out >= itch_invapp);
   365    return in + MAX (dn + itch_local + itch_out, itch_invapp);
   366  }