github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/generic/divrem_1.c

github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/generic/divrem_1.c (about)

     1  /* mpn_divrem_1 -- mpn by limb division.
     2  
     3  Copyright 1991, 1993, 1994, 1996, 1998-2000, 2002, 2003 Free Software
     4  Foundation, Inc.
     5  
     6  This file is part of the GNU MP Library.
     7  
     8  The GNU MP Library is free software; you can redistribute it and/or modify
     9  it under the terms of either:
    10  
    11    * the GNU Lesser General Public License as published by the Free
    12      Software Foundation; either version 3 of the License, or (at your
    13      option) any later version.
    14  
    15  or
    16  
    17    * the GNU General Public License as published by the Free Software
    18      Foundation; either version 2 of the License, or (at your option) any
    19      later version.
    20  
    21  or both in parallel, as here.
    22  
    23  The GNU MP Library is distributed in the hope that it will be useful, but
    24  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    25  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    26  for more details.
    27  
    28  You should have received copies of the GNU General Public License and the
    29  GNU Lesser General Public License along with the GNU MP Library.  If not,
    30  see https://www.gnu.org/licenses/.  */
    31  
    32  #include "gmp.h"
    33  #include "gmp-impl.h"
    34  #include "longlong.h"
    35  
    36  
    37  /* The size where udiv_qrnnd_preinv should be used rather than udiv_qrnnd,
    38     meaning the quotient size where that should happen, the quotient size
    39     being how many udiv divisions will be done.
    40  
    41     The default is to use preinv always, CPUs where this doesn't suit have
    42     tuned thresholds.  Note in particular that preinv should certainly be
    43     used if that's the only division available (USE_PREINV_ALWAYS).  */
    44  
    45  #ifndef DIVREM_1_NORM_THRESHOLD
    46  #define DIVREM_1_NORM_THRESHOLD  0
    47  #endif
    48  #ifndef DIVREM_1_UNNORM_THRESHOLD
    49  #define DIVREM_1_UNNORM_THRESHOLD  0
    50  #endif
    51  
    52  
    53  
    54  /* If the cpu only has multiply-by-inverse division (eg. alpha), then NORM
    55     and UNNORM thresholds are 0 and only the inversion code is included.
    56  
    57     If multiply-by-inverse is never viable, then NORM and UNNORM thresholds
    58     will be MP_SIZE_T_MAX and only the plain division code is included.
    59  
    60     Otherwise mul-by-inverse is better than plain division above some
    61     threshold, and best results are obtained by having code for both present.
    62  
    63     The main reason for separating the norm and unnorm cases is that not all
    64     CPUs give zero for "n0 >> GMP_LIMB_BITS" which would arise in the unnorm
    65     code used on an already normalized divisor.
    66  
    67     If UDIV_NEEDS_NORMALIZATION is false then plain division uses the same
    68     non-shifting code for both the norm and unnorm cases, though with
    69     different criteria for skipping a division, and with different thresholds
    70     of course.  And in fact if inversion is never viable, then that simple
    71     non-shifting division would be all that's left.
    72  
    73     The NORM and UNNORM thresholds might not differ much, but if there's
    74     going to be separate code for norm and unnorm then it makes sense to have
    75     separate thresholds.  One thing that's possible is that the
    76     mul-by-inverse might be better only for normalized divisors, due to that
    77     case not needing variable bit shifts.
    78  
    79     Notice that the thresholds are tested after the decision to possibly skip
    80     one divide step, so they're based on the actual number of divisions done.
    81  
    82     For the unnorm case, it would be possible to call mpn_lshift to adjust
    83     the dividend all in one go (into the quotient space say), rather than
    84     limb-by-limb in the loop.  This might help if mpn_lshift is a lot faster
    85     than what the compiler can generate for EXTRACT.  But this is left to CPU
    86     specific implementations to consider, especially since EXTRACT isn't on
    87     the dependent chain.  */
    88  
    89  mp_limb_t
    90  mpn_divrem_1 (mp_ptr qp, mp_size_t qxn,
    91  	      mp_srcptr up, mp_size_t un, mp_limb_t d)
    92  {
    93    mp_size_t  n;
    94    mp_size_t  i;
    95    mp_limb_t  n1, n0;
    96    mp_limb_t  r = 0;
    97  
    98    ASSERT (qxn >= 0);
    99    ASSERT (un >= 0);
   100    ASSERT (d != 0);
   101    /* FIXME: What's the correct overlap rule when qxn!=0? */
   102    ASSERT (MPN_SAME_OR_SEPARATE_P (qp+qxn, up, un));
   103  
   104    n = un + qxn;
   105    if (n == 0)
   106      return 0;
   107  
   108    d <<= GMP_NAIL_BITS;
   109  
   110    qp += (n - 1);   /* Make qp point at most significant quotient limb */
   111  
   112    if ((d & GMP_LIMB_HIGHBIT) != 0)
   113      {
   114        if (un != 0)
   115  	{
   116  	  /* High quotient limb is 0 or 1, skip a divide step. */
   117  	  mp_limb_t q;
   118  	  r = up[un - 1] << GMP_NAIL_BITS;
   119  	  q = (r >= d);
   120  	  *qp-- = q;
   121  	  r -= (d & -q);
   122  	  r >>= GMP_NAIL_BITS;
   123  	  n--;
   124  	  un--;
   125  	}
   126  
   127        if (BELOW_THRESHOLD (n, DIVREM_1_NORM_THRESHOLD))
   128  	{
   129  	plain:
   130  	  for (i = un - 1; i >= 0; i--)
   131  	    {
   132  	      n0 = up[i] << GMP_NAIL_BITS;
   133  	      udiv_qrnnd (*qp, r, r, n0, d);
   134  	      r >>= GMP_NAIL_BITS;
   135  	      qp--;
   136  	    }
   137  	  for (i = qxn - 1; i >= 0; i--)
   138  	    {
   139  	      udiv_qrnnd (*qp, r, r, CNST_LIMB(0), d);
   140  	      r >>= GMP_NAIL_BITS;
   141  	      qp--;
   142  	    }
   143  	  return r;
   144  	}
   145        else
   146  	{
   147  	  /* Multiply-by-inverse, divisor already normalized. */
   148  	  mp_limb_t dinv;
   149  	  invert_limb (dinv, d);
   150  
   151  	  for (i = un - 1; i >= 0; i--)
   152  	    {
   153  	      n0 = up[i] << GMP_NAIL_BITS;
   154  	      udiv_qrnnd_preinv (*qp, r, r, n0, d, dinv);
   155  	      r >>= GMP_NAIL_BITS;
   156  	      qp--;
   157  	    }
   158  	  for (i = qxn - 1; i >= 0; i--)
   159  	    {
   160  	      udiv_qrnnd_preinv (*qp, r, r, CNST_LIMB(0), d, dinv);
   161  	      r >>= GMP_NAIL_BITS;
   162  	      qp--;
   163  	    }
   164  	  return r;
   165  	}
   166      }
   167    else
   168      {
   169        /* Most significant bit of divisor == 0.  */
   170        int cnt;
   171  
   172        /* Skip a division if high < divisor (high quotient 0).  Testing here
   173  	 before normalizing will still skip as often as possible.  */
   174        if (un != 0)
   175  	{
   176  	  n1 = up[un - 1] << GMP_NAIL_BITS;
   177  	  if (n1 < d)
   178  	    {
   179  	      r = n1 >> GMP_NAIL_BITS;
   180  	      *qp-- = 0;
   181  	      n--;
   182  	      if (n == 0)
   183  		return r;
   184  	      un--;
   185  	    }
   186  	}
   187  
   188        if (! UDIV_NEEDS_NORMALIZATION
   189  	  && BELOW_THRESHOLD (n, DIVREM_1_UNNORM_THRESHOLD))
   190  	goto plain;
   191  
   192        count_leading_zeros (cnt, d);
   193        d <<= cnt;
   194        r <<= cnt;
   195  
   196        if (UDIV_NEEDS_NORMALIZATION
   197  	  && BELOW_THRESHOLD (n, DIVREM_1_UNNORM_THRESHOLD))
   198  	{
   199  	  mp_limb_t nshift;
   200  	  if (un != 0)
   201  	    {
   202  	      n1 = up[un - 1] << GMP_NAIL_BITS;
   203  	      r |= (n1 >> (GMP_LIMB_BITS - cnt));
   204  	      for (i = un - 2; i >= 0; i--)
   205  		{
   206  		  n0 = up[i] << GMP_NAIL_BITS;
   207  		  nshift = (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt));
   208  		  udiv_qrnnd (*qp, r, r, nshift, d);
   209  		  r >>= GMP_NAIL_BITS;
   210  		  qp--;
   211  		  n1 = n0;
   212  		}
   213  	      udiv_qrnnd (*qp, r, r, n1 << cnt, d);
   214  	      r >>= GMP_NAIL_BITS;
   215  	      qp--;
   216  	    }
   217  	  for (i = qxn - 1; i >= 0; i--)
   218  	    {
   219  	      udiv_qrnnd (*qp, r, r, CNST_LIMB(0), d);
   220  	      r >>= GMP_NAIL_BITS;
   221  	      qp--;
   222  	    }
   223  	  return r >> cnt;
   224  	}
   225        else
   226  	{
   227  	  mp_limb_t  dinv, nshift;
   228  	  invert_limb (dinv, d);
   229  	  if (un != 0)
   230  	    {
   231  	      n1 = up[un - 1] << GMP_NAIL_BITS;
   232  	      r |= (n1 >> (GMP_LIMB_BITS - cnt));
   233  	      for (i = un - 2; i >= 0; i--)
   234  		{
   235  		  n0 = up[i] << GMP_NAIL_BITS;
   236  		  nshift = (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt));
   237  		  udiv_qrnnd_preinv (*qp, r, r, nshift, d, dinv);
   238  		  r >>= GMP_NAIL_BITS;
   239  		  qp--;
   240  		  n1 = n0;
   241  		}
   242  	      udiv_qrnnd_preinv (*qp, r, r, n1 << cnt, d, dinv);
   243  	      r >>= GMP_NAIL_BITS;
   244  	      qp--;
   245  	    }
   246  	  for (i = qxn - 1; i >= 0; i--)
   247  	    {
   248  	      udiv_qrnnd_preinv (*qp, r, r, CNST_LIMB(0), d, dinv);
   249  	      r >>= GMP_NAIL_BITS;
   250  	      qp--;
   251  	    }
   252  	  return r >> cnt;
   253  	}
   254      }
   255  }