github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/sparc64/mod_1.c (about)

     1  /* UltraSPARC 64 mpn_mod_1 -- mpn by limb remainder.
     2  
     3  Copyright 1991, 1993, 1994, 1999-2001, 2003, 2010 Free Software Foundation,
     4  Inc.
     5  
     6  This file is part of the GNU MP Library.
     7  
     8  The GNU MP Library is free software; you can redistribute it and/or modify
     9  it under the terms of either:
    10  
    11    * the GNU Lesser General Public License as published by the Free
    12      Software Foundation; either version 3 of the License, or (at your
    13      option) any later version.
    14  
    15  or
    16  
    17    * the GNU General Public License as published by the Free Software
    18      Foundation; either version 2 of the License, or (at your option) any
    19      later version.
    20  
    21  or both in parallel, as here.
    22  
    23  The GNU MP Library is distributed in the hope that it will be useful, but
    24  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    25  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    26  for more details.
    27  
    28  You should have received copies of the GNU General Public License and the
    29  GNU Lesser General Public License along with the GNU MP Library.  If not,
    30  see https://www.gnu.org/licenses/.  */
    31  
    32  #include "gmp.h"
    33  #include "gmp-impl.h"
    34  #include "longlong.h"
    35  
    36  #include "mpn/sparc64/sparc64.h"
    37  
    38  
    39  /*                 64-bit divisor   32-bit divisor
    40                      cycles/limb      cycles/limb
    41                       (approx)         (approx)
    42     Ultrasparc 2i:      160               120
    43  */
    44  
    45  
    46  /* 32-bit divisors are treated in special case code.  This requires 4 mulx
    47     per limb instead of 8 in the general case.
    48  
    49     For big endian systems we need HALF_ENDIAN_ADJ included in the src[i]
    50     addressing, to get the two halves of each limb read in the correct order.
    51     This is kept in an adj variable.  Doing that measures about 6 c/l faster
    52     than just writing HALF_ENDIAN_ADJ(i) in the loop.  The latter shouldn't
    53     be 6 cycles worth of work, but perhaps it doesn't schedule well (on gcc
    54     3.2.1 at least).
    55  
    56     A simple udivx/umulx loop for the 32-bit case was attempted for small
    57     sizes, but at size==2 it was only about the same speed and at size==3 was
    58     slower.  */
    59  
    60  static mp_limb_t
    61  mpn_mod_1_anynorm (mp_srcptr src_limbptr, mp_size_t size_limbs, mp_limb_t d_limb)
    62  {
    63    int        norm, norm_rshift;
    64    mp_limb_t  src_high_limb;
    65    mp_size_t  i;
    66  
    67    ASSERT (size_limbs >= 0);
    68    ASSERT (d_limb != 0);
    69  
    70    if (UNLIKELY (size_limbs == 0))
    71      return 0;
    72  
    73    src_high_limb = src_limbptr[size_limbs-1];
    74  
    75    /* udivx is good for size==1, and no need to bother checking limb<divisor,
    76       since if that's likely the caller should check */
    77    if (UNLIKELY (size_limbs == 1))
    78      return src_high_limb % d_limb;
    79  
    80    if (d_limb <= CNST_LIMB(0xFFFFFFFF))
    81      {
    82        unsigned   *src, n1, n0, r, dummy_q, nshift, norm_rmask;
    83        mp_size_t  size, adj;
    84        mp_limb_t  dinv_limb;
    85  
    86        size = 2 * size_limbs;    /* halfwords */
    87        src = (unsigned *) src_limbptr;
    88  
    89        /* prospective initial remainder, if < d */
    90        r = src_high_limb >> 32;
    91  
    92        /* If the length of the source is uniformly distributed, then there's
    93           a 50% chance of the high 32-bits being zero, which we can skip.  */
    94        if (r == 0)
    95          {
    96            r = (unsigned) src_high_limb;
    97            size--;
    98            ASSERT (size > 0);  /* because always even */
    99          }
   100  
   101        /* Skip a division if high < divisor.  Having the test here before
   102           normalizing will still skip as often as possible.  */
   103        if (r < d_limb)
   104          {
   105            size--;
   106            ASSERT (size > 0);  /* because size==1 handled above */
   107          }
   108        else
   109          r = 0;
   110  
   111        count_leading_zeros_32 (norm, d_limb);
   112        norm -= 32;
   113        d_limb <<= norm;
   114  
   115        norm_rshift = 32 - norm;
   116        norm_rmask = (norm == 0 ? 0 : 0xFFFFFFFF);
   117        i = size-1;
   118        adj = HALF_ENDIAN_ADJ (i);
   119        n1 = src [i + adj];
   120        r = (r << norm) | ((n1 >> norm_rshift) & norm_rmask);
   121  
   122        invert_half_limb (dinv_limb, d_limb);
   123        adj = -adj;
   124  
   125        for (i--; i >= 0; i--)
   126          {
   127            n0 = src [i + adj];
   128            adj = -adj;
   129            nshift = (n1 << norm) | ((n0 >> norm_rshift) & norm_rmask);
   130            udiv_qrnnd_half_preinv (dummy_q, r, r, nshift, d_limb, dinv_limb);
   131            n1 = n0;
   132          }
   133  
   134        /* same as loop, but without n0 */
   135        nshift = n1 << norm;
   136        udiv_qrnnd_half_preinv (dummy_q, r, r, nshift, d_limb, dinv_limb);
   137  
   138        ASSERT ((r & ((1 << norm) - 1)) == 0);
   139        return r >> norm;
   140      }
   141    else
   142      {
   143        mp_srcptr  src;
   144        mp_size_t  size;
   145        mp_limb_t  n1, n0, r, dinv, dummy_q, nshift, norm_rmask;
   146  
   147        src = src_limbptr;
   148        size = size_limbs;
   149        r = src_high_limb;  /* initial remainder */
   150  
   151        /* Skip a division if high < divisor.  Having the test here before
   152           normalizing will still skip as often as possible.  */
   153        if (r < d_limb)
   154          {
   155            size--;
   156            ASSERT (size > 0);  /* because size==1 handled above */
   157          }
   158        else
   159          r = 0;
   160  
   161        count_leading_zeros (norm, d_limb);
   162        d_limb <<= norm;
   163  
   164        norm_rshift = GMP_LIMB_BITS - norm;
   165        norm_rmask = (norm == 0 ? 0 : 0xFFFFFFFF);
   166  
   167        src += size;
   168        n1 = *--src;
   169        r = (r << norm) | ((n1 >> norm_rshift) & norm_rmask);
   170  
   171        invert_limb (dinv, d_limb);
   172  
   173        for (i = size-2; i >= 0; i--)
   174          {
   175            n0 = *--src;
   176            nshift = (n1 << norm) | ((n0 >> norm_rshift) & norm_rmask);
   177            udiv_qrnnd_preinv (dummy_q, r, r, nshift, d_limb, dinv);
   178            n1 = n0;
   179          }
   180  
   181        /* same as loop, but without n0 */
   182        nshift = n1 << norm;
   183        udiv_qrnnd_preinv (dummy_q, r, r, nshift, d_limb, dinv);
   184  
   185        ASSERT ((r & ((CNST_LIMB(1) << norm) - 1)) == 0);
   186        return r >> norm;
   187      }
   188  }
   189  
   190  mp_limb_t
   191  mpn_mod_1 (mp_srcptr ap, mp_size_t n, mp_limb_t b)
   192  {
   193    ASSERT (n >= 0);
   194    ASSERT (b != 0);
   195  
   196    /* Should this be handled at all?  Rely on callers?  Note un==0 is currently
   197       required by mpz/fdiv_r_ui.c and possibly other places.  */
   198    if (n == 0)
   199      return 0;
   200  
   201    if (UNLIKELY ((b & GMP_NUMB_HIGHBIT) != 0))
   202      {
   203        if (BELOW_THRESHOLD (n, MOD_1N_TO_MOD_1_1_THRESHOLD))
   204  	{
   205  	  return mpn_mod_1_anynorm (ap, n, b);
   206  	}
   207        else
   208  	{
   209  	  mp_limb_t pre[4];
   210  	  mpn_mod_1_1p_cps (pre, b);
   211  	  return mpn_mod_1_1p (ap, n, b, pre);
   212  	}
   213      }
   214    else
   215      {
   216        if (BELOW_THRESHOLD (n, MOD_1U_TO_MOD_1_1_THRESHOLD))
   217  	{
   218  	  return mpn_mod_1_anynorm (ap, n, b);
   219  	}
   220        else if (BELOW_THRESHOLD (n, MOD_1_1_TO_MOD_1_2_THRESHOLD))
   221  	{
   222  	  mp_limb_t pre[4];
   223  	  mpn_mod_1_1p_cps (pre, b);
   224  	  return mpn_mod_1_1p (ap, n, b << pre[1], pre);
   225  	}
   226        else if (BELOW_THRESHOLD (n, MOD_1_2_TO_MOD_1_4_THRESHOLD) || UNLIKELY (b > GMP_NUMB_MASK / 4))
   227  	{
   228  	  mp_limb_t pre[5];
   229  	  mpn_mod_1s_2p_cps (pre, b);
   230  	  return mpn_mod_1s_2p (ap, n, b << pre[1], pre);
   231  	}
   232        else
   233  	{
   234  	  mp_limb_t pre[7];
   235  	  mpn_mod_1s_4p_cps (pre, b);
   236  	  return mpn_mod_1s_4p (ap, n, b << pre[1], pre);
   237  	}
   238      }
   239  }