github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/sparc64/divrem_1.c (about)

     1  /* UltraSparc 64 mpn_divrem_1 -- mpn by limb division.
     2  
     3  Copyright 1991, 1993, 1994, 1996, 1998-2001, 2003 Free Software Foundation,
     4  Inc.
     5  
     6  This file is part of the GNU MP Library.
     7  
     8  The GNU MP Library is free software; you can redistribute it and/or modify
     9  it under the terms of either:
    10  
    11    * the GNU Lesser General Public License as published by the Free
    12      Software Foundation; either version 3 of the License, or (at your
    13      option) any later version.
    14  
    15  or
    16  
    17    * the GNU General Public License as published by the Free Software
    18      Foundation; either version 2 of the License, or (at your option) any
    19      later version.
    20  
    21  or both in parallel, as here.
    22  
    23  The GNU MP Library is distributed in the hope that it will be useful, but
    24  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    25  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    26  for more details.
    27  
    28  You should have received copies of the GNU General Public License and the
    29  GNU Lesser General Public License along with the GNU MP Library.  If not,
    30  see https://www.gnu.org/licenses/.  */
    31  
    32  #include "gmp.h"
    33  #include "gmp-impl.h"
    34  #include "longlong.h"
    35  
    36  #include "mpn/sparc64/sparc64.h"
    37  
    38  
    39  /*                   64-bit divisor       32-bit divisor
    40                         cycles/limb          cycles/limb
    41                          (approx)             (approx)
    42                     integer  fraction    integer  fraction
    43     Ultrasparc 2i:    160      160          122      96
    44  */
    45  
    46  
    47  /* 32-bit divisors are treated in special case code.  This requires 4 mulx
    48     per limb instead of 8 in the general case.
    49  
    50     For big endian systems we need HALF_ENDIAN_ADJ included in the src[i]
    51     addressing, to get the two halves of each limb read in the correct order.
    52     This is kept in an adj variable.  Doing that measures about 4 c/l faster
    53     than just writing HALF_ENDIAN_ADJ(i) in the integer loop.  The latter
    54     shouldn't be 6 cycles worth of work, but perhaps it doesn't schedule well
    55     (on gcc 3.2.1 at least).  The fraction loop doesn't seem affected, but we
    56     still use a variable since that ought to work out best.  */
    57  
    58  mp_limb_t
    59  mpn_divrem_1 (mp_ptr qp_limbptr, mp_size_t xsize_limbs,
    60                mp_srcptr ap_limbptr, mp_size_t size_limbs, mp_limb_t d_limb)
    61  {
    62    mp_size_t  total_size_limbs;
    63    mp_size_t  i;
    64  
    65    ASSERT (xsize_limbs >= 0);
    66    ASSERT (size_limbs >= 0);
    67    ASSERT (d_limb != 0);
    68    /* FIXME: What's the correct overlap rule when xsize!=0? */
    69    ASSERT (MPN_SAME_OR_SEPARATE_P (qp_limbptr + xsize_limbs,
    70                                    ap_limbptr, size_limbs));
    71  
    72    total_size_limbs = size_limbs + xsize_limbs;
    73    if (UNLIKELY (total_size_limbs == 0))
    74      return 0;
    75  
    76    /* udivx is good for total_size==1, and no need to bother checking
    77       limb<divisor, since if that's likely the caller should check */
    78    if (UNLIKELY (total_size_limbs == 1))
    79      {
    80        mp_limb_t  a, q;
    81        a = (LIKELY (size_limbs != 0) ? ap_limbptr[0] : 0);
    82        q = a / d_limb;
    83        qp_limbptr[0] = q;
    84        return a - q*d_limb;
    85      }
    86  
    87    if (d_limb <= CNST_LIMB(0xFFFFFFFF))
    88      {
    89        mp_size_t  size, xsize, total_size, adj;
    90        unsigned   *qp, n1, n0, q, r, nshift, norm_rmask;
    91        mp_limb_t  dinv_limb;
    92        const unsigned *ap;
    93        int        norm, norm_rshift;
    94  
    95        size = 2 * size_limbs;
    96        xsize = 2 * xsize_limbs;
    97        total_size = size + xsize;
    98  
    99        ap = (unsigned *) ap_limbptr;
   100        qp = (unsigned *) qp_limbptr;
   101  
   102        qp += xsize;
   103        r = 0;        /* initial remainder */
   104  
   105        if (LIKELY (size != 0))
   106          {
   107            n1 = ap[size-1 + HALF_ENDIAN_ADJ(1)];
   108  
   109            /* If the length of the source is uniformly distributed, then
   110               there's a 50% chance of the high 32-bits being zero, which we
   111               can skip.  */
   112            if (n1 == 0)
   113              {
   114                n1 = ap[size-2 + HALF_ENDIAN_ADJ(0)];
   115                total_size--;
   116                size--;
   117                ASSERT (size > 0);  /* because always even */
   118                qp[size + HALF_ENDIAN_ADJ(1)] = 0;
   119              }
   120  
   121            /* Skip a division if high < divisor (high quotient 0).  Testing
   122               here before before normalizing will still skip as often as
   123               possible.  */
   124            if (n1 < d_limb)
   125              {
   126                r = n1;
   127                size--;
   128                qp[size + HALF_ENDIAN_ADJ(size)] = 0;
   129                total_size--;
   130                if (total_size == 0)
   131                  return r;
   132              }
   133          }
   134  
   135        count_leading_zeros_32 (norm, d_limb);
   136        norm -= 32;
   137        d_limb <<= norm;
   138        r <<= norm;
   139  
   140        norm_rshift = 32 - norm;
   141        norm_rmask = (norm == 0 ? 0 : 0xFFFFFFFF);
   142  
   143        invert_half_limb (dinv_limb, d_limb);
   144  
   145        if (LIKELY (size != 0))
   146          {
   147            i = size - 1;
   148            adj = HALF_ENDIAN_ADJ (i);
   149            n1 = ap[i + adj];
   150            adj = -adj;
   151            r |= ((n1 >> norm_rshift) & norm_rmask);
   152            for ( ; i > 0; i--)
   153              {
   154                n0 = ap[i-1 + adj];
   155                adj = -adj;
   156                nshift = (n1 << norm) | ((n0 >> norm_rshift) & norm_rmask);
   157                udiv_qrnnd_half_preinv (q, r, r, nshift, d_limb, dinv_limb);
   158                qp[i + adj] = q;
   159                n1 = n0;
   160              }
   161            nshift = n1 << norm;
   162            udiv_qrnnd_half_preinv (q, r, r, nshift, d_limb, dinv_limb);
   163            qp[0 + HALF_ENDIAN_ADJ(0)] = q;
   164          }
   165        qp -= xsize;
   166        adj = HALF_ENDIAN_ADJ (0);
   167        for (i = xsize-1; i >= 0; i--)
   168          {
   169            udiv_qrnnd_half_preinv (q, r, r, 0, d_limb, dinv_limb);
   170            adj = -adj;
   171            qp[i + adj] = q;
   172          }
   173  
   174        return r >> norm;
   175      }
   176    else
   177      {
   178        mp_srcptr  ap;
   179        mp_ptr     qp;
   180        mp_size_t  size, xsize, total_size;
   181        mp_limb_t  d, n1, n0, q, r, dinv, nshift, norm_rmask;
   182        int        norm, norm_rshift;
   183  
   184        ap = ap_limbptr;
   185        qp = qp_limbptr;
   186        size = size_limbs;
   187        xsize = xsize_limbs;
   188        total_size = total_size_limbs;
   189        d = d_limb;
   190  
   191        qp += total_size;   /* above high limb */
   192        r = 0;              /* initial remainder */
   193  
   194        if (LIKELY (size != 0))
   195          {
   196            /* Skip a division if high < divisor (high quotient 0).  Testing
   197               here before before normalizing will still skip as often as
   198               possible.  */
   199            n1 = ap[size-1];
   200            if (n1 < d)
   201              {
   202                r = n1;
   203                *--qp = 0;
   204                total_size--;
   205                if (total_size == 0)
   206                  return r;
   207                size--;
   208              }
   209          }
   210  
   211        count_leading_zeros (norm, d);
   212        d <<= norm;
   213        r <<= norm;
   214  
   215        norm_rshift = GMP_LIMB_BITS - norm;
   216        norm_rmask = (norm == 0 ? 0 : ~CNST_LIMB(0));
   217  
   218        invert_limb (dinv, d);
   219  
   220        if (LIKELY (size != 0))
   221          {
   222            n1 = ap[size-1];
   223            r |= ((n1 >> norm_rshift) & norm_rmask);
   224            for (i = size-2; i >= 0; i--)
   225              {
   226                n0 = ap[i];
   227                nshift = (n1 << norm) | ((n0 >> norm_rshift) & norm_rmask);
   228                udiv_qrnnd_preinv (q, r, r, nshift, d, dinv);
   229                *--qp = q;
   230                n1 = n0;
   231              }
   232            nshift = n1 << norm;
   233            udiv_qrnnd_preinv (q, r, r, nshift, d, dinv);
   234            *--qp = q;
   235          }
   236        for (i = 0; i < xsize; i++)
   237          {
   238            udiv_qrnnd_preinv (q, r, r, CNST_LIMB(0), d, dinv);
   239            *--qp = q;
   240          }
   241        return r >> norm;
   242      }
   243  }