github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/sparc64/mode1o.c (about)

     1  /* UltraSPARC 64 mpn_modexact_1c_odd -- mpn by limb exact style remainder.
     2  
     3     THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
     4     CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
     5     FUTURE GNU MP RELEASES.
     6  
     7  Copyright 2000-2003 Free Software Foundation, Inc.
     8  
     9  This file is part of the GNU MP Library.
    10  
    11  The GNU MP Library is free software; you can redistribute it and/or modify
    12  it under the terms of either:
    13  
    14    * the GNU Lesser General Public License as published by the Free
    15      Software Foundation; either version 3 of the License, or (at your
    16      option) any later version.
    17  
    18  or
    19  
    20    * the GNU General Public License as published by the Free Software
    21      Foundation; either version 2 of the License, or (at your option) any
    22      later version.
    23  
    24  or both in parallel, as here.
    25  
    26  The GNU MP Library is distributed in the hope that it will be useful, but
    27  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    28  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    29  for more details.
    30  
    31  You should have received copies of the GNU General Public License and the
    32  GNU Lesser General Public License along with the GNU MP Library.  If not,
    33  see https://www.gnu.org/licenses/.  */
    34  
    35  #include "gmp.h"
    36  #include "gmp-impl.h"
    37  #include "longlong.h"
    38  
    39  #include "mpn/sparc64/sparc64.h"
    40  
    41  
    42  /*                 64-bit divisor   32-bit divisor
    43                      cycles/limb      cycles/limb
    44                       (approx)         (approx)
    45     Ultrasparc 2i:       ?                ?
    46  */
    47  
    48  
    49  /* This implementation reduces the number of multiplies done, knowing that
    50     on ultrasparc 1 and 2 the mulx instruction stalls the whole chip.
    51  
    52     The key idea is to use the fact that the low limb of q*d equals l, this
    53     being the whole purpose of the q calculated.  It means there's no need to
    54     calculate the lowest 32x32->64 part of the q*d, instead it can be
    55     inferred from l and the other three 32x32->64 parts.  See sparc64.h for
    56     details.
    57  
    58     When d is 32-bits, the same applies, but in this case there's only one
    59     other 32x32->64 part (ie. HIGH(q)*d).
    60  
    61     The net effect is that for 64-bit divisor each limb is 4 mulx, or for
    62     32-bit divisor each is 2 mulx.
    63  
    64     Enhancements:
    65  
    66     No doubt this could be done in assembler, if that helped the scheduling,
    67     or perhaps guaranteed good code irrespective of the compiler.
    68  
    69     Alternatives:
    70  
    71     It might be possibly to use floating point.  The loop is dominated by
    72     multiply latency, so not sure if floats would improve that.  One
    73     possibility would be to take two limbs at a time, with a 128 bit inverse,
    74     if there's enough registers, which could effectively use float throughput
    75     to reduce total latency across two limbs.  */
    76  
    77  #define ASSERT_RETVAL(r)                \
    78    ASSERT (orig_c < d ? r < d : r <= d)
    79  
    80  mp_limb_t
    81  mpn_modexact_1c_odd (mp_srcptr src, mp_size_t size, mp_limb_t d, mp_limb_t orig_c)
    82  {
    83    mp_limb_t  c = orig_c;
    84    mp_limb_t  s, l, q, h, inverse;
    85  
    86    ASSERT (size >= 1);
    87    ASSERT (d & 1);
    88    ASSERT_MPN (src, size);
    89    ASSERT_LIMB (d);
    90    ASSERT_LIMB (c);
    91  
    92    /* udivx is faster than 10 or 12 mulx's for one limb via an inverse */
    93    if (size == 1)
    94      {
    95        s = src[0];
    96        if (s > c)
    97  	{
    98  	  l = s-c;
    99  	  h = l % d;
   100  	  if (h != 0)
   101  	    h = d - h;
   102  	}
   103        else
   104  	{
   105  	  l = c-s;
   106  	  h = l % d;
   107  	}
   108        return h;
   109      }
   110  
   111    binvert_limb (inverse, d);
   112  
   113    if (d <= 0xFFFFFFFF)
   114      {
   115        s = *src++;
   116        size--;
   117        do
   118          {
   119            SUBC_LIMB (c, l, s, c);
   120            s = *src++;
   121            q = l * inverse;
   122            umul_ppmm_half_lowequal (h, q, d, l);
   123            c += h;
   124            size--;
   125          }
   126        while (size != 0);
   127  
   128        if (s <= d)
   129          {
   130            /* With high s <= d the final step can be a subtract and addback.
   131               If c==0 then the addback will restore to l>=0.  If c==d then
   132               will get l==d if s==0, but that's ok per the function
   133               definition.  */
   134  
   135            l = c - s;
   136            l += (l > c ? d : 0);
   137  
   138            ASSERT_RETVAL (l);
   139            return l;
   140          }
   141        else
   142          {
   143            /* Can't skip a divide, just do the loop code once more. */
   144            SUBC_LIMB (c, l, s, c);
   145            q = l * inverse;
   146            umul_ppmm_half_lowequal (h, q, d, l);
   147            c += h;
   148  
   149            ASSERT_RETVAL (c);
   150            return c;
   151          }
   152      }
   153    else
   154      {
   155        mp_limb_t  dl = LOW32 (d);
   156        mp_limb_t  dh = HIGH32 (d);
   157        long i;
   158  
   159        s = *src++;
   160        size--;
   161        do
   162          {
   163            SUBC_LIMB (c, l, s, c);
   164            s = *src++;
   165            q = l * inverse;
   166            umul_ppmm_lowequal (h, q, d, dh, dl, l);
   167            c += h;
   168            size--;
   169          }
   170        while (size != 0);
   171  
   172        if (s <= d)
   173          {
   174            /* With high s <= d the final step can be a subtract and addback.
   175               If c==0 then the addback will restore to l>=0.  If c==d then
   176               will get l==d if s==0, but that's ok per the function
   177               definition.  */
   178  
   179            l = c - s;
   180            l += (l > c ? d : 0);
   181  
   182            ASSERT_RETVAL (l);
   183            return l;
   184          }
   185        else
   186          {
   187            /* Can't skip a divide, just do the loop code once more. */
   188            SUBC_LIMB (c, l, s, c);
   189            q = l * inverse;
   190            umul_ppmm_lowequal (h, q, d, dh, dl, l);
   191            c += h;
   192  
   193            ASSERT_RETVAL (c);
   194            return c;
   195          }
   196      }
   197  }