github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/generic/mod_1_4.c (about)

     1  /* mpn_mod_1s_4p (ap, n, b, cps)
     2     Divide (ap,,n) by b.  Return the single-limb remainder.
     3     Requires that d < B / 4.
     4  
     5     Contributed to the GNU project by Torbjorn Granlund.
     6     Based on a suggestion by Peter L. Montgomery.
     7  
     8     THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
     9     SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
    10     GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
    11  
    12  Copyright 2008-2010 Free Software Foundation, Inc.
    13  
    14  This file is part of the GNU MP Library.
    15  
    16  The GNU MP Library is free software; you can redistribute it and/or modify
    17  it under the terms of either:
    18  
    19    * the GNU Lesser General Public License as published by the Free
    20      Software Foundation; either version 3 of the License, or (at your
    21      option) any later version.
    22  
    23  or
    24  
    25    * the GNU General Public License as published by the Free Software
    26      Foundation; either version 2 of the License, or (at your option) any
    27      later version.
    28  
    29  or both in parallel, as here.
    30  
    31  The GNU MP Library is distributed in the hope that it will be useful, but
    32  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    33  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    34  for more details.
    35  
    36  You should have received copies of the GNU General Public License and the
    37  GNU Lesser General Public License along with the GNU MP Library.  If not,
    38  see https://www.gnu.org/licenses/.  */
    39  
    40  #include "gmp.h"
    41  #include "gmp-impl.h"
    42  #include "longlong.h"
    43  
    44  void
    45  mpn_mod_1s_4p_cps (mp_limb_t cps[7], mp_limb_t b)
    46  {
    47    mp_limb_t bi;
    48    mp_limb_t B1modb, B2modb, B3modb, B4modb, B5modb;
    49    int cnt;
    50  
    51    ASSERT (b <= (~(mp_limb_t) 0) / 4);
    52  
    53    count_leading_zeros (cnt, b);
    54  
    55    b <<= cnt;
    56    invert_limb (bi, b);
    57  
    58    cps[0] = bi;
    59    cps[1] = cnt;
    60  
    61    B1modb = -b * ((bi >> (GMP_LIMB_BITS-cnt)) | (CNST_LIMB(1) << cnt));
    62    ASSERT (B1modb <= b);		/* NB: not fully reduced mod b */
    63    cps[2] = B1modb >> cnt;
    64  
    65    udiv_rnnd_preinv (B2modb, B1modb, CNST_LIMB(0), b, bi);
    66    cps[3] = B2modb >> cnt;
    67  
    68    udiv_rnnd_preinv (B3modb, B2modb, CNST_LIMB(0), b, bi);
    69    cps[4] = B3modb >> cnt;
    70  
    71    udiv_rnnd_preinv (B4modb, B3modb, CNST_LIMB(0), b, bi);
    72    cps[5] = B4modb >> cnt;
    73  
    74    udiv_rnnd_preinv (B5modb, B4modb, CNST_LIMB(0), b, bi);
    75    cps[6] = B5modb >> cnt;
    76  
    77  #if WANT_ASSERT
    78    {
    79      int i;
    80      b = cps[2];
    81      for (i = 3; i <= 6; i++)
    82        {
    83  	b += cps[i];
    84  	ASSERT (b >= cps[i]);
    85        }
    86    }
    87  #endif
    88  }
    89  
    90  mp_limb_t
    91  mpn_mod_1s_4p (mp_srcptr ap, mp_size_t n, mp_limb_t b, const mp_limb_t cps[7])
    92  {
    93    mp_limb_t rh, rl, bi, ph, pl, ch, cl, r;
    94    mp_limb_t B1modb, B2modb, B3modb, B4modb, B5modb;
    95    mp_size_t i;
    96    int cnt;
    97  
    98    ASSERT (n >= 1);
    99  
   100    B1modb = cps[2];
   101    B2modb = cps[3];
   102    B3modb = cps[4];
   103    B4modb = cps[5];
   104    B5modb = cps[6];
   105  
   106    switch (n & 3)
   107      {
   108      case 0:
   109        umul_ppmm (ph, pl, ap[n - 3], B1modb);
   110        add_ssaaaa (ph, pl, ph, pl, CNST_LIMB(0), ap[n - 4]);
   111        umul_ppmm (ch, cl, ap[n - 2], B2modb);
   112        add_ssaaaa (ph, pl, ph, pl, ch, cl);
   113        umul_ppmm (rh, rl, ap[n - 1], B3modb);
   114        add_ssaaaa (rh, rl, rh, rl, ph, pl);
   115        n -= 4;
   116        break;
   117      case 1:
   118        rh = 0;
   119        rl = ap[n - 1];
   120        n -= 1;
   121        break;
   122      case 2:
   123        rh = ap[n - 1];
   124        rl = ap[n - 2];
   125        n -= 2;
   126        break;
   127      case 3:
   128        umul_ppmm (ph, pl, ap[n - 2], B1modb);
   129        add_ssaaaa (ph, pl, ph, pl, CNST_LIMB(0), ap[n - 3]);
   130        umul_ppmm (rh, rl, ap[n - 1], B2modb);
   131        add_ssaaaa (rh, rl, rh, rl, ph, pl);
   132        n -= 3;
   133        break;
   134      }
   135  
   136    for (i = n - 4; i >= 0; i -= 4)
   137      {
   138        /* rr = ap[i]				< B
   139  	    + ap[i+1] * (B mod b)		<= (B-1)(b-1)
   140  	    + ap[i+2] * (B^2 mod b)		<= (B-1)(b-1)
   141  	    + ap[i+3] * (B^3 mod b)		<= (B-1)(b-1)
   142  	    + LO(rr)  * (B^4 mod b)		<= (B-1)(b-1)
   143  	    + HI(rr)  * (B^5 mod b)		<= (B-1)(b-1)
   144        */
   145        umul_ppmm (ph, pl, ap[i + 1], B1modb);
   146        add_ssaaaa (ph, pl, ph, pl, CNST_LIMB(0), ap[i + 0]);
   147  
   148        umul_ppmm (ch, cl, ap[i + 2], B2modb);
   149        add_ssaaaa (ph, pl, ph, pl, ch, cl);
   150  
   151        umul_ppmm (ch, cl, ap[i + 3], B3modb);
   152        add_ssaaaa (ph, pl, ph, pl, ch, cl);
   153  
   154        umul_ppmm (ch, cl, rl, B4modb);
   155        add_ssaaaa (ph, pl, ph, pl, ch, cl);
   156  
   157        umul_ppmm (rh, rl, rh, B5modb);
   158        add_ssaaaa (rh, rl, rh, rl, ph, pl);
   159      }
   160  
   161    umul_ppmm (rh, cl, rh, B1modb);
   162    add_ssaaaa (rh, rl, rh, rl, CNST_LIMB(0), cl);
   163  
   164    cnt = cps[1];
   165    bi = cps[0];
   166  
   167    r = (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt));
   168    udiv_rnnd_preinv (r, r, rl << cnt, b, bi);
   169  
   170    return r >> cnt;
   171  }