github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/generic/hgcd2.c (about)

     1  /* hgcd2.c
     2  
     3     THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
     4     SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
     5     GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
     6  
     7  Copyright 1996, 1998, 2000-2004, 2008, 2012 Free Software Foundation, Inc.
     8  
     9  This file is part of the GNU MP Library.
    10  
    11  The GNU MP Library is free software; you can redistribute it and/or modify
    12  it under the terms of either:
    13  
    14    * the GNU Lesser General Public License as published by the Free
    15      Software Foundation; either version 3 of the License, or (at your
    16      option) any later version.
    17  
    18  or
    19  
    20    * the GNU General Public License as published by the Free Software
    21      Foundation; either version 2 of the License, or (at your option) any
    22      later version.
    23  
    24  or both in parallel, as here.
    25  
    26  The GNU MP Library is distributed in the hope that it will be useful, but
    27  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    28  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    29  for more details.
    30  
    31  You should have received copies of the GNU General Public License and the
    32  GNU Lesser General Public License along with the GNU MP Library.  If not,
    33  see https://www.gnu.org/licenses/.  */
    34  
    35  #include "gmp.h"
    36  #include "gmp-impl.h"
    37  #include "longlong.h"
    38  
    39  #if GMP_NAIL_BITS == 0
    40  
    41  /* Copied from the old mpn/generic/gcdext.c, and modified slightly to return
    42     the remainder. */
    43  
    44  /* Single-limb division optimized for small quotients. */
    45  static inline mp_limb_t
    46  div1 (mp_ptr rp,
    47        mp_limb_t n0,
    48        mp_limb_t d0)
    49  {
    50    mp_limb_t q = 0;
    51  
    52    if ((mp_limb_signed_t) n0 < 0)
    53      {
    54        int cnt;
    55        for (cnt = 1; (mp_limb_signed_t) d0 >= 0; cnt++)
    56  	{
    57  	  d0 = d0 << 1;
    58  	}
    59  
    60        q = 0;
    61        while (cnt)
    62  	{
    63  	  q <<= 1;
    64  	  if (n0 >= d0)
    65  	    {
    66  	      n0 = n0 - d0;
    67  	      q |= 1;
    68  	    }
    69  	  d0 = d0 >> 1;
    70  	  cnt--;
    71  	}
    72      }
    73    else
    74      {
    75        int cnt;
    76        for (cnt = 0; n0 >= d0; cnt++)
    77  	{
    78  	  d0 = d0 << 1;
    79  	}
    80  
    81        q = 0;
    82        while (cnt)
    83  	{
    84  	  d0 = d0 >> 1;
    85  	  q <<= 1;
    86  	  if (n0 >= d0)
    87  	    {
    88  	      n0 = n0 - d0;
    89  	      q |= 1;
    90  	    }
    91  	  cnt--;
    92  	}
    93      }
    94    *rp = n0;
    95    return q;
    96  }
    97  
    98  /* Two-limb division optimized for small quotients.  */
    99  static inline mp_limb_t
   100  div2 (mp_ptr rp,
   101        mp_limb_t nh, mp_limb_t nl,
   102        mp_limb_t dh, mp_limb_t dl)
   103  {
   104    mp_limb_t q = 0;
   105  
   106    if ((mp_limb_signed_t) nh < 0)
   107      {
   108        int cnt;
   109        for (cnt = 1; (mp_limb_signed_t) dh >= 0; cnt++)
   110  	{
   111  	  dh = (dh << 1) | (dl >> (GMP_LIMB_BITS - 1));
   112  	  dl = dl << 1;
   113  	}
   114  
   115        while (cnt)
   116  	{
   117  	  q <<= 1;
   118  	  if (nh > dh || (nh == dh && nl >= dl))
   119  	    {
   120  	      sub_ddmmss (nh, nl, nh, nl, dh, dl);
   121  	      q |= 1;
   122  	    }
   123  	  dl = (dh << (GMP_LIMB_BITS - 1)) | (dl >> 1);
   124  	  dh = dh >> 1;
   125  	  cnt--;
   126  	}
   127      }
   128    else
   129      {
   130        int cnt;
   131        for (cnt = 0; nh > dh || (nh == dh && nl >= dl); cnt++)
   132  	{
   133  	  dh = (dh << 1) | (dl >> (GMP_LIMB_BITS - 1));
   134  	  dl = dl << 1;
   135  	}
   136  
   137        while (cnt)
   138  	{
   139  	  dl = (dh << (GMP_LIMB_BITS - 1)) | (dl >> 1);
   140  	  dh = dh >> 1;
   141  	  q <<= 1;
   142  	  if (nh > dh || (nh == dh && nl >= dl))
   143  	    {
   144  	      sub_ddmmss (nh, nl, nh, nl, dh, dl);
   145  	      q |= 1;
   146  	    }
   147  	  cnt--;
   148  	}
   149      }
   150  
   151    rp[0] = nl;
   152    rp[1] = nh;
   153  
   154    return q;
   155  }
   156  
   157  #if 0
   158  /* This div2 uses less branches, but it seems to nevertheless be
   159     slightly slower than the above code. */
   160  static inline mp_limb_t
   161  div2 (mp_ptr rp,
   162        mp_limb_t nh, mp_limb_t nl,
   163        mp_limb_t dh, mp_limb_t dl)
   164  {
   165    mp_limb_t q = 0;
   166    int ncnt;
   167    int dcnt;
   168  
   169    count_leading_zeros (ncnt, nh);
   170    count_leading_zeros (dcnt, dh);
   171    dcnt -= ncnt;
   172  
   173    dh = (dh << dcnt) + (-(dcnt > 0) & (dl >> (GMP_LIMB_BITS - dcnt)));
   174    dl <<= dcnt;
   175  
   176    do
   177      {
   178        mp_limb_t bit;
   179        q <<= 1;
   180        if (UNLIKELY (nh == dh))
   181  	bit = (nl >= dl);
   182        else
   183  	bit = (nh > dh);
   184  
   185        q |= bit;
   186  
   187        sub_ddmmss (nh, nl, nh, nl, (-bit) & dh, (-bit) & dl);
   188  
   189        dl = (dh << (GMP_LIMB_BITS - 1)) | (dl >> 1);
   190        dh = dh >> 1;
   191      }
   192    while (dcnt--);
   193  
   194    rp[0] = nl;
   195    rp[1] = nh;
   196  
   197    return q;
   198  }
   199  #endif
   200  
   201  #else /* GMP_NAIL_BITS != 0 */
   202  /* Check all functions for nail support. */
   203  /* hgcd2 should be defined to take inputs including nail bits, and
   204     produce a matrix with elements also including nail bits. This is
   205     necessary, for the matrix elements to be useful with mpn_mul_1,
   206     mpn_addmul_1 and friends. */
   207  #error Not implemented
   208  #endif /* GMP_NAIL_BITS != 0 */
   209  
   210  /* Reduces a,b until |a-b| (almost) fits in one limb + 1 bit. Constructs
   211     matrix M. Returns 1 if we make progress, i.e. can perform at least
   212     one subtraction. Otherwise returns zero. */
   213  
   214  /* FIXME: Possible optimizations:
   215  
   216     The div2 function starts with checking the most significant bit of
   217     the numerator. We can maintained normalized operands here, call
   218     hgcd with normalized operands only, which should make the code
   219     simpler and possibly faster.
   220  
   221     Experiment with table lookups on the most significant bits.
   222  
   223     This function is also a candidate for assembler implementation.
   224  */
   225  int
   226  mpn_hgcd2 (mp_limb_t ah, mp_limb_t al, mp_limb_t bh, mp_limb_t bl,
   227  	   struct hgcd_matrix1 *M)
   228  {
   229    mp_limb_t u00, u01, u10, u11;
   230  
   231    if (ah < 2 || bh < 2)
   232      return 0;
   233  
   234    if (ah > bh || (ah == bh && al > bl))
   235      {
   236        sub_ddmmss (ah, al, ah, al, bh, bl);
   237        if (ah < 2)
   238  	return 0;
   239  
   240        u00 = u01 = u11 = 1;
   241        u10 = 0;
   242      }
   243    else
   244      {
   245        sub_ddmmss (bh, bl, bh, bl, ah, al);
   246        if (bh < 2)
   247  	return 0;
   248  
   249        u00 = u10 = u11 = 1;
   250        u01 = 0;
   251      }
   252  
   253    if (ah < bh)
   254      goto subtract_a;
   255  
   256    for (;;)
   257      {
   258        ASSERT (ah >= bh);
   259        if (ah == bh)
   260  	goto done;
   261  
   262        if (ah < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2)))
   263  	{
   264  	  ah = (ah << (GMP_LIMB_BITS / 2) ) + (al >> (GMP_LIMB_BITS / 2));
   265  	  bh = (bh << (GMP_LIMB_BITS / 2) ) + (bl >> (GMP_LIMB_BITS / 2));
   266  
   267  	  break;
   268  	}
   269  
   270        /* Subtract a -= q b, and multiply M from the right by (1 q ; 0
   271  	 1), affecting the second column of M. */
   272        ASSERT (ah > bh);
   273        sub_ddmmss (ah, al, ah, al, bh, bl);
   274  
   275        if (ah < 2)
   276  	goto done;
   277  
   278        if (ah <= bh)
   279  	{
   280  	  /* Use q = 1 */
   281  	  u01 += u00;
   282  	  u11 += u10;
   283  	}
   284        else
   285  	{
   286  	  mp_limb_t r[2];
   287  	  mp_limb_t q = div2 (r, ah, al, bh, bl);
   288  	  al = r[0]; ah = r[1];
   289  	  if (ah < 2)
   290  	    {
   291  	      /* A is too small, but q is correct. */
   292  	      u01 += q * u00;
   293  	      u11 += q * u10;
   294  	      goto done;
   295  	    }
   296  	  q++;
   297  	  u01 += q * u00;
   298  	  u11 += q * u10;
   299  	}
   300      subtract_a:
   301        ASSERT (bh >= ah);
   302        if (ah == bh)
   303  	goto done;
   304  
   305        if (bh < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2)))
   306  	{
   307  	  ah = (ah << (GMP_LIMB_BITS / 2) ) + (al >> (GMP_LIMB_BITS / 2));
   308  	  bh = (bh << (GMP_LIMB_BITS / 2) ) + (bl >> (GMP_LIMB_BITS / 2));
   309  
   310  	  goto subtract_a1;
   311  	}
   312  
   313        /* Subtract b -= q a, and multiply M from the right by (1 0 ; q
   314  	 1), affecting the first column of M. */
   315        sub_ddmmss (bh, bl, bh, bl, ah, al);
   316  
   317        if (bh < 2)
   318  	goto done;
   319  
   320        if (bh <= ah)
   321  	{
   322  	  /* Use q = 1 */
   323  	  u00 += u01;
   324  	  u10 += u11;
   325  	}
   326        else
   327  	{
   328  	  mp_limb_t r[2];
   329  	  mp_limb_t q = div2 (r, bh, bl, ah, al);
   330  	  bl = r[0]; bh = r[1];
   331  	  if (bh < 2)
   332  	    {
   333  	      /* B is too small, but q is correct. */
   334  	      u00 += q * u01;
   335  	      u10 += q * u11;
   336  	      goto done;
   337  	    }
   338  	  q++;
   339  	  u00 += q * u01;
   340  	  u10 += q * u11;
   341  	}
   342      }
   343  
   344    /* NOTE: Since we discard the least significant half limb, we don't
   345       get a truly maximal M (corresponding to |a - b| <
   346       2^{GMP_LIMB_BITS +1}). */
   347    /* Single precision loop */
   348    for (;;)
   349      {
   350        ASSERT (ah >= bh);
   351  
   352        ah -= bh;
   353        if (ah < (CNST_LIMB (1) << (GMP_LIMB_BITS / 2 + 1)))
   354  	break;
   355  
   356        if (ah <= bh)
   357  	{
   358  	  /* Use q = 1 */
   359  	  u01 += u00;
   360  	  u11 += u10;
   361  	}
   362        else
   363  	{
   364  	  mp_limb_t r;
   365  	  mp_limb_t q = div1 (&r, ah, bh);
   366  	  ah = r;
   367  	  if (ah < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2 + 1)))
   368  	    {
   369  	      /* A is too small, but q is correct. */
   370  	      u01 += q * u00;
   371  	      u11 += q * u10;
   372  	      break;
   373  	    }
   374  	  q++;
   375  	  u01 += q * u00;
   376  	  u11 += q * u10;
   377  	}
   378      subtract_a1:
   379        ASSERT (bh >= ah);
   380  
   381        bh -= ah;
   382        if (bh < (CNST_LIMB (1) << (GMP_LIMB_BITS / 2 + 1)))
   383  	break;
   384  
   385        if (bh <= ah)
   386  	{
   387  	  /* Use q = 1 */
   388  	  u00 += u01;
   389  	  u10 += u11;
   390  	}
   391        else
   392  	{
   393  	  mp_limb_t r;
   394  	  mp_limb_t q = div1 (&r, bh, ah);
   395  	  bh = r;
   396  	  if (bh < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2 + 1)))
   397  	    {
   398  	      /* B is too small, but q is correct. */
   399  	      u00 += q * u01;
   400  	      u10 += q * u11;
   401  	      break;
   402  	    }
   403  	  q++;
   404  	  u00 += q * u01;
   405  	  u10 += q * u11;
   406  	}
   407      }
   408  
   409   done:
   410    M->u[0][0] = u00; M->u[0][1] = u01;
   411    M->u[1][0] = u10; M->u[1][1] = u11;
   412  
   413    return 1;
   414  }
   415  
   416  /* Sets (r;b) = (a;b) M, with M = (u00, u01; u10, u11). Vector must
   417   * have space for n + 1 limbs. Uses three buffers to avoid a copy*/
   418  mp_size_t
   419  mpn_hgcd_mul_matrix1_vector (const struct hgcd_matrix1 *M,
   420  			     mp_ptr rp, mp_srcptr ap, mp_ptr bp, mp_size_t n)
   421  {
   422    mp_limb_t ah, bh;
   423  
   424    /* Compute (r,b) <-- (u00 a + u10 b, u01 a + u11 b) as
   425  
   426       r  = u00 * a
   427       r += u10 * b
   428       b *= u11
   429       b += u01 * a
   430    */
   431  
   432  #if HAVE_NATIVE_mpn_addaddmul_1msb0
   433    ah = mpn_addaddmul_1msb0 (rp, ap, bp, n, M->u[0][0], M->u[1][0]);
   434    bh = mpn_addaddmul_1msb0 (bp, bp, ap, n, M->u[1][1], M->u[0][1]);
   435  #else
   436    ah =     mpn_mul_1 (rp, ap, n, M->u[0][0]);
   437    ah += mpn_addmul_1 (rp, bp, n, M->u[1][0]);
   438  
   439    bh =     mpn_mul_1 (bp, bp, n, M->u[1][1]);
   440    bh += mpn_addmul_1 (bp, ap, n, M->u[0][1]);
   441  #endif
   442    rp[n] = ah;
   443    bp[n] = bh;
   444  
   445    n += (ah | bh) > 0;
   446    return n;
   447  }