github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/generic/jacobi.c

github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/generic/jacobi.c (about)

     1  /* jacobi.c
     2  
     3     THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
     4     SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
     5     GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
     6  
     7  Copyright 1996, 1998, 2000-2004, 2008, 2010, 2011 Free Software Foundation,
     8  Inc.
     9  
    10  This file is part of the GNU MP Library.
    11  
    12  The GNU MP Library is free software; you can redistribute it and/or modify
    13  it under the terms of either:
    14  
    15    * the GNU Lesser General Public License as published by the Free
    16      Software Foundation; either version 3 of the License, or (at your
    17      option) any later version.
    18  
    19  or
    20  
    21    * the GNU General Public License as published by the Free Software
    22      Foundation; either version 2 of the License, or (at your option) any
    23      later version.
    24  
    25  or both in parallel, as here.
    26  
    27  The GNU MP Library is distributed in the hope that it will be useful, but
    28  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    29  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    30  for more details.
    31  
    32  You should have received copies of the GNU General Public License and the
    33  GNU Lesser General Public License along with the GNU MP Library.  If not,
    34  see https://www.gnu.org/licenses/.  */
    35  
    36  #include "gmp.h"
    37  #include "gmp-impl.h"
    38  #include "longlong.h"
    39  
    40  #ifndef JACOBI_DC_THRESHOLD
    41  #define JACOBI_DC_THRESHOLD GCD_DC_THRESHOLD
    42  #endif
    43  
    44  /* Schönhage's rules:
    45   *
    46   * Assume r0 = r1 q1 + r2, with r0 odd, and r1 = q2 r2 + r3
    47   *
    48   * If r1 is odd, then
    49   *
    50   *   (r1 | r0) = s(r1, r0) (r0 | r1) = s(r1, r0) (r2, r1)
    51   *
    52   * where s(x,y) = (-1)^{(x-1)(y-1)/4} = (-1)^[x = y = 3 (mod 4)].
    53   *
    54   * If r1 is even, r2 must be odd. We have
    55   *
    56   *   (r1 | r0) = (r1 - r0 | r0) = (-1)^(r0-1)/2 (r0 - r1 | r0)
    57   *             = (-1)^(r0-1)/2 s(r0, r0 - r1) (r0 | r0 - r1)
    58   *             = (-1)^(r0-1)/2 s(r0, r0 - r1) (r1 | r0 - r1)
    59   *
    60   * Now, if r1 = 0 (mod 4), then the sign factor is +1, and repeating
    61   * q1 times gives
    62   *
    63   *   (r1 | r0) = (r1 | r2) = (r3 | r2)
    64   *
    65   * On the other hand, if r1 = 2 (mod 4), the sign factor is
    66   * (-1)^{(r0-1)/2}, and repeating q1 times gives the exponent
    67   *
    68   *   (r0-1)/2 + (r0-r1-1)/2 + ... + (r0 - (q1-1) r1)/2
    69   *   = q1 (r0-1)/2 + q1 (q1-1)/2
    70   *
    71   * and we can summarize the even case as
    72   *
    73   *   (r1 | r0) = t(r1, r0, q1) (r3 | r2)
    74   *
    75   * where t(x,y,q) = (-1)^{[x = 2 (mod 4)] (q(y-1)/2 + y(q-1)/2)}
    76   *
    77   * What about termination? The remainder sequence ends with (0|1) = 1
    78   * (or (0 | r) = 0 if r != 1). What are the possible cases? If r1 is
    79   * odd, r2 may be zero. If r1 is even, then r2 = r0 - q1 r1 is odd and
    80   * hence non-zero. We may have r3 = r1 - q2 r2 = 0.
    81   *
    82   * Examples: (11|15) = - (15|11) = - (4|11)
    83   *            (4|11) =    (4| 3) =   (1| 3)
    84   *            (1| 3) = (3|1) = (0|1) = 1
    85   *
    86   *             (2|7) = (2|1) = (0|1) = 1
    87   *
    88   * Detail:     (2|7) = (2-7|7) = (-1|7)(5|7) = -(7|5) = -(2|5)
    89   *             (2|5) = (2-5|5) = (-1|5)(3|5) =  (5|3) =  (2|3)
    90   *             (2|3) = (2-3|3) = (-1|3)(1|3) = -(3|1) = -(2|1)
    91   *
    92   */
    93  
    94  /* In principle, the state consists of four variables: e (one bit), a,
    95     b (two bits each), d (one bit). Collected factors are (-1)^e. a and
    96     b are the least significant bits of the current remainders. d
    97     (denominator) is 0 if we're currently subtracting multiplies of a
    98     from b, and 1 if we're subtracting b from a.
    99  
   100     e is stored in the least significant bit, while a, b and d are
   101     coded as only 13 distinct values in bits 1-4, according to the
   102     following table. For rows not mentioning d, the value is either
   103     implied, or it doesn't matter. */
   104  
   105  #if WANT_ASSERT
   106  static const struct
   107  {
   108    unsigned char a;
   109    unsigned char b;
   110  } decode_table[13] = {
   111    /*  0 */ { 0, 1 },
   112    /*  1 */ { 0, 3 },
   113    /*  2 */ { 1, 1 },
   114    /*  3 */ { 1, 3 },
   115    /*  4 */ { 2, 1 },
   116    /*  5 */ { 2, 3 },
   117    /*  6 */ { 3, 1 },
   118    /*  7 */ { 3, 3 }, /* d = 1 */
   119    /*  8 */ { 1, 0 },
   120    /*  9 */ { 1, 2 },
   121    /* 10 */ { 3, 0 },
   122    /* 11 */ { 3, 2 },
   123    /* 12 */ { 3, 3 }, /* d = 0 */
   124  };
   125  #define JACOBI_A(bits) (decode_table[(bits)>>1].a)
   126  #define JACOBI_B(bits) (decode_table[(bits)>>1].b)
   127  #endif /* WANT_ASSERT */
   128  
   129  const unsigned char jacobi_table[208] = {
   130  #include "jacobitab.h"
   131  };
   132  
   133  #define BITS_FAIL 31
   134  
   135  static void
   136  jacobi_hook (void *p, mp_srcptr gp, mp_size_t gn,
   137  	     mp_srcptr qp, mp_size_t qn, int d)
   138  {
   139    unsigned *bitsp = (unsigned *) p;
   140  
   141    if (gp)
   142      {
   143        ASSERT (gn > 0);
   144        if (gn != 1 || gp[0] != 1)
   145  	{
   146  	  *bitsp = BITS_FAIL;
   147  	  return;
   148  	}
   149      }
   150  
   151    if (qp)
   152      {
   153        ASSERT (qn > 0);
   154        ASSERT (d >= 0);
   155        *bitsp = mpn_jacobi_update (*bitsp, d, qp[0] & 3);
   156      }
   157  }
   158  
   159  #define CHOOSE_P(n) (2*(n) / 3)
   160  
   161  int
   162  mpn_jacobi_n (mp_ptr ap, mp_ptr bp, mp_size_t n, unsigned bits)
   163  {
   164    mp_size_t scratch;
   165    mp_size_t matrix_scratch;
   166    mp_ptr tp;
   167  
   168    TMP_DECL;
   169  
   170    ASSERT (n > 0);
   171    ASSERT ( (ap[n-1] | bp[n-1]) > 0);
   172    ASSERT ( (bp[0] | ap[0]) & 1);
   173  
   174    /* FIXME: Check for small sizes first, before setting up temporary
   175       storage etc. */
   176    scratch = MPN_GCD_SUBDIV_STEP_ITCH(n);
   177  
   178    if (ABOVE_THRESHOLD (n, GCD_DC_THRESHOLD))
   179      {
   180        mp_size_t hgcd_scratch;
   181        mp_size_t update_scratch;
   182        mp_size_t p = CHOOSE_P (n);
   183        mp_size_t dc_scratch;
   184  
   185        matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p);
   186        hgcd_scratch = mpn_hgcd_itch (n - p);
   187        update_scratch = p + n - 1;
   188  
   189        dc_scratch = matrix_scratch + MAX(hgcd_scratch, update_scratch);
   190        if (dc_scratch > scratch)
   191  	scratch = dc_scratch;
   192      }
   193  
   194    TMP_MARK;
   195    tp = TMP_ALLOC_LIMBS(scratch);
   196  
   197    while (ABOVE_THRESHOLD (n, JACOBI_DC_THRESHOLD))
   198      {
   199        struct hgcd_matrix M;
   200        mp_size_t p = 2*n/3;
   201        mp_size_t matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p);
   202        mp_size_t nn;
   203        mpn_hgcd_matrix_init (&M, n - p, tp);
   204  
   205        nn = mpn_hgcd_jacobi (ap + p, bp + p, n - p, &M, &bits,
   206  			    tp + matrix_scratch);
   207        if (nn > 0)
   208  	{
   209  	  ASSERT (M.n <= (n - p - 1)/2);
   210  	  ASSERT (M.n + p <= (p + n - 1) / 2);
   211  	  /* Temporary storage 2 (p + M->n) <= p + n - 1. */
   212  	  n = mpn_hgcd_matrix_adjust (&M, p + nn, ap, bp, p, tp + matrix_scratch);
   213  	}
   214        else
   215  	{
   216  	  /* Temporary storage n */
   217  	  n = mpn_gcd_subdiv_step (ap, bp, n, 0, jacobi_hook, &bits, tp);
   218  	  if (!n)
   219  	    {
   220  	      TMP_FREE;
   221  	      return bits == BITS_FAIL ? 0 : mpn_jacobi_finish (bits);
   222  	    }
   223  	}
   224      }
   225  
   226    while (n > 2)
   227      {
   228        struct hgcd_matrix1 M;
   229        mp_limb_t ah, al, bh, bl;
   230        mp_limb_t mask;
   231  
   232        mask = ap[n-1] | bp[n-1];
   233        ASSERT (mask > 0);
   234  
   235        if (mask & GMP_NUMB_HIGHBIT)
   236  	{
   237  	  ah = ap[n-1]; al = ap[n-2];
   238  	  bh = bp[n-1]; bl = bp[n-2];
   239  	}
   240        else
   241  	{
   242  	  int shift;
   243  
   244  	  count_leading_zeros (shift, mask);
   245  	  ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]);
   246  	  al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]);
   247  	  bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]);
   248  	  bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]);
   249  	}
   250  
   251        /* Try an mpn_nhgcd2 step */
   252        if (mpn_hgcd2_jacobi (ah, al, bh, bl, &M, &bits))
   253  	{
   254  	  n = mpn_matrix22_mul1_inverse_vector (&M, tp, ap, bp, n);
   255  	  MP_PTR_SWAP (ap, tp);
   256  	}
   257        else
   258  	{
   259  	  /* mpn_hgcd2 has failed. Then either one of a or b is very
   260  	     small, or the difference is very small. Perform one
   261  	     subtraction followed by one division. */
   262  	  n = mpn_gcd_subdiv_step (ap, bp, n, 0, &jacobi_hook, &bits, tp);
   263  	  if (!n)
   264  	    {
   265  	      TMP_FREE;
   266  	      return bits == BITS_FAIL ? 0 : mpn_jacobi_finish (bits);
   267  	    }
   268  	}
   269      }
   270  
   271    if (bits >= 16)
   272      MP_PTR_SWAP (ap, bp);
   273  
   274    ASSERT (bp[0] & 1);
   275  
   276    if (n == 1)
   277      {
   278        mp_limb_t al, bl;
   279        al = ap[0];
   280        bl = bp[0];
   281  
   282        TMP_FREE;
   283        if (bl == 1)
   284  	return 1 - 2*(bits & 1);
   285        else
   286  	return mpn_jacobi_base (al, bl, bits << 1);
   287      }
   288  
   289    else
   290      {
   291        int res = mpn_jacobi_2 (ap, bp, bits & 1);
   292        TMP_FREE;
   293        return res;
   294      }
   295  }