github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/generic/gcdext.c (about)

     1  /* mpn_gcdext -- Extended Greatest Common Divisor.
     2  
     3  Copyright 1996, 1998, 2000-2005, 2008, 2009, 2012 Free Software Foundation,
     4  Inc.
     5  
     6  This file is part of the GNU MP Library.
     7  
     8  The GNU MP Library is free software; you can redistribute it and/or modify
     9  it under the terms of either:
    10  
    11    * the GNU Lesser General Public License as published by the Free
    12      Software Foundation; either version 3 of the License, or (at your
    13      option) any later version.
    14  
    15  or
    16  
    17    * the GNU General Public License as published by the Free Software
    18      Foundation; either version 2 of the License, or (at your option) any
    19      later version.
    20  
    21  or both in parallel, as here.
    22  
    23  The GNU MP Library is distributed in the hope that it will be useful, but
    24  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    25  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    26  for more details.
    27  
    28  You should have received copies of the GNU General Public License and the
    29  GNU Lesser General Public License along with the GNU MP Library.  If not,
    30  see https://www.gnu.org/licenses/.  */
    31  
    32  #include "gmp.h"
    33  #include "gmp-impl.h"
    34  #include "longlong.h"
    35  
    36  /* Computes (r;b) = (a; b) M. Result is of size n + M->n +/- 1, and
    37     the size is returned (if inputs are non-normalized, result may be
    38     non-normalized too). Temporary space needed is M->n + n.
    39   */
    40  static size_t
    41  hgcd_mul_matrix_vector (struct hgcd_matrix *M,
    42  			mp_ptr rp, mp_srcptr ap, mp_ptr bp, mp_size_t n, mp_ptr tp)
    43  {
    44    mp_limb_t ah, bh;
    45  
    46    /* Compute (r,b) <-- (u00 a + u10 b, u01 a + u11 b) as
    47  
    48       t  = u00 * a
    49       r  = u10 * b
    50       r += t;
    51  
    52       t  = u11 * b
    53       b  = u01 * a
    54       b += t;
    55    */
    56  
    57    if (M->n >= n)
    58      {
    59        mpn_mul (tp, M->p[0][0], M->n, ap, n);
    60        mpn_mul (rp, M->p[1][0], M->n, bp, n);
    61      }
    62    else
    63      {
    64        mpn_mul (tp, ap, n, M->p[0][0], M->n);
    65        mpn_mul (rp, bp, n, M->p[1][0], M->n);
    66      }
    67  
    68    ah = mpn_add_n (rp, rp, tp, n + M->n);
    69  
    70    if (M->n >= n)
    71      {
    72        mpn_mul (tp, M->p[1][1], M->n, bp, n);
    73        mpn_mul (bp, M->p[0][1], M->n, ap, n);
    74      }
    75    else
    76      {
    77        mpn_mul (tp, bp, n, M->p[1][1], M->n);
    78        mpn_mul (bp, ap, n, M->p[0][1], M->n);
    79      }
    80    bh = mpn_add_n (bp, bp, tp, n + M->n);
    81  
    82    n += M->n;
    83    if ( (ah | bh) > 0)
    84      {
    85        rp[n] = ah;
    86        bp[n] = bh;
    87        n++;
    88      }
    89    else
    90      {
    91        /* Normalize */
    92        while ( (rp[n-1] | bp[n-1]) == 0)
    93  	n--;
    94      }
    95  
    96    return n;
    97  }
    98  
    99  #define COMPUTE_V_ITCH(n) (2*(n))
   100  
   101  /* Computes |v| = |(g - u a)| / b, where u may be positive or
   102     negative, and v is of the opposite sign. max(a, b) is of size n, u and
   103     v at most size n, and v must have space for n+1 limbs. */
   104  static mp_size_t
   105  compute_v (mp_ptr vp,
   106  	   mp_srcptr ap, mp_srcptr bp, mp_size_t n,
   107  	   mp_srcptr gp, mp_size_t gn,
   108  	   mp_srcptr up, mp_size_t usize,
   109  	   mp_ptr tp)
   110  {
   111    mp_size_t size;
   112    mp_size_t an;
   113    mp_size_t bn;
   114    mp_size_t vn;
   115  
   116    ASSERT (n > 0);
   117    ASSERT (gn > 0);
   118    ASSERT (usize != 0);
   119  
   120    size = ABS (usize);
   121    ASSERT (size <= n);
   122    ASSERT (up[size-1] > 0);
   123  
   124    an = n;
   125    MPN_NORMALIZE (ap, an);
   126    ASSERT (gn <= an);
   127  
   128    if (an >= size)
   129      mpn_mul (tp, ap, an, up, size);
   130    else
   131      mpn_mul (tp, up, size, ap, an);
   132  
   133    size += an;
   134  
   135    if (usize > 0)
   136      {
   137        /* |v| = -v = (u a - g) / b */
   138  
   139        ASSERT_NOCARRY (mpn_sub (tp, tp, size, gp, gn));
   140        MPN_NORMALIZE (tp, size);
   141        if (size == 0)
   142  	return 0;
   143      }
   144    else
   145      { /* |v| = v = (g - u a) / b = (g + |u| a) / b. Since g <= a,
   146  	 (g + |u| a) always fits in (|usize| + an) limbs. */
   147  
   148        ASSERT_NOCARRY (mpn_add (tp, tp, size, gp, gn));
   149        size -= (tp[size - 1] == 0);
   150      }
   151  
   152    /* Now divide t / b. There must be no remainder */
   153    bn = n;
   154    MPN_NORMALIZE (bp, bn);
   155    ASSERT (size >= bn);
   156  
   157    vn = size + 1 - bn;
   158    ASSERT (vn <= n + 1);
   159  
   160    mpn_divexact (vp, tp, size, bp, bn);
   161    vn -= (vp[vn-1] == 0);
   162  
   163    return vn;
   164  }
   165  
   166  /* Temporary storage:
   167  
   168     Initial division: Quotient of at most an - n + 1 <= an limbs.
   169  
   170     Storage for u0 and u1: 2(n+1).
   171  
   172     Storage for hgcd matrix M, with input ceil(n/2): 5 * ceil(n/4)
   173  
   174     Storage for hgcd, input (n + 1)/2: 9 n/4 plus some.
   175  
   176     When hgcd succeeds: 1 + floor(3n/2) for adjusting a and b, and 2(n+1) for the cofactors.
   177  
   178     When hgcd fails: 2n + 1 for mpn_gcdext_subdiv_step, which is less.
   179  
   180     For the lehmer call after the loop, Let T denote
   181     GCDEXT_DC_THRESHOLD. For the gcdext_lehmer call, we need T each for
   182     u, a and b, and 4T+3 scratch space. Next, for compute_v, we need T
   183     for u, T+1 for v and 2T scratch space. In all, 7T + 3 is
   184     sufficient for both operations.
   185  
   186  */
   187  
   188  /* Optimal choice of p seems difficult. In each iteration the division
   189   * of work between hgcd and the updates of u0 and u1 depends on the
   190   * current size of the u. It may be desirable to use a different
   191   * choice of p in each iteration. Also the input size seems to matter;
   192   * choosing p = n / 3 in the first iteration seems to improve
   193   * performance slightly for input size just above the threshold, but
   194   * degrade performance for larger inputs. */
   195  #define CHOOSE_P_1(n) ((n) / 2)
   196  #define CHOOSE_P_2(n) ((n) / 3)
   197  
   198  mp_size_t
   199  mpn_gcdext (mp_ptr gp, mp_ptr up, mp_size_t *usizep,
   200  	    mp_ptr ap, mp_size_t an, mp_ptr bp, mp_size_t n)
   201  {
   202    mp_size_t talloc;
   203    mp_size_t scratch;
   204    mp_size_t matrix_scratch;
   205    mp_size_t ualloc = n + 1;
   206  
   207    struct gcdext_ctx ctx;
   208    mp_size_t un;
   209    mp_ptr u0;
   210    mp_ptr u1;
   211  
   212    mp_ptr tp;
   213  
   214    TMP_DECL;
   215  
   216    ASSERT (an >= n);
   217    ASSERT (n > 0);
   218    ASSERT (bp[n-1] > 0);
   219  
   220    TMP_MARK;
   221  
   222    /* FIXME: Check for small sizes first, before setting up temporary
   223       storage etc. */
   224    talloc = MPN_GCDEXT_LEHMER_N_ITCH(n);
   225  
   226    /* For initial division */
   227    scratch = an - n + 1;
   228    if (scratch > talloc)
   229      talloc = scratch;
   230  
   231    if (ABOVE_THRESHOLD (n, GCDEXT_DC_THRESHOLD))
   232      {
   233        /* For hgcd loop. */
   234        mp_size_t hgcd_scratch;
   235        mp_size_t update_scratch;
   236        mp_size_t p1 = CHOOSE_P_1 (n);
   237        mp_size_t p2 = CHOOSE_P_2 (n);
   238        mp_size_t min_p = MIN(p1, p2);
   239        mp_size_t max_p = MAX(p1, p2);
   240        matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - min_p);
   241        hgcd_scratch = mpn_hgcd_itch (n - min_p);
   242        update_scratch = max_p + n - 1;
   243  
   244        scratch = matrix_scratch + MAX(hgcd_scratch, update_scratch);
   245        if (scratch > talloc)
   246  	talloc = scratch;
   247  
   248        /* Final mpn_gcdext_lehmer_n call. Need space for u and for
   249  	 copies of a and b. */
   250        scratch = MPN_GCDEXT_LEHMER_N_ITCH (GCDEXT_DC_THRESHOLD)
   251  	+ 3*GCDEXT_DC_THRESHOLD;
   252  
   253        if (scratch > talloc)
   254  	talloc = scratch;
   255  
   256        /* Cofactors u0 and u1 */
   257        talloc += 2*(n+1);
   258      }
   259  
   260    tp = TMP_ALLOC_LIMBS(talloc);
   261  
   262    if (an > n)
   263      {
   264        mpn_tdiv_qr (tp, ap, 0, ap, an, bp, n);
   265  
   266        if (mpn_zero_p (ap, n))
   267  	{
   268  	  MPN_COPY (gp, bp, n);
   269  	  *usizep = 0;
   270  	  TMP_FREE;
   271  	  return n;
   272  	}
   273      }
   274  
   275    if (BELOW_THRESHOLD (n, GCDEXT_DC_THRESHOLD))
   276      {
   277        mp_size_t gn = mpn_gcdext_lehmer_n(gp, up, usizep, ap, bp, n, tp);
   278  
   279        TMP_FREE;
   280        return gn;
   281      }
   282  
   283    MPN_ZERO (tp, 2*ualloc);
   284    u0 = tp; tp += ualloc;
   285    u1 = tp; tp += ualloc;
   286  
   287    ctx.gp = gp;
   288    ctx.up = up;
   289    ctx.usize = usizep;
   290  
   291    {
   292      /* For the first hgcd call, there are no u updates, and it makes
   293         some sense to use a different choice for p. */
   294  
   295      /* FIXME: We could trim use of temporary storage, since u0 and u1
   296         are not used yet. For the hgcd call, we could swap in the u0
   297         and u1 pointers for the relevant matrix elements. */
   298  
   299      struct hgcd_matrix M;
   300      mp_size_t p = CHOOSE_P_1 (n);
   301      mp_size_t nn;
   302  
   303      mpn_hgcd_matrix_init (&M, n - p, tp);
   304      nn = mpn_hgcd (ap + p, bp + p, n - p, &M, tp + matrix_scratch);
   305      if (nn > 0)
   306        {
   307  	ASSERT (M.n <= (n - p - 1)/2);
   308  	ASSERT (M.n + p <= (p + n - 1) / 2);
   309  
   310  	/* Temporary storage 2 (p + M->n) <= p + n - 1 */
   311  	n = mpn_hgcd_matrix_adjust (&M, p + nn, ap, bp, p, tp + matrix_scratch);
   312  
   313  	MPN_COPY (u0, M.p[1][0], M.n);
   314  	MPN_COPY (u1, M.p[1][1], M.n);
   315  	un = M.n;
   316  	while ( (u0[un-1] | u1[un-1] ) == 0)
   317  	  un--;
   318        }
   319      else
   320        {
   321  	/* mpn_hgcd has failed. Then either one of a or b is very
   322  	   small, or the difference is very small. Perform one
   323  	   subtraction followed by one division. */
   324  	u1[0] = 1;
   325  
   326  	ctx.u0 = u0;
   327  	ctx.u1 = u1;
   328  	ctx.tp = tp + n; /* ualloc */
   329  	ctx.un = 1;
   330  
   331  	/* Temporary storage n */
   332  	n = mpn_gcd_subdiv_step (ap, bp, n, 0, mpn_gcdext_hook, &ctx, tp);
   333  	if (n == 0)
   334  	  {
   335  	    TMP_FREE;
   336  	    return ctx.gn;
   337  	  }
   338  
   339  	un = ctx.un;
   340  	ASSERT (un < ualloc);
   341        }
   342    }
   343  
   344    while (ABOVE_THRESHOLD (n, GCDEXT_DC_THRESHOLD))
   345      {
   346        struct hgcd_matrix M;
   347        mp_size_t p = CHOOSE_P_2 (n);
   348        mp_size_t nn;
   349  
   350        mpn_hgcd_matrix_init (&M, n - p, tp);
   351        nn = mpn_hgcd (ap + p, bp + p, n - p, &M, tp + matrix_scratch);
   352        if (nn > 0)
   353  	{
   354  	  mp_ptr t0;
   355  
   356  	  t0 = tp + matrix_scratch;
   357  	  ASSERT (M.n <= (n - p - 1)/2);
   358  	  ASSERT (M.n + p <= (p + n - 1) / 2);
   359  
   360  	  /* Temporary storage 2 (p + M->n) <= p + n - 1 */
   361  	  n = mpn_hgcd_matrix_adjust (&M, p + nn, ap, bp, p, t0);
   362  
   363  	  /* By the same analysis as for mpn_hgcd_matrix_mul */
   364  	  ASSERT (M.n + un <= ualloc);
   365  
   366  	  /* FIXME: This copying could be avoided by some swapping of
   367  	   * pointers. May need more temporary storage, though. */
   368  	  MPN_COPY (t0, u0, un);
   369  
   370  	  /* Temporary storage ualloc */
   371  	  un = hgcd_mul_matrix_vector (&M, u0, t0, u1, un, t0 + un);
   372  
   373  	  ASSERT (un < ualloc);
   374  	  ASSERT ( (u0[un-1] | u1[un-1]) > 0);
   375  	}
   376        else
   377  	{
   378  	  /* mpn_hgcd has failed. Then either one of a or b is very
   379  	     small, or the difference is very small. Perform one
   380  	     subtraction followed by one division. */
   381  	  ctx.u0 = u0;
   382  	  ctx.u1 = u1;
   383  	  ctx.tp = tp + n; /* ualloc */
   384  	  ctx.un = un;
   385  
   386  	  /* Temporary storage n */
   387  	  n = mpn_gcd_subdiv_step (ap, bp, n, 0, mpn_gcdext_hook, &ctx, tp);
   388  	  if (n == 0)
   389  	    {
   390  	      TMP_FREE;
   391  	      return ctx.gn;
   392  	    }
   393  
   394  	  un = ctx.un;
   395  	  ASSERT (un < ualloc);
   396  	}
   397      }
   398    /* We have A = ... a + ... b
   399  	     B =  u0 a +  u1 b
   400  
   401  	     a = u1  A + ... B
   402  	     b = -u0 A + ... B
   403  
   404       with bounds
   405  
   406         |u0|, |u1| <= B / min(a, b)
   407  
   408       We always have u1 > 0, and u0 == 0 is possible only if u1 == 1,
   409       in which case the only reduction done so far is a = A - k B for
   410       some k.
   411  
   412       Compute g = u a + v b = (u u1 - v u0) A + (...) B
   413       Here, u, v are bounded by
   414  
   415         |u| <= b,
   416         |v| <= a
   417    */
   418  
   419    ASSERT ( (ap[n-1] | bp[n-1]) > 0);
   420  
   421    if (UNLIKELY (mpn_cmp (ap, bp, n) == 0))
   422      {
   423        /* Must return the smallest cofactor, +u1 or -u0 */
   424        int c;
   425  
   426        MPN_COPY (gp, ap, n);
   427  
   428        MPN_CMP (c, u0, u1, un);
   429        /* c == 0 can happen only when A = (2k+1) G, B = 2 G. And in
   430  	 this case we choose the cofactor + 1, corresponding to G = A
   431  	 - k B, rather than -1, corresponding to G = - A + (k+1) B. */
   432        ASSERT (c != 0 || (un == 1 && u0[0] == 1 && u1[0] == 1));
   433        if (c < 0)
   434  	{
   435  	  MPN_NORMALIZE (u0, un);
   436  	  MPN_COPY (up, u0, un);
   437  	  *usizep = -un;
   438  	}
   439        else
   440  	{
   441  	  MPN_NORMALIZE_NOT_ZERO (u1, un);
   442  	  MPN_COPY (up, u1, un);
   443  	  *usizep = un;
   444  	}
   445  
   446        TMP_FREE;
   447        return n;
   448      }
   449    else if (UNLIKELY (u0[0] == 0) && un == 1)
   450      {
   451        mp_size_t gn;
   452        ASSERT (u1[0] == 1);
   453  
   454        /* g = u a + v b = (u u1 - v u0) A + (...) B = u A + (...) B */
   455        gn = mpn_gcdext_lehmer_n (gp, up, usizep, ap, bp, n, tp);
   456  
   457        TMP_FREE;
   458        return gn;
   459      }
   460    else
   461      {
   462        mp_size_t u0n;
   463        mp_size_t u1n;
   464        mp_size_t lehmer_un;
   465        mp_size_t lehmer_vn;
   466        mp_size_t gn;
   467  
   468        mp_ptr lehmer_up;
   469        mp_ptr lehmer_vp;
   470        int negate;
   471  
   472        lehmer_up = tp; tp += n;
   473  
   474        /* Call mpn_gcdext_lehmer_n with copies of a and b. */
   475        MPN_COPY (tp, ap, n);
   476        MPN_COPY (tp + n, bp, n);
   477        gn = mpn_gcdext_lehmer_n (gp, lehmer_up, &lehmer_un, tp, tp + n, n, tp + 2*n);
   478  
   479        u0n = un;
   480        MPN_NORMALIZE (u0, u0n);
   481        ASSERT (u0n > 0);
   482  
   483        if (lehmer_un == 0)
   484  	{
   485  	  /* u == 0  ==>  v = g / b == 1  ==> g = - u0 A + (...) B */
   486  	  MPN_COPY (up, u0, u0n);
   487  	  *usizep = -u0n;
   488  
   489  	  TMP_FREE;
   490  	  return gn;
   491  	}
   492  
   493        lehmer_vp = tp;
   494        /* Compute v = (g - u a) / b */
   495        lehmer_vn = compute_v (lehmer_vp,
   496  			     ap, bp, n, gp, gn, lehmer_up, lehmer_un, tp + n + 1);
   497  
   498        if (lehmer_un > 0)
   499  	negate = 0;
   500        else
   501  	{
   502  	  lehmer_un = -lehmer_un;
   503  	  negate = 1;
   504  	}
   505  
   506        u1n = un;
   507        MPN_NORMALIZE (u1, u1n);
   508        ASSERT (u1n > 0);
   509  
   510        ASSERT (lehmer_un + u1n <= ualloc);
   511        ASSERT (lehmer_vn + u0n <= ualloc);
   512  
   513        /* We may still have v == 0 */
   514  
   515        /* Compute u u0 */
   516        if (lehmer_un <= u1n)
   517  	/* Should be the common case */
   518  	mpn_mul (up, u1, u1n, lehmer_up, lehmer_un);
   519        else
   520  	mpn_mul (up, lehmer_up, lehmer_un, u1, u1n);
   521  
   522        un = u1n + lehmer_un;
   523        un -= (up[un - 1] == 0);
   524  
   525        if (lehmer_vn > 0)
   526  	{
   527  	  mp_limb_t cy;
   528  
   529  	  /* Overwrites old u1 value */
   530  	  if (lehmer_vn <= u0n)
   531  	    /* Should be the common case */
   532  	    mpn_mul (u1, u0, u0n, lehmer_vp, lehmer_vn);
   533  	  else
   534  	    mpn_mul (u1, lehmer_vp, lehmer_vn, u0, u0n);
   535  
   536  	  u1n = u0n + lehmer_vn;
   537  	  u1n -= (u1[u1n - 1] == 0);
   538  
   539  	  if (u1n <= un)
   540  	    {
   541  	      cy = mpn_add (up, up, un, u1, u1n);
   542  	    }
   543  	  else
   544  	    {
   545  	      cy = mpn_add (up, u1, u1n, up, un);
   546  	      un = u1n;
   547  	    }
   548  	  up[un] = cy;
   549  	  un += (cy != 0);
   550  
   551  	  ASSERT (un < ualloc);
   552  	}
   553        *usizep = negate ? -un : un;
   554  
   555        TMP_FREE;
   556        return gn;
   557      }
   558  }