github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpz/lucnum_ui.c (about)

     1  /* mpz_lucnum_ui -- calculate Lucas number.
     2  
     3  Copyright 2001, 2003, 2005, 2011, 2012 Free Software Foundation, Inc.
     4  
     5  This file is part of the GNU MP Library.
     6  
     7  The GNU MP Library is free software; you can redistribute it and/or modify
     8  it under the terms of either:
     9  
    10    * the GNU Lesser General Public License as published by the Free
    11      Software Foundation; either version 3 of the License, or (at your
    12      option) any later version.
    13  
    14  or
    15  
    16    * the GNU General Public License as published by the Free Software
    17      Foundation; either version 2 of the License, or (at your option) any
    18      later version.
    19  
    20  or both in parallel, as here.
    21  
    22  The GNU MP Library is distributed in the hope that it will be useful, but
    23  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    24  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    25  for more details.
    26  
    27  You should have received copies of the GNU General Public License and the
    28  GNU Lesser General Public License along with the GNU MP Library.  If not,
    29  see https://www.gnu.org/licenses/.  */
    30  
    31  #include <stdio.h>
    32  #include "gmp.h"
    33  #include "gmp-impl.h"
    34  
    35  
    36  /* change this to "#define TRACE(x) x" for diagnostics */
    37  #define TRACE(x)
    38  
    39  
    40  /* Notes:
    41  
    42     For the +4 in L[2k+1] when k is even, all L[4m+3] == 4, 5 or 7 mod 8, so
    43     there can't be an overflow applying +4 to just the low limb (since that
    44     would leave 0, 1, 2 or 3 mod 8).
    45  
    46     For the -4 in L[2k+1] when k is even, it seems (no proof) that
    47     L[3*2^(b-2)-3] == -4 mod 2^b, so for instance with a 32-bit limb
    48     L[0xBFFFFFFD] == 0xFFFFFFFC mod 2^32, and this implies a borrow from the
    49     low limb.  Obviously L[0xBFFFFFFD] is a huge number, but it's at least
    50     conceivable to calculate it, so it probably should be handled.
    51  
    52     For the -2 in L[2k] with k even, it seems (no proof) L[2^(b-1)] == -1 mod
    53     2^b, so for instance in 32-bits L[0x80000000] has a low limb of
    54     0xFFFFFFFF so there would have been a borrow.  Again L[0x80000000] is
    55     obviously huge, but probably should be made to work.  */
    56  
    57  void
    58  mpz_lucnum_ui (mpz_ptr ln, unsigned long n)
    59  {
    60    mp_size_t  lalloc, xalloc, lsize, xsize;
    61    mp_ptr     lp, xp;
    62    mp_limb_t  c;
    63    int        zeros;
    64    TMP_DECL;
    65  
    66    TRACE (printf ("mpn_lucnum_ui n=%lu\n", n));
    67  
    68    if (n <= FIB_TABLE_LUCNUM_LIMIT)
    69      {
    70        /* L[n] = F[n] + 2F[n-1] */
    71        PTR(ln)[0] = FIB_TABLE(n) + 2 * FIB_TABLE ((int) n - 1);
    72        SIZ(ln) = 1;
    73        return;
    74      }
    75  
    76    /* +1 since L[n]=F[n]+2F[n-1] might be 1 limb bigger than F[n], further +1
    77       since square or mul used below might need an extra limb over the true
    78       size */
    79    lalloc = MPN_FIB2_SIZE (n) + 2;
    80    lp = MPZ_REALLOC (ln, lalloc);
    81  
    82    TMP_MARK;
    83    xalloc = lalloc;
    84    xp = TMP_ALLOC_LIMBS (xalloc);
    85  
    86    /* Strip trailing zeros from n, until either an odd number is reached
    87       where the L[2k+1] formula can be used, or until n fits within the
    88       FIB_TABLE data.  The table is preferred of course.  */
    89    zeros = 0;
    90    for (;;)
    91      {
    92        if (n & 1)
    93  	{
    94  	  /* L[2k+1] = 5*F[k-1]*(2*F[k]+F[k-1]) - 4*(-1)^k */
    95  
    96  	  mp_size_t  yalloc, ysize;
    97  	  mp_ptr     yp;
    98  
    99  	  TRACE (printf ("  initial odd n=%lu\n", n));
   100  
   101  	  yalloc = MPN_FIB2_SIZE (n/2);
   102  	  yp = TMP_ALLOC_LIMBS (yalloc);
   103  	  ASSERT (xalloc >= yalloc);
   104  
   105  	  xsize = mpn_fib2_ui (xp, yp, n/2);
   106  
   107  	  /* possible high zero on F[k-1] */
   108  	  ysize = xsize;
   109  	  ysize -= (yp[ysize-1] == 0);
   110  	  ASSERT (yp[ysize-1] != 0);
   111  
   112  	  /* xp = 2*F[k] + F[k-1] */
   113  #if HAVE_NATIVE_mpn_addlsh1_n
   114  	  c = mpn_addlsh1_n (xp, yp, xp, xsize);
   115  #else
   116  	  c = mpn_lshift (xp, xp, xsize, 1);
   117  	  c += mpn_add_n (xp, xp, yp, xsize);
   118  #endif
   119  	  ASSERT (xalloc >= xsize+1);
   120  	  xp[xsize] = c;
   121  	  xsize += (c != 0);
   122  	  ASSERT (xp[xsize-1] != 0);
   123  
   124  	  ASSERT (lalloc >= xsize + ysize);
   125  	  c = mpn_mul (lp, xp, xsize, yp, ysize);
   126  	  lsize = xsize + ysize;
   127  	  lsize -= (c == 0);
   128  
   129  	  /* lp = 5*lp */
   130  #if HAVE_NATIVE_mpn_addlsh2_n
   131  	  c = mpn_addlsh2_n (lp, lp, lp, lsize);
   132  #else
   133  	  /* FIXME: Is this faster than mpn_mul_1 ? */
   134  	  c = mpn_lshift (xp, lp, lsize, 2);
   135  	  c += mpn_add_n (lp, lp, xp, lsize);
   136  #endif
   137  	  ASSERT (lalloc >= lsize+1);
   138  	  lp[lsize] = c;
   139  	  lsize += (c != 0);
   140  
   141  	  /* lp = lp - 4*(-1)^k */
   142  	  if (n & 2)
   143  	    {
   144  	      /* no overflow, see comments above */
   145  	      ASSERT (lp[0] <= MP_LIMB_T_MAX-4);
   146  	      lp[0] += 4;
   147  	    }
   148  	  else
   149  	    {
   150  	      /* won't go negative */
   151  	      MPN_DECR_U (lp, lsize, CNST_LIMB(4));
   152  	    }
   153  
   154  	  TRACE (mpn_trace ("  l",lp, lsize));
   155  	  break;
   156  	}
   157  
   158        MP_PTR_SWAP (xp, lp); /* balance the swaps wanted in the L[2k] below */
   159        zeros++;
   160        n /= 2;
   161  
   162        if (n <= FIB_TABLE_LUCNUM_LIMIT)
   163  	{
   164  	  /* L[n] = F[n] + 2F[n-1] */
   165  	  lp[0] = FIB_TABLE (n) + 2 * FIB_TABLE ((int) n - 1);
   166  	  lsize = 1;
   167  
   168  	  TRACE (printf ("  initial small n=%lu\n", n);
   169  		 mpn_trace ("  l",lp, lsize));
   170  	  break;
   171  	}
   172      }
   173  
   174    for ( ; zeros != 0; zeros--)
   175      {
   176        /* L[2k] = L[k]^2 + 2*(-1)^k */
   177  
   178        TRACE (printf ("  zeros=%d\n", zeros));
   179  
   180        ASSERT (xalloc >= 2*lsize);
   181        mpn_sqr (xp, lp, lsize);
   182        lsize *= 2;
   183        lsize -= (xp[lsize-1] == 0);
   184  
   185        /* First time around the loop k==n determines (-1)^k, after that k is
   186  	 always even and we set n=0 to indicate that.  */
   187        if (n & 1)
   188  	{
   189  	  /* L[n]^2 == 0 or 1 mod 4, like all squares, so +2 gives no carry */
   190  	  ASSERT (xp[0] <= MP_LIMB_T_MAX-2);
   191  	  xp[0] += 2;
   192  	  n = 0;
   193  	}
   194        else
   195  	{
   196  	  /* won't go negative */
   197  	  MPN_DECR_U (xp, lsize, CNST_LIMB(2));
   198  	}
   199  
   200        MP_PTR_SWAP (xp, lp);
   201        ASSERT (lp[lsize-1] != 0);
   202      }
   203  
   204    /* should end up in the right spot after all the xp/lp swaps */
   205    ASSERT (lp == PTR(ln));
   206    SIZ(ln) = lsize;
   207  
   208    TMP_FREE;
   209  }