github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/generic/sqr_basecase.c (about)

     1  /* mpn_sqr_basecase -- Internal routine to square a natural number
     2     of length n.
     3  
     4     THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS ONLY
     5     SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
     6  
     7  
     8  Copyright 1991-1994, 1996, 1997, 2000-2005, 2008, 2010, 2011 Free Software
     9  Foundation, Inc.
    10  
    11  This file is part of the GNU MP Library.
    12  
    13  The GNU MP Library is free software; you can redistribute it and/or modify
    14  it under the terms of either:
    15  
    16    * the GNU Lesser General Public License as published by the Free
    17      Software Foundation; either version 3 of the License, or (at your
    18      option) any later version.
    19  
    20  or
    21  
    22    * the GNU General Public License as published by the Free Software
    23      Foundation; either version 2 of the License, or (at your option) any
    24      later version.
    25  
    26  or both in parallel, as here.
    27  
    28  The GNU MP Library is distributed in the hope that it will be useful, but
    29  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    30  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    31  for more details.
    32  
    33  You should have received copies of the GNU General Public License and the
    34  GNU Lesser General Public License along with the GNU MP Library.  If not,
    35  see https://www.gnu.org/licenses/.  */
    36  
    37  #include "gmp.h"
    38  #include "gmp-impl.h"
    39  #include "longlong.h"
    40  
    41  
    42  #if HAVE_NATIVE_mpn_sqr_diagonal
    43  #define MPN_SQR_DIAGONAL(rp, up, n)					\
    44    mpn_sqr_diagonal (rp, up, n)
    45  #else
    46  #define MPN_SQR_DIAGONAL(rp, up, n)					\
    47    do {									\
    48      mp_size_t _i;							\
    49      for (_i = 0; _i < (n); _i++)					\
    50        {									\
    51  	mp_limb_t ul, lpl;						\
    52  	ul = (up)[_i];							\
    53  	umul_ppmm ((rp)[2 * _i + 1], lpl, ul, ul << GMP_NAIL_BITS);	\
    54  	(rp)[2 * _i] = lpl >> GMP_NAIL_BITS;				\
    55        }									\
    56    } while (0)
    57  #endif
    58  
    59  #if HAVE_NATIVE_mpn_sqr_diag_addlsh1
    60  #define MPN_SQR_DIAG_ADDLSH1(rp, tp, up, n)				\
    61    mpn_sqr_diag_addlsh1 (rp, tp, up, n)
    62  #else
    63  #if HAVE_NATIVE_mpn_addlsh1_n
    64  #define MPN_SQR_DIAG_ADDLSH1(rp, tp, up, n)				\
    65    do {									\
    66      mp_limb_t cy;							\
    67      MPN_SQR_DIAGONAL (rp, up, n);					\
    68      cy = mpn_addlsh1_n (rp + 1, rp + 1, tp, 2 * n - 2);			\
    69      rp[2 * n - 1] += cy;						\
    70    } while (0)
    71  #else
    72  #define MPN_SQR_DIAG_ADDLSH1(rp, tp, up, n)				\
    73    do {									\
    74      mp_limb_t cy;							\
    75      MPN_SQR_DIAGONAL (rp, up, n);					\
    76      cy = mpn_lshift (tp, tp, 2 * n - 2, 1);				\
    77      cy += mpn_add_n (rp + 1, rp + 1, tp, 2 * n - 2);			\
    78      rp[2 * n - 1] += cy;						\
    79    } while (0)
    80  #endif
    81  #endif
    82  
    83  
    84  #undef READY_WITH_mpn_sqr_basecase
    85  
    86  
    87  #if ! defined (READY_WITH_mpn_sqr_basecase) && HAVE_NATIVE_mpn_addmul_2s
    88  void
    89  mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
    90  {
    91    mp_size_t i;
    92    mp_limb_t tarr[2 * SQR_TOOM2_THRESHOLD];
    93    mp_ptr tp = tarr;
    94    mp_limb_t cy;
    95  
    96    /* must fit 2*n limbs in tarr */
    97    ASSERT (n <= SQR_TOOM2_THRESHOLD);
    98  
    99    if ((n & 1) != 0)
   100      {
   101        if (n == 1)
   102  	{
   103  	  mp_limb_t ul, lpl;
   104  	  ul = up[0];
   105  	  umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS);
   106  	  rp[0] = lpl >> GMP_NAIL_BITS;
   107  	  return;
   108  	}
   109  
   110        MPN_ZERO (tp, n);
   111  
   112        for (i = 0; i <= n - 2; i += 2)
   113  	{
   114  	  cy = mpn_addmul_2s (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
   115  	  tp[n + i] = cy;
   116  	}
   117      }
   118    else
   119      {
   120        if (n == 2)
   121  	{
   122  #if HAVE_NATIVE_mpn_mul_2
   123  	  rp[3] = mpn_mul_2 (rp, up, 2, up);
   124  #else
   125  	  rp[0] = 0;
   126  	  rp[1] = 0;
   127  	  rp[3] = mpn_addmul_2 (rp, up, 2, up);
   128  #endif
   129  	  return;
   130  	}
   131  
   132        MPN_ZERO (tp, n);
   133  
   134        for (i = 0; i <= n - 4; i += 2)
   135  	{
   136  	  cy = mpn_addmul_2s (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
   137  	  tp[n + i] = cy;
   138  	}
   139        cy = mpn_addmul_1 (tp + 2 * n - 4, up + n - 1, 1, up[n - 2]);
   140        tp[2 * n - 3] = cy;
   141      }
   142  
   143    MPN_SQR_DIAG_ADDLSH1 (rp, tp, up, n);
   144  }
   145  #define READY_WITH_mpn_sqr_basecase
   146  #endif
   147  
   148  
   149  #if ! defined (READY_WITH_mpn_sqr_basecase) && HAVE_NATIVE_mpn_addmul_2
   150  
   151  /* mpn_sqr_basecase using plain mpn_addmul_2.
   152  
   153     This is tricky, since we have to let mpn_addmul_2 make some undesirable
   154     multiplies, u[k]*u[k], that we would like to let mpn_sqr_diagonal handle.
   155     This forces us to conditionally add or subtract the mpn_sqr_diagonal
   156     results.  Examples of the product we form:
   157  
   158     n = 4              n = 5		n = 6
   159     u1u0 * u3u2u1      u1u0 * u4u3u2u1	u1u0 * u5u4u3u2u1
   160     u2 * u3	      u3u2 * u4u3	u3u2 * u5u4u3
   161  					u4 * u5
   162     add: u0 u2 u3      add: u0 u2 u4	add: u0 u2 u4 u5
   163     sub: u1	      sub: u1 u3	sub: u1 u3
   164  */
   165  
   166  void
   167  mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
   168  {
   169    mp_size_t i;
   170    mp_limb_t tarr[2 * SQR_TOOM2_THRESHOLD];
   171    mp_ptr tp = tarr;
   172    mp_limb_t cy;
   173  
   174    /* must fit 2*n limbs in tarr */
   175    ASSERT (n <= SQR_TOOM2_THRESHOLD);
   176  
   177    if ((n & 1) != 0)
   178      {
   179        mp_limb_t x0, x1;
   180  
   181        if (n == 1)
   182  	{
   183  	  mp_limb_t ul, lpl;
   184  	  ul = up[0];
   185  	  umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS);
   186  	  rp[0] = lpl >> GMP_NAIL_BITS;
   187  	  return;
   188  	}
   189  
   190        /* The code below doesn't like unnormalized operands.  Since such
   191  	 operands are unusual, handle them with a dumb recursion.  */
   192        if (up[n - 1] == 0)
   193  	{
   194  	  rp[2 * n - 2] = 0;
   195  	  rp[2 * n - 1] = 0;
   196  	  mpn_sqr_basecase (rp, up, n - 1);
   197  	  return;
   198  	}
   199  
   200        MPN_ZERO (tp, n);
   201  
   202        for (i = 0; i <= n - 2; i += 2)
   203  	{
   204  	  cy = mpn_addmul_2 (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
   205  	  tp[n + i] = cy;
   206  	}
   207  
   208        MPN_SQR_DIAGONAL (rp, up, n);
   209  
   210        for (i = 2;; i += 4)
   211  	{
   212  	  x0 = rp[i + 0];
   213  	  rp[i + 0] = (-x0) & GMP_NUMB_MASK;
   214  	  x1 = rp[i + 1];
   215  	  rp[i + 1] = (-x1 - (x0 != 0)) & GMP_NUMB_MASK;
   216  	  __GMPN_SUB_1 (cy, rp + i + 2, rp + i + 2, 2, (x1 | x0) != 0);
   217  	  if (i + 4 >= 2 * n)
   218  	    break;
   219  	  mpn_incr_u (rp + i + 4, cy);
   220  	}
   221      }
   222    else
   223      {
   224        mp_limb_t x0, x1;
   225  
   226        if (n == 2)
   227  	{
   228  #if HAVE_NATIVE_mpn_mul_2
   229  	  rp[3] = mpn_mul_2 (rp, up, 2, up);
   230  #else
   231  	  rp[0] = 0;
   232  	  rp[1] = 0;
   233  	  rp[3] = mpn_addmul_2 (rp, up, 2, up);
   234  #endif
   235  	  return;
   236  	}
   237  
   238        /* The code below doesn't like unnormalized operands.  Since such
   239  	 operands are unusual, handle them with a dumb recursion.  */
   240        if (up[n - 1] == 0)
   241  	{
   242  	  rp[2 * n - 2] = 0;
   243  	  rp[2 * n - 1] = 0;
   244  	  mpn_sqr_basecase (rp, up, n - 1);
   245  	  return;
   246  	}
   247  
   248        MPN_ZERO (tp, n);
   249  
   250        for (i = 0; i <= n - 4; i += 2)
   251  	{
   252  	  cy = mpn_addmul_2 (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
   253  	  tp[n + i] = cy;
   254  	}
   255        cy = mpn_addmul_1 (tp + 2 * n - 4, up + n - 1, 1, up[n - 2]);
   256        tp[2 * n - 3] = cy;
   257  
   258        MPN_SQR_DIAGONAL (rp, up, n);
   259  
   260        for (i = 2;; i += 4)
   261  	{
   262  	  x0 = rp[i + 0];
   263  	  rp[i + 0] = (-x0) & GMP_NUMB_MASK;
   264  	  x1 = rp[i + 1];
   265  	  rp[i + 1] = (-x1 - (x0 != 0)) & GMP_NUMB_MASK;
   266  	  if (i + 6 >= 2 * n)
   267  	    break;
   268  	  __GMPN_SUB_1 (cy, rp + i + 2, rp + i + 2, 2, (x1 | x0) != 0);
   269  	  mpn_incr_u (rp + i + 4, cy);
   270  	}
   271        mpn_decr_u (rp + i + 2, (x1 | x0) != 0);
   272      }
   273  
   274  #if HAVE_NATIVE_mpn_addlsh1_n
   275    cy = mpn_addlsh1_n (rp + 1, rp + 1, tp, 2 * n - 2);
   276  #else
   277    cy = mpn_lshift (tp, tp, 2 * n - 2, 1);
   278    cy += mpn_add_n (rp + 1, rp + 1, tp, 2 * n - 2);
   279  #endif
   280    rp[2 * n - 1] += cy;
   281  }
   282  #define READY_WITH_mpn_sqr_basecase
   283  #endif
   284  
   285  
   286  #if ! defined (READY_WITH_mpn_sqr_basecase)
   287  
   288  /* Default mpn_sqr_basecase using mpn_addmul_1.  */
   289  
   290  void
   291  mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
   292  {
   293    mp_size_t i;
   294  
   295    ASSERT (n >= 1);
   296    ASSERT (! MPN_OVERLAP_P (rp, 2*n, up, n));
   297  
   298    {
   299      mp_limb_t ul, lpl;
   300      ul = up[0];
   301      umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS);
   302      rp[0] = lpl >> GMP_NAIL_BITS;
   303    }
   304    if (n > 1)
   305      {
   306        mp_limb_t tarr[2 * SQR_TOOM2_THRESHOLD];
   307        mp_ptr tp = tarr;
   308        mp_limb_t cy;
   309  
   310        /* must fit 2*n limbs in tarr */
   311        ASSERT (n <= SQR_TOOM2_THRESHOLD);
   312  
   313        cy = mpn_mul_1 (tp, up + 1, n - 1, up[0]);
   314        tp[n - 1] = cy;
   315        for (i = 2; i < n; i++)
   316  	{
   317  	  mp_limb_t cy;
   318  	  cy = mpn_addmul_1 (tp + 2 * i - 2, up + i, n - i, up[i - 1]);
   319  	  tp[n + i - 2] = cy;
   320  	}
   321  
   322        MPN_SQR_DIAG_ADDLSH1 (rp, tp, up, n);
   323      }
   324  }
   325  #endif