github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/generic/sqrlo_basecase.c (about)

     1  /* mpn_sqrlo_basecase -- Internal routine to square a natural number
     2     of length n.
     3  
     4     THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS ONLY
     5     SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
     6  
     7  
     8  Copyright 1991-1994, 1996, 1997, 2000-2005, 2008, 2010, 2011, 2015
     9  Free Software Foundation, Inc.
    10  
    11  This file is part of the GNU MP Library.
    12  
    13  The GNU MP Library is free software; you can redistribute it and/or modify
    14  it under the terms of either:
    15  
    16    * the GNU Lesser General Public License as published by the Free
    17      Software Foundation; either version 3 of the License, or (at your
    18      option) any later version.
    19  
    20  or
    21  
    22    * the GNU General Public License as published by the Free Software
    23      Foundation; either version 2 of the License, or (at your option) any
    24      later version.
    25  
    26  or both in parallel, as here.
    27  
    28  The GNU MP Library is distributed in the hope that it will be useful, but
    29  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    30  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    31  for more details.
    32  
    33  You should have received copies of the GNU General Public License and the
    34  GNU Lesser General Public License along with the GNU MP Library.  If not,
    35  see https://www.gnu.org/licenses/.  */
    36  
    37  #include "gmp.h"
    38  #include "gmp-impl.h"
    39  #include "longlong.h"
    40  
    41  #ifndef SQRLO_SHORTCUT_MULTIPLICATIONS
    42  #if HAVE_NATIVE_mpn_addmul_1
    43  #define SQRLO_SHORTCUT_MULTIPLICATIONS 0
    44  #else
    45  #define SQRLO_SHORTCUT_MULTIPLICATIONS 1
    46  #endif
    47  #endif
    48  
    49  #if HAVE_NATIVE_mpn_sqr_diagonal
    50  #define MPN_SQR_DIAGONAL(rp, up, n)					\
    51    mpn_sqr_diagonal (rp, up, n)
    52  #else
    53  #define MPN_SQR_DIAGONAL(rp, up, n)					\
    54    do {									\
    55      mp_size_t _i;							\
    56      for (_i = 0; _i < (n); _i++)					\
    57        {									\
    58  	mp_limb_t ul, lpl;						\
    59  	ul = (up)[_i];							\
    60  	umul_ppmm ((rp)[2 * _i + 1], lpl, ul, ul << GMP_NAIL_BITS);	\
    61  	(rp)[2 * _i] = lpl >> GMP_NAIL_BITS;				\
    62        }									\
    63    } while (0)
    64  #endif
    65  
    66  #define MPN_SQRLO_DIAGONAL(rp, up, n)					\
    67    do {									\
    68      mp_size_t nhalf;							\
    69      nhalf = (n) >> 1;							\
    70      MPN_SQR_DIAGONAL ((rp), (up), nhalf);				\
    71      if (((n) & 1) != 0)							\
    72        {									\
    73  	mp_limb_t op;							\
    74  	op = (up)[nhalf];						\
    75  	(rp)[(n) - 1] = (op * op) & GMP_NUMB_MASK;			\
    76        }									\
    77    } while (0)
    78  
    79  #if HAVE_NATIVE_mpn_addlsh1_n_ip1
    80  #define MPN_SQRLO_DIAG_ADDLSH1(rp, tp, up, n)				\
    81    do {									\
    82      MPN_SQRLO_DIAGONAL((rp), (up), (n));				\
    83      mpn_addlsh1_n_ip1 ((rp) + 1, (tp), (n) - 1);			\
    84    } while (0)
    85  #else
    86  #define MPN_SQRLO_DIAG_ADDLSH1(rp, tp, up, n)				\
    87    do {									\
    88      MPN_SQRLO_DIAGONAL((rp), (up), (n));				\
    89      mpn_lshift ((tp), (tp), (n) - 1, 1);				\
    90      mpn_add_n ((rp) + 1, (rp) + 1, (tp), (n) - 1);			\
    91    } while (0)
    92  #endif
    93  
    94  /* Avoid zero allocations when SQRLO_LO_THRESHOLD is 0 (this code not used). */
    95  #define SQRLO_BASECASE_ALLOC						\
    96    (SQRLO_DC_THRESHOLD_LIMIT < 2 ? 1 : SQRLO_DC_THRESHOLD_LIMIT - 1)
    97  
    98  /* Default mpn_sqrlo_basecase using mpn_addmul_1.  */
    99  #ifndef SQRLO_SPECIAL_CASES
   100  #define SQRLO_SPECIAL_CASES 2
   101  #endif
   102  void
   103  mpn_sqrlo_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
   104  {
   105    mp_limb_t ul;
   106  
   107    ASSERT (n >= 1);
   108    ASSERT (! MPN_OVERLAP_P (rp, n, up, n));
   109  
   110    ul = up[0];
   111  
   112    if (n <= SQRLO_SPECIAL_CASES)
   113      {
   114  #if SQRLO_SPECIAL_CASES == 1
   115        rp[0] = (ul * ul) & GMP_NUMB_MASK;
   116  #else
   117        if (n == 1)
   118  	rp[0] = (ul * ul) & GMP_NUMB_MASK;
   119        else
   120  	{
   121  	  mp_limb_t hi, lo, ul1;
   122  	  umul_ppmm (hi, lo, ul, ul << GMP_NAIL_BITS);
   123  	  rp[0] = lo >> GMP_NAIL_BITS;
   124  	  ul1 = up[1];
   125  #if SQRLO_SPECIAL_CASES == 2
   126  	  rp[1] = (hi + ul * ul1 * 2) & GMP_NUMB_MASK;
   127  #else
   128  	  if (n == 2)
   129  	    rp[1] = (hi + ul * ul1 * 2) & GMP_NUMB_MASK;
   130  	  else
   131  	    {
   132  	      mp_limb_t hi1;
   133  #if GMP_NAIL_BITS != 0
   134  	      ul <<= 1;
   135  #endif
   136  	      umul_ppmm (hi1, lo, ul1 << GMP_NAIL_BITS, ul);
   137  	      hi1 += ul * up[2];
   138  #if GMP_NAIL_BITS == 0
   139  	      hi1 = (hi1 << 1) | (lo >> (GMP_LIMB_BITS - 1));
   140  	      add_ssaaaa(rp[2], rp[1], hi1, lo << 1, ul1 * ul1, hi);
   141  #else
   142  	      hi += lo >> GMP_NAIL_BITS;
   143  	      rp[1] = hi & GMP_NUMB_MASK;
   144  	      rp[2] = (hi1 + ul1 * ul1 + (hi >> GMP_NUMB_BITS)) & GMP_NUMB_MASK;
   145  #endif
   146  	    }
   147  #endif
   148  	}
   149  #endif
   150      }
   151    else
   152      {
   153        mp_limb_t tp[SQRLO_BASECASE_ALLOC];
   154        mp_size_t i;
   155  
   156        /* must fit n-1 limbs in tp */
   157        ASSERT (n <= SQRLO_DC_THRESHOLD_LIMIT);
   158  
   159        --n;
   160  #if SQRLO_SHORTCUT_MULTIPLICATIONS
   161        {
   162  	mp_limb_t cy;
   163  
   164  	cy = ul * up[n] + mpn_mul_1 (tp, up + 1, n - 1, ul);
   165  	for (i = 1; 2 * i + 1 < n; ++i)
   166  	  {
   167  	    ul = up[i];
   168  	    cy += ul * up[n - i] + mpn_addmul_1 (tp + 2 * i, up + i + 1, n - 2 * i - 1, ul);
   169  	  }
   170  	tp [n-1] = (cy + ((n & 1)?up[i] * up[i + 1]:0)) & GMP_NUMB_MASK;
   171        }
   172  #else
   173        mpn_mul_1 (tp, up + 1, n, ul);
   174        for (i = 1; 2 * i < n; ++i)
   175  	mpn_addmul_1 (tp + 2 * i, up + i + 1, n - 2 * i, up[i]);
   176  #endif
   177  
   178        MPN_SQRLO_DIAG_ADDLSH1 (rp, tp, up, n + 1);
   179      }
   180  }
   181  #undef SQRLO_SPECIAL_CASES