github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/generic/perfsqr.c (about)

     1  /* mpn_perfect_square_p(u,usize) -- Return non-zero if U is a perfect square,
     2     zero otherwise.
     3  
     4  Copyright 1991, 1993, 1994, 1996, 1997, 2000-2002, 2005, 2012 Free Software
     5  Foundation, Inc.
     6  
     7  This file is part of the GNU MP Library.
     8  
     9  The GNU MP Library is free software; you can redistribute it and/or modify
    10  it under the terms of either:
    11  
    12    * the GNU Lesser General Public License as published by the Free
    13      Software Foundation; either version 3 of the License, or (at your
    14      option) any later version.
    15  
    16  or
    17  
    18    * the GNU General Public License as published by the Free Software
    19      Foundation; either version 2 of the License, or (at your option) any
    20      later version.
    21  
    22  or both in parallel, as here.
    23  
    24  The GNU MP Library is distributed in the hope that it will be useful, but
    25  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    26  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    27  for more details.
    28  
    29  You should have received copies of the GNU General Public License and the
    30  GNU Lesser General Public License along with the GNU MP Library.  If not,
    31  see https://www.gnu.org/licenses/.  */
    32  
    33  #include <stdio.h> /* for NULL */
    34  #include "gmp.h"
    35  #include "gmp-impl.h"
    36  #include "longlong.h"
    37  
    38  #include "perfsqr.h"
    39  
    40  
    41  /* change this to "#define TRACE(x) x" for diagnostics */
    42  #define TRACE(x)
    43  
    44  
    45  
    46  /* PERFSQR_MOD_* detects non-squares using residue tests.
    47  
    48     A macro PERFSQR_MOD_TEST is setup by gen-psqr.c in perfsqr.h.  It takes
    49     {up,usize} modulo a selected modulus to get a remainder r.  For 32-bit or
    50     64-bit limbs this modulus will be 2^24-1 or 2^48-1 using PERFSQR_MOD_34,
    51     or for other limb or nail sizes a PERFSQR_PP is chosen and PERFSQR_MOD_PP
    52     used.  PERFSQR_PP_NORM and PERFSQR_PP_INVERTED are pre-calculated in this
    53     case too.
    54  
    55     PERFSQR_MOD_TEST then makes various calls to PERFSQR_MOD_1 or
    56     PERFSQR_MOD_2 with divisors d which are factors of the modulus, and table
    57     data indicating residues and non-residues modulo those divisors.  The
    58     table data is in 1 or 2 limbs worth of bits respectively, per the size of
    59     each d.
    60  
    61     A "modexact" style remainder is taken to reduce r modulo d.
    62     PERFSQR_MOD_IDX implements this, producing an index "idx" for use with
    63     the table data.  Notice there's just one multiplication by a constant
    64     "inv", for each d.
    65  
    66     The modexact doesn't produce a true r%d remainder, instead idx satisfies
    67     "-(idx<<PERFSQR_MOD_BITS) == r mod d".  Because d is odd, this factor
    68     -2^PERFSQR_MOD_BITS is a one-to-one mapping between r and idx, and is
    69     accounted for by having the table data suitably permuted.
    70  
    71     The remainder r fits within PERFSQR_MOD_BITS which is less than a limb.
    72     In fact the GMP_LIMB_BITS - PERFSQR_MOD_BITS spare bits are enough to fit
    73     each divisor d meaning the modexact multiply can take place entirely
    74     within one limb, giving the compiler the chance to optimize it, in a way
    75     that say umul_ppmm would not give.
    76  
    77     There's no need for the divisors d to be prime, in fact gen-psqr.c makes
    78     a deliberate effort to combine factors so as to reduce the number of
    79     separate tests done on r.  But such combining is limited to d <=
    80     2*GMP_LIMB_BITS so that the table data fits in at most 2 limbs.
    81  
    82     Alternatives:
    83  
    84     It'd be possible to use bigger divisors d, and more than 2 limbs of table
    85     data, but this doesn't look like it would be of much help to the prime
    86     factors in the usual moduli 2^24-1 or 2^48-1.
    87  
    88     The moduli 2^24-1 or 2^48-1 are nothing particularly special, they're
    89     just easy to calculate (see mpn_mod_34lsub1) and have a nice set of prime
    90     factors.  2^32-1 and 2^64-1 would be equally easy to calculate, but have
    91     fewer prime factors.
    92  
    93     The nails case usually ends up using mpn_mod_1, which is a lot slower
    94     than mpn_mod_34lsub1.  Perhaps other such special moduli could be found
    95     for the nails case.  Two-term things like 2^30-2^15-1 might be
    96     candidates.  Or at worst some on-the-fly de-nailing would allow the plain
    97     2^24-1 to be used.  Currently nails are too preliminary to be worried
    98     about.
    99  
   100  */
   101  
   102  #define PERFSQR_MOD_MASK       ((CNST_LIMB(1) << PERFSQR_MOD_BITS) - 1)
   103  
   104  #define MOD34_BITS  (GMP_NUMB_BITS / 4 * 3)
   105  #define MOD34_MASK  ((CNST_LIMB(1) << MOD34_BITS) - 1)
   106  
   107  #define PERFSQR_MOD_34(r, up, usize)				\
   108    do {								\
   109      (r) = mpn_mod_34lsub1 (up, usize);				\
   110      (r) = ((r) & MOD34_MASK) + ((r) >> MOD34_BITS);		\
   111    } while (0)
   112  
   113  /* FIXME: The %= here isn't good, and might destroy any savings from keeping
   114     the PERFSQR_MOD_IDX stuff within a limb (rather than needing umul_ppmm).
   115     Maybe a new sort of mpn_preinv_mod_1 could accept an unnormalized divisor
   116     and a shift count, like mpn_preinv_divrem_1.  But mod_34lsub1 is our
   117     normal case, so lets not worry too much about mod_1.  */
   118  #define PERFSQR_MOD_PP(r, up, usize)					\
   119    do {									\
   120      if (BELOW_THRESHOLD (usize, PREINV_MOD_1_TO_MOD_1_THRESHOLD))	\
   121        {									\
   122  	(r) = mpn_preinv_mod_1 (up, usize, PERFSQR_PP_NORM,		\
   123  				PERFSQR_PP_INVERTED);			\
   124  	(r) %= PERFSQR_PP;						\
   125        }									\
   126      else								\
   127        {									\
   128  	(r) = mpn_mod_1 (up, usize, PERFSQR_PP);			\
   129        }									\
   130    } while (0)
   131  
   132  #define PERFSQR_MOD_IDX(idx, r, d, inv)				\
   133    do {								\
   134      mp_limb_t  q;						\
   135      ASSERT ((r) <= PERFSQR_MOD_MASK);				\
   136      ASSERT ((((inv) * (d)) & PERFSQR_MOD_MASK) == 1);		\
   137      ASSERT (MP_LIMB_T_MAX / (d) >= PERFSQR_MOD_MASK);		\
   138  								\
   139      q = ((r) * (inv)) & PERFSQR_MOD_MASK;			\
   140      ASSERT (r == ((q * (d)) & PERFSQR_MOD_MASK));		\
   141      (idx) = (q * (d)) >> PERFSQR_MOD_BITS;			\
   142    } while (0)
   143  
   144  #define PERFSQR_MOD_1(r, d, inv, mask)				\
   145    do {								\
   146      unsigned   idx;						\
   147      ASSERT ((d) <= GMP_LIMB_BITS);				\
   148      PERFSQR_MOD_IDX(idx, r, d, inv);				\
   149      TRACE (printf ("  PERFSQR_MOD_1 d=%u r=%lu idx=%u\n",	\
   150  		   d, r%d, idx));				\
   151      if ((((mask) >> idx) & 1) == 0)				\
   152        {								\
   153  	TRACE (printf ("  non-square\n"));			\
   154  	return 0;						\
   155        }								\
   156    } while (0)
   157  
   158  /* The expression "(int) idx - GMP_LIMB_BITS < 0" lets the compiler use the
   159     sign bit from "idx-GMP_LIMB_BITS", which might help avoid a branch.  */
   160  #define PERFSQR_MOD_2(r, d, inv, mhi, mlo)			\
   161    do {								\
   162      mp_limb_t  m;						\
   163      unsigned   idx;						\
   164      ASSERT ((d) <= 2*GMP_LIMB_BITS);				\
   165  								\
   166      PERFSQR_MOD_IDX (idx, r, d, inv);				\
   167      TRACE (printf ("  PERFSQR_MOD_2 d=%u r=%lu idx=%u\n",	\
   168  		   d, r%d, idx));				\
   169      m = ((int) idx - GMP_LIMB_BITS < 0 ? (mlo) : (mhi));	\
   170      idx %= GMP_LIMB_BITS;					\
   171      if (((m >> idx) & 1) == 0)					\
   172        {								\
   173  	TRACE (printf ("  non-square\n"));			\
   174  	return 0;						\
   175        }								\
   176    } while (0)
   177  
   178  
   179  int
   180  mpn_perfect_square_p (mp_srcptr up, mp_size_t usize)
   181  {
   182    ASSERT (usize >= 1);
   183  
   184    TRACE (gmp_printf ("mpn_perfect_square_p %Nd\n", up, usize));
   185  
   186    /* The first test excludes 212/256 (82.8%) of the perfect square candidates
   187       in O(1) time.  */
   188    {
   189      unsigned  idx = up[0] % 0x100;
   190      if (((sq_res_0x100[idx / GMP_LIMB_BITS]
   191  	  >> (idx % GMP_LIMB_BITS)) & 1) == 0)
   192        return 0;
   193    }
   194  
   195  #if 0
   196    /* Check that we have even multiplicity of 2, and then check that the rest is
   197       a possible perfect square.  Leave disabled until we can determine this
   198       really is an improvement.  It it is, it could completely replace the
   199       simple probe above, since this should throw out more non-squares, but at
   200       the expense of somewhat more cycles.  */
   201    {
   202      mp_limb_t lo;
   203      int cnt;
   204      lo = up[0];
   205      while (lo == 0)
   206        up++, lo = up[0], usize--;
   207      count_trailing_zeros (cnt, lo);
   208      if ((cnt & 1) != 0)
   209        return 0;			/* return of not even multiplicity of 2 */
   210      lo >>= cnt;			/* shift down to align lowest non-zero bit */
   211      lo >>= 1;			/* shift away lowest non-zero bit */
   212      if ((lo & 3) != 0)
   213        return 0;
   214    }
   215  #endif
   216  
   217  
   218    /* The second test uses mpn_mod_34lsub1 or mpn_mod_1 to detect non-squares
   219       according to their residues modulo small primes (or powers of
   220       primes).  See perfsqr.h.  */
   221    PERFSQR_MOD_TEST (up, usize);
   222  
   223  
   224    /* For the third and last test, we finally compute the square root,
   225       to make sure we've really got a perfect square.  */
   226    {
   227      mp_ptr root_ptr;
   228      int res;
   229      TMP_DECL;
   230  
   231      TMP_MARK;
   232      root_ptr = TMP_ALLOC_LIMBS ((usize + 1) / 2);
   233  
   234      /* Iff mpn_sqrtrem returns zero, the square is perfect.  */
   235      res = ! mpn_sqrtrem (root_ptr, NULL, up, usize);
   236      TMP_FREE;
   237  
   238      return res;
   239    }
   240  }