github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/gen-psqr.c

github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/gen-psqr.c (about)

     1  /* Generate perfect square testing data.
     2  
     3  Copyright 2002-2004, 2012, 2014 Free Software Foundation, Inc.
     4  
     5  This file is part of the GNU MP Library.
     6  
     7  The GNU MP Library is free software; you can redistribute it and/or modify
     8  it under the terms of either:
     9  
    10    * the GNU Lesser General Public License as published by the Free
    11      Software Foundation; either version 3 of the License, or (at your
    12      option) any later version.
    13  
    14  or
    15  
    16    * the GNU General Public License as published by the Free Software
    17      Foundation; either version 2 of the License, or (at your option) any
    18      later version.
    19  
    20  or both in parallel, as here.
    21  
    22  The GNU MP Library is distributed in the hope that it will be useful, but
    23  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    24  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    25  for more details.
    26  
    27  You should have received copies of the GNU General Public License and the
    28  GNU Lesser General Public License along with the GNU MP Library.  If not,
    29  see https://www.gnu.org/licenses/.  */
    30  
    31  #include <stdio.h>
    32  #include <stdlib.h>
    33  
    34  #include "bootstrap.c"
    35  
    36  
    37  /* The aim of this program is to choose either mpn_mod_34lsub1 or mpn_mod_1
    38     (plus a PERFSQR_PP modulus), and generate tables indicating quadratic
    39     residues and non-residues modulo small factors of that modulus.
    40  
    41     For the usual 32 or 64 bit cases mpn_mod_34lsub1 gets used.  That
    42     function exists specifically because 2^24-1 and 2^48-1 have nice sets of
    43     prime factors.  For other limb sizes it's considered, but if it doesn't
    44     have good factors then mpn_mod_1 will be used instead.
    45  
    46     When mpn_mod_1 is used, the modulus PERFSQR_PP is created from a
    47     selection of small primes, chosen to fill PERFSQR_MOD_BITS of a limb,
    48     with that bit count chosen so (2*GMP_LIMB_BITS)*2^PERFSQR_MOD_BITS <=
    49     GMP_LIMB_MAX, allowing PERFSQR_MOD_IDX in mpn/generic/perfsqr.c to do its
    50     calculation within a single limb.
    51  
    52     In either case primes can be combined to make divisors.  The table data
    53     then effectively indicates remainders which are quadratic residues mod
    54     all the primes.  This sort of combining reduces the number of steps
    55     needed after mpn_mod_34lsub1 or mpn_mod_1, saving code size and time.
    56     Nothing is gained or lost in terms of detections, the same total fraction
    57     of non-residues will be identified.
    58  
    59     Nothing particularly sophisticated is attempted for combining factors to
    60     make divisors.  This is probably a kind of knapsack problem so it'd be
    61     too hard to attempt anything completely general.  For the usual 32 and 64
    62     bit limbs we get a good enough result just pairing the biggest and
    63     smallest which fit together, repeatedly.
    64  
    65     Another aim is to get powerful combinations, ie. divisors which identify
    66     biggest fraction of non-residues, and have those run first.  Again for
    67     the usual 32 and 64 bits it seems good enough just to pair for big
    68     divisors then sort according to the resulting fraction of non-residues
    69     identified.
    70  
    71     Also in this program, a table sq_res_0x100 of residues modulo 256 is
    72     generated.  This simply fills bits into limbs of the appropriate
    73     build-time GMP_LIMB_BITS each.
    74  
    75  */
    76  
    77  
    78  /* Normally we aren't using const in gen*.c programs, so as not to have to
    79     bother figuring out if it works, but using it with f_cmp_divisor and
    80     f_cmp_fraction avoids warnings from the qsort calls. */
    81  
    82  /* Same tests as gmp.h. */
    83  #if  defined (__STDC__)                                 \
    84    || defined (__cplusplus)                              \
    85    || defined (_AIX)                                     \
    86    || defined (__DECC)                                   \
    87    || (defined (__mips) && defined (_SYSTYPE_SVR4))      \
    88    || defined (_MSC_VER)                                 \
    89    || defined (_WIN32)
    90  #define HAVE_CONST        1
    91  #endif
    92  
    93  #if ! HAVE_CONST
    94  #define const
    95  #endif
    96  
    97  
    98  mpz_t  *sq_res_0x100;          /* table of limbs */
    99  int    nsq_res_0x100;          /* elements in sq_res_0x100 array */
   100  int    sq_res_0x100_num;       /* squares in sq_res_0x100 */
   101  double sq_res_0x100_fraction;  /* sq_res_0x100_num / 256 */
   102  
   103  int     mod34_bits;        /* 3*GMP_NUMB_BITS/4 */
   104  int     mod_bits;          /* bits from PERFSQR_MOD_34 or MOD_PP */
   105  int     max_divisor;       /* all divisors <= max_divisor */
   106  int     max_divisor_bits;  /* ceil(log2(max_divisor)) */
   107  double  total_fraction;    /* of squares */
   108  mpz_t   pp;                /* product of primes, or 0 if mod_34lsub1 used */
   109  mpz_t   pp_norm;           /* pp shifted so NUMB high bit set */
   110  mpz_t   pp_inverted;       /* invert_limb style inverse */
   111  mpz_t   mod_mask;          /* 2^mod_bits-1 */
   112  char    mod34_excuse[128]; /* why mod_34lsub1 not used (if it's not) */
   113  
   114  /* raw list of divisors of 2^mod34_bits-1 or pp, just to show in a comment */
   115  struct rawfactor_t {
   116    int     divisor;
   117    int     multiplicity;
   118  };
   119  struct rawfactor_t  *rawfactor;
   120  int                 nrawfactor;
   121  
   122  /* factors of 2^mod34_bits-1 or pp and associated data, after combining etc */
   123  struct factor_t {
   124    int     divisor;
   125    mpz_t   inverse;   /* 1/divisor mod 2^mod_bits */
   126    mpz_t   mask;      /* indicating squares mod divisor */
   127    double  fraction;  /* squares/total */
   128  };
   129  struct factor_t  *factor;
   130  int              nfactor;       /* entries in use in factor array */
   131  int              factor_alloc;  /* entries allocated to factor array */
   132  
   133  
   134  int
   135  f_cmp_divisor (const void *parg, const void *qarg)
   136  {
   137    const struct factor_t *p, *q;
   138    p = (const struct factor_t *) parg;
   139    q = (const struct factor_t *) qarg;
   140    if (p->divisor > q->divisor)
   141      return 1;
   142    else if (p->divisor < q->divisor)
   143      return -1;
   144    else
   145      return 0;
   146  }
   147  
   148  int
   149  f_cmp_fraction (const void *parg, const void *qarg)
   150  {
   151    const struct factor_t *p, *q;
   152    p = (const struct factor_t *) parg;
   153    q = (const struct factor_t *) qarg;
   154    if (p->fraction > q->fraction)
   155      return 1;
   156    else if (p->fraction < q->fraction)
   157      return -1;
   158    else
   159      return 0;
   160  }
   161  
   162  /* Remove array[idx] by copying the remainder down, and adjust narray
   163     accordingly.  */
   164  #define COLLAPSE_ELEMENT(array, idx, narray)                    \
   165    do {                                                          \
   166      memmove (&(array)[idx],					\
   167  	     &(array)[idx+1],					\
   168  	     ((narray)-((idx)+1)) * sizeof (array[0]));		\
   169      (narray)--;                                                 \
   170    } while (0)
   171  
   172  
   173  /* return n*2^p mod m */
   174  int
   175  mul_2exp_mod (int n, int p, int m)
   176  {
   177    while (--p >= 0)
   178      n = (2 * n) % m;
   179    return n;
   180  }
   181  
   182  /* return -n mod m */
   183  int
   184  neg_mod (int n, int m)
   185  {
   186    assert (n >= 0 && n < m);
   187    return (n == 0 ? 0 : m-n);
   188  }
   189  
   190  /* Set "mask" to a value such that "mask & (1<<idx)" is non-zero if
   191     "-(idx<<mod_bits)" can be a square modulo m.  */
   192  void
   193  square_mask (mpz_t mask, int m)
   194  {
   195    int    p, i, r, idx;
   196  
   197    p = mul_2exp_mod (1, mod_bits, m);
   198    p = neg_mod (p, m);
   199  
   200    mpz_set_ui (mask, 0L);
   201    for (i = 0; i < m; i++)
   202      {
   203        r = (i * i) % m;
   204        idx = (r * p) % m;
   205        mpz_setbit (mask, (unsigned long) idx);
   206      }
   207  }
   208  
   209  void
   210  generate_sq_res_0x100 (int limb_bits)
   211  {
   212    int  i, res;
   213  
   214    nsq_res_0x100 = (0x100 + limb_bits - 1) / limb_bits;
   215    sq_res_0x100 = (mpz_t *) xmalloc (nsq_res_0x100 * sizeof (*sq_res_0x100));
   216  
   217    for (i = 0; i < nsq_res_0x100; i++)
   218      mpz_init_set_ui (sq_res_0x100[i], 0L);
   219  
   220    for (i = 0; i < 0x100; i++)
   221      {
   222        res = (i * i) % 0x100;
   223        mpz_setbit (sq_res_0x100[res / limb_bits],
   224                    (unsigned long) (res % limb_bits));
   225      }
   226  
   227    sq_res_0x100_num = 0;
   228    for (i = 0; i < nsq_res_0x100; i++)
   229      sq_res_0x100_num += mpz_popcount (sq_res_0x100[i]);
   230    sq_res_0x100_fraction = (double) sq_res_0x100_num / 256.0;
   231  }
   232  
   233  void
   234  generate_mod (int limb_bits, int nail_bits)
   235  {
   236    int    numb_bits = limb_bits - nail_bits;
   237    int    i, divisor;
   238  
   239    mpz_init_set_ui (pp, 0L);
   240    mpz_init_set_ui (pp_norm, 0L);
   241    mpz_init_set_ui (pp_inverted, 0L);
   242  
   243    /* no more than limb_bits many factors in a one limb modulus (and of
   244       course in reality nothing like that many) */
   245    factor_alloc = limb_bits;
   246    factor = (struct factor_t *) xmalloc (factor_alloc * sizeof (*factor));
   247    rawfactor = (struct rawfactor_t *) xmalloc (factor_alloc * sizeof (*rawfactor));
   248  
   249    if (numb_bits % 4 != 0)
   250      {
   251        strcpy (mod34_excuse, "GMP_NUMB_BITS % 4 != 0");
   252        goto use_pp;
   253      }
   254  
   255    max_divisor = 2*limb_bits;
   256    max_divisor_bits = log2_ceil (max_divisor);
   257  
   258    if (numb_bits / 4 < max_divisor_bits)
   259      {
   260        /* Wind back to one limb worth of max_divisor, if that will let us use
   261           mpn_mod_34lsub1.  */
   262        max_divisor = limb_bits;
   263        max_divisor_bits = log2_ceil (max_divisor);
   264  
   265        if (numb_bits / 4 < max_divisor_bits)
   266          {
   267            strcpy (mod34_excuse, "GMP_NUMB_BITS / 4 too small");
   268            goto use_pp;
   269          }
   270      }
   271  
   272    {
   273      /* Can use mpn_mod_34lsub1, find small factors of 2^mod34_bits-1. */
   274      mpz_t  m, q, r;
   275      int    multiplicity;
   276  
   277      mod34_bits = (numb_bits / 4) * 3;
   278  
   279      /* mpn_mod_34lsub1 returns a full limb value, PERFSQR_MOD_34 folds it at
   280         the mod34_bits mark, adding the two halves for a remainder of at most
   281         mod34_bits+1 many bits */
   282      mod_bits = mod34_bits + 1;
   283  
   284      mpz_init_set_ui (m, 1L);
   285      mpz_mul_2exp (m, m, mod34_bits);
   286      mpz_sub_ui (m, m, 1L);
   287  
   288      mpz_init (q);
   289      mpz_init (r);
   290  
   291      for (i = 3; i <= max_divisor; i+=2)
   292        {
   293          if (! isprime (i))
   294            continue;
   295  
   296          mpz_tdiv_qr_ui (q, r, m, (unsigned long) i);
   297          if (mpz_sgn (r) != 0)
   298            continue;
   299  
   300          /* if a repeated prime is found it's used as an i^n in one factor */
   301          divisor = 1;
   302          multiplicity = 0;
   303          do
   304            {
   305              if (divisor > max_divisor / i)
   306                break;
   307              multiplicity++;
   308              mpz_set (m, q);
   309              mpz_tdiv_qr_ui (q, r, m, (unsigned long) i);
   310            }
   311          while (mpz_sgn (r) == 0);
   312  
   313          assert (nrawfactor < factor_alloc);
   314          rawfactor[nrawfactor].divisor = i;
   315          rawfactor[nrawfactor].multiplicity = multiplicity;
   316          nrawfactor++;
   317        }
   318  
   319      mpz_clear (m);
   320      mpz_clear (q);
   321      mpz_clear (r);
   322    }
   323  
   324    if (nrawfactor <= 2)
   325      {
   326        mpz_t  new_pp;
   327  
   328        sprintf (mod34_excuse, "only %d small factor%s",
   329                 nrawfactor, nrawfactor == 1 ? "" : "s");
   330  
   331      use_pp:
   332        /* reset to two limbs of max_divisor, in case the mpn_mod_34lsub1 code
   333           tried with just one */
   334        max_divisor = 2*limb_bits;
   335        max_divisor_bits = log2_ceil (max_divisor);
   336  
   337        mpz_init (new_pp);
   338        nrawfactor = 0;
   339        mod_bits = MIN (numb_bits, limb_bits - max_divisor_bits);
   340  
   341        /* one copy of each small prime */
   342        mpz_set_ui (pp, 1L);
   343        for (i = 3; i <= max_divisor; i+=2)
   344          {
   345            if (! isprime (i))
   346              continue;
   347  
   348            mpz_mul_ui (new_pp, pp, (unsigned long) i);
   349            if (mpz_sizeinbase (new_pp, 2) > mod_bits)
   350              break;
   351            mpz_set (pp, new_pp);
   352  
   353            assert (nrawfactor < factor_alloc);
   354            rawfactor[nrawfactor].divisor = i;
   355            rawfactor[nrawfactor].multiplicity = 1;
   356            nrawfactor++;
   357          }
   358  
   359        /* Plus an extra copy of one or more of the primes selected, if that
   360           still fits in max_divisor and the total in mod_bits.  Usually only
   361           3 or 5 will be candidates */
   362        for (i = nrawfactor-1; i >= 0; i--)
   363          {
   364            if (rawfactor[i].divisor > max_divisor / rawfactor[i].divisor)
   365              continue;
   366            mpz_mul_ui (new_pp, pp, (unsigned long) rawfactor[i].divisor);
   367            if (mpz_sizeinbase (new_pp, 2) > mod_bits)
   368              continue;
   369            mpz_set (pp, new_pp);
   370  
   371            rawfactor[i].multiplicity++;
   372          }
   373  
   374        mod_bits = mpz_sizeinbase (pp, 2);
   375  
   376        mpz_set (pp_norm, pp);
   377        while (mpz_sizeinbase (pp_norm, 2) < numb_bits)
   378          mpz_add (pp_norm, pp_norm, pp_norm);
   379  
   380        mpz_preinv_invert (pp_inverted, pp_norm, numb_bits);
   381  
   382        mpz_clear (new_pp);
   383      }
   384  
   385    /* start the factor array */
   386    for (i = 0; i < nrawfactor; i++)
   387      {
   388        int  j;
   389        assert (nfactor < factor_alloc);
   390        factor[nfactor].divisor = 1;
   391        for (j = 0; j < rawfactor[i].multiplicity; j++)
   392          factor[nfactor].divisor *= rawfactor[i].divisor;
   393        nfactor++;
   394      }
   395  
   396   combine:
   397    /* Combine entries in the factor array.  Combine the smallest entry with
   398       the biggest one that will fit with it (ie. under max_divisor), then
   399       repeat that with the new smallest entry. */
   400    qsort (factor, nfactor, sizeof (factor[0]), f_cmp_divisor);
   401    for (i = nfactor-1; i >= 1; i--)
   402      {
   403        if (factor[i].divisor <= max_divisor / factor[0].divisor)
   404          {
   405            factor[0].divisor *= factor[i].divisor;
   406            COLLAPSE_ELEMENT (factor, i, nfactor);
   407            goto combine;
   408          }
   409      }
   410  
   411    total_fraction = 1.0;
   412    for (i = 0; i < nfactor; i++)
   413      {
   414        mpz_init (factor[i].inverse);
   415        mpz_invert_ui_2exp (factor[i].inverse,
   416                            (unsigned long) factor[i].divisor,
   417                            (unsigned long) mod_bits);
   418  
   419        mpz_init (factor[i].mask);
   420        square_mask (factor[i].mask, factor[i].divisor);
   421  
   422        /* fraction of possible squares */
   423        factor[i].fraction = (double) mpz_popcount (factor[i].mask)
   424          / factor[i].divisor;
   425  
   426        /* total fraction of possible squares */
   427        total_fraction *= factor[i].fraction;
   428      }
   429  
   430    /* best tests first (ie. smallest fraction) */
   431    qsort (factor, nfactor, sizeof (factor[0]), f_cmp_fraction);
   432  }
   433  
   434  void
   435  print (int limb_bits, int nail_bits)
   436  {
   437    int    i;
   438    mpz_t  mhi, mlo;
   439  
   440    printf ("/* This file generated by gen-psqr.c - DO NOT EDIT. */\n");
   441    printf ("\n");
   442  
   443    printf ("#if GMP_LIMB_BITS != %d || GMP_NAIL_BITS != %d\n",
   444            limb_bits, nail_bits);
   445    printf ("Error, error, this data is for %d bit limb and %d bit nail\n",
   446            limb_bits, nail_bits);
   447    printf ("#endif\n");
   448    printf ("\n");
   449  
   450    printf ("/* Non-zero bit indicates a quadratic residue mod 0x100.\n");
   451    printf ("   This test identifies %.2f%% as non-squares (%d/256). */\n",
   452            (1.0 - sq_res_0x100_fraction) * 100.0,
   453            0x100 - sq_res_0x100_num);
   454    printf ("static const mp_limb_t\n");
   455    printf ("sq_res_0x100[%d] = {\n", nsq_res_0x100);
   456    for (i = 0; i < nsq_res_0x100; i++)
   457      {
   458        printf ("  CNST_LIMB(0x");
   459        mpz_out_str (stdout, 16, sq_res_0x100[i]);
   460        printf ("),\n");
   461      }
   462    printf ("};\n");
   463    printf ("\n");
   464  
   465    if (mpz_sgn (pp) != 0)
   466      {
   467        printf ("/* mpn_mod_34lsub1 not used due to %s */\n", mod34_excuse);
   468        printf ("/* PERFSQR_PP = ");
   469      }
   470    else
   471      printf ("/* 2^%d-1 = ", mod34_bits);
   472    for (i = 0; i < nrawfactor; i++)
   473      {
   474        if (i != 0)
   475          printf (" * ");
   476        printf ("%d", rawfactor[i].divisor);
   477        if (rawfactor[i].multiplicity != 1)
   478          printf ("^%d", rawfactor[i].multiplicity);
   479      }
   480    printf (" %s*/\n", mpz_sgn (pp) == 0 ? "... " : "");
   481  
   482    printf ("#define PERFSQR_MOD_BITS  %d\n", mod_bits);
   483    if (mpz_sgn (pp) != 0)
   484      {
   485        printf ("#define PERFSQR_PP            CNST_LIMB(0x");
   486        mpz_out_str (stdout, 16, pp);
   487        printf (")\n");
   488        printf ("#define PERFSQR_PP_NORM       CNST_LIMB(0x");
   489        mpz_out_str (stdout, 16, pp_norm);
   490        printf (")\n");
   491        printf ("#define PERFSQR_PP_INVERTED   CNST_LIMB(0x");
   492        mpz_out_str (stdout, 16, pp_inverted);
   493        printf (")\n");
   494      }
   495    printf ("\n");
   496  
   497    mpz_init (mhi);
   498    mpz_init (mlo);
   499  
   500    printf ("/* This test identifies %.2f%% as non-squares. */\n",
   501            (1.0 - total_fraction) * 100.0);
   502    printf ("#define PERFSQR_MOD_TEST(up, usize) \\\n");
   503    printf ("  do {                              \\\n");
   504    printf ("    mp_limb_t  r;                   \\\n");
   505    if (mpz_sgn (pp) != 0)
   506      printf ("    PERFSQR_MOD_PP (r, up, usize);  \\\n");
   507    else
   508      printf ("    PERFSQR_MOD_34 (r, up, usize);  \\\n");
   509  
   510    for (i = 0; i < nfactor; i++)
   511      {
   512        printf ("                                    \\\n");
   513        printf ("    /* %5.2f%% */                    \\\n",
   514                (1.0 - factor[i].fraction) * 100.0);
   515  
   516        printf ("    PERFSQR_MOD_%d (r, CNST_LIMB(%2d), CNST_LIMB(0x",
   517                factor[i].divisor <= limb_bits ? 1 : 2,
   518                factor[i].divisor);
   519        mpz_out_str (stdout, 16, factor[i].inverse);
   520        printf ("), \\\n");
   521        printf ("                   CNST_LIMB(0x");
   522  
   523        if ( factor[i].divisor <= limb_bits)
   524          {
   525            mpz_out_str (stdout, 16, factor[i].mask);
   526          }
   527        else
   528          {
   529            mpz_tdiv_r_2exp (mlo, factor[i].mask, (unsigned long) limb_bits);
   530            mpz_tdiv_q_2exp (mhi, factor[i].mask, (unsigned long) limb_bits);
   531            mpz_out_str (stdout, 16, mhi);
   532            printf ("), CNST_LIMB(0x");
   533            mpz_out_str (stdout, 16, mlo);
   534          }
   535        printf (")); \\\n");
   536      }
   537  
   538    printf ("  } while (0)\n");
   539    printf ("\n");
   540  
   541    printf ("/* Grand total sq_res_0x100 and PERFSQR_MOD_TEST, %.2f%% non-squares. */\n",
   542            (1.0 - (total_fraction * 44.0/256.0)) * 100.0);
   543    printf ("\n");
   544  
   545    printf ("/* helper for tests/mpz/t-perfsqr.c */\n");
   546    printf ("#define PERFSQR_DIVISORS  { 256,");
   547    for (i = 0; i < nfactor; i++)
   548        printf (" %d,", factor[i].divisor);
   549    printf (" }\n");
   550  
   551  
   552    mpz_clear (mhi);
   553    mpz_clear (mlo);
   554  }
   555  
   556  int
   557  main (int argc, char *argv[])
   558  {
   559    int  limb_bits, nail_bits;
   560  
   561    if (argc != 3)
   562      {
   563        fprintf (stderr, "Usage: gen-psqr <limbbits> <nailbits>\n");
   564        exit (1);
   565      }
   566  
   567    limb_bits = atoi (argv[1]);
   568    nail_bits = atoi (argv[2]);
   569  
   570    if (limb_bits <= 0
   571        || nail_bits < 0
   572        || nail_bits >= limb_bits)
   573      {
   574        fprintf (stderr, "Invalid limb/nail bits: %d %d\n",
   575                 limb_bits, nail_bits);
   576        exit (1);
   577      }
   578  
   579    generate_sq_res_0x100 (limb_bits);
   580    generate_mod (limb_bits, nail_bits);
   581  
   582    print (limb_bits, nail_bits);
   583  
   584    return 0;
   585  }