github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/gen-psqr.c (about) 1 /* Generate perfect square testing data. 2 3 Copyright 2002-2004, 2012, 2014 Free Software Foundation, Inc. 4 5 This file is part of the GNU MP Library. 6 7 The GNU MP Library is free software; you can redistribute it and/or modify 8 it under the terms of either: 9 10 * the GNU Lesser General Public License as published by the Free 11 Software Foundation; either version 3 of the License, or (at your 12 option) any later version. 13 14 or 15 16 * the GNU General Public License as published by the Free Software 17 Foundation; either version 2 of the License, or (at your option) any 18 later version. 19 20 or both in parallel, as here. 21 22 The GNU MP Library is distributed in the hope that it will be useful, but 23 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25 for more details. 26 27 You should have received copies of the GNU General Public License and the 28 GNU Lesser General Public License along with the GNU MP Library. If not, 29 see https://www.gnu.org/licenses/. */ 30 31 #include <stdio.h> 32 #include <stdlib.h> 33 34 #include "bootstrap.c" 35 36 37 /* The aim of this program is to choose either mpn_mod_34lsub1 or mpn_mod_1 38 (plus a PERFSQR_PP modulus), and generate tables indicating quadratic 39 residues and non-residues modulo small factors of that modulus. 40 41 For the usual 32 or 64 bit cases mpn_mod_34lsub1 gets used. That 42 function exists specifically because 2^24-1 and 2^48-1 have nice sets of 43 prime factors. For other limb sizes it's considered, but if it doesn't 44 have good factors then mpn_mod_1 will be used instead. 45 46 When mpn_mod_1 is used, the modulus PERFSQR_PP is created from a 47 selection of small primes, chosen to fill PERFSQR_MOD_BITS of a limb, 48 with that bit count chosen so (2*GMP_LIMB_BITS)*2^PERFSQR_MOD_BITS <= 49 GMP_LIMB_MAX, allowing PERFSQR_MOD_IDX in mpn/generic/perfsqr.c to do its 50 calculation within a single limb. 51 52 In either case primes can be combined to make divisors. The table data 53 then effectively indicates remainders which are quadratic residues mod 54 all the primes. This sort of combining reduces the number of steps 55 needed after mpn_mod_34lsub1 or mpn_mod_1, saving code size and time. 56 Nothing is gained or lost in terms of detections, the same total fraction 57 of non-residues will be identified. 58 59 Nothing particularly sophisticated is attempted for combining factors to 60 make divisors. This is probably a kind of knapsack problem so it'd be 61 too hard to attempt anything completely general. For the usual 32 and 64 62 bit limbs we get a good enough result just pairing the biggest and 63 smallest which fit together, repeatedly. 64 65 Another aim is to get powerful combinations, ie. divisors which identify 66 biggest fraction of non-residues, and have those run first. Again for 67 the usual 32 and 64 bits it seems good enough just to pair for big 68 divisors then sort according to the resulting fraction of non-residues 69 identified. 70 71 Also in this program, a table sq_res_0x100 of residues modulo 256 is 72 generated. This simply fills bits into limbs of the appropriate 73 build-time GMP_LIMB_BITS each. 74 75 */ 76 77 78 /* Normally we aren't using const in gen*.c programs, so as not to have to 79 bother figuring out if it works, but using it with f_cmp_divisor and 80 f_cmp_fraction avoids warnings from the qsort calls. */ 81 82 /* Same tests as gmp.h. */ 83 #if defined (__STDC__) \ 84 || defined (__cplusplus) \ 85 || defined (_AIX) \ 86 || defined (__DECC) \ 87 || (defined (__mips) && defined (_SYSTYPE_SVR4)) \ 88 || defined (_MSC_VER) \ 89 || defined (_WIN32) 90 #define HAVE_CONST 1 91 #endif 92 93 #if ! HAVE_CONST 94 #define const 95 #endif 96 97 98 mpz_t *sq_res_0x100; /* table of limbs */ 99 int nsq_res_0x100; /* elements in sq_res_0x100 array */ 100 int sq_res_0x100_num; /* squares in sq_res_0x100 */ 101 double sq_res_0x100_fraction; /* sq_res_0x100_num / 256 */ 102 103 int mod34_bits; /* 3*GMP_NUMB_BITS/4 */ 104 int mod_bits; /* bits from PERFSQR_MOD_34 or MOD_PP */ 105 int max_divisor; /* all divisors <= max_divisor */ 106 int max_divisor_bits; /* ceil(log2(max_divisor)) */ 107 double total_fraction; /* of squares */ 108 mpz_t pp; /* product of primes, or 0 if mod_34lsub1 used */ 109 mpz_t pp_norm; /* pp shifted so NUMB high bit set */ 110 mpz_t pp_inverted; /* invert_limb style inverse */ 111 mpz_t mod_mask; /* 2^mod_bits-1 */ 112 char mod34_excuse[128]; /* why mod_34lsub1 not used (if it's not) */ 113 114 /* raw list of divisors of 2^mod34_bits-1 or pp, just to show in a comment */ 115 struct rawfactor_t { 116 int divisor; 117 int multiplicity; 118 }; 119 struct rawfactor_t *rawfactor; 120 int nrawfactor; 121 122 /* factors of 2^mod34_bits-1 or pp and associated data, after combining etc */ 123 struct factor_t { 124 int divisor; 125 mpz_t inverse; /* 1/divisor mod 2^mod_bits */ 126 mpz_t mask; /* indicating squares mod divisor */ 127 double fraction; /* squares/total */ 128 }; 129 struct factor_t *factor; 130 int nfactor; /* entries in use in factor array */ 131 int factor_alloc; /* entries allocated to factor array */ 132 133 134 int 135 f_cmp_divisor (const void *parg, const void *qarg) 136 { 137 const struct factor_t *p, *q; 138 p = (const struct factor_t *) parg; 139 q = (const struct factor_t *) qarg; 140 if (p->divisor > q->divisor) 141 return 1; 142 else if (p->divisor < q->divisor) 143 return -1; 144 else 145 return 0; 146 } 147 148 int 149 f_cmp_fraction (const void *parg, const void *qarg) 150 { 151 const struct factor_t *p, *q; 152 p = (const struct factor_t *) parg; 153 q = (const struct factor_t *) qarg; 154 if (p->fraction > q->fraction) 155 return 1; 156 else if (p->fraction < q->fraction) 157 return -1; 158 else 159 return 0; 160 } 161 162 /* Remove array[idx] by copying the remainder down, and adjust narray 163 accordingly. */ 164 #define COLLAPSE_ELEMENT(array, idx, narray) \ 165 do { \ 166 memmove (&(array)[idx], \ 167 &(array)[idx+1], \ 168 ((narray)-((idx)+1)) * sizeof (array[0])); \ 169 (narray)--; \ 170 } while (0) 171 172 173 /* return n*2^p mod m */ 174 int 175 mul_2exp_mod (int n, int p, int m) 176 { 177 while (--p >= 0) 178 n = (2 * n) % m; 179 return n; 180 } 181 182 /* return -n mod m */ 183 int 184 neg_mod (int n, int m) 185 { 186 assert (n >= 0 && n < m); 187 return (n == 0 ? 0 : m-n); 188 } 189 190 /* Set "mask" to a value such that "mask & (1<<idx)" is non-zero if 191 "-(idx<<mod_bits)" can be a square modulo m. */ 192 void 193 square_mask (mpz_t mask, int m) 194 { 195 int p, i, r, idx; 196 197 p = mul_2exp_mod (1, mod_bits, m); 198 p = neg_mod (p, m); 199 200 mpz_set_ui (mask, 0L); 201 for (i = 0; i < m; i++) 202 { 203 r = (i * i) % m; 204 idx = (r * p) % m; 205 mpz_setbit (mask, (unsigned long) idx); 206 } 207 } 208 209 void 210 generate_sq_res_0x100 (int limb_bits) 211 { 212 int i, res; 213 214 nsq_res_0x100 = (0x100 + limb_bits - 1) / limb_bits; 215 sq_res_0x100 = (mpz_t *) xmalloc (nsq_res_0x100 * sizeof (*sq_res_0x100)); 216 217 for (i = 0; i < nsq_res_0x100; i++) 218 mpz_init_set_ui (sq_res_0x100[i], 0L); 219 220 for (i = 0; i < 0x100; i++) 221 { 222 res = (i * i) % 0x100; 223 mpz_setbit (sq_res_0x100[res / limb_bits], 224 (unsigned long) (res % limb_bits)); 225 } 226 227 sq_res_0x100_num = 0; 228 for (i = 0; i < nsq_res_0x100; i++) 229 sq_res_0x100_num += mpz_popcount (sq_res_0x100[i]); 230 sq_res_0x100_fraction = (double) sq_res_0x100_num / 256.0; 231 } 232 233 void 234 generate_mod (int limb_bits, int nail_bits) 235 { 236 int numb_bits = limb_bits - nail_bits; 237 int i, divisor; 238 239 mpz_init_set_ui (pp, 0L); 240 mpz_init_set_ui (pp_norm, 0L); 241 mpz_init_set_ui (pp_inverted, 0L); 242 243 /* no more than limb_bits many factors in a one limb modulus (and of 244 course in reality nothing like that many) */ 245 factor_alloc = limb_bits; 246 factor = (struct factor_t *) xmalloc (factor_alloc * sizeof (*factor)); 247 rawfactor = (struct rawfactor_t *) xmalloc (factor_alloc * sizeof (*rawfactor)); 248 249 if (numb_bits % 4 != 0) 250 { 251 strcpy (mod34_excuse, "GMP_NUMB_BITS % 4 != 0"); 252 goto use_pp; 253 } 254 255 max_divisor = 2*limb_bits; 256 max_divisor_bits = log2_ceil (max_divisor); 257 258 if (numb_bits / 4 < max_divisor_bits) 259 { 260 /* Wind back to one limb worth of max_divisor, if that will let us use 261 mpn_mod_34lsub1. */ 262 max_divisor = limb_bits; 263 max_divisor_bits = log2_ceil (max_divisor); 264 265 if (numb_bits / 4 < max_divisor_bits) 266 { 267 strcpy (mod34_excuse, "GMP_NUMB_BITS / 4 too small"); 268 goto use_pp; 269 } 270 } 271 272 { 273 /* Can use mpn_mod_34lsub1, find small factors of 2^mod34_bits-1. */ 274 mpz_t m, q, r; 275 int multiplicity; 276 277 mod34_bits = (numb_bits / 4) * 3; 278 279 /* mpn_mod_34lsub1 returns a full limb value, PERFSQR_MOD_34 folds it at 280 the mod34_bits mark, adding the two halves for a remainder of at most 281 mod34_bits+1 many bits */ 282 mod_bits = mod34_bits + 1; 283 284 mpz_init_set_ui (m, 1L); 285 mpz_mul_2exp (m, m, mod34_bits); 286 mpz_sub_ui (m, m, 1L); 287 288 mpz_init (q); 289 mpz_init (r); 290 291 for (i = 3; i <= max_divisor; i+=2) 292 { 293 if (! isprime (i)) 294 continue; 295 296 mpz_tdiv_qr_ui (q, r, m, (unsigned long) i); 297 if (mpz_sgn (r) != 0) 298 continue; 299 300 /* if a repeated prime is found it's used as an i^n in one factor */ 301 divisor = 1; 302 multiplicity = 0; 303 do 304 { 305 if (divisor > max_divisor / i) 306 break; 307 multiplicity++; 308 mpz_set (m, q); 309 mpz_tdiv_qr_ui (q, r, m, (unsigned long) i); 310 } 311 while (mpz_sgn (r) == 0); 312 313 assert (nrawfactor < factor_alloc); 314 rawfactor[nrawfactor].divisor = i; 315 rawfactor[nrawfactor].multiplicity = multiplicity; 316 nrawfactor++; 317 } 318 319 mpz_clear (m); 320 mpz_clear (q); 321 mpz_clear (r); 322 } 323 324 if (nrawfactor <= 2) 325 { 326 mpz_t new_pp; 327 328 sprintf (mod34_excuse, "only %d small factor%s", 329 nrawfactor, nrawfactor == 1 ? "" : "s"); 330 331 use_pp: 332 /* reset to two limbs of max_divisor, in case the mpn_mod_34lsub1 code 333 tried with just one */ 334 max_divisor = 2*limb_bits; 335 max_divisor_bits = log2_ceil (max_divisor); 336 337 mpz_init (new_pp); 338 nrawfactor = 0; 339 mod_bits = MIN (numb_bits, limb_bits - max_divisor_bits); 340 341 /* one copy of each small prime */ 342 mpz_set_ui (pp, 1L); 343 for (i = 3; i <= max_divisor; i+=2) 344 { 345 if (! isprime (i)) 346 continue; 347 348 mpz_mul_ui (new_pp, pp, (unsigned long) i); 349 if (mpz_sizeinbase (new_pp, 2) > mod_bits) 350 break; 351 mpz_set (pp, new_pp); 352 353 assert (nrawfactor < factor_alloc); 354 rawfactor[nrawfactor].divisor = i; 355 rawfactor[nrawfactor].multiplicity = 1; 356 nrawfactor++; 357 } 358 359 /* Plus an extra copy of one or more of the primes selected, if that 360 still fits in max_divisor and the total in mod_bits. Usually only 361 3 or 5 will be candidates */ 362 for (i = nrawfactor-1; i >= 0; i--) 363 { 364 if (rawfactor[i].divisor > max_divisor / rawfactor[i].divisor) 365 continue; 366 mpz_mul_ui (new_pp, pp, (unsigned long) rawfactor[i].divisor); 367 if (mpz_sizeinbase (new_pp, 2) > mod_bits) 368 continue; 369 mpz_set (pp, new_pp); 370 371 rawfactor[i].multiplicity++; 372 } 373 374 mod_bits = mpz_sizeinbase (pp, 2); 375 376 mpz_set (pp_norm, pp); 377 while (mpz_sizeinbase (pp_norm, 2) < numb_bits) 378 mpz_add (pp_norm, pp_norm, pp_norm); 379 380 mpz_preinv_invert (pp_inverted, pp_norm, numb_bits); 381 382 mpz_clear (new_pp); 383 } 384 385 /* start the factor array */ 386 for (i = 0; i < nrawfactor; i++) 387 { 388 int j; 389 assert (nfactor < factor_alloc); 390 factor[nfactor].divisor = 1; 391 for (j = 0; j < rawfactor[i].multiplicity; j++) 392 factor[nfactor].divisor *= rawfactor[i].divisor; 393 nfactor++; 394 } 395 396 combine: 397 /* Combine entries in the factor array. Combine the smallest entry with 398 the biggest one that will fit with it (ie. under max_divisor), then 399 repeat that with the new smallest entry. */ 400 qsort (factor, nfactor, sizeof (factor[0]), f_cmp_divisor); 401 for (i = nfactor-1; i >= 1; i--) 402 { 403 if (factor[i].divisor <= max_divisor / factor[0].divisor) 404 { 405 factor[0].divisor *= factor[i].divisor; 406 COLLAPSE_ELEMENT (factor, i, nfactor); 407 goto combine; 408 } 409 } 410 411 total_fraction = 1.0; 412 for (i = 0; i < nfactor; i++) 413 { 414 mpz_init (factor[i].inverse); 415 mpz_invert_ui_2exp (factor[i].inverse, 416 (unsigned long) factor[i].divisor, 417 (unsigned long) mod_bits); 418 419 mpz_init (factor[i].mask); 420 square_mask (factor[i].mask, factor[i].divisor); 421 422 /* fraction of possible squares */ 423 factor[i].fraction = (double) mpz_popcount (factor[i].mask) 424 / factor[i].divisor; 425 426 /* total fraction of possible squares */ 427 total_fraction *= factor[i].fraction; 428 } 429 430 /* best tests first (ie. smallest fraction) */ 431 qsort (factor, nfactor, sizeof (factor[0]), f_cmp_fraction); 432 } 433 434 void 435 print (int limb_bits, int nail_bits) 436 { 437 int i; 438 mpz_t mhi, mlo; 439 440 printf ("/* This file generated by gen-psqr.c - DO NOT EDIT. */\n"); 441 printf ("\n"); 442 443 printf ("#if GMP_LIMB_BITS != %d || GMP_NAIL_BITS != %d\n", 444 limb_bits, nail_bits); 445 printf ("Error, error, this data is for %d bit limb and %d bit nail\n", 446 limb_bits, nail_bits); 447 printf ("#endif\n"); 448 printf ("\n"); 449 450 printf ("/* Non-zero bit indicates a quadratic residue mod 0x100.\n"); 451 printf (" This test identifies %.2f%% as non-squares (%d/256). */\n", 452 (1.0 - sq_res_0x100_fraction) * 100.0, 453 0x100 - sq_res_0x100_num); 454 printf ("static const mp_limb_t\n"); 455 printf ("sq_res_0x100[%d] = {\n", nsq_res_0x100); 456 for (i = 0; i < nsq_res_0x100; i++) 457 { 458 printf (" CNST_LIMB(0x"); 459 mpz_out_str (stdout, 16, sq_res_0x100[i]); 460 printf ("),\n"); 461 } 462 printf ("};\n"); 463 printf ("\n"); 464 465 if (mpz_sgn (pp) != 0) 466 { 467 printf ("/* mpn_mod_34lsub1 not used due to %s */\n", mod34_excuse); 468 printf ("/* PERFSQR_PP = "); 469 } 470 else 471 printf ("/* 2^%d-1 = ", mod34_bits); 472 for (i = 0; i < nrawfactor; i++) 473 { 474 if (i != 0) 475 printf (" * "); 476 printf ("%d", rawfactor[i].divisor); 477 if (rawfactor[i].multiplicity != 1) 478 printf ("^%d", rawfactor[i].multiplicity); 479 } 480 printf (" %s*/\n", mpz_sgn (pp) == 0 ? "... " : ""); 481 482 printf ("#define PERFSQR_MOD_BITS %d\n", mod_bits); 483 if (mpz_sgn (pp) != 0) 484 { 485 printf ("#define PERFSQR_PP CNST_LIMB(0x"); 486 mpz_out_str (stdout, 16, pp); 487 printf (")\n"); 488 printf ("#define PERFSQR_PP_NORM CNST_LIMB(0x"); 489 mpz_out_str (stdout, 16, pp_norm); 490 printf (")\n"); 491 printf ("#define PERFSQR_PP_INVERTED CNST_LIMB(0x"); 492 mpz_out_str (stdout, 16, pp_inverted); 493 printf (")\n"); 494 } 495 printf ("\n"); 496 497 mpz_init (mhi); 498 mpz_init (mlo); 499 500 printf ("/* This test identifies %.2f%% as non-squares. */\n", 501 (1.0 - total_fraction) * 100.0); 502 printf ("#define PERFSQR_MOD_TEST(up, usize) \\\n"); 503 printf (" do { \\\n"); 504 printf (" mp_limb_t r; \\\n"); 505 if (mpz_sgn (pp) != 0) 506 printf (" PERFSQR_MOD_PP (r, up, usize); \\\n"); 507 else 508 printf (" PERFSQR_MOD_34 (r, up, usize); \\\n"); 509 510 for (i = 0; i < nfactor; i++) 511 { 512 printf (" \\\n"); 513 printf (" /* %5.2f%% */ \\\n", 514 (1.0 - factor[i].fraction) * 100.0); 515 516 printf (" PERFSQR_MOD_%d (r, CNST_LIMB(%2d), CNST_LIMB(0x", 517 factor[i].divisor <= limb_bits ? 1 : 2, 518 factor[i].divisor); 519 mpz_out_str (stdout, 16, factor[i].inverse); 520 printf ("), \\\n"); 521 printf (" CNST_LIMB(0x"); 522 523 if ( factor[i].divisor <= limb_bits) 524 { 525 mpz_out_str (stdout, 16, factor[i].mask); 526 } 527 else 528 { 529 mpz_tdiv_r_2exp (mlo, factor[i].mask, (unsigned long) limb_bits); 530 mpz_tdiv_q_2exp (mhi, factor[i].mask, (unsigned long) limb_bits); 531 mpz_out_str (stdout, 16, mhi); 532 printf ("), CNST_LIMB(0x"); 533 mpz_out_str (stdout, 16, mlo); 534 } 535 printf (")); \\\n"); 536 } 537 538 printf (" } while (0)\n"); 539 printf ("\n"); 540 541 printf ("/* Grand total sq_res_0x100 and PERFSQR_MOD_TEST, %.2f%% non-squares. */\n", 542 (1.0 - (total_fraction * 44.0/256.0)) * 100.0); 543 printf ("\n"); 544 545 printf ("/* helper for tests/mpz/t-perfsqr.c */\n"); 546 printf ("#define PERFSQR_DIVISORS { 256,"); 547 for (i = 0; i < nfactor; i++) 548 printf (" %d,", factor[i].divisor); 549 printf (" }\n"); 550 551 552 mpz_clear (mhi); 553 mpz_clear (mlo); 554 } 555 556 int 557 main (int argc, char *argv[]) 558 { 559 int limb_bits, nail_bits; 560 561 if (argc != 3) 562 { 563 fprintf (stderr, "Usage: gen-psqr <limbbits> <nailbits>\n"); 564 exit (1); 565 } 566 567 limb_bits = atoi (argv[1]); 568 nail_bits = atoi (argv[2]); 569 570 if (limb_bits <= 0 571 || nail_bits < 0 572 || nail_bits >= limb_bits) 573 { 574 fprintf (stderr, "Invalid limb/nail bits: %d %d\n", 575 limb_bits, nail_bits); 576 exit (1); 577 } 578 579 generate_sq_res_0x100 (limb_bits); 580 generate_mod (limb_bits, nail_bits); 581 582 print (limb_bits, nail_bits); 583 584 return 0; 585 }