github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/tests/devel/try.c (about) 1 /* Run some tests on various mpn routines. 2 3 THIS IS A TEST PROGRAM USED ONLY FOR DEVELOPMENT. IT'S ALMOST CERTAIN TO 4 BE SUBJECT TO INCOMPATIBLE CHANGES IN FUTURE VERSIONS OF GMP. 5 6 Copyright 2000-2006, 2008, 2009, 2011, 2012 Free Software Foundation, Inc. 7 8 This file is part of the GNU MP Library test suite. 9 10 The GNU MP Library test suite is free software; you can redistribute it 11 and/or modify it under the terms of the GNU General Public License as 12 published by the Free Software Foundation; either version 3 of the License, 13 or (at your option) any later version. 14 15 The GNU MP Library test suite is distributed in the hope that it will be 16 useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General 18 Public License for more details. 19 20 You should have received a copy of the GNU General Public License along with 21 the GNU MP Library test suite. If not, see https://www.gnu.org/licenses/. */ 22 23 24 /* Usage: try [options] <function>... 25 26 For example, "./try mpn_add_n" to run tests of that function. 27 28 Combinations of alignments and overlaps are tested, with redzones above 29 or below the destinations, and with the sources write-protected. 30 31 The number of tests performed becomes ridiculously large with all the 32 combinations, and for that reason this can't be a part of a "make check", 33 it's meant only for development. The code isn't very pretty either. 34 35 During development it can help to disable the redzones, since seeing the 36 rest of the destination written can show where the wrong part is, or if 37 the dst pointers are off by 1 or whatever. The magic DEADVAL initial 38 fill (see below) will show locations never written. 39 40 The -s option can be used to test only certain size operands, which is 41 useful if some new code doesn't yet support say sizes less than the 42 unrolling, or whatever. 43 44 When a problem occurs it'll of course be necessary to run the program 45 under gdb to find out quite where, how and why it's going wrong. Disable 46 the spinner with the -W option when doing this, or single stepping won't 47 work. Using the "-1" option to run with simple data can be useful. 48 49 New functions to test can be added in try_array[]. If a new TYPE is 50 required then add it to the existing constants, set up its parameters in 51 param_init(), and add it to the call() function. Extra parameter fields 52 can be added if necessary, or further interpretations given to existing 53 fields. 54 55 56 Portability: 57 58 This program is not designed for use on Cray vector systems under Unicos, 59 it will fail to compile due to missing _SC_PAGE_SIZE. Those systems 60 don't really have pages or mprotect. We could arrange to run the tests 61 without the redzones, but we haven't bothered currently. 62 63 64 Enhancements: 65 66 umul_ppmm support is not very good, lots of source data is generated 67 whereas only two limbs are needed. 68 69 Make a little scheme for interpreting the "SIZE" selections uniformly. 70 71 Make tr->size==SIZE_2 work, for the benefit of find_a which wants just 2 72 source limbs. Possibly increase the default repetitions in that case. 73 74 Automatically detect gdb and disable the spinner (use -W for now). 75 76 Make a way to re-run a failing case in the debugger. Have an option to 77 snapshot each test case before it's run so the data is available if a 78 segv occurs. (This should be more reliable than the current print_all() 79 in the signal handler.) 80 81 When alignment means a dst isn't hard against the redzone, check the 82 space in between remains unchanged. 83 84 When a source overlaps a destination, don't run both s[i].high 0 and 1, 85 as s[i].high has no effect. Maybe encode s[i].high into overlap->s[i]. 86 87 When partial overlaps aren't done, don't loop over source alignments 88 during overlaps. 89 90 Try to make the looping code a bit less horrible. Right now it's pretty 91 hard to see what iterations are actually done. 92 93 Perhaps specific setups and loops for each style of function under test 94 would be clearer than a parameterized general loop. There's lots of 95 stuff common to all functions, but the exceptions get messy. 96 97 When there's no overlap, run with both src>dst and src<dst. A subtle 98 calling-conventions violation occurred in a P6 copy which depended on the 99 relative location of src and dst. 100 101 multiplier_N is more or less a third source region for the addmul_N 102 routines, and could be done with the redzoned region scheme. 103 104 */ 105 106 107 /* always do assertion checking */ 108 #define WANT_ASSERT 1 109 110 #include "config.h" 111 112 #include <errno.h> 113 #include <limits.h> 114 #include <signal.h> 115 #include <stdio.h> 116 #include <stdlib.h> 117 #include <string.h> 118 #include <time.h> 119 120 #if HAVE_UNISTD_H 121 #include <unistd.h> 122 #endif 123 124 #if HAVE_SYS_MMAN_H 125 #include <sys/mman.h> 126 #endif 127 128 #include "gmp.h" 129 #include "gmp-impl.h" 130 #include "longlong.h" 131 #include "tests.h" 132 133 134 #if !HAVE_DECL_OPTARG 135 extern char *optarg; 136 extern int optind, opterr; 137 #endif 138 139 #if ! HAVE_DECL_SYS_NERR 140 extern int sys_nerr; 141 #endif 142 143 #if ! HAVE_DECL_SYS_ERRLIST 144 extern char *sys_errlist[]; 145 #endif 146 147 #if ! HAVE_STRERROR 148 char * 149 strerror (int n) 150 { 151 if (n < 0 || n >= sys_nerr) 152 return "errno out of range"; 153 else 154 return sys_errlist[n]; 155 } 156 #endif 157 158 /* Rumour has it some systems lack a define of PROT_NONE. */ 159 #ifndef PROT_NONE 160 #define PROT_NONE 0 161 #endif 162 163 /* Dummy defines for when mprotect doesn't exist. */ 164 #ifndef PROT_READ 165 #define PROT_READ 0 166 #endif 167 #ifndef PROT_WRITE 168 #define PROT_WRITE 0 169 #endif 170 171 /* _SC_PAGESIZE is standard, but hpux 9 and possibly other systems have 172 _SC_PAGE_SIZE instead. */ 173 #if defined (_SC_PAGE_SIZE) && ! defined (_SC_PAGESIZE) 174 #define _SC_PAGESIZE _SC_PAGE_SIZE 175 #endif 176 177 178 #ifdef EXTRA_PROTOS 179 EXTRA_PROTOS 180 #endif 181 #ifdef EXTRA_PROTOS2 182 EXTRA_PROTOS2 183 #endif 184 185 186 #define DEFAULT_REPETITIONS 10 187 188 int option_repetitions = DEFAULT_REPETITIONS; 189 int option_spinner = 1; 190 int option_redzones = 1; 191 int option_firstsize = 0; 192 int option_lastsize = 500; 193 int option_firstsize2 = 0; 194 195 #define ALIGNMENTS 4 196 #define OVERLAPS 4 197 #define CARRY_RANDOMS 5 198 #define MULTIPLIER_RANDOMS 5 199 #define DIVISOR_RANDOMS 5 200 #define FRACTION_COUNT 4 201 202 int option_print = 0; 203 204 #define DATA_TRAND 0 205 #define DATA_ZEROS 1 206 #define DATA_SEQ 2 207 #define DATA_FFS 3 208 #define DATA_2FD 4 209 int option_data = DATA_TRAND; 210 211 212 mp_size_t pagesize; 213 #define PAGESIZE_LIMBS (pagesize / GMP_LIMB_BYTES) 214 215 /* must be a multiple of the page size */ 216 #define REDZONE_BYTES (pagesize * 16) 217 #define REDZONE_LIMBS (REDZONE_BYTES / GMP_LIMB_BYTES) 218 219 220 #define MAX3(x,y,z) (MAX (x, MAX (y, z))) 221 222 #if GMP_LIMB_BITS == 32 223 #define DEADVAL CNST_LIMB(0xDEADBEEF) 224 #else 225 #define DEADVAL CNST_LIMB(0xDEADBEEFBADDCAFE) 226 #endif 227 228 229 struct region_t { 230 mp_ptr ptr; 231 mp_size_t size; 232 }; 233 234 235 #define TRAP_NOWHERE 0 236 #define TRAP_REF 1 237 #define TRAP_FUN 2 238 #define TRAP_SETUPS 3 239 int trap_location = TRAP_NOWHERE; 240 241 242 #define NUM_SOURCES 5 243 #define NUM_DESTS 2 244 245 struct source_t { 246 struct region_t region; 247 int high; 248 mp_size_t align; 249 mp_ptr p; 250 }; 251 252 struct source_t s[NUM_SOURCES]; 253 254 struct dest_t { 255 int high; 256 mp_size_t align; 257 mp_size_t size; 258 }; 259 260 struct dest_t d[NUM_DESTS]; 261 262 struct source_each_t { 263 mp_ptr p; 264 }; 265 266 struct dest_each_t { 267 struct region_t region; 268 mp_ptr p; 269 }; 270 271 mp_size_t size; 272 mp_size_t size2; 273 unsigned long shift; 274 mp_limb_t carry; 275 mp_limb_t divisor; 276 mp_limb_t multiplier; 277 mp_limb_t multiplier_N[8]; 278 279 struct each_t { 280 const char *name; 281 struct dest_each_t d[NUM_DESTS]; 282 struct source_each_t s[NUM_SOURCES]; 283 mp_limb_t retval; 284 }; 285 286 struct each_t ref = { "Ref" }; 287 struct each_t fun = { "Fun" }; 288 289 #define SRC_SIZE(n) ((n) == 1 && tr->size2 ? size2 : size) 290 291 void validate_fail (void); 292 293 294 #if HAVE_TRY_NEW_C 295 #include "try-new.c" 296 #endif 297 298 299 typedef mp_limb_t (*tryfun_t) (ANYARGS); 300 301 struct try_t { 302 char retval; 303 304 char src[NUM_SOURCES]; 305 char dst[NUM_DESTS]; 306 307 #define SIZE_YES 1 308 #define SIZE_ALLOW_ZERO 2 309 #define SIZE_1 3 /* 1 limb */ 310 #define SIZE_2 4 /* 2 limbs */ 311 #define SIZE_3 5 /* 3 limbs */ 312 #define SIZE_4 6 /* 4 limbs */ 313 #define SIZE_6 7 /* 6 limbs */ 314 #define SIZE_FRACTION 8 /* size2 is fraction for divrem etc */ 315 #define SIZE_SIZE2 9 316 #define SIZE_PLUS_1 10 317 #define SIZE_SUM 11 318 #define SIZE_DIFF 12 319 #define SIZE_DIFF_PLUS_1 13 320 #define SIZE_DIFF_PLUS_3 14 321 #define SIZE_RETVAL 15 322 #define SIZE_CEIL_HALF 16 323 #define SIZE_GET_STR 17 324 #define SIZE_PLUS_MSIZE_SUB_1 18 /* size+msize-1 */ 325 #define SIZE_ODD 19 326 char size; 327 char size2; 328 char dst_size[NUM_DESTS]; 329 330 /* multiplier_N size in limbs */ 331 mp_size_t msize; 332 333 char dst_bytes[NUM_DESTS]; 334 335 char dst0_from_src1; 336 337 #define CARRY_BIT 1 /* single bit 0 or 1 */ 338 #define CARRY_3 2 /* 0, 1, 2 */ 339 #define CARRY_4 3 /* 0 to 3 */ 340 #define CARRY_LIMB 4 /* any limb value */ 341 #define CARRY_DIVISOR 5 /* carry<divisor */ 342 char carry; 343 344 /* a fudge to tell the output when to print negatives */ 345 char carry_sign; 346 347 char multiplier; 348 char shift; 349 350 #define DIVISOR_LIMB 1 351 #define DIVISOR_NORM 2 352 #define DIVISOR_ODD 3 353 char divisor; 354 355 #define DATA_NON_ZERO 1 356 #define DATA_GCD 2 357 #define DATA_SRC0_ODD 3 358 #define DATA_SRC0_HIGHBIT 4 359 #define DATA_SRC1_ODD 5 360 #define DATA_SRC1_ODD_PRIME 6 361 #define DATA_SRC1_HIGHBIT 7 362 #define DATA_MULTIPLE_DIVISOR 8 363 #define DATA_UDIV_QRNND 9 364 #define DATA_DIV_QR_1 10 365 char data; 366 367 /* Default is allow full overlap. */ 368 #define OVERLAP_NONE 1 369 #define OVERLAP_LOW_TO_HIGH 2 370 #define OVERLAP_HIGH_TO_LOW 3 371 #define OVERLAP_NOT_SRCS 4 372 #define OVERLAP_NOT_SRC2 8 373 #define OVERLAP_NOT_DST2 16 374 char overlap; 375 376 tryfun_t reference; 377 const char *reference_name; 378 379 void (*validate) (void); 380 const char *validate_name; 381 }; 382 383 struct try_t *tr; 384 385 386 void 387 validate_mod_34lsub1 (void) 388 { 389 #define CNST_34LSUB1 ((CNST_LIMB(1) << (3 * (GMP_NUMB_BITS / 4))) - 1) 390 391 mp_srcptr ptr = s[0].p; 392 int error = 0; 393 mp_limb_t got, got_mod, want, want_mod; 394 395 ASSERT (size >= 1); 396 397 got = fun.retval; 398 got_mod = got % CNST_34LSUB1; 399 400 want = refmpn_mod_34lsub1 (ptr, size); 401 want_mod = want % CNST_34LSUB1; 402 403 if (got_mod != want_mod) 404 { 405 gmp_printf ("got 0x%MX reduced from 0x%MX\n", got_mod, got); 406 gmp_printf ("want 0x%MX reduced from 0x%MX\n", want_mod, want); 407 error = 1; 408 } 409 410 if (error) 411 validate_fail (); 412 } 413 414 void 415 validate_divexact_1 (void) 416 { 417 mp_srcptr src = s[0].p; 418 mp_srcptr dst = fun.d[0].p; 419 int error = 0; 420 421 ASSERT (size >= 1); 422 423 { 424 mp_ptr tp = refmpn_malloc_limbs (size); 425 mp_limb_t rem; 426 427 rem = refmpn_divrem_1 (tp, 0, src, size, divisor); 428 if (rem != 0) 429 { 430 gmp_printf ("Remainder a%%d == 0x%MX, mpn_divexact_1 undefined\n", rem); 431 error = 1; 432 } 433 if (! refmpn_equal_anynail (tp, dst, size)) 434 { 435 printf ("Quotient a/d wrong\n"); 436 mpn_trace ("fun ", dst, size); 437 mpn_trace ("want", tp, size); 438 error = 1; 439 } 440 free (tp); 441 } 442 443 if (error) 444 validate_fail (); 445 } 446 447 void 448 validate_bdiv_q_1 449 (void) 450 { 451 mp_srcptr src = s[0].p; 452 mp_srcptr dst = fun.d[0].p; 453 int error = 0; 454 455 ASSERT (size >= 1); 456 457 { 458 mp_ptr tp = refmpn_malloc_limbs (size + 1); 459 460 refmpn_mul_1 (tp, dst, size, divisor); 461 /* Set ignored low bits */ 462 tp[0] |= (src[0] & LOW_ZEROS_MASK (divisor)); 463 if (! refmpn_equal_anynail (tp, src, size)) 464 { 465 printf ("Bdiv wrong: res * divisor != src (mod B^size)\n"); 466 mpn_trace ("res ", dst, size); 467 mpn_trace ("src ", src, size); 468 error = 1; 469 } 470 free (tp); 471 } 472 473 if (error) 474 validate_fail (); 475 } 476 477 478 void 479 validate_modexact_1c_odd (void) 480 { 481 mp_srcptr ptr = s[0].p; 482 mp_limb_t r = fun.retval; 483 int error = 0; 484 485 ASSERT (size >= 1); 486 ASSERT (divisor & 1); 487 488 if ((r & GMP_NAIL_MASK) != 0) 489 printf ("r has non-zero nail\n"); 490 491 if (carry < divisor) 492 { 493 if (! (r < divisor)) 494 { 495 printf ("Don't have r < divisor\n"); 496 error = 1; 497 } 498 } 499 else /* carry >= divisor */ 500 { 501 if (! (r <= divisor)) 502 { 503 printf ("Don't have r <= divisor\n"); 504 error = 1; 505 } 506 } 507 508 { 509 mp_limb_t c = carry % divisor; 510 mp_ptr tp = refmpn_malloc_limbs (size+1); 511 mp_size_t k; 512 513 for (k = size-1; k <= size; k++) 514 { 515 /* set {tp,size+1} to r*b^k + a - c */ 516 refmpn_copyi (tp, ptr, size); 517 tp[size] = 0; 518 ASSERT_NOCARRY (refmpn_add_1 (tp+k, tp+k, size+1-k, r)); 519 if (refmpn_sub_1 (tp, tp, size+1, c)) 520 ASSERT_CARRY (mpn_add_1 (tp, tp, size+1, divisor)); 521 522 if (refmpn_mod_1 (tp, size+1, divisor) == 0) 523 goto good_remainder; 524 } 525 printf ("Remainder matches neither r*b^(size-1) nor r*b^size\n"); 526 error = 1; 527 528 good_remainder: 529 free (tp); 530 } 531 532 if (error) 533 validate_fail (); 534 } 535 536 void 537 validate_modexact_1_odd (void) 538 { 539 carry = 0; 540 validate_modexact_1c_odd (); 541 } 542 543 void 544 validate_div_qr_1_pi1 (void) 545 { 546 mp_srcptr up = ref.s[0].p; 547 mp_size_t un = size; 548 mp_size_t uh = ref.s[1].p[0]; 549 mp_srcptr qp = fun.d[0].p; 550 mp_limb_t r = fun.retval; 551 mp_limb_t cy; 552 int cmp; 553 mp_ptr tp; 554 if (r >= divisor) 555 { 556 gmp_printf ("Bad remainder %Md, d = %Md\n", r, divisor); 557 validate_fail (); 558 } 559 tp = refmpn_malloc_limbs (un); 560 cy = refmpn_mul_1 (tp, qp, un, divisor); 561 cy += refmpn_add_1 (tp, tp, un, r); 562 if (cy != uh || refmpn_cmp (tp, up, un) != 0) 563 { 564 gmp_printf ("Incorrect result, size %ld.\n" 565 "d = %Mx, u = %Mx, %Nx\n" 566 "got: r = %Mx, q = %Nx\n" 567 "q d + r = %Mx, %Nx", 568 (long) un, 569 divisor, uh, up, un, 570 r, qp, un, 571 cy, tp, un); 572 validate_fail (); 573 } 574 free (tp); 575 } 576 577 578 void 579 validate_sqrtrem (void) 580 { 581 mp_srcptr orig_ptr = s[0].p; 582 mp_size_t orig_size = size; 583 mp_size_t root_size = (size+1)/2; 584 mp_srcptr root_ptr = fun.d[0].p; 585 mp_size_t rem_size = fun.retval; 586 mp_srcptr rem_ptr = fun.d[1].p; 587 mp_size_t prod_size = 2*root_size; 588 mp_ptr p; 589 int error = 0; 590 591 if (rem_size < 0 || rem_size > size) 592 { 593 printf ("Bad remainder size retval %ld\n", (long) rem_size); 594 validate_fail (); 595 } 596 597 p = refmpn_malloc_limbs (prod_size); 598 599 p[root_size] = refmpn_lshift (p, root_ptr, root_size, 1); 600 if (refmpn_cmp_twosizes (p,root_size+1, rem_ptr,rem_size) < 0) 601 { 602 printf ("Remainder bigger than 2*root\n"); 603 error = 1; 604 } 605 606 refmpn_sqr (p, root_ptr, root_size); 607 if (rem_size != 0) 608 refmpn_add (p, p, prod_size, rem_ptr, rem_size); 609 if (refmpn_cmp_twosizes (p,prod_size, orig_ptr,orig_size) != 0) 610 { 611 printf ("root^2+rem != original\n"); 612 mpn_trace ("prod", p, prod_size); 613 error = 1; 614 } 615 free (p); 616 617 if (error) 618 validate_fail (); 619 } 620 621 void 622 validate_sqrt (void) 623 { 624 mp_srcptr orig_ptr = s[0].p; 625 mp_size_t orig_size = size; 626 mp_size_t root_size = (size+1)/2; 627 mp_srcptr root_ptr = fun.d[0].p; 628 int perf_pow = (fun.retval == 0); 629 mp_size_t prod_size = 2*root_size; 630 mp_ptr p; 631 int error = 0; 632 633 p = refmpn_malloc_limbs (prod_size); 634 635 refmpn_sqr (p, root_ptr, root_size); 636 MPN_NORMALIZE (p, prod_size); 637 if (refmpn_cmp_twosizes (p,prod_size, orig_ptr,orig_size) != - !perf_pow) 638 { 639 printf ("root^2 bigger than original, or wrong return value.\n"); 640 mpn_trace ("prod...", p, prod_size); 641 error = 1; 642 } 643 644 refmpn_sub (p, orig_ptr,orig_size, p,prod_size); 645 MPN_NORMALIZE (p, prod_size); 646 if (prod_size >= root_size && 647 refmpn_sub (p, p,prod_size, root_ptr, root_size) == 0 && 648 refmpn_cmp_twosizes (p, prod_size, root_ptr, root_size) > 0) 649 { 650 printf ("(root+1)^2 smaller than original.\n"); 651 mpn_trace ("prod", p, prod_size); 652 error = 1; 653 } 654 free (p); 655 656 if (error) 657 validate_fail (); 658 } 659 660 661 /* These types are indexes into the param[] array and are arbitrary so long 662 as they're all distinct and within the size of param[]. Renumber 663 whenever necessary or desired. */ 664 665 enum { 666 TYPE_ADD = 1, TYPE_ADD_N, TYPE_ADD_NC, TYPE_SUB, TYPE_SUB_N, TYPE_SUB_NC, 667 668 TYPE_ADD_ERR1_N, TYPE_ADD_ERR2_N, TYPE_ADD_ERR3_N, 669 TYPE_SUB_ERR1_N, TYPE_SUB_ERR2_N, TYPE_SUB_ERR3_N, 670 671 TYPE_MUL_1, TYPE_MUL_1C, 672 673 TYPE_MUL_2, TYPE_MUL_3, TYPE_MUL_4, TYPE_MUL_5, TYPE_MUL_6, 674 675 TYPE_ADDMUL_1, TYPE_ADDMUL_1C, TYPE_SUBMUL_1, TYPE_SUBMUL_1C, 676 677 TYPE_ADDMUL_2, TYPE_ADDMUL_3, TYPE_ADDMUL_4, TYPE_ADDMUL_5, TYPE_ADDMUL_6, 678 TYPE_ADDMUL_7, TYPE_ADDMUL_8, 679 680 TYPE_ADDSUB_N, TYPE_ADDSUB_NC, 681 682 TYPE_RSHIFT, TYPE_LSHIFT, TYPE_LSHIFTC, 683 684 TYPE_COPY, TYPE_COPYI, TYPE_COPYD, TYPE_COM, 685 686 TYPE_ADDLSH1_N, TYPE_ADDLSH2_N, TYPE_ADDLSH_N, 687 TYPE_ADDLSH1_N_IP1, TYPE_ADDLSH2_N_IP1, TYPE_ADDLSH_N_IP1, 688 TYPE_ADDLSH1_N_IP2, TYPE_ADDLSH2_N_IP2, TYPE_ADDLSH_N_IP2, 689 TYPE_SUBLSH1_N, TYPE_SUBLSH2_N, TYPE_SUBLSH_N, 690 TYPE_SUBLSH1_N_IP1, TYPE_SUBLSH2_N_IP1, TYPE_SUBLSH_N_IP1, 691 TYPE_RSBLSH1_N, TYPE_RSBLSH2_N, TYPE_RSBLSH_N, 692 TYPE_RSH1ADD_N, TYPE_RSH1SUB_N, 693 694 TYPE_ADDLSH1_NC, TYPE_ADDLSH2_NC, TYPE_ADDLSH_NC, 695 TYPE_SUBLSH1_NC, TYPE_SUBLSH2_NC, TYPE_SUBLSH_NC, 696 TYPE_RSBLSH1_NC, TYPE_RSBLSH2_NC, TYPE_RSBLSH_NC, 697 698 TYPE_ADDCND_N, TYPE_SUBCND_N, 699 700 TYPE_MOD_1, TYPE_MOD_1C, TYPE_DIVMOD_1, TYPE_DIVMOD_1C, TYPE_DIVREM_1, 701 TYPE_DIVREM_1C, TYPE_PREINV_DIVREM_1, TYPE_DIVREM_2, TYPE_PREINV_MOD_1, 702 TYPE_DIV_QR_1N_PI1, 703 TYPE_MOD_34LSUB1, TYPE_UDIV_QRNND, TYPE_UDIV_QRNND_R, 704 705 TYPE_DIVEXACT_1, TYPE_BDIV_Q_1, TYPE_DIVEXACT_BY3, TYPE_DIVEXACT_BY3C, 706 TYPE_MODEXACT_1_ODD, TYPE_MODEXACT_1C_ODD, 707 708 TYPE_INVERT, TYPE_BINVERT, 709 710 TYPE_GCD, TYPE_GCD_1, TYPE_GCD_FINDA, TYPE_MPZ_JACOBI, TYPE_MPZ_KRONECKER, 711 TYPE_MPZ_KRONECKER_UI, TYPE_MPZ_KRONECKER_SI, TYPE_MPZ_UI_KRONECKER, 712 TYPE_MPZ_SI_KRONECKER, TYPE_MPZ_LEGENDRE, 713 714 TYPE_AND_N, TYPE_NAND_N, TYPE_ANDN_N, TYPE_IOR_N, TYPE_IORN_N, TYPE_NIOR_N, 715 TYPE_XOR_N, TYPE_XNOR_N, 716 717 TYPE_MUL_MN, TYPE_MUL_N, TYPE_SQR, TYPE_UMUL_PPMM, TYPE_UMUL_PPMM_R, 718 TYPE_MULLO_N, TYPE_SQRLO, TYPE_MULMID_MN, TYPE_MULMID_N, 719 720 TYPE_SBPI1_DIV_QR, TYPE_TDIV_QR, 721 722 TYPE_SQRTREM, TYPE_SQRT, TYPE_ZERO, TYPE_GET_STR, TYPE_POPCOUNT, TYPE_HAMDIST, 723 724 TYPE_EXTRA 725 }; 726 727 struct try_t param[TYPE_EXTRA]; 728 729 730 void 731 param_init (void) 732 { 733 struct try_t *p; 734 735 #define COPY(index) memcpy (p, ¶m[index], sizeof (*p)) 736 737 #define REFERENCE(fun) \ 738 p->reference = (tryfun_t) fun; \ 739 p->reference_name = #fun 740 #define VALIDATE(fun) \ 741 p->validate = fun; \ 742 p->validate_name = #fun 743 744 745 p = ¶m[TYPE_ADD_N]; 746 p->retval = 1; 747 p->dst[0] = 1; 748 p->src[0] = 1; 749 p->src[1] = 1; 750 REFERENCE (refmpn_add_n); 751 752 p = ¶m[TYPE_ADD_NC]; 753 COPY (TYPE_ADD_N); 754 p->carry = CARRY_BIT; 755 REFERENCE (refmpn_add_nc); 756 757 p = ¶m[TYPE_SUB_N]; 758 COPY (TYPE_ADD_N); 759 REFERENCE (refmpn_sub_n); 760 761 p = ¶m[TYPE_SUB_NC]; 762 COPY (TYPE_ADD_NC); 763 REFERENCE (refmpn_sub_nc); 764 765 p = ¶m[TYPE_ADD]; 766 COPY (TYPE_ADD_N); 767 p->size = SIZE_ALLOW_ZERO; 768 p->size2 = 1; 769 REFERENCE (refmpn_add); 770 771 p = ¶m[TYPE_SUB]; 772 COPY (TYPE_ADD); 773 REFERENCE (refmpn_sub); 774 775 776 p = ¶m[TYPE_ADD_ERR1_N]; 777 p->retval = 1; 778 p->dst[0] = 1; 779 p->dst[1] = 1; 780 p->src[0] = 1; 781 p->src[1] = 1; 782 p->src[2] = 1; 783 p->dst_size[1] = SIZE_2; 784 p->carry = CARRY_BIT; 785 p->overlap = OVERLAP_NOT_DST2; 786 REFERENCE (refmpn_add_err1_n); 787 788 p = ¶m[TYPE_SUB_ERR1_N]; 789 COPY (TYPE_ADD_ERR1_N); 790 REFERENCE (refmpn_sub_err1_n); 791 792 p = ¶m[TYPE_ADD_ERR2_N]; 793 COPY (TYPE_ADD_ERR1_N); 794 p->src[3] = 1; 795 p->dst_size[1] = SIZE_4; 796 REFERENCE (refmpn_add_err2_n); 797 798 p = ¶m[TYPE_SUB_ERR2_N]; 799 COPY (TYPE_ADD_ERR2_N); 800 REFERENCE (refmpn_sub_err2_n); 801 802 p = ¶m[TYPE_ADD_ERR3_N]; 803 COPY (TYPE_ADD_ERR2_N); 804 p->src[4] = 1; 805 p->dst_size[1] = SIZE_6; 806 REFERENCE (refmpn_add_err3_n); 807 808 p = ¶m[TYPE_SUB_ERR3_N]; 809 COPY (TYPE_ADD_ERR3_N); 810 REFERENCE (refmpn_sub_err3_n); 811 812 p = ¶m[TYPE_ADDCND_N]; 813 COPY (TYPE_ADD_N); 814 p->carry = CARRY_BIT; 815 REFERENCE (refmpn_cnd_add_n); 816 817 p = ¶m[TYPE_SUBCND_N]; 818 COPY (TYPE_ADD_N); 819 p->carry = CARRY_BIT; 820 REFERENCE (refmpn_cnd_sub_n); 821 822 823 p = ¶m[TYPE_MUL_1]; 824 p->retval = 1; 825 p->dst[0] = 1; 826 p->src[0] = 1; 827 p->multiplier = 1; 828 p->overlap = OVERLAP_LOW_TO_HIGH; 829 REFERENCE (refmpn_mul_1); 830 831 p = ¶m[TYPE_MUL_1C]; 832 COPY (TYPE_MUL_1); 833 p->carry = CARRY_LIMB; 834 REFERENCE (refmpn_mul_1c); 835 836 837 p = ¶m[TYPE_MUL_2]; 838 p->retval = 1; 839 p->dst[0] = 1; 840 p->dst_size[0] = SIZE_PLUS_MSIZE_SUB_1; 841 p->src[0] = 1; 842 p->src[1] = 1; 843 p->msize = 2; 844 p->overlap = OVERLAP_NOT_SRC2; 845 REFERENCE (refmpn_mul_2); 846 847 p = ¶m[TYPE_MUL_3]; 848 COPY (TYPE_MUL_2); 849 p->msize = 3; 850 REFERENCE (refmpn_mul_3); 851 852 p = ¶m[TYPE_MUL_4]; 853 COPY (TYPE_MUL_2); 854 p->msize = 4; 855 REFERENCE (refmpn_mul_4); 856 857 p = ¶m[TYPE_MUL_5]; 858 COPY (TYPE_MUL_2); 859 p->msize = 5; 860 REFERENCE (refmpn_mul_5); 861 862 p = ¶m[TYPE_MUL_6]; 863 COPY (TYPE_MUL_2); 864 p->msize = 6; 865 REFERENCE (refmpn_mul_6); 866 867 868 p = ¶m[TYPE_ADDMUL_1]; 869 p->retval = 1; 870 p->dst[0] = 1; 871 p->src[0] = 1; 872 p->multiplier = 1; 873 p->dst0_from_src1 = 1; 874 REFERENCE (refmpn_addmul_1); 875 876 p = ¶m[TYPE_ADDMUL_1C]; 877 COPY (TYPE_ADDMUL_1); 878 p->carry = CARRY_LIMB; 879 REFERENCE (refmpn_addmul_1c); 880 881 p = ¶m[TYPE_SUBMUL_1]; 882 COPY (TYPE_ADDMUL_1); 883 REFERENCE (refmpn_submul_1); 884 885 p = ¶m[TYPE_SUBMUL_1C]; 886 COPY (TYPE_ADDMUL_1C); 887 REFERENCE (refmpn_submul_1c); 888 889 890 p = ¶m[TYPE_ADDMUL_2]; 891 p->retval = 1; 892 p->dst[0] = 1; 893 p->dst_size[0] = SIZE_PLUS_MSIZE_SUB_1; 894 p->src[0] = 1; 895 p->src[1] = 1; 896 p->msize = 2; 897 p->dst0_from_src1 = 1; 898 p->overlap = OVERLAP_NONE; 899 REFERENCE (refmpn_addmul_2); 900 901 p = ¶m[TYPE_ADDMUL_3]; 902 COPY (TYPE_ADDMUL_2); 903 p->msize = 3; 904 REFERENCE (refmpn_addmul_3); 905 906 p = ¶m[TYPE_ADDMUL_4]; 907 COPY (TYPE_ADDMUL_2); 908 p->msize = 4; 909 REFERENCE (refmpn_addmul_4); 910 911 p = ¶m[TYPE_ADDMUL_5]; 912 COPY (TYPE_ADDMUL_2); 913 p->msize = 5; 914 REFERENCE (refmpn_addmul_5); 915 916 p = ¶m[TYPE_ADDMUL_6]; 917 COPY (TYPE_ADDMUL_2); 918 p->msize = 6; 919 REFERENCE (refmpn_addmul_6); 920 921 p = ¶m[TYPE_ADDMUL_7]; 922 COPY (TYPE_ADDMUL_2); 923 p->msize = 7; 924 REFERENCE (refmpn_addmul_7); 925 926 p = ¶m[TYPE_ADDMUL_8]; 927 COPY (TYPE_ADDMUL_2); 928 p->msize = 8; 929 REFERENCE (refmpn_addmul_8); 930 931 932 p = ¶m[TYPE_AND_N]; 933 p->dst[0] = 1; 934 p->src[0] = 1; 935 p->src[1] = 1; 936 REFERENCE (refmpn_and_n); 937 938 p = ¶m[TYPE_ANDN_N]; 939 COPY (TYPE_AND_N); 940 REFERENCE (refmpn_andn_n); 941 942 p = ¶m[TYPE_NAND_N]; 943 COPY (TYPE_AND_N); 944 REFERENCE (refmpn_nand_n); 945 946 p = ¶m[TYPE_IOR_N]; 947 COPY (TYPE_AND_N); 948 REFERENCE (refmpn_ior_n); 949 950 p = ¶m[TYPE_IORN_N]; 951 COPY (TYPE_AND_N); 952 REFERENCE (refmpn_iorn_n); 953 954 p = ¶m[TYPE_NIOR_N]; 955 COPY (TYPE_AND_N); 956 REFERENCE (refmpn_nior_n); 957 958 p = ¶m[TYPE_XOR_N]; 959 COPY (TYPE_AND_N); 960 REFERENCE (refmpn_xor_n); 961 962 p = ¶m[TYPE_XNOR_N]; 963 COPY (TYPE_AND_N); 964 REFERENCE (refmpn_xnor_n); 965 966 967 p = ¶m[TYPE_ADDSUB_N]; 968 p->retval = 1; 969 p->dst[0] = 1; 970 p->dst[1] = 1; 971 p->src[0] = 1; 972 p->src[1] = 1; 973 REFERENCE (refmpn_add_n_sub_n); 974 975 p = ¶m[TYPE_ADDSUB_NC]; 976 COPY (TYPE_ADDSUB_N); 977 p->carry = CARRY_4; 978 REFERENCE (refmpn_add_n_sub_nc); 979 980 981 p = ¶m[TYPE_COPY]; 982 p->dst[0] = 1; 983 p->src[0] = 1; 984 p->overlap = OVERLAP_NONE; 985 p->size = SIZE_ALLOW_ZERO; 986 REFERENCE (refmpn_copy); 987 988 p = ¶m[TYPE_COPYI]; 989 p->dst[0] = 1; 990 p->src[0] = 1; 991 p->overlap = OVERLAP_LOW_TO_HIGH; 992 p->size = SIZE_ALLOW_ZERO; 993 REFERENCE (refmpn_copyi); 994 995 p = ¶m[TYPE_COPYD]; 996 p->dst[0] = 1; 997 p->src[0] = 1; 998 p->overlap = OVERLAP_HIGH_TO_LOW; 999 p->size = SIZE_ALLOW_ZERO; 1000 REFERENCE (refmpn_copyd); 1001 1002 p = ¶m[TYPE_COM]; 1003 p->dst[0] = 1; 1004 p->src[0] = 1; 1005 REFERENCE (refmpn_com); 1006 1007 1008 p = ¶m[TYPE_ADDLSH1_N]; 1009 COPY (TYPE_ADD_N); 1010 REFERENCE (refmpn_addlsh1_n); 1011 1012 p = ¶m[TYPE_ADDLSH2_N]; 1013 COPY (TYPE_ADD_N); 1014 REFERENCE (refmpn_addlsh2_n); 1015 1016 p = ¶m[TYPE_ADDLSH_N]; 1017 COPY (TYPE_ADD_N); 1018 p->shift = 1; 1019 REFERENCE (refmpn_addlsh_n); 1020 1021 p = ¶m[TYPE_ADDLSH1_N_IP1]; 1022 p->retval = 1; 1023 p->dst[0] = 1; 1024 p->src[0] = 1; 1025 p->dst0_from_src1 = 1; 1026 REFERENCE (refmpn_addlsh1_n_ip1); 1027 1028 p = ¶m[TYPE_ADDLSH2_N_IP1]; 1029 COPY (TYPE_ADDLSH1_N_IP1); 1030 REFERENCE (refmpn_addlsh2_n_ip1); 1031 1032 p = ¶m[TYPE_ADDLSH_N_IP1]; 1033 COPY (TYPE_ADDLSH1_N_IP1); 1034 p->shift = 1; 1035 REFERENCE (refmpn_addlsh_n_ip1); 1036 1037 p = ¶m[TYPE_ADDLSH1_N_IP2]; 1038 COPY (TYPE_ADDLSH1_N_IP1); 1039 REFERENCE (refmpn_addlsh1_n_ip2); 1040 1041 p = ¶m[TYPE_ADDLSH2_N_IP2]; 1042 COPY (TYPE_ADDLSH1_N_IP1); 1043 REFERENCE (refmpn_addlsh2_n_ip2); 1044 1045 p = ¶m[TYPE_ADDLSH_N_IP2]; 1046 COPY (TYPE_ADDLSH_N_IP1); 1047 REFERENCE (refmpn_addlsh_n_ip2); 1048 1049 p = ¶m[TYPE_SUBLSH1_N]; 1050 COPY (TYPE_ADD_N); 1051 REFERENCE (refmpn_sublsh1_n); 1052 1053 p = ¶m[TYPE_SUBLSH2_N]; 1054 COPY (TYPE_ADD_N); 1055 REFERENCE (refmpn_sublsh2_n); 1056 1057 p = ¶m[TYPE_SUBLSH_N]; 1058 COPY (TYPE_ADDLSH_N); 1059 REFERENCE (refmpn_sublsh_n); 1060 1061 p = ¶m[TYPE_SUBLSH1_N_IP1]; 1062 COPY (TYPE_ADDLSH1_N_IP1); 1063 REFERENCE (refmpn_sublsh1_n_ip1); 1064 1065 p = ¶m[TYPE_SUBLSH2_N_IP1]; 1066 COPY (TYPE_ADDLSH1_N_IP1); 1067 REFERENCE (refmpn_sublsh2_n_ip1); 1068 1069 p = ¶m[TYPE_SUBLSH_N_IP1]; 1070 COPY (TYPE_ADDLSH_N_IP1); 1071 REFERENCE (refmpn_sublsh_n_ip1); 1072 1073 p = ¶m[TYPE_RSBLSH1_N]; 1074 COPY (TYPE_ADD_N); 1075 REFERENCE (refmpn_rsblsh1_n); 1076 1077 p = ¶m[TYPE_RSBLSH2_N]; 1078 COPY (TYPE_ADD_N); 1079 REFERENCE (refmpn_rsblsh2_n); 1080 1081 p = ¶m[TYPE_RSBLSH_N]; 1082 COPY (TYPE_ADDLSH_N); 1083 REFERENCE (refmpn_rsblsh_n); 1084 1085 p = ¶m[TYPE_RSH1ADD_N]; 1086 COPY (TYPE_ADD_N); 1087 REFERENCE (refmpn_rsh1add_n); 1088 1089 p = ¶m[TYPE_RSH1SUB_N]; 1090 COPY (TYPE_ADD_N); 1091 REFERENCE (refmpn_rsh1sub_n); 1092 1093 1094 p = ¶m[TYPE_ADDLSH1_NC]; 1095 COPY (TYPE_ADDLSH1_N); 1096 p->carry = CARRY_3; 1097 REFERENCE (refmpn_addlsh1_nc); 1098 1099 p = ¶m[TYPE_ADDLSH2_NC]; 1100 COPY (TYPE_ADDLSH2_N); 1101 p->carry = CARRY_4; /* FIXME */ 1102 REFERENCE (refmpn_addlsh2_nc); 1103 1104 p = ¶m[TYPE_ADDLSH_NC]; 1105 COPY (TYPE_ADDLSH_N); 1106 p->carry = CARRY_BIT; /* FIXME */ 1107 REFERENCE (refmpn_addlsh_nc); 1108 1109 p = ¶m[TYPE_SUBLSH1_NC]; 1110 COPY (TYPE_ADDLSH1_NC); 1111 REFERENCE (refmpn_sublsh1_nc); 1112 1113 p = ¶m[TYPE_SUBLSH2_NC]; 1114 COPY (TYPE_ADDLSH2_NC); 1115 REFERENCE (refmpn_sublsh2_nc); 1116 1117 p = ¶m[TYPE_SUBLSH_NC]; 1118 COPY (TYPE_ADDLSH_NC); 1119 REFERENCE (refmpn_sublsh_nc); 1120 1121 p = ¶m[TYPE_RSBLSH1_NC]; 1122 COPY (TYPE_RSBLSH1_N); 1123 p->carry = CARRY_BIT; /* FIXME */ 1124 REFERENCE (refmpn_rsblsh1_nc); 1125 1126 p = ¶m[TYPE_RSBLSH2_NC]; 1127 COPY (TYPE_RSBLSH2_N); 1128 p->carry = CARRY_4; /* FIXME */ 1129 REFERENCE (refmpn_rsblsh2_nc); 1130 1131 p = ¶m[TYPE_RSBLSH_NC]; 1132 COPY (TYPE_RSBLSH_N); 1133 p->carry = CARRY_BIT; /* FIXME */ 1134 REFERENCE (refmpn_rsblsh_nc); 1135 1136 1137 p = ¶m[TYPE_MOD_1]; 1138 p->retval = 1; 1139 p->src[0] = 1; 1140 p->size = SIZE_ALLOW_ZERO; 1141 p->divisor = DIVISOR_LIMB; 1142 REFERENCE (refmpn_mod_1); 1143 1144 p = ¶m[TYPE_MOD_1C]; 1145 COPY (TYPE_MOD_1); 1146 p->carry = CARRY_DIVISOR; 1147 REFERENCE (refmpn_mod_1c); 1148 1149 p = ¶m[TYPE_DIVMOD_1]; 1150 COPY (TYPE_MOD_1); 1151 p->dst[0] = 1; 1152 REFERENCE (refmpn_divmod_1); 1153 1154 p = ¶m[TYPE_DIVMOD_1C]; 1155 COPY (TYPE_DIVMOD_1); 1156 p->carry = CARRY_DIVISOR; 1157 REFERENCE (refmpn_divmod_1c); 1158 1159 p = ¶m[TYPE_DIVREM_1]; 1160 COPY (TYPE_DIVMOD_1); 1161 p->size2 = SIZE_FRACTION; 1162 p->dst_size[0] = SIZE_SUM; 1163 REFERENCE (refmpn_divrem_1); 1164 1165 p = ¶m[TYPE_DIVREM_1C]; 1166 COPY (TYPE_DIVREM_1); 1167 p->carry = CARRY_DIVISOR; 1168 REFERENCE (refmpn_divrem_1c); 1169 1170 p = ¶m[TYPE_PREINV_DIVREM_1]; 1171 COPY (TYPE_DIVREM_1); 1172 p->size = SIZE_YES; /* ie. no size==0 */ 1173 REFERENCE (refmpn_preinv_divrem_1); 1174 1175 p = ¶m[TYPE_DIV_QR_1N_PI1]; 1176 p->retval = 1; 1177 p->src[0] = 1; 1178 p->src[1] = 1; 1179 /* SIZE_1 not supported. Always uses low limb only. */ 1180 p->size2 = 1; 1181 p->dst[0] = 1; 1182 p->divisor = DIVISOR_NORM; 1183 p->data = DATA_DIV_QR_1; 1184 VALIDATE (validate_div_qr_1_pi1); 1185 1186 p = ¶m[TYPE_PREINV_MOD_1]; 1187 p->retval = 1; 1188 p->src[0] = 1; 1189 p->divisor = DIVISOR_NORM; 1190 REFERENCE (refmpn_preinv_mod_1); 1191 1192 p = ¶m[TYPE_MOD_34LSUB1]; 1193 p->retval = 1; 1194 p->src[0] = 1; 1195 VALIDATE (validate_mod_34lsub1); 1196 1197 p = ¶m[TYPE_UDIV_QRNND]; 1198 p->retval = 1; 1199 p->src[0] = 1; 1200 p->dst[0] = 1; 1201 p->dst_size[0] = SIZE_1; 1202 p->divisor = UDIV_NEEDS_NORMALIZATION ? DIVISOR_NORM : DIVISOR_LIMB; 1203 p->data = DATA_UDIV_QRNND; 1204 p->overlap = OVERLAP_NONE; 1205 REFERENCE (refmpn_udiv_qrnnd); 1206 1207 p = ¶m[TYPE_UDIV_QRNND_R]; 1208 COPY (TYPE_UDIV_QRNND); 1209 REFERENCE (refmpn_udiv_qrnnd_r); 1210 1211 1212 p = ¶m[TYPE_DIVEXACT_1]; 1213 p->dst[0] = 1; 1214 p->src[0] = 1; 1215 p->divisor = DIVISOR_LIMB; 1216 p->data = DATA_MULTIPLE_DIVISOR; 1217 VALIDATE (validate_divexact_1); 1218 REFERENCE (refmpn_divmod_1); 1219 1220 p = ¶m[TYPE_BDIV_Q_1]; 1221 p->dst[0] = 1; 1222 p->src[0] = 1; 1223 p->divisor = DIVISOR_LIMB; 1224 VALIDATE (validate_bdiv_q_1); 1225 1226 p = ¶m[TYPE_DIVEXACT_BY3]; 1227 p->retval = 1; 1228 p->dst[0] = 1; 1229 p->src[0] = 1; 1230 REFERENCE (refmpn_divexact_by3); 1231 1232 p = ¶m[TYPE_DIVEXACT_BY3C]; 1233 COPY (TYPE_DIVEXACT_BY3); 1234 p->carry = CARRY_3; 1235 REFERENCE (refmpn_divexact_by3c); 1236 1237 1238 p = ¶m[TYPE_MODEXACT_1_ODD]; 1239 p->retval = 1; 1240 p->src[0] = 1; 1241 p->divisor = DIVISOR_ODD; 1242 VALIDATE (validate_modexact_1_odd); 1243 1244 p = ¶m[TYPE_MODEXACT_1C_ODD]; 1245 COPY (TYPE_MODEXACT_1_ODD); 1246 p->carry = CARRY_LIMB; 1247 VALIDATE (validate_modexact_1c_odd); 1248 1249 1250 p = ¶m[TYPE_GCD_1]; 1251 p->retval = 1; 1252 p->src[0] = 1; 1253 p->data = DATA_NON_ZERO; 1254 p->divisor = DIVISOR_LIMB; 1255 REFERENCE (refmpn_gcd_1); 1256 1257 p = ¶m[TYPE_GCD]; 1258 p->retval = 1; 1259 p->dst[0] = 1; 1260 p->src[0] = 1; 1261 p->src[1] = 1; 1262 p->size2 = 1; 1263 p->dst_size[0] = SIZE_RETVAL; 1264 p->overlap = OVERLAP_NOT_SRCS; 1265 p->data = DATA_GCD; 1266 REFERENCE (refmpn_gcd); 1267 1268 1269 p = ¶m[TYPE_MPZ_LEGENDRE]; 1270 p->retval = 1; 1271 p->src[0] = 1; 1272 p->size = SIZE_ALLOW_ZERO; 1273 p->src[1] = 1; 1274 p->data = DATA_SRC1_ODD_PRIME; 1275 p->size2 = 1; 1276 p->carry = CARRY_BIT; 1277 p->carry_sign = 1; 1278 REFERENCE (refmpz_legendre); 1279 1280 p = ¶m[TYPE_MPZ_JACOBI]; 1281 p->retval = 1; 1282 p->src[0] = 1; 1283 p->size = SIZE_ALLOW_ZERO; 1284 p->src[1] = 1; 1285 p->data = DATA_SRC1_ODD; 1286 p->size2 = 1; 1287 p->carry = CARRY_BIT; 1288 p->carry_sign = 1; 1289 REFERENCE (refmpz_jacobi); 1290 1291 p = ¶m[TYPE_MPZ_KRONECKER]; 1292 p->retval = 1; 1293 p->src[0] = 1; 1294 p->size = SIZE_ALLOW_ZERO; 1295 p->src[1] = 1; 1296 p->data = 0; 1297 p->size2 = 1; 1298 p->carry = CARRY_4; 1299 p->carry_sign = 1; 1300 REFERENCE (refmpz_kronecker); 1301 1302 1303 p = ¶m[TYPE_MPZ_KRONECKER_UI]; 1304 p->retval = 1; 1305 p->src[0] = 1; 1306 p->size = SIZE_ALLOW_ZERO; 1307 p->multiplier = 1; 1308 p->carry = CARRY_BIT; 1309 REFERENCE (refmpz_kronecker_ui); 1310 1311 p = ¶m[TYPE_MPZ_KRONECKER_SI]; 1312 COPY (TYPE_MPZ_KRONECKER_UI); 1313 REFERENCE (refmpz_kronecker_si); 1314 1315 p = ¶m[TYPE_MPZ_UI_KRONECKER]; 1316 COPY (TYPE_MPZ_KRONECKER_UI); 1317 REFERENCE (refmpz_ui_kronecker); 1318 1319 p = ¶m[TYPE_MPZ_SI_KRONECKER]; 1320 COPY (TYPE_MPZ_KRONECKER_UI); 1321 REFERENCE (refmpz_si_kronecker); 1322 1323 1324 p = ¶m[TYPE_SQR]; 1325 p->dst[0] = 1; 1326 p->src[0] = 1; 1327 p->dst_size[0] = SIZE_SUM; 1328 p->overlap = OVERLAP_NONE; 1329 REFERENCE (refmpn_sqr); 1330 1331 p = ¶m[TYPE_MUL_N]; 1332 COPY (TYPE_SQR); 1333 p->src[1] = 1; 1334 REFERENCE (refmpn_mul_n); 1335 1336 p = ¶m[TYPE_MULLO_N]; 1337 COPY (TYPE_MUL_N); 1338 p->dst_size[0] = 0; 1339 REFERENCE (refmpn_mullo_n); 1340 1341 p = ¶m[TYPE_SQRLO]; 1342 COPY (TYPE_SQR); 1343 p->dst_size[0] = 0; 1344 REFERENCE (refmpn_sqrlo); 1345 1346 p = ¶m[TYPE_MUL_MN]; 1347 COPY (TYPE_MUL_N); 1348 p->size2 = 1; 1349 REFERENCE (refmpn_mul_basecase); 1350 1351 p = ¶m[TYPE_MULMID_MN]; 1352 COPY (TYPE_MUL_MN); 1353 p->dst_size[0] = SIZE_DIFF_PLUS_3; 1354 REFERENCE (refmpn_mulmid_basecase); 1355 1356 p = ¶m[TYPE_MULMID_N]; 1357 COPY (TYPE_MUL_N); 1358 p->size = SIZE_ODD; 1359 p->size2 = SIZE_CEIL_HALF; 1360 p->dst_size[0] = SIZE_DIFF_PLUS_3; 1361 REFERENCE (refmpn_mulmid_n); 1362 1363 p = ¶m[TYPE_UMUL_PPMM]; 1364 p->retval = 1; 1365 p->src[0] = 1; 1366 p->dst[0] = 1; 1367 p->dst_size[0] = SIZE_1; 1368 p->overlap = OVERLAP_NONE; 1369 REFERENCE (refmpn_umul_ppmm); 1370 1371 p = ¶m[TYPE_UMUL_PPMM_R]; 1372 COPY (TYPE_UMUL_PPMM); 1373 REFERENCE (refmpn_umul_ppmm_r); 1374 1375 1376 p = ¶m[TYPE_RSHIFT]; 1377 p->retval = 1; 1378 p->dst[0] = 1; 1379 p->src[0] = 1; 1380 p->shift = 1; 1381 p->overlap = OVERLAP_LOW_TO_HIGH; 1382 REFERENCE (refmpn_rshift); 1383 1384 p = ¶m[TYPE_LSHIFT]; 1385 COPY (TYPE_RSHIFT); 1386 p->overlap = OVERLAP_HIGH_TO_LOW; 1387 REFERENCE (refmpn_lshift); 1388 1389 p = ¶m[TYPE_LSHIFTC]; 1390 COPY (TYPE_RSHIFT); 1391 p->overlap = OVERLAP_HIGH_TO_LOW; 1392 REFERENCE (refmpn_lshiftc); 1393 1394 1395 p = ¶m[TYPE_POPCOUNT]; 1396 p->retval = 1; 1397 p->src[0] = 1; 1398 REFERENCE (refmpn_popcount); 1399 1400 p = ¶m[TYPE_HAMDIST]; 1401 COPY (TYPE_POPCOUNT); 1402 p->src[1] = 1; 1403 REFERENCE (refmpn_hamdist); 1404 1405 1406 p = ¶m[TYPE_SBPI1_DIV_QR]; 1407 p->retval = 1; 1408 p->dst[0] = 1; 1409 p->dst[1] = 1; 1410 p->src[0] = 1; 1411 p->src[1] = 1; 1412 p->data = DATA_SRC1_HIGHBIT; 1413 p->size2 = 1; 1414 p->dst_size[0] = SIZE_DIFF; 1415 p->overlap = OVERLAP_NONE; 1416 REFERENCE (refmpn_sb_div_qr); 1417 1418 p = ¶m[TYPE_TDIV_QR]; 1419 p->dst[0] = 1; 1420 p->dst[1] = 1; 1421 p->src[0] = 1; 1422 p->src[1] = 1; 1423 p->size2 = 1; 1424 p->dst_size[0] = SIZE_DIFF_PLUS_1; 1425 p->dst_size[1] = SIZE_SIZE2; 1426 p->overlap = OVERLAP_NONE; 1427 REFERENCE (refmpn_tdiv_qr); 1428 1429 p = ¶m[TYPE_SQRTREM]; 1430 p->retval = 1; 1431 p->dst[0] = 1; 1432 p->dst[1] = 1; 1433 p->src[0] = 1; 1434 p->dst_size[0] = SIZE_CEIL_HALF; 1435 p->dst_size[1] = SIZE_RETVAL; 1436 p->overlap = OVERLAP_NONE; 1437 VALIDATE (validate_sqrtrem); 1438 REFERENCE (refmpn_sqrtrem); 1439 1440 p = ¶m[TYPE_SQRT]; 1441 p->retval = 1; 1442 p->dst[0] = 1; 1443 p->dst[1] = 0; 1444 p->src[0] = 1; 1445 p->dst_size[0] = SIZE_CEIL_HALF; 1446 p->overlap = OVERLAP_NONE; 1447 VALIDATE (validate_sqrt); 1448 1449 p = ¶m[TYPE_ZERO]; 1450 p->dst[0] = 1; 1451 p->size = SIZE_ALLOW_ZERO; 1452 REFERENCE (refmpn_zero); 1453 1454 p = ¶m[TYPE_GET_STR]; 1455 p->retval = 1; 1456 p->src[0] = 1; 1457 p->size = SIZE_ALLOW_ZERO; 1458 p->dst[0] = 1; 1459 p->dst[1] = 1; 1460 p->dst_size[0] = SIZE_GET_STR; 1461 p->dst_bytes[0] = 1; 1462 p->overlap = OVERLAP_NONE; 1463 REFERENCE (refmpn_get_str); 1464 1465 p = ¶m[TYPE_BINVERT]; 1466 p->dst[0] = 1; 1467 p->src[0] = 1; 1468 p->data = DATA_SRC0_ODD; 1469 p->overlap = OVERLAP_NONE; 1470 REFERENCE (refmpn_binvert); 1471 1472 p = ¶m[TYPE_INVERT]; 1473 p->dst[0] = 1; 1474 p->src[0] = 1; 1475 p->data = DATA_SRC0_HIGHBIT; 1476 p->overlap = OVERLAP_NONE; 1477 REFERENCE (refmpn_invert); 1478 1479 #ifdef EXTRA_PARAM_INIT 1480 EXTRA_PARAM_INIT 1481 #endif 1482 } 1483 1484 1485 /* The following are macros if there's no native versions, so wrap them in 1486 functions that can be in try_array[]. */ 1487 1488 void 1489 MPN_COPY_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size) 1490 { MPN_COPY (rp, sp, size); } 1491 1492 void 1493 MPN_COPY_INCR_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size) 1494 { MPN_COPY_INCR (rp, sp, size); } 1495 1496 void 1497 MPN_COPY_DECR_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size) 1498 { MPN_COPY_DECR (rp, sp, size); } 1499 1500 void 1501 __GMPN_COPY_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size) 1502 { __GMPN_COPY (rp, sp, size); } 1503 1504 #ifdef __GMPN_COPY_INCR 1505 void 1506 __GMPN_COPY_INCR_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size) 1507 { __GMPN_COPY_INCR (rp, sp, size); } 1508 #endif 1509 1510 void 1511 mpn_com_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size) 1512 { mpn_com (rp, sp, size); } 1513 1514 void 1515 mpn_and_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size) 1516 { mpn_and_n (rp, s1, s2, size); } 1517 1518 void 1519 mpn_andn_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size) 1520 { mpn_andn_n (rp, s1, s2, size); } 1521 1522 void 1523 mpn_nand_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size) 1524 { mpn_nand_n (rp, s1, s2, size); } 1525 1526 void 1527 mpn_ior_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size) 1528 { mpn_ior_n (rp, s1, s2, size); } 1529 1530 void 1531 mpn_iorn_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size) 1532 { mpn_iorn_n (rp, s1, s2, size); } 1533 1534 void 1535 mpn_nior_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size) 1536 { mpn_nior_n (rp, s1, s2, size); } 1537 1538 void 1539 mpn_xor_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size) 1540 { mpn_xor_n (rp, s1, s2, size); } 1541 1542 void 1543 mpn_xnor_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size) 1544 { mpn_xnor_n (rp, s1, s2, size); } 1545 1546 mp_limb_t 1547 udiv_qrnnd_fun (mp_limb_t *remptr, mp_limb_t n1, mp_limb_t n0, mp_limb_t d) 1548 { 1549 mp_limb_t q; 1550 udiv_qrnnd (q, *remptr, n1, n0, d); 1551 return q; 1552 } 1553 1554 mp_limb_t 1555 mpn_divexact_by3_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size) 1556 { 1557 return mpn_divexact_by3 (rp, sp, size); 1558 } 1559 1560 #if HAVE_NATIVE_mpn_addlsh1_n_ip1 1561 mp_limb_t 1562 mpn_addlsh1_n_ip1_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size) 1563 { 1564 return mpn_addlsh1_n_ip1 (rp, sp, size); 1565 } 1566 #endif 1567 #if HAVE_NATIVE_mpn_addlsh2_n_ip1 1568 mp_limb_t 1569 mpn_addlsh2_n_ip1_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size) 1570 { 1571 return mpn_addlsh2_n_ip1 (rp, sp, size); 1572 } 1573 #endif 1574 #if HAVE_NATIVE_mpn_addlsh_n_ip1 1575 mp_limb_t 1576 mpn_addlsh_n_ip1_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned int sh) 1577 { 1578 return mpn_addlsh_n_ip1 (rp, sp, size, sh); 1579 } 1580 #endif 1581 #if HAVE_NATIVE_mpn_addlsh1_n_ip2 1582 mp_limb_t 1583 mpn_addlsh1_n_ip2_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size) 1584 { 1585 return mpn_addlsh1_n_ip2 (rp, sp, size); 1586 } 1587 #endif 1588 #if HAVE_NATIVE_mpn_addlsh2_n_ip2 1589 mp_limb_t 1590 mpn_addlsh2_n_ip2_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size) 1591 { 1592 return mpn_addlsh2_n_ip2 (rp, sp, size); 1593 } 1594 #endif 1595 #if HAVE_NATIVE_mpn_addlsh_n_ip2 1596 mp_limb_t 1597 mpn_addlsh_n_ip2_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned int sh) 1598 { 1599 return mpn_addlsh_n_ip2 (rp, sp, size, sh); 1600 } 1601 #endif 1602 #if HAVE_NATIVE_mpn_sublsh1_n_ip1 1603 mp_limb_t 1604 mpn_sublsh1_n_ip1_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size) 1605 { 1606 return mpn_sublsh1_n_ip1 (rp, sp, size); 1607 } 1608 #endif 1609 #if HAVE_NATIVE_mpn_sublsh2_n_ip1 1610 mp_limb_t 1611 mpn_sublsh2_n_ip1_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size) 1612 { 1613 return mpn_sublsh2_n_ip1 (rp, sp, size); 1614 } 1615 #endif 1616 #if HAVE_NATIVE_mpn_sublsh_n_ip1 1617 mp_limb_t 1618 mpn_sublsh_n_ip1_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned int sh) 1619 { 1620 return mpn_sublsh_n_ip1 (rp, sp, size, sh); 1621 } 1622 #endif 1623 1624 mp_limb_t 1625 mpn_modexact_1_odd_fun (mp_srcptr ptr, mp_size_t size, mp_limb_t divisor) 1626 { 1627 return mpn_modexact_1_odd (ptr, size, divisor); 1628 } 1629 1630 void 1631 mpn_toom22_mul_fun (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, mp_size_t size) 1632 { 1633 mp_ptr tspace; 1634 TMP_DECL; 1635 TMP_MARK; 1636 tspace = TMP_ALLOC_LIMBS (mpn_toom22_mul_itch (size, size)); 1637 mpn_toom22_mul (dst, src1, size, src2, size, tspace); 1638 TMP_FREE; 1639 } 1640 void 1641 mpn_toom2_sqr_fun (mp_ptr dst, mp_srcptr src, mp_size_t size) 1642 { 1643 mp_ptr tspace; 1644 TMP_DECL; 1645 TMP_MARK; 1646 tspace = TMP_ALLOC_LIMBS (mpn_toom2_sqr_itch (size)); 1647 mpn_toom2_sqr (dst, src, size, tspace); 1648 TMP_FREE; 1649 } 1650 void 1651 mpn_toom33_mul_fun (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, mp_size_t size) 1652 { 1653 mp_ptr tspace; 1654 TMP_DECL; 1655 TMP_MARK; 1656 tspace = TMP_ALLOC_LIMBS (mpn_toom33_mul_itch (size, size)); 1657 mpn_toom33_mul (dst, src1, size, src2, size, tspace); 1658 TMP_FREE; 1659 } 1660 void 1661 mpn_toom3_sqr_fun (mp_ptr dst, mp_srcptr src, mp_size_t size) 1662 { 1663 mp_ptr tspace; 1664 TMP_DECL; 1665 TMP_MARK; 1666 tspace = TMP_ALLOC_LIMBS (mpn_toom3_sqr_itch (size)); 1667 mpn_toom3_sqr (dst, src, size, tspace); 1668 TMP_FREE; 1669 } 1670 void 1671 mpn_toom44_mul_fun (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, mp_size_t size) 1672 { 1673 mp_ptr tspace; 1674 TMP_DECL; 1675 TMP_MARK; 1676 tspace = TMP_ALLOC_LIMBS (mpn_toom44_mul_itch (size, size)); 1677 mpn_toom44_mul (dst, src1, size, src2, size, tspace); 1678 TMP_FREE; 1679 } 1680 void 1681 mpn_toom4_sqr_fun (mp_ptr dst, mp_srcptr src, mp_size_t size) 1682 { 1683 mp_ptr tspace; 1684 TMP_DECL; 1685 TMP_MARK; 1686 tspace = TMP_ALLOC_LIMBS (mpn_toom4_sqr_itch (size)); 1687 mpn_toom4_sqr (dst, src, size, tspace); 1688 TMP_FREE; 1689 } 1690 1691 void 1692 mpn_toom42_mulmid_fun (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, 1693 mp_size_t size) 1694 { 1695 mp_ptr tspace; 1696 mp_size_t n; 1697 TMP_DECL; 1698 TMP_MARK; 1699 tspace = TMP_ALLOC_LIMBS (mpn_toom42_mulmid_itch (size)); 1700 mpn_toom42_mulmid (dst, src1, src2, size, tspace); 1701 TMP_FREE; 1702 } 1703 1704 mp_limb_t 1705 umul_ppmm_fun (mp_limb_t *lowptr, mp_limb_t m1, mp_limb_t m2) 1706 { 1707 mp_limb_t high; 1708 umul_ppmm (high, *lowptr, m1, m2); 1709 return high; 1710 } 1711 1712 void 1713 MPN_ZERO_fun (mp_ptr ptr, mp_size_t size) 1714 { MPN_ZERO (ptr, size); } 1715 1716 mp_size_t 1717 mpn_sqrt_fun (mp_ptr dst, mp_srcptr src, mp_size_t size) 1718 { return mpn_sqrtrem (dst, NULL, src, size); } 1719 1720 struct choice_t { 1721 const char *name; 1722 tryfun_t function; 1723 int type; 1724 mp_size_t minsize; 1725 }; 1726 1727 #define TRY(fun) #fun, (tryfun_t) fun 1728 #define TRY_FUNFUN(fun) #fun, (tryfun_t) fun##_fun 1729 1730 const struct choice_t choice_array[] = { 1731 { TRY(mpn_add), TYPE_ADD }, 1732 { TRY(mpn_sub), TYPE_SUB }, 1733 1734 { TRY(mpn_add_n), TYPE_ADD_N }, 1735 { TRY(mpn_sub_n), TYPE_SUB_N }, 1736 1737 #if HAVE_NATIVE_mpn_add_nc 1738 { TRY(mpn_add_nc), TYPE_ADD_NC }, 1739 #endif 1740 #if HAVE_NATIVE_mpn_sub_nc 1741 { TRY(mpn_sub_nc), TYPE_SUB_NC }, 1742 #endif 1743 1744 #if HAVE_NATIVE_mpn_add_n_sub_n 1745 { TRY(mpn_add_n_sub_n), TYPE_ADDSUB_N }, 1746 #endif 1747 #if HAVE_NATIVE_mpn_add_n_sub_nc 1748 { TRY(mpn_add_n_sub_nc), TYPE_ADDSUB_NC }, 1749 #endif 1750 1751 { TRY(mpn_add_err1_n), TYPE_ADD_ERR1_N }, 1752 { TRY(mpn_sub_err1_n), TYPE_SUB_ERR1_N }, 1753 { TRY(mpn_add_err2_n), TYPE_ADD_ERR2_N }, 1754 { TRY(mpn_sub_err2_n), TYPE_SUB_ERR2_N }, 1755 { TRY(mpn_add_err3_n), TYPE_ADD_ERR3_N }, 1756 { TRY(mpn_sub_err3_n), TYPE_SUB_ERR3_N }, 1757 1758 { TRY(mpn_addmul_1), TYPE_ADDMUL_1 }, 1759 { TRY(mpn_submul_1), TYPE_SUBMUL_1 }, 1760 #if HAVE_NATIVE_mpn_addmul_1c 1761 { TRY(mpn_addmul_1c), TYPE_ADDMUL_1C }, 1762 #endif 1763 #if HAVE_NATIVE_mpn_submul_1c 1764 { TRY(mpn_submul_1c), TYPE_SUBMUL_1C }, 1765 #endif 1766 1767 #if HAVE_NATIVE_mpn_addmul_2 1768 { TRY(mpn_addmul_2), TYPE_ADDMUL_2, 2 }, 1769 #endif 1770 #if HAVE_NATIVE_mpn_addmul_3 1771 { TRY(mpn_addmul_3), TYPE_ADDMUL_3, 3 }, 1772 #endif 1773 #if HAVE_NATIVE_mpn_addmul_4 1774 { TRY(mpn_addmul_4), TYPE_ADDMUL_4, 4 }, 1775 #endif 1776 #if HAVE_NATIVE_mpn_addmul_5 1777 { TRY(mpn_addmul_5), TYPE_ADDMUL_5, 5 }, 1778 #endif 1779 #if HAVE_NATIVE_mpn_addmul_6 1780 { TRY(mpn_addmul_6), TYPE_ADDMUL_6, 6 }, 1781 #endif 1782 #if HAVE_NATIVE_mpn_addmul_7 1783 { TRY(mpn_addmul_7), TYPE_ADDMUL_7, 7 }, 1784 #endif 1785 #if HAVE_NATIVE_mpn_addmul_8 1786 { TRY(mpn_addmul_8), TYPE_ADDMUL_8, 8 }, 1787 #endif 1788 1789 { TRY_FUNFUN(mpn_com), TYPE_COM }, 1790 1791 { TRY_FUNFUN(MPN_COPY), TYPE_COPY }, 1792 { TRY_FUNFUN(MPN_COPY_INCR), TYPE_COPYI }, 1793 { TRY_FUNFUN(MPN_COPY_DECR), TYPE_COPYD }, 1794 1795 { TRY_FUNFUN(__GMPN_COPY), TYPE_COPY }, 1796 #ifdef __GMPN_COPY_INCR 1797 { TRY_FUNFUN(__GMPN_COPY_INCR), TYPE_COPYI }, 1798 #endif 1799 1800 #if HAVE_NATIVE_mpn_copyi 1801 { TRY(mpn_copyi), TYPE_COPYI }, 1802 #endif 1803 #if HAVE_NATIVE_mpn_copyd 1804 { TRY(mpn_copyd), TYPE_COPYD }, 1805 #endif 1806 1807 { TRY(mpn_cnd_add_n), TYPE_ADDCND_N }, 1808 { TRY(mpn_cnd_sub_n), TYPE_SUBCND_N }, 1809 #if HAVE_NATIVE_mpn_addlsh1_n == 1 1810 { TRY(mpn_addlsh1_n), TYPE_ADDLSH1_N }, 1811 #endif 1812 #if HAVE_NATIVE_mpn_addlsh2_n == 1 1813 { TRY(mpn_addlsh2_n), TYPE_ADDLSH2_N }, 1814 #endif 1815 #if HAVE_NATIVE_mpn_addlsh_n 1816 { TRY(mpn_addlsh_n), TYPE_ADDLSH_N }, 1817 #endif 1818 #if HAVE_NATIVE_mpn_addlsh1_n_ip1 1819 { TRY_FUNFUN(mpn_addlsh1_n_ip1), TYPE_ADDLSH1_N_IP1 }, 1820 #endif 1821 #if HAVE_NATIVE_mpn_addlsh2_n_ip1 1822 { TRY_FUNFUN(mpn_addlsh2_n_ip1), TYPE_ADDLSH2_N_IP1 }, 1823 #endif 1824 #if HAVE_NATIVE_mpn_addlsh_n_ip1 1825 { TRY_FUNFUN(mpn_addlsh_n_ip1), TYPE_ADDLSH_N_IP1 }, 1826 #endif 1827 #if HAVE_NATIVE_mpn_addlsh1_n_ip2 1828 { TRY_FUNFUN(mpn_addlsh1_n_ip2), TYPE_ADDLSH1_N_IP2 }, 1829 #endif 1830 #if HAVE_NATIVE_mpn_addlsh2_n_ip2 1831 { TRY_FUNFUN(mpn_addlsh2_n_ip2), TYPE_ADDLSH2_N_IP2 }, 1832 #endif 1833 #if HAVE_NATIVE_mpn_addlsh_n_ip2 1834 { TRY_FUNFUN(mpn_addlsh_n_ip2), TYPE_ADDLSH_N_IP2 }, 1835 #endif 1836 #if HAVE_NATIVE_mpn_sublsh1_n == 1 1837 { TRY(mpn_sublsh1_n), TYPE_SUBLSH1_N }, 1838 #endif 1839 #if HAVE_NATIVE_mpn_sublsh2_n == 1 1840 { TRY(mpn_sublsh2_n), TYPE_SUBLSH2_N }, 1841 #endif 1842 #if HAVE_NATIVE_mpn_sublsh_n 1843 { TRY(mpn_sublsh_n), TYPE_SUBLSH_N }, 1844 #endif 1845 #if HAVE_NATIVE_mpn_sublsh1_n_ip1 1846 { TRY_FUNFUN(mpn_sublsh1_n_ip1), TYPE_SUBLSH1_N_IP1 }, 1847 #endif 1848 #if HAVE_NATIVE_mpn_sublsh2_n_ip1 1849 { TRY_FUNFUN(mpn_sublsh2_n_ip1), TYPE_SUBLSH2_N_IP1 }, 1850 #endif 1851 #if HAVE_NATIVE_mpn_sublsh_n_ip1 1852 { TRY_FUNFUN(mpn_sublsh_n_ip1), TYPE_SUBLSH_N_IP1 }, 1853 #endif 1854 #if HAVE_NATIVE_mpn_rsblsh1_n == 1 1855 { TRY(mpn_rsblsh1_n), TYPE_RSBLSH1_N }, 1856 #endif 1857 #if HAVE_NATIVE_mpn_rsblsh2_n == 1 1858 { TRY(mpn_rsblsh2_n), TYPE_RSBLSH2_N }, 1859 #endif 1860 #if HAVE_NATIVE_mpn_rsblsh_n 1861 { TRY(mpn_rsblsh_n), TYPE_RSBLSH_N }, 1862 #endif 1863 #if HAVE_NATIVE_mpn_rsh1add_n 1864 { TRY(mpn_rsh1add_n), TYPE_RSH1ADD_N }, 1865 #endif 1866 #if HAVE_NATIVE_mpn_rsh1sub_n 1867 { TRY(mpn_rsh1sub_n), TYPE_RSH1SUB_N }, 1868 #endif 1869 1870 #if HAVE_NATIVE_mpn_addlsh1_nc 1871 { TRY(mpn_addlsh1_nc), TYPE_ADDLSH1_NC }, 1872 #endif 1873 #if HAVE_NATIVE_mpn_addlsh2_nc 1874 { TRY(mpn_addlsh2_nc), TYPE_ADDLSH2_NC }, 1875 #endif 1876 #if HAVE_NATIVE_mpn_addlsh_nc 1877 { TRY(mpn_addlsh_nc), TYPE_ADDLSH_NC }, 1878 #endif 1879 #if HAVE_NATIVE_mpn_sublsh1_nc 1880 { TRY(mpn_sublsh1_nc), TYPE_SUBLSH1_NC }, 1881 #endif 1882 #if HAVE_NATIVE_mpn_sublsh2_nc 1883 { TRY(mpn_sublsh2_nc), TYPE_SUBLSH2_NC }, 1884 #endif 1885 #if HAVE_NATIVE_mpn_sublsh_nc 1886 { TRY(mpn_sublsh_nc), TYPE_SUBLSH_NC }, 1887 #endif 1888 #if HAVE_NATIVE_mpn_rsblsh1_nc 1889 { TRY(mpn_rsblsh1_nc), TYPE_RSBLSH1_NC }, 1890 #endif 1891 #if HAVE_NATIVE_mpn_rsblsh2_nc 1892 { TRY(mpn_rsblsh2_nc), TYPE_RSBLSH2_NC }, 1893 #endif 1894 #if HAVE_NATIVE_mpn_rsblsh_nc 1895 { TRY(mpn_rsblsh_nc), TYPE_RSBLSH_NC }, 1896 #endif 1897 1898 { TRY_FUNFUN(mpn_and_n), TYPE_AND_N }, 1899 { TRY_FUNFUN(mpn_andn_n), TYPE_ANDN_N }, 1900 { TRY_FUNFUN(mpn_nand_n), TYPE_NAND_N }, 1901 { TRY_FUNFUN(mpn_ior_n), TYPE_IOR_N }, 1902 { TRY_FUNFUN(mpn_iorn_n), TYPE_IORN_N }, 1903 { TRY_FUNFUN(mpn_nior_n), TYPE_NIOR_N }, 1904 { TRY_FUNFUN(mpn_xor_n), TYPE_XOR_N }, 1905 { TRY_FUNFUN(mpn_xnor_n), TYPE_XNOR_N }, 1906 1907 { TRY(mpn_divrem_1), TYPE_DIVREM_1 }, 1908 #if USE_PREINV_DIVREM_1 1909 { TRY(mpn_preinv_divrem_1), TYPE_PREINV_DIVREM_1 }, 1910 #endif 1911 { TRY(mpn_mod_1), TYPE_MOD_1 }, 1912 #if USE_PREINV_MOD_1 1913 { TRY(mpn_preinv_mod_1), TYPE_PREINV_MOD_1 }, 1914 #endif 1915 #if HAVE_NATIVE_mpn_divrem_1c 1916 { TRY(mpn_divrem_1c), TYPE_DIVREM_1C }, 1917 #endif 1918 #if HAVE_NATIVE_mpn_mod_1c 1919 { TRY(mpn_mod_1c), TYPE_MOD_1C }, 1920 #endif 1921 { TRY(mpn_div_qr_1n_pi1), TYPE_DIV_QR_1N_PI1 }, 1922 #if GMP_NUMB_BITS % 4 == 0 1923 { TRY(mpn_mod_34lsub1), TYPE_MOD_34LSUB1 }, 1924 #endif 1925 1926 { TRY_FUNFUN(udiv_qrnnd), TYPE_UDIV_QRNND, 2 }, 1927 #if HAVE_NATIVE_mpn_udiv_qrnnd 1928 { TRY(mpn_udiv_qrnnd), TYPE_UDIV_QRNND, 2 }, 1929 #endif 1930 #if HAVE_NATIVE_mpn_udiv_qrnnd_r 1931 { TRY(mpn_udiv_qrnnd_r), TYPE_UDIV_QRNND_R, 2 }, 1932 #endif 1933 1934 { TRY(mpn_divexact_1), TYPE_DIVEXACT_1 }, 1935 { TRY(mpn_bdiv_q_1), TYPE_BDIV_Q_1 }, 1936 { TRY_FUNFUN(mpn_divexact_by3), TYPE_DIVEXACT_BY3 }, 1937 { TRY(mpn_divexact_by3c), TYPE_DIVEXACT_BY3C }, 1938 1939 { TRY_FUNFUN(mpn_modexact_1_odd), TYPE_MODEXACT_1_ODD }, 1940 { TRY(mpn_modexact_1c_odd), TYPE_MODEXACT_1C_ODD }, 1941 1942 1943 { TRY(mpn_sbpi1_div_qr), TYPE_SBPI1_DIV_QR, 3}, 1944 { TRY(mpn_tdiv_qr), TYPE_TDIV_QR }, 1945 1946 { TRY(mpn_mul_1), TYPE_MUL_1 }, 1947 #if HAVE_NATIVE_mpn_mul_1c 1948 { TRY(mpn_mul_1c), TYPE_MUL_1C }, 1949 #endif 1950 #if HAVE_NATIVE_mpn_mul_2 1951 { TRY(mpn_mul_2), TYPE_MUL_2, 2 }, 1952 #endif 1953 #if HAVE_NATIVE_mpn_mul_3 1954 { TRY(mpn_mul_3), TYPE_MUL_3, 3 }, 1955 #endif 1956 #if HAVE_NATIVE_mpn_mul_4 1957 { TRY(mpn_mul_4), TYPE_MUL_4, 4 }, 1958 #endif 1959 #if HAVE_NATIVE_mpn_mul_5 1960 { TRY(mpn_mul_5), TYPE_MUL_5, 5 }, 1961 #endif 1962 #if HAVE_NATIVE_mpn_mul_6 1963 { TRY(mpn_mul_6), TYPE_MUL_6, 6 }, 1964 #endif 1965 1966 { TRY(mpn_rshift), TYPE_RSHIFT }, 1967 { TRY(mpn_lshift), TYPE_LSHIFT }, 1968 { TRY(mpn_lshiftc), TYPE_LSHIFTC }, 1969 1970 1971 { TRY(mpn_mul_basecase), TYPE_MUL_MN }, 1972 { TRY(mpn_mulmid_basecase), TYPE_MULMID_MN }, 1973 { TRY(mpn_mullo_basecase), TYPE_MULLO_N }, 1974 { TRY(mpn_sqrlo_basecase), TYPE_SQRLO }, 1975 { TRY(mpn_sqrlo), TYPE_SQRLO }, 1976 #if SQR_TOOM2_THRESHOLD > 0 1977 { TRY(mpn_sqr_basecase), TYPE_SQR }, 1978 #endif 1979 1980 { TRY(mpn_mul), TYPE_MUL_MN }, 1981 { TRY(mpn_mul_n), TYPE_MUL_N }, 1982 { TRY(mpn_sqr), TYPE_SQR }, 1983 1984 { TRY_FUNFUN(umul_ppmm), TYPE_UMUL_PPMM, 2 }, 1985 #if HAVE_NATIVE_mpn_umul_ppmm 1986 { TRY(mpn_umul_ppmm), TYPE_UMUL_PPMM, 2 }, 1987 #endif 1988 #if HAVE_NATIVE_mpn_umul_ppmm_r 1989 { TRY(mpn_umul_ppmm_r), TYPE_UMUL_PPMM_R, 2 }, 1990 #endif 1991 1992 { TRY_FUNFUN(mpn_toom22_mul), TYPE_MUL_N, MPN_TOOM22_MUL_MINSIZE }, 1993 { TRY_FUNFUN(mpn_toom2_sqr), TYPE_SQR, MPN_TOOM2_SQR_MINSIZE }, 1994 { TRY_FUNFUN(mpn_toom33_mul), TYPE_MUL_N, MPN_TOOM33_MUL_MINSIZE }, 1995 { TRY_FUNFUN(mpn_toom3_sqr), TYPE_SQR, MPN_TOOM3_SQR_MINSIZE }, 1996 { TRY_FUNFUN(mpn_toom44_mul), TYPE_MUL_N, MPN_TOOM44_MUL_MINSIZE }, 1997 { TRY_FUNFUN(mpn_toom4_sqr), TYPE_SQR, MPN_TOOM4_SQR_MINSIZE }, 1998 1999 { TRY(mpn_mulmid_n), TYPE_MULMID_N, 1 }, 2000 { TRY(mpn_mulmid), TYPE_MULMID_MN, 1 }, 2001 { TRY_FUNFUN(mpn_toom42_mulmid), TYPE_MULMID_N, 2002 (2 * MPN_TOOM42_MULMID_MINSIZE - 1) }, 2003 2004 { TRY(mpn_gcd_1), TYPE_GCD_1 }, 2005 { TRY(mpn_gcd), TYPE_GCD }, 2006 { TRY(mpz_legendre), TYPE_MPZ_LEGENDRE }, 2007 { TRY(mpz_jacobi), TYPE_MPZ_JACOBI }, 2008 { TRY(mpz_kronecker), TYPE_MPZ_KRONECKER }, 2009 { TRY(mpz_kronecker_ui), TYPE_MPZ_KRONECKER_UI }, 2010 { TRY(mpz_kronecker_si), TYPE_MPZ_KRONECKER_SI }, 2011 { TRY(mpz_ui_kronecker), TYPE_MPZ_UI_KRONECKER }, 2012 { TRY(mpz_si_kronecker), TYPE_MPZ_SI_KRONECKER }, 2013 2014 { TRY(mpn_popcount), TYPE_POPCOUNT }, 2015 { TRY(mpn_hamdist), TYPE_HAMDIST }, 2016 2017 { TRY(mpn_sqrtrem), TYPE_SQRTREM }, 2018 { TRY_FUNFUN(mpn_sqrt), TYPE_SQRT }, 2019 2020 { TRY_FUNFUN(MPN_ZERO), TYPE_ZERO }, 2021 2022 { TRY(mpn_get_str), TYPE_GET_STR }, 2023 2024 { TRY(mpn_binvert), TYPE_BINVERT }, 2025 { TRY(mpn_invert), TYPE_INVERT }, 2026 2027 #ifdef EXTRA_ROUTINES 2028 EXTRA_ROUTINES 2029 #endif 2030 }; 2031 2032 const struct choice_t *choice = NULL; 2033 2034 2035 void 2036 mprotect_maybe (void *addr, size_t len, int prot) 2037 { 2038 if (!option_redzones) 2039 return; 2040 2041 #if HAVE_MPROTECT 2042 if (mprotect (addr, len, prot) != 0) 2043 { 2044 fprintf (stderr, "Cannot mprotect %p 0x%X 0x%X: %s\n", 2045 addr, (unsigned) len, prot, strerror (errno)); 2046 exit (1); 2047 } 2048 #else 2049 { 2050 static int warned = 0; 2051 if (!warned) 2052 { 2053 fprintf (stderr, 2054 "mprotect not available, bounds testing not performed\n"); 2055 warned = 1; 2056 } 2057 } 2058 #endif 2059 } 2060 2061 /* round "a" up to a multiple of "m" */ 2062 size_t 2063 round_up_multiple (size_t a, size_t m) 2064 { 2065 unsigned long r; 2066 2067 r = a % m; 2068 if (r == 0) 2069 return a; 2070 else 2071 return a + (m - r); 2072 } 2073 2074 2075 /* On some systems it seems that only an mmap'ed region can be mprotect'ed, 2076 for instance HP-UX 10. 2077 2078 mmap will almost certainly return a pointer already aligned to a page 2079 boundary, but it's easy enough to share the alignment handling with the 2080 malloc case. */ 2081 2082 void 2083 malloc_region (struct region_t *r, mp_size_t n) 2084 { 2085 mp_ptr p; 2086 size_t nbytes; 2087 2088 ASSERT ((pagesize % GMP_LIMB_BYTES) == 0); 2089 2090 n = round_up_multiple (n, PAGESIZE_LIMBS); 2091 r->size = n; 2092 2093 nbytes = n*GMP_LIMB_BYTES + 2*REDZONE_BYTES + pagesize; 2094 2095 #if defined (MAP_ANONYMOUS) && ! defined (MAP_ANON) 2096 #define MAP_ANON MAP_ANONYMOUS 2097 #endif 2098 2099 #if HAVE_MMAP && defined (MAP_ANON) 2100 /* note must pass fd=-1 for MAP_ANON on BSD */ 2101 p = (mp_ptr) mmap (NULL, nbytes, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0); 2102 if (p == (void *) -1) 2103 { 2104 fprintf (stderr, "Cannot mmap %#x anon bytes: %s\n", 2105 (unsigned) nbytes, strerror (errno)); 2106 exit (1); 2107 } 2108 #else 2109 p = (mp_ptr) malloc (nbytes); 2110 ASSERT_ALWAYS (p != NULL); 2111 #endif 2112 2113 p = (mp_ptr) align_pointer (p, pagesize); 2114 2115 mprotect_maybe (p, REDZONE_BYTES, PROT_NONE); 2116 p += REDZONE_LIMBS; 2117 r->ptr = p; 2118 2119 mprotect_maybe (p + n, REDZONE_BYTES, PROT_NONE); 2120 } 2121 2122 void 2123 mprotect_region (const struct region_t *r, int prot) 2124 { 2125 mprotect_maybe (r->ptr, r->size, prot); 2126 } 2127 2128 2129 /* First four entries must be 0,1,2,3 for the benefit of CARRY_BIT, CARRY_3, 2130 and CARRY_4 */ 2131 mp_limb_t carry_array[] = { 2132 0, 1, 2, 3, 2133 4, 2134 CNST_LIMB(1) << 8, 2135 CNST_LIMB(1) << 16, 2136 GMP_NUMB_MAX 2137 }; 2138 int carry_index; 2139 2140 #define CARRY_COUNT \ 2141 ((tr->carry == CARRY_BIT) ? 2 \ 2142 : tr->carry == CARRY_3 ? 3 \ 2143 : tr->carry == CARRY_4 ? 4 \ 2144 : (tr->carry == CARRY_LIMB || tr->carry == CARRY_DIVISOR) \ 2145 ? numberof(carry_array) + CARRY_RANDOMS \ 2146 : 1) 2147 2148 #define MPN_RANDOM_ALT(index,dst,size) \ 2149 (((index) & 1) ? refmpn_random (dst, size) : refmpn_random2 (dst, size)) 2150 2151 /* The dummy value after MPN_RANDOM_ALT ensures both sides of the ":" have 2152 the same type */ 2153 #define CARRY_ITERATION \ 2154 for (carry_index = 0; \ 2155 (carry_index < numberof (carry_array) \ 2156 ? (carry = carry_array[carry_index]) \ 2157 : (MPN_RANDOM_ALT (carry_index, &carry, 1), (mp_limb_t) 0)), \ 2158 (tr->carry == CARRY_DIVISOR ? carry %= divisor : 0), \ 2159 carry_index < CARRY_COUNT; \ 2160 carry_index++) 2161 2162 2163 mp_limb_t multiplier_array[] = { 2164 0, 1, 2, 3, 2165 CNST_LIMB(1) << 8, 2166 CNST_LIMB(1) << 16, 2167 GMP_NUMB_MAX - 2, 2168 GMP_NUMB_MAX - 1, 2169 GMP_NUMB_MAX 2170 }; 2171 int multiplier_index; 2172 2173 mp_limb_t divisor_array[] = { 2174 1, 2, 3, 2175 CNST_LIMB(1) << 8, 2176 CNST_LIMB(1) << 16, 2177 CNST_LIMB(1) << (GMP_NUMB_BITS/2 - 1), 2178 GMP_NUMB_MAX >> (GMP_NUMB_BITS/2), 2179 GMP_NUMB_HIGHBIT, 2180 GMP_NUMB_HIGHBIT + 1, 2181 GMP_NUMB_MAX - 2, 2182 GMP_NUMB_MAX - 1, 2183 GMP_NUMB_MAX 2184 }; 2185 2186 int divisor_index; 2187 2188 /* The dummy value after MPN_RANDOM_ALT ensures both sides of the ":" have 2189 the same type */ 2190 #define ARRAY_ITERATION(var, index, limit, array, randoms, cond) \ 2191 for (index = 0; \ 2192 (index < numberof (array) \ 2193 ? (var = array[index]) \ 2194 : (MPN_RANDOM_ALT (index, &var, 1), (mp_limb_t) 0)), \ 2195 index < limit; \ 2196 index++) 2197 2198 #define MULTIPLIER_COUNT \ 2199 (tr->multiplier \ 2200 ? numberof (multiplier_array) + MULTIPLIER_RANDOMS \ 2201 : 1) 2202 2203 #define MULTIPLIER_ITERATION \ 2204 ARRAY_ITERATION(multiplier, multiplier_index, MULTIPLIER_COUNT, \ 2205 multiplier_array, MULTIPLIER_RANDOMS, TRY_MULTIPLIER) 2206 2207 #define DIVISOR_COUNT \ 2208 (tr->divisor \ 2209 ? numberof (divisor_array) + DIVISOR_RANDOMS \ 2210 : 1) 2211 2212 #define DIVISOR_ITERATION \ 2213 ARRAY_ITERATION(divisor, divisor_index, DIVISOR_COUNT, divisor_array, \ 2214 DIVISOR_RANDOMS, TRY_DIVISOR) 2215 2216 2217 /* overlap_array[].s[i] is where s[i] should be, 0 or 1 means overlapping 2218 d[0] or d[1] respectively, -1 means a separate (write-protected) 2219 location. */ 2220 2221 struct overlap_t { 2222 int s[NUM_SOURCES]; 2223 } overlap_array[] = { 2224 { { -1, -1, -1, -1, -1 } }, 2225 { { 0, -1, -1, -1, -1 } }, 2226 { { -1, 0, -1, -1, -1 } }, 2227 { { 0, 0, -1, -1, -1 } }, 2228 { { 1, -1, -1, -1, -1 } }, 2229 { { -1, 1, -1, -1, -1 } }, 2230 { { 1, 1, -1, -1, -1 } }, 2231 { { 0, 1, -1, -1, -1 } }, 2232 { { 1, 0, -1, -1, -1 } }, 2233 }; 2234 2235 struct overlap_t *overlap, *overlap_limit; 2236 2237 #define OVERLAP_COUNT \ 2238 (tr->overlap & OVERLAP_NONE ? 1 \ 2239 : tr->overlap & OVERLAP_NOT_SRCS ? 3 \ 2240 : tr->overlap & OVERLAP_NOT_SRC2 ? 2 \ 2241 : tr->overlap & OVERLAP_NOT_DST2 ? 4 \ 2242 : tr->dst[1] ? 9 \ 2243 : tr->src[1] ? 4 \ 2244 : tr->dst[0] ? 2 \ 2245 : 1) 2246 2247 #define OVERLAP_ITERATION \ 2248 for (overlap = &overlap_array[0], \ 2249 overlap_limit = &overlap_array[OVERLAP_COUNT]; \ 2250 overlap < overlap_limit; \ 2251 overlap++) 2252 2253 2254 int base = 10; 2255 2256 #define T_RAND_COUNT 2 2257 int t_rand; 2258 2259 void 2260 t_random (mp_ptr ptr, mp_size_t n) 2261 { 2262 if (n == 0) 2263 return; 2264 2265 switch (option_data) { 2266 case DATA_TRAND: 2267 switch (t_rand) { 2268 case 0: refmpn_random (ptr, n); break; 2269 case 1: refmpn_random2 (ptr, n); break; 2270 default: abort(); 2271 } 2272 break; 2273 case DATA_SEQ: 2274 { 2275 static mp_limb_t counter = 0; 2276 mp_size_t i; 2277 for (i = 0; i < n; i++) 2278 ptr[i] = ++counter; 2279 } 2280 break; 2281 case DATA_ZEROS: 2282 refmpn_zero (ptr, n); 2283 break; 2284 case DATA_FFS: 2285 refmpn_fill (ptr, n, GMP_NUMB_MAX); 2286 break; 2287 case DATA_2FD: 2288 /* Special value 0x2FFF...FFFD, which divided by 3 gives 0xFFF...FFF, 2289 inducing the q1_ff special case in the mul-by-inverse part of some 2290 versions of divrem_1 and mod_1. */ 2291 refmpn_fill (ptr, n, (mp_limb_t) -1); 2292 ptr[n-1] = 2; 2293 ptr[0] -= 2; 2294 break; 2295 2296 default: 2297 abort(); 2298 } 2299 } 2300 #define T_RAND_ITERATION \ 2301 for (t_rand = 0; t_rand < T_RAND_COUNT; t_rand++) 2302 2303 2304 void 2305 print_each (const struct each_t *e) 2306 { 2307 int i; 2308 2309 printf ("%s %s\n", e->name, e == &ref ? tr->reference_name : choice->name); 2310 if (tr->retval) 2311 mpn_trace (" retval", &e->retval, 1); 2312 2313 for (i = 0; i < NUM_DESTS; i++) 2314 { 2315 if (tr->dst[i]) 2316 { 2317 if (tr->dst_bytes[i]) 2318 byte_tracen (" d[%d]", i, e->d[i].p, d[i].size); 2319 else 2320 mpn_tracen (" d[%d]", i, e->d[i].p, d[i].size); 2321 printf (" located %p\n", (void *) (e->d[i].p)); 2322 } 2323 } 2324 2325 for (i = 0; i < NUM_SOURCES; i++) 2326 if (tr->src[i]) 2327 printf (" s[%d] located %p\n", i, (void *) (e->s[i].p)); 2328 } 2329 2330 2331 void 2332 print_all (void) 2333 { 2334 int i; 2335 2336 printf ("\n"); 2337 printf ("size %ld\n", (long) size); 2338 if (tr->size2) 2339 printf ("size2 %ld\n", (long) size2); 2340 2341 for (i = 0; i < NUM_DESTS; i++) 2342 if (d[i].size != size) 2343 printf ("d[%d].size %ld\n", i, (long) d[i].size); 2344 2345 if (tr->multiplier) 2346 mpn_trace (" multiplier", &multiplier, 1); 2347 if (tr->divisor) 2348 mpn_trace (" divisor", &divisor, 1); 2349 if (tr->shift) 2350 printf (" shift %lu\n", shift); 2351 if (tr->carry) 2352 mpn_trace (" carry", &carry, 1); 2353 if (tr->msize) 2354 mpn_trace (" multiplier_N", multiplier_N, tr->msize); 2355 2356 for (i = 0; i < NUM_DESTS; i++) 2357 if (tr->dst[i]) 2358 printf (" d[%d] %s, align %ld, size %ld\n", 2359 i, d[i].high ? "high" : "low", 2360 (long) d[i].align, (long) d[i].size); 2361 2362 for (i = 0; i < NUM_SOURCES; i++) 2363 { 2364 if (tr->src[i]) 2365 { 2366 printf (" s[%d] %s, align %ld, ", 2367 i, s[i].high ? "high" : "low", (long) s[i].align); 2368 switch (overlap->s[i]) { 2369 case -1: 2370 printf ("no overlap\n"); 2371 break; 2372 default: 2373 printf ("==d[%d]%s\n", 2374 overlap->s[i], 2375 tr->overlap == OVERLAP_LOW_TO_HIGH ? "+a" 2376 : tr->overlap == OVERLAP_HIGH_TO_LOW ? "-a" 2377 : ""); 2378 break; 2379 } 2380 printf (" s[%d]=", i); 2381 if (tr->carry_sign && (carry & (1 << i))) 2382 printf ("-"); 2383 mpn_trace (NULL, s[i].p, SRC_SIZE(i)); 2384 } 2385 } 2386 2387 if (tr->dst0_from_src1) 2388 mpn_trace (" d[0]", s[1].region.ptr, size); 2389 2390 if (tr->reference) 2391 print_each (&ref); 2392 print_each (&fun); 2393 } 2394 2395 void 2396 compare (void) 2397 { 2398 int error = 0; 2399 int i; 2400 2401 if (tr->retval && ref.retval != fun.retval) 2402 { 2403 gmp_printf ("Different return values (%Mu, %Mu)\n", 2404 ref.retval, fun.retval); 2405 error = 1; 2406 } 2407 2408 for (i = 0; i < NUM_DESTS; i++) 2409 { 2410 switch (tr->dst_size[i]) { 2411 case SIZE_RETVAL: 2412 case SIZE_GET_STR: 2413 d[i].size = ref.retval; 2414 break; 2415 } 2416 } 2417 2418 for (i = 0; i < NUM_DESTS; i++) 2419 { 2420 if (! tr->dst[i]) 2421 continue; 2422 2423 if (tr->dst_bytes[i]) 2424 { 2425 if (memcmp (ref.d[i].p, fun.d[i].p, d[i].size) != 0) 2426 { 2427 printf ("Different d[%d] data results, low diff at %ld, high diff at %ld\n", 2428 i, 2429 (long) byte_diff_lowest (ref.d[i].p, fun.d[i].p, d[i].size), 2430 (long) byte_diff_highest (ref.d[i].p, fun.d[i].p, d[i].size)); 2431 error = 1; 2432 } 2433 } 2434 else 2435 { 2436 if (d[i].size != 0 2437 && ! refmpn_equal_anynail (ref.d[i].p, fun.d[i].p, d[i].size)) 2438 { 2439 printf ("Different d[%d] data results, low diff at %ld, high diff at %ld\n", 2440 i, 2441 (long) mpn_diff_lowest (ref.d[i].p, fun.d[i].p, d[i].size), 2442 (long) mpn_diff_highest (ref.d[i].p, fun.d[i].p, d[i].size)); 2443 error = 1; 2444 } 2445 } 2446 } 2447 2448 if (error) 2449 { 2450 print_all(); 2451 abort(); 2452 } 2453 } 2454 2455 2456 /* The functions are cast if the return value should be a long rather than 2457 the default mp_limb_t. This is necessary under _LONG_LONG_LIMB. This 2458 might not be enough if some actual calling conventions checking is 2459 implemented on a long long limb system. */ 2460 2461 void 2462 call (struct each_t *e, tryfun_t function) 2463 { 2464 switch (choice->type) { 2465 case TYPE_ADD: 2466 case TYPE_SUB: 2467 e->retval = CALLING_CONVENTIONS (function) 2468 (e->d[0].p, e->s[0].p, size, e->s[1].p, size2); 2469 break; 2470 2471 case TYPE_ADD_N: 2472 case TYPE_SUB_N: 2473 case TYPE_ADDLSH1_N: 2474 case TYPE_ADDLSH2_N: 2475 case TYPE_SUBLSH1_N: 2476 case TYPE_SUBLSH2_N: 2477 case TYPE_RSBLSH1_N: 2478 case TYPE_RSBLSH2_N: 2479 case TYPE_RSH1ADD_N: 2480 case TYPE_RSH1SUB_N: 2481 e->retval = CALLING_CONVENTIONS (function) 2482 (e->d[0].p, e->s[0].p, e->s[1].p, size); 2483 break; 2484 case TYPE_ADDLSH_N: 2485 case TYPE_SUBLSH_N: 2486 case TYPE_RSBLSH_N: 2487 e->retval = CALLING_CONVENTIONS (function) 2488 (e->d[0].p, e->s[0].p, e->s[1].p, size, shift); 2489 break; 2490 case TYPE_ADDLSH_NC: 2491 case TYPE_SUBLSH_NC: 2492 case TYPE_RSBLSH_NC: 2493 e->retval = CALLING_CONVENTIONS (function) 2494 (e->d[0].p, e->s[0].p, e->s[1].p, size, shift, carry); 2495 break; 2496 case TYPE_ADDLSH1_NC: 2497 case TYPE_ADDLSH2_NC: 2498 case TYPE_SUBLSH1_NC: 2499 case TYPE_SUBLSH2_NC: 2500 case TYPE_RSBLSH1_NC: 2501 case TYPE_RSBLSH2_NC: 2502 case TYPE_ADD_NC: 2503 case TYPE_SUB_NC: 2504 e->retval = CALLING_CONVENTIONS (function) 2505 (e->d[0].p, e->s[0].p, e->s[1].p, size, carry); 2506 break; 2507 case TYPE_ADDCND_N: 2508 case TYPE_SUBCND_N: 2509 e->retval = CALLING_CONVENTIONS (function) 2510 (carry, e->d[0].p, e->s[0].p, e->s[1].p, size); 2511 break; 2512 case TYPE_ADD_ERR1_N: 2513 case TYPE_SUB_ERR1_N: 2514 e->retval = CALLING_CONVENTIONS (function) 2515 (e->d[0].p, e->s[0].p, e->s[1].p, e->d[1].p, e->s[2].p, size, carry); 2516 break; 2517 case TYPE_ADD_ERR2_N: 2518 case TYPE_SUB_ERR2_N: 2519 e->retval = CALLING_CONVENTIONS (function) 2520 (e->d[0].p, e->s[0].p, e->s[1].p, e->d[1].p, e->s[2].p, e->s[3].p, size, carry); 2521 break; 2522 case TYPE_ADD_ERR3_N: 2523 case TYPE_SUB_ERR3_N: 2524 e->retval = CALLING_CONVENTIONS (function) 2525 (e->d[0].p, e->s[0].p, e->s[1].p, e->d[1].p, e->s[2].p, e->s[3].p, e->s[4].p, size, carry); 2526 break; 2527 2528 case TYPE_MUL_1: 2529 case TYPE_ADDMUL_1: 2530 case TYPE_SUBMUL_1: 2531 e->retval = CALLING_CONVENTIONS (function) 2532 (e->d[0].p, e->s[0].p, size, multiplier); 2533 break; 2534 case TYPE_MUL_1C: 2535 case TYPE_ADDMUL_1C: 2536 case TYPE_SUBMUL_1C: 2537 e->retval = CALLING_CONVENTIONS (function) 2538 (e->d[0].p, e->s[0].p, size, multiplier, carry); 2539 break; 2540 2541 case TYPE_MUL_2: 2542 case TYPE_MUL_3: 2543 case TYPE_MUL_4: 2544 case TYPE_MUL_5: 2545 case TYPE_MUL_6: 2546 if (size == 1) 2547 abort (); 2548 e->retval = CALLING_CONVENTIONS (function) 2549 (e->d[0].p, e->s[0].p, size, multiplier_N); 2550 break; 2551 2552 case TYPE_ADDMUL_2: 2553 case TYPE_ADDMUL_3: 2554 case TYPE_ADDMUL_4: 2555 case TYPE_ADDMUL_5: 2556 case TYPE_ADDMUL_6: 2557 case TYPE_ADDMUL_7: 2558 case TYPE_ADDMUL_8: 2559 if (size == 1) 2560 abort (); 2561 e->retval = CALLING_CONVENTIONS (function) 2562 (e->d[0].p, e->s[0].p, size, multiplier_N); 2563 break; 2564 2565 case TYPE_AND_N: 2566 case TYPE_ANDN_N: 2567 case TYPE_NAND_N: 2568 case TYPE_IOR_N: 2569 case TYPE_IORN_N: 2570 case TYPE_NIOR_N: 2571 case TYPE_XOR_N: 2572 case TYPE_XNOR_N: 2573 CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, e->s[1].p, size); 2574 break; 2575 2576 case TYPE_ADDSUB_N: 2577 e->retval = CALLING_CONVENTIONS (function) 2578 (e->d[0].p, e->d[1].p, e->s[0].p, e->s[1].p, size); 2579 break; 2580 case TYPE_ADDSUB_NC: 2581 e->retval = CALLING_CONVENTIONS (function) 2582 (e->d[0].p, e->d[1].p, e->s[0].p, e->s[1].p, size, carry); 2583 break; 2584 2585 case TYPE_COPY: 2586 case TYPE_COPYI: 2587 case TYPE_COPYD: 2588 case TYPE_COM: 2589 CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size); 2590 break; 2591 2592 case TYPE_ADDLSH1_N_IP1: 2593 case TYPE_ADDLSH2_N_IP1: 2594 case TYPE_ADDLSH1_N_IP2: 2595 case TYPE_ADDLSH2_N_IP2: 2596 case TYPE_SUBLSH1_N_IP1: 2597 case TYPE_SUBLSH2_N_IP1: 2598 case TYPE_DIVEXACT_BY3: 2599 e->retval = CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size); 2600 break; 2601 case TYPE_DIVEXACT_BY3C: 2602 e->retval = CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size, 2603 carry); 2604 break; 2605 2606 2607 case TYPE_DIVMOD_1: 2608 case TYPE_DIVEXACT_1: 2609 case TYPE_BDIV_Q_1: 2610 e->retval = CALLING_CONVENTIONS (function) 2611 (e->d[0].p, e->s[0].p, size, divisor); 2612 break; 2613 case TYPE_DIVMOD_1C: 2614 e->retval = CALLING_CONVENTIONS (function) 2615 (e->d[0].p, e->s[0].p, size, divisor, carry); 2616 break; 2617 case TYPE_DIVREM_1: 2618 e->retval = CALLING_CONVENTIONS (function) 2619 (e->d[0].p, size2, e->s[0].p, size, divisor); 2620 break; 2621 case TYPE_DIVREM_1C: 2622 e->retval = CALLING_CONVENTIONS (function) 2623 (e->d[0].p, size2, e->s[0].p, size, divisor, carry); 2624 break; 2625 case TYPE_PREINV_DIVREM_1: 2626 { 2627 mp_limb_t dinv; 2628 unsigned shift; 2629 shift = refmpn_count_leading_zeros (divisor); 2630 dinv = refmpn_invert_limb (divisor << shift); 2631 e->retval = CALLING_CONVENTIONS (function) 2632 (e->d[0].p, size2, e->s[0].p, size, divisor, dinv, shift); 2633 } 2634 break; 2635 case TYPE_MOD_1: 2636 case TYPE_MODEXACT_1_ODD: 2637 e->retval = CALLING_CONVENTIONS (function) 2638 (e->s[0].p, size, divisor); 2639 break; 2640 case TYPE_MOD_1C: 2641 case TYPE_MODEXACT_1C_ODD: 2642 e->retval = CALLING_CONVENTIONS (function) 2643 (e->s[0].p, size, divisor, carry); 2644 break; 2645 case TYPE_PREINV_MOD_1: 2646 e->retval = CALLING_CONVENTIONS (function) 2647 (e->s[0].p, size, divisor, refmpn_invert_limb (divisor)); 2648 break; 2649 case TYPE_DIV_QR_1N_PI1: 2650 { 2651 mp_limb_t dinv = refmpn_invert_limb (divisor); 2652 e->retval = CALLING_CONVENTIONS (function) 2653 (e->d[0].p, e->s[0].p, size, e->s[1].p[0], divisor, dinv); 2654 break; 2655 } 2656 2657 case TYPE_MOD_34LSUB1: 2658 e->retval = CALLING_CONVENTIONS (function) (e->s[0].p, size); 2659 break; 2660 2661 case TYPE_UDIV_QRNND: 2662 e->retval = CALLING_CONVENTIONS (function) 2663 (e->d[0].p, e->s[0].p[1], e->s[0].p[0], divisor); 2664 break; 2665 case TYPE_UDIV_QRNND_R: 2666 e->retval = CALLING_CONVENTIONS (function) 2667 (e->s[0].p[1], e->s[0].p[0], divisor, e->d[0].p); 2668 break; 2669 2670 case TYPE_SBPI1_DIV_QR: 2671 { 2672 gmp_pi1_t dinv; 2673 invert_pi1 (dinv, e->s[1].p[size2-1], e->s[1].p[size2-2]); /* FIXME: use refinvert_pi1 */ 2674 refmpn_copyi (e->d[1].p, e->s[0].p, size); /* dividend */ 2675 refmpn_fill (e->d[0].p, size-size2, 0x98765432); /* quotient */ 2676 e->retval = CALLING_CONVENTIONS (function) 2677 (e->d[0].p, e->d[1].p, size, e->s[1].p, size2, dinv.inv32); 2678 refmpn_zero (e->d[1].p+size2, size-size2); /* excess over remainder */ 2679 } 2680 break; 2681 2682 case TYPE_TDIV_QR: 2683 CALLING_CONVENTIONS (function) (e->d[0].p, e->d[1].p, 0, 2684 e->s[0].p, size, e->s[1].p, size2); 2685 break; 2686 2687 case TYPE_GCD_1: 2688 /* Must have a non-zero src, but this probably isn't the best way to do 2689 it. */ 2690 if (refmpn_zero_p (e->s[0].p, size)) 2691 e->retval = 0; 2692 else 2693 e->retval = CALLING_CONVENTIONS (function) (e->s[0].p, size, divisor); 2694 break; 2695 2696 case TYPE_GCD: 2697 /* Sources are destroyed, so they're saved and replaced, but a general 2698 approach to this might be better. Note that it's still e->s[0].p and 2699 e->s[1].p that are passed, to get the desired alignments. */ 2700 { 2701 mp_ptr s0 = refmpn_malloc_limbs (size); 2702 mp_ptr s1 = refmpn_malloc_limbs (size2); 2703 refmpn_copyi (s0, e->s[0].p, size); 2704 refmpn_copyi (s1, e->s[1].p, size2); 2705 2706 mprotect_region (&s[0].region, PROT_READ|PROT_WRITE); 2707 mprotect_region (&s[1].region, PROT_READ|PROT_WRITE); 2708 e->retval = CALLING_CONVENTIONS (function) (e->d[0].p, 2709 e->s[0].p, size, 2710 e->s[1].p, size2); 2711 refmpn_copyi (e->s[0].p, s0, size); 2712 refmpn_copyi (e->s[1].p, s1, size2); 2713 free (s0); 2714 free (s1); 2715 } 2716 break; 2717 2718 case TYPE_GCD_FINDA: 2719 { 2720 /* FIXME: do this with a flag */ 2721 mp_limb_t c[2]; 2722 c[0] = e->s[0].p[0]; 2723 c[0] += (c[0] == 0); 2724 c[1] = e->s[0].p[0]; 2725 c[1] += (c[1] == 0); 2726 e->retval = CALLING_CONVENTIONS (function) (c); 2727 } 2728 break; 2729 2730 case TYPE_MPZ_LEGENDRE: 2731 case TYPE_MPZ_JACOBI: 2732 { 2733 mpz_t a, b; 2734 PTR(a) = e->s[0].p; SIZ(a) = (carry==0 ? size : -size); 2735 PTR(b) = e->s[1].p; SIZ(b) = size2; 2736 e->retval = CALLING_CONVENTIONS (function) (a, b); 2737 } 2738 break; 2739 case TYPE_MPZ_KRONECKER: 2740 { 2741 mpz_t a, b; 2742 PTR(a) = e->s[0].p; SIZ(a) = ((carry&1)==0 ? size : -size); 2743 PTR(b) = e->s[1].p; SIZ(b) = ((carry&2)==0 ? size2 : -size2); 2744 e->retval = CALLING_CONVENTIONS (function) (a, b); 2745 } 2746 break; 2747 case TYPE_MPZ_KRONECKER_UI: 2748 { 2749 mpz_t a; 2750 PTR(a) = e->s[0].p; SIZ(a) = (carry==0 ? size : -size); 2751 e->retval = CALLING_CONVENTIONS(function) (a, (unsigned long)multiplier); 2752 } 2753 break; 2754 case TYPE_MPZ_KRONECKER_SI: 2755 { 2756 mpz_t a; 2757 PTR(a) = e->s[0].p; SIZ(a) = (carry==0 ? size : -size); 2758 e->retval = CALLING_CONVENTIONS (function) (a, (long) multiplier); 2759 } 2760 break; 2761 case TYPE_MPZ_UI_KRONECKER: 2762 { 2763 mpz_t b; 2764 PTR(b) = e->s[0].p; SIZ(b) = (carry==0 ? size : -size); 2765 e->retval = CALLING_CONVENTIONS(function) ((unsigned long)multiplier, b); 2766 } 2767 break; 2768 case TYPE_MPZ_SI_KRONECKER: 2769 { 2770 mpz_t b; 2771 PTR(b) = e->s[0].p; SIZ(b) = (carry==0 ? size : -size); 2772 e->retval = CALLING_CONVENTIONS (function) ((long) multiplier, b); 2773 } 2774 break; 2775 2776 case TYPE_MUL_MN: 2777 case TYPE_MULMID_MN: 2778 CALLING_CONVENTIONS (function) 2779 (e->d[0].p, e->s[0].p, size, e->s[1].p, size2); 2780 break; 2781 case TYPE_MUL_N: 2782 case TYPE_MULLO_N: 2783 CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, e->s[1].p, size); 2784 break; 2785 case TYPE_MULMID_N: 2786 CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, e->s[1].p, 2787 (size + 1) / 2); 2788 break; 2789 case TYPE_SQR: 2790 case TYPE_SQRLO: 2791 CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size); 2792 break; 2793 2794 case TYPE_UMUL_PPMM: 2795 e->retval = CALLING_CONVENTIONS (function) 2796 (e->d[0].p, e->s[0].p[0], e->s[0].p[1]); 2797 break; 2798 case TYPE_UMUL_PPMM_R: 2799 e->retval = CALLING_CONVENTIONS (function) 2800 (e->s[0].p[0], e->s[0].p[1], e->d[0].p); 2801 break; 2802 2803 case TYPE_ADDLSH_N_IP1: 2804 case TYPE_ADDLSH_N_IP2: 2805 case TYPE_SUBLSH_N_IP1: 2806 case TYPE_LSHIFT: 2807 case TYPE_LSHIFTC: 2808 case TYPE_RSHIFT: 2809 e->retval = CALLING_CONVENTIONS (function) 2810 (e->d[0].p, e->s[0].p, size, shift); 2811 break; 2812 2813 case TYPE_POPCOUNT: 2814 e->retval = (* (unsigned long (*)(ANYARGS)) 2815 CALLING_CONVENTIONS (function)) (e->s[0].p, size); 2816 break; 2817 case TYPE_HAMDIST: 2818 e->retval = (* (unsigned long (*)(ANYARGS)) 2819 CALLING_CONVENTIONS (function)) (e->s[0].p, e->s[1].p, size); 2820 break; 2821 2822 case TYPE_SQRTREM: 2823 e->retval = (* (long (*)(ANYARGS)) CALLING_CONVENTIONS (function)) 2824 (e->d[0].p, e->d[1].p, e->s[0].p, size); 2825 break; 2826 2827 case TYPE_SQRT: 2828 e->retval = (* (long (*)(ANYARGS)) CALLING_CONVENTIONS (function)) 2829 (e->d[0].p, e->s[0].p, size); 2830 break; 2831 2832 case TYPE_ZERO: 2833 CALLING_CONVENTIONS (function) (e->d[0].p, size); 2834 break; 2835 2836 case TYPE_GET_STR: 2837 { 2838 size_t sizeinbase, fill; 2839 char *dst; 2840 MPN_SIZEINBASE (sizeinbase, e->s[0].p, size, base); 2841 ASSERT_ALWAYS (sizeinbase <= d[0].size); 2842 fill = d[0].size - sizeinbase; 2843 if (d[0].high) 2844 { 2845 memset (e->d[0].p, 0xBA, fill); 2846 dst = (char *) e->d[0].p + fill; 2847 } 2848 else 2849 { 2850 dst = (char *) e->d[0].p; 2851 memset (dst + sizeinbase, 0xBA, fill); 2852 } 2853 if (POW2_P (base)) 2854 { 2855 e->retval = CALLING_CONVENTIONS (function) (dst, base, 2856 e->s[0].p, size); 2857 } 2858 else 2859 { 2860 refmpn_copy (e->d[1].p, e->s[0].p, size); 2861 e->retval = CALLING_CONVENTIONS (function) (dst, base, 2862 e->d[1].p, size); 2863 } 2864 refmpn_zero (e->d[1].p, size); /* clobbered or unused */ 2865 } 2866 break; 2867 2868 case TYPE_INVERT: 2869 { 2870 mp_ptr scratch; 2871 TMP_DECL; 2872 TMP_MARK; 2873 scratch = TMP_ALLOC_LIMBS (mpn_invert_itch (size)); 2874 CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size, scratch); 2875 TMP_FREE; 2876 } 2877 break; 2878 case TYPE_BINVERT: 2879 { 2880 mp_ptr scratch; 2881 TMP_DECL; 2882 TMP_MARK; 2883 scratch = TMP_ALLOC_LIMBS (mpn_binvert_itch (size)); 2884 CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size, scratch); 2885 TMP_FREE; 2886 } 2887 break; 2888 2889 #ifdef EXTRA_CALL 2890 EXTRA_CALL 2891 #endif 2892 2893 default: 2894 printf ("Unknown routine type %d\n", choice->type); 2895 abort (); 2896 break; 2897 } 2898 } 2899 2900 2901 void 2902 pointer_setup (struct each_t *e) 2903 { 2904 int i, j; 2905 2906 for (i = 0; i < NUM_DESTS; i++) 2907 { 2908 switch (tr->dst_size[i]) { 2909 case 0: 2910 case SIZE_RETVAL: /* will be adjusted later */ 2911 d[i].size = size; 2912 break; 2913 2914 case SIZE_1: 2915 d[i].size = 1; 2916 break; 2917 case SIZE_2: 2918 d[i].size = 2; 2919 break; 2920 case SIZE_3: 2921 d[i].size = 3; 2922 break; 2923 case SIZE_4: 2924 d[i].size = 4; 2925 break; 2926 case SIZE_6: 2927 d[i].size = 6; 2928 break; 2929 2930 case SIZE_PLUS_1: 2931 d[i].size = size+1; 2932 break; 2933 case SIZE_PLUS_MSIZE_SUB_1: 2934 d[i].size = size + tr->msize - 1; 2935 break; 2936 2937 case SIZE_SUM: 2938 if (tr->size2) 2939 d[i].size = size + size2; 2940 else 2941 d[i].size = 2*size; 2942 break; 2943 2944 case SIZE_SIZE2: 2945 d[i].size = size2; 2946 break; 2947 2948 case SIZE_DIFF: 2949 d[i].size = size - size2; 2950 break; 2951 2952 case SIZE_DIFF_PLUS_1: 2953 d[i].size = size - size2 + 1; 2954 break; 2955 2956 case SIZE_DIFF_PLUS_3: 2957 d[i].size = size - size2 + 3; 2958 break; 2959 2960 case SIZE_CEIL_HALF: 2961 d[i].size = (size+1)/2; 2962 break; 2963 2964 case SIZE_GET_STR: 2965 { 2966 mp_limb_t ff = GMP_NUMB_MAX; 2967 MPN_SIZEINBASE (d[i].size, &ff - (size-1), size, base); 2968 } 2969 break; 2970 2971 default: 2972 printf ("Unrecognised dst_size type %d\n", tr->dst_size[i]); 2973 abort (); 2974 } 2975 } 2976 2977 /* establish e->d[].p destinations */ 2978 for (i = 0; i < NUM_DESTS; i++) 2979 { 2980 mp_size_t offset = 0; 2981 2982 /* possible room for overlapping sources */ 2983 for (j = 0; j < numberof (overlap->s); j++) 2984 if (overlap->s[j] == i) 2985 offset = MAX (offset, s[j].align); 2986 2987 if (d[i].high) 2988 { 2989 if (tr->dst_bytes[i]) 2990 { 2991 e->d[i].p = (mp_ptr) 2992 ((char *) (e->d[i].region.ptr + e->d[i].region.size) 2993 - d[i].size - d[i].align); 2994 } 2995 else 2996 { 2997 e->d[i].p = e->d[i].region.ptr + e->d[i].region.size 2998 - d[i].size - d[i].align; 2999 if (tr->overlap == OVERLAP_LOW_TO_HIGH) 3000 e->d[i].p -= offset; 3001 } 3002 } 3003 else 3004 { 3005 if (tr->dst_bytes[i]) 3006 { 3007 e->d[i].p = (mp_ptr) ((char *) e->d[i].region.ptr + d[i].align); 3008 } 3009 else 3010 { 3011 e->d[i].p = e->d[i].region.ptr + d[i].align; 3012 if (tr->overlap == OVERLAP_HIGH_TO_LOW) 3013 e->d[i].p += offset; 3014 } 3015 } 3016 } 3017 3018 /* establish e->s[].p sources */ 3019 for (i = 0; i < NUM_SOURCES; i++) 3020 { 3021 int o = overlap->s[i]; 3022 switch (o) { 3023 case -1: 3024 /* no overlap */ 3025 e->s[i].p = s[i].p; 3026 break; 3027 case 0: 3028 case 1: 3029 /* overlap with d[o] */ 3030 if (tr->overlap == OVERLAP_HIGH_TO_LOW) 3031 e->s[i].p = e->d[o].p - s[i].align; 3032 else if (tr->overlap == OVERLAP_LOW_TO_HIGH) 3033 e->s[i].p = e->d[o].p + s[i].align; 3034 else if (tr->size2 == SIZE_FRACTION) 3035 e->s[i].p = e->d[o].p + size2; 3036 else 3037 e->s[i].p = e->d[o].p; 3038 break; 3039 default: 3040 abort(); 3041 break; 3042 } 3043 } 3044 } 3045 3046 3047 void 3048 validate_fail (void) 3049 { 3050 if (tr->reference) 3051 { 3052 trap_location = TRAP_REF; 3053 call (&ref, tr->reference); 3054 trap_location = TRAP_NOWHERE; 3055 } 3056 3057 print_all(); 3058 abort(); 3059 } 3060 3061 3062 void 3063 try_one (void) 3064 { 3065 int i; 3066 3067 if (option_spinner) 3068 spinner(); 3069 spinner_count++; 3070 3071 trap_location = TRAP_SETUPS; 3072 3073 if (tr->divisor == DIVISOR_NORM) 3074 divisor |= GMP_NUMB_HIGHBIT; 3075 if (tr->divisor == DIVISOR_ODD) 3076 divisor |= 1; 3077 3078 for (i = 0; i < NUM_SOURCES; i++) 3079 { 3080 if (s[i].high) 3081 s[i].p = s[i].region.ptr + s[i].region.size - SRC_SIZE(i) - s[i].align; 3082 else 3083 s[i].p = s[i].region.ptr + s[i].align; 3084 } 3085 3086 pointer_setup (&ref); 3087 pointer_setup (&fun); 3088 3089 ref.retval = 0x04152637; 3090 fun.retval = 0x8C9DAEBF; 3091 3092 t_random (multiplier_N, tr->msize); 3093 3094 for (i = 0; i < NUM_SOURCES; i++) 3095 { 3096 if (! tr->src[i]) 3097 continue; 3098 3099 mprotect_region (&s[i].region, PROT_READ|PROT_WRITE); 3100 t_random (s[i].p, SRC_SIZE(i)); 3101 3102 switch (tr->data) { 3103 case DATA_NON_ZERO: 3104 if (refmpn_zero_p (s[i].p, SRC_SIZE(i))) 3105 s[i].p[0] = 1; 3106 break; 3107 3108 case DATA_MULTIPLE_DIVISOR: 3109 /* same number of low zero bits as divisor */ 3110 s[i].p[0] &= ~ LOW_ZEROS_MASK (divisor); 3111 refmpn_sub_1 (s[i].p, s[i].p, size, 3112 refmpn_mod_1 (s[i].p, size, divisor)); 3113 break; 3114 3115 case DATA_GCD: 3116 /* s[1] no more bits than s[0] */ 3117 if (i == 1 && size2 == size) 3118 s[1].p[size-1] &= refmpn_msbone_mask (s[0].p[size-1]); 3119 3120 /* high limb non-zero */ 3121 s[i].p[SRC_SIZE(i)-1] += (s[i].p[SRC_SIZE(i)-1] == 0); 3122 3123 /* odd */ 3124 s[i].p[0] |= 1; 3125 break; 3126 3127 case DATA_SRC0_ODD: 3128 if (i == 0) 3129 s[i].p[0] |= 1; 3130 break; 3131 3132 case DATA_SRC1_ODD: 3133 if (i == 1) 3134 s[i].p[0] |= 1; 3135 break; 3136 3137 case DATA_SRC1_ODD_PRIME: 3138 if (i == 1) 3139 { 3140 if (refmpn_zero_p (s[i].p+1, SRC_SIZE(i)-1) 3141 && s[i].p[0] <=3) 3142 s[i].p[0] = 3; 3143 else 3144 { 3145 mpz_t p; 3146 mpz_init (p); 3147 for (;;) 3148 { 3149 _mpz_realloc (p, SRC_SIZE(i)); 3150 MPN_COPY (PTR(p), s[i].p, SRC_SIZE(i)); 3151 SIZ(p) = SRC_SIZE(i); 3152 MPN_NORMALIZE (PTR(p), SIZ(p)); 3153 mpz_nextprime (p, p); 3154 if (mpz_size (p) <= SRC_SIZE(i)) 3155 break; 3156 3157 t_random (s[i].p, SRC_SIZE(i)); 3158 } 3159 MPN_COPY (s[i].p, PTR(p), SIZ(p)); 3160 if (SIZ(p) < SRC_SIZE(i)) 3161 MPN_ZERO (s[i].p + SIZ(p), SRC_SIZE(i) - SIZ(p)); 3162 mpz_clear (p); 3163 } 3164 } 3165 break; 3166 3167 case DATA_SRC1_HIGHBIT: 3168 if (i == 1) 3169 { 3170 if (tr->size2) 3171 s[i].p[size2-1] |= GMP_NUMB_HIGHBIT; 3172 else 3173 s[i].p[size-1] |= GMP_NUMB_HIGHBIT; 3174 } 3175 break; 3176 3177 case DATA_SRC0_HIGHBIT: 3178 if (i == 0) 3179 { 3180 s[i].p[size-1] |= GMP_NUMB_HIGHBIT; 3181 } 3182 break; 3183 3184 case DATA_UDIV_QRNND: 3185 s[i].p[1] %= divisor; 3186 break; 3187 case DATA_DIV_QR_1: 3188 if (i == 1) 3189 s[i].p[0] %= divisor; 3190 break; 3191 } 3192 3193 mprotect_region (&s[i].region, PROT_READ); 3194 } 3195 3196 for (i = 0; i < NUM_DESTS; i++) 3197 { 3198 if (! tr->dst[i]) 3199 continue; 3200 3201 if (tr->dst0_from_src1 && i==0) 3202 { 3203 mp_size_t copy = MIN (d[0].size, SRC_SIZE(1)); 3204 mp_size_t fill = MAX (0, d[0].size - copy); 3205 MPN_COPY (fun.d[0].p, s[1].region.ptr, copy); 3206 MPN_COPY (ref.d[0].p, s[1].region.ptr, copy); 3207 refmpn_fill (fun.d[0].p + copy, fill, DEADVAL); 3208 refmpn_fill (ref.d[0].p + copy, fill, DEADVAL); 3209 } 3210 else if (tr->dst_bytes[i]) 3211 { 3212 memset (ref.d[i].p, 0xBA, d[i].size); 3213 memset (fun.d[i].p, 0xBA, d[i].size); 3214 } 3215 else 3216 { 3217 refmpn_fill (ref.d[i].p, d[i].size, DEADVAL); 3218 refmpn_fill (fun.d[i].p, d[i].size, DEADVAL); 3219 } 3220 } 3221 3222 for (i = 0; i < NUM_SOURCES; i++) 3223 { 3224 if (! tr->src[i]) 3225 continue; 3226 3227 if (ref.s[i].p != s[i].p) 3228 { 3229 refmpn_copyi (ref.s[i].p, s[i].p, SRC_SIZE(i)); 3230 refmpn_copyi (fun.s[i].p, s[i].p, SRC_SIZE(i)); 3231 } 3232 } 3233 3234 if (option_print) 3235 print_all(); 3236 3237 if (tr->validate != NULL) 3238 { 3239 trap_location = TRAP_FUN; 3240 call (&fun, choice->function); 3241 trap_location = TRAP_NOWHERE; 3242 3243 if (! CALLING_CONVENTIONS_CHECK ()) 3244 { 3245 print_all(); 3246 abort(); 3247 } 3248 3249 (*tr->validate) (); 3250 } 3251 else 3252 { 3253 trap_location = TRAP_REF; 3254 call (&ref, tr->reference); 3255 trap_location = TRAP_FUN; 3256 call (&fun, choice->function); 3257 trap_location = TRAP_NOWHERE; 3258 3259 if (! CALLING_CONVENTIONS_CHECK ()) 3260 { 3261 print_all(); 3262 abort(); 3263 } 3264 3265 compare (); 3266 } 3267 } 3268 3269 3270 #define SIZE_ITERATION \ 3271 for (size = MAX3 (option_firstsize, \ 3272 choice->minsize, \ 3273 (tr->size == SIZE_ALLOW_ZERO) ? 0 : 1), \ 3274 size += (tr->size == SIZE_ODD) && !(size & 1); \ 3275 size <= option_lastsize; \ 3276 size += (tr->size == SIZE_ODD) ? 2 : 1) 3277 3278 #define SIZE2_FIRST \ 3279 (tr->size2 == SIZE_2 ? 2 \ 3280 : tr->size2 == SIZE_FRACTION ? option_firstsize2 \ 3281 : tr->size2 == SIZE_CEIL_HALF ? ((size + 1) / 2) \ 3282 : tr->size2 ? \ 3283 MAX (choice->minsize, (option_firstsize2 != 0 \ 3284 ? option_firstsize2 : 1)) \ 3285 : 0) 3286 3287 #define SIZE2_LAST \ 3288 (tr->size2 == SIZE_2 ? 2 \ 3289 : tr->size2 == SIZE_FRACTION ? FRACTION_COUNT-1 \ 3290 : tr->size2 == SIZE_CEIL_HALF ? ((size + 1) / 2) \ 3291 : tr->size2 ? size \ 3292 : 0) 3293 3294 #define SIZE2_ITERATION \ 3295 for (size2 = SIZE2_FIRST; size2 <= SIZE2_LAST; size2++) 3296 3297 #define ALIGN_COUNT(cond) ((cond) ? ALIGNMENTS : 1) 3298 #define ALIGN_ITERATION(w,n,cond) \ 3299 for (w[n].align = 0; w[n].align < ALIGN_COUNT(cond); w[n].align++) 3300 3301 #define HIGH_LIMIT(cond) ((cond) != 0) 3302 #define HIGH_COUNT(cond) (HIGH_LIMIT (cond) + 1) 3303 #define HIGH_ITERATION(w,n,cond) \ 3304 for (w[n].high = 0; w[n].high <= HIGH_LIMIT(cond); w[n].high++) 3305 3306 #define SHIFT_LIMIT \ 3307 ((unsigned long) (tr->shift ? GMP_NUMB_BITS -1 : 1)) 3308 3309 #define SHIFT_ITERATION \ 3310 for (shift = 1; shift <= SHIFT_LIMIT; shift++) 3311 3312 3313 void 3314 try_many (void) 3315 { 3316 int i; 3317 3318 { 3319 unsigned long total = 1; 3320 3321 total *= option_repetitions; 3322 total *= option_lastsize; 3323 if (tr->size2 == SIZE_FRACTION) total *= FRACTION_COUNT; 3324 else if (tr->size2) total *= (option_lastsize+1)/2; 3325 3326 total *= SHIFT_LIMIT; 3327 total *= MULTIPLIER_COUNT; 3328 total *= DIVISOR_COUNT; 3329 total *= CARRY_COUNT; 3330 total *= T_RAND_COUNT; 3331 3332 total *= HIGH_COUNT (tr->dst[0]); 3333 total *= HIGH_COUNT (tr->dst[1]); 3334 total *= HIGH_COUNT (tr->src[0]); 3335 total *= HIGH_COUNT (tr->src[1]); 3336 3337 total *= ALIGN_COUNT (tr->dst[0]); 3338 total *= ALIGN_COUNT (tr->dst[1]); 3339 total *= ALIGN_COUNT (tr->src[0]); 3340 total *= ALIGN_COUNT (tr->src[1]); 3341 3342 total *= OVERLAP_COUNT; 3343 3344 printf ("%s %lu\n", choice->name, total); 3345 } 3346 3347 spinner_count = 0; 3348 3349 for (i = 0; i < option_repetitions; i++) 3350 SIZE_ITERATION 3351 SIZE2_ITERATION 3352 3353 SHIFT_ITERATION 3354 MULTIPLIER_ITERATION 3355 DIVISOR_ITERATION 3356 CARRY_ITERATION /* must be after divisor */ 3357 T_RAND_ITERATION 3358 3359 HIGH_ITERATION(d,0, tr->dst[0]) 3360 HIGH_ITERATION(d,1, tr->dst[1]) 3361 HIGH_ITERATION(s,0, tr->src[0]) 3362 HIGH_ITERATION(s,1, tr->src[1]) 3363 3364 ALIGN_ITERATION(d,0, tr->dst[0]) 3365 ALIGN_ITERATION(d,1, tr->dst[1]) 3366 ALIGN_ITERATION(s,0, tr->src[0]) 3367 ALIGN_ITERATION(s,1, tr->src[1]) 3368 3369 OVERLAP_ITERATION 3370 try_one(); 3371 3372 printf("\n"); 3373 } 3374 3375 3376 /* Usually print_all() doesn't show much, but it might give a hint as to 3377 where the function was up to when it died. */ 3378 void 3379 trap (int sig) 3380 { 3381 const char *name = "noname"; 3382 3383 switch (sig) { 3384 case SIGILL: name = "SIGILL"; break; 3385 #ifdef SIGBUS 3386 case SIGBUS: name = "SIGBUS"; break; 3387 #endif 3388 case SIGSEGV: name = "SIGSEGV"; break; 3389 case SIGFPE: name = "SIGFPE"; break; 3390 } 3391 3392 printf ("\n\nSIGNAL TRAP: %s\n", name); 3393 3394 switch (trap_location) { 3395 case TRAP_REF: 3396 printf (" in reference function: %s\n", tr->reference_name); 3397 break; 3398 case TRAP_FUN: 3399 printf (" in test function: %s\n", choice->name); 3400 print_all (); 3401 break; 3402 case TRAP_SETUPS: 3403 printf (" in parameter setups\n"); 3404 print_all (); 3405 break; 3406 default: 3407 printf (" somewhere unknown\n"); 3408 break; 3409 } 3410 exit (1); 3411 } 3412 3413 3414 void 3415 try_init (void) 3416 { 3417 #if HAVE_GETPAGESIZE 3418 /* Prefer getpagesize() over sysconf(), since on SunOS 4 sysconf() doesn't 3419 know _SC_PAGESIZE. */ 3420 pagesize = getpagesize (); 3421 #else 3422 #if HAVE_SYSCONF 3423 if ((pagesize = sysconf (_SC_PAGESIZE)) == -1) 3424 { 3425 /* According to the linux man page, sysconf doesn't set errno */ 3426 fprintf (stderr, "Cannot get sysconf _SC_PAGESIZE\n"); 3427 exit (1); 3428 } 3429 #else 3430 Error, error, cannot get page size 3431 #endif 3432 #endif 3433 3434 printf ("pagesize is 0x%lX bytes\n", pagesize); 3435 3436 signal (SIGILL, trap); 3437 #ifdef SIGBUS 3438 signal (SIGBUS, trap); 3439 #endif 3440 signal (SIGSEGV, trap); 3441 signal (SIGFPE, trap); 3442 3443 { 3444 int i; 3445 3446 for (i = 0; i < NUM_SOURCES; i++) 3447 { 3448 malloc_region (&s[i].region, 2*option_lastsize+ALIGNMENTS-1); 3449 printf ("s[%d] %p to %p (0x%lX bytes)\n", 3450 i, (void *) (s[i].region.ptr), 3451 (void *) (s[i].region.ptr + s[i].region.size), 3452 (long) s[i].region.size * GMP_LIMB_BYTES); 3453 } 3454 3455 #define INIT_EACH(e,es) \ 3456 for (i = 0; i < NUM_DESTS; i++) \ 3457 { \ 3458 malloc_region (&e.d[i].region, 2*option_lastsize+ALIGNMENTS-1); \ 3459 printf ("%s d[%d] %p to %p (0x%lX bytes)\n", \ 3460 es, i, (void *) (e.d[i].region.ptr), \ 3461 (void *) (e.d[i].region.ptr + e.d[i].region.size), \ 3462 (long) e.d[i].region.size * GMP_LIMB_BYTES); \ 3463 } 3464 3465 INIT_EACH(ref, "ref"); 3466 INIT_EACH(fun, "fun"); 3467 } 3468 } 3469 3470 int 3471 strmatch_wild (const char *pattern, const char *str) 3472 { 3473 size_t plen, slen; 3474 3475 /* wildcard at start */ 3476 if (pattern[0] == '*') 3477 { 3478 pattern++; 3479 plen = strlen (pattern); 3480 slen = strlen (str); 3481 return (plen == 0 3482 || (slen >= plen && memcmp (pattern, str+slen-plen, plen) == 0)); 3483 } 3484 3485 /* wildcard at end */ 3486 plen = strlen (pattern); 3487 if (plen >= 1 && pattern[plen-1] == '*') 3488 return (memcmp (pattern, str, plen-1) == 0); 3489 3490 /* no wildcards */ 3491 return (strcmp (pattern, str) == 0); 3492 } 3493 3494 void 3495 try_name (const char *name) 3496 { 3497 int found = 0; 3498 int i; 3499 3500 for (i = 0; i < numberof (choice_array); i++) 3501 { 3502 if (strmatch_wild (name, choice_array[i].name)) 3503 { 3504 choice = &choice_array[i]; 3505 tr = ¶m[choice->type]; 3506 try_many (); 3507 found = 1; 3508 } 3509 } 3510 3511 if (!found) 3512 { 3513 printf ("%s unknown\n", name); 3514 /* exit (1); */ 3515 } 3516 } 3517 3518 3519 void 3520 usage (const char *prog) 3521 { 3522 int col = 0; 3523 int i; 3524 3525 printf ("Usage: %s [options] function...\n", prog); 3526 printf (" -1 use limb data 1,2,3,etc\n"); 3527 printf (" -9 use limb data all 0xFF..FFs\n"); 3528 printf (" -a zeros use limb data all zeros\n"); 3529 printf (" -a ffs use limb data all 0xFF..FFs (same as -9)\n"); 3530 printf (" -a 2fd use data 0x2FFF...FFFD\n"); 3531 printf (" -p print each case tried (try this if seg faulting)\n"); 3532 printf (" -R seed random numbers from time()\n"); 3533 printf (" -r reps set repetitions (default %d)\n", DEFAULT_REPETITIONS); 3534 printf (" -s size starting size to test\n"); 3535 printf (" -S size2 starting size2 to test\n"); 3536 printf (" -s s1-s2 range of sizes to test\n"); 3537 printf (" -W don't show the spinner (use this in gdb)\n"); 3538 printf (" -z disable mprotect() redzones\n"); 3539 printf ("Default data is refmpn_random() and refmpn_random2().\n"); 3540 printf ("\n"); 3541 printf ("Functions that can be tested:\n"); 3542 3543 for (i = 0; i < numberof (choice_array); i++) 3544 { 3545 if (col + 1 + strlen (choice_array[i].name) > 79) 3546 { 3547 printf ("\n"); 3548 col = 0; 3549 } 3550 printf (" %s", choice_array[i].name); 3551 col += 1 + strlen (choice_array[i].name); 3552 } 3553 printf ("\n"); 3554 3555 exit(1); 3556 } 3557 3558 3559 int 3560 main (int argc, char *argv[]) 3561 { 3562 int i; 3563 3564 /* unbuffered output */ 3565 setbuf (stdout, NULL); 3566 setbuf (stderr, NULL); 3567 3568 /* default trace in hex, and in upper-case so can paste into bc */ 3569 mp_trace_base = -16; 3570 3571 param_init (); 3572 3573 { 3574 unsigned long seed = 123; 3575 int opt; 3576 3577 while ((opt = getopt(argc, argv, "19a:b:E:pRr:S:s:Wz")) != EOF) 3578 { 3579 switch (opt) { 3580 case '1': 3581 /* use limb data values 1, 2, 3, ... etc */ 3582 option_data = DATA_SEQ; 3583 break; 3584 case '9': 3585 /* use limb data values 0xFFF...FFF always */ 3586 option_data = DATA_FFS; 3587 break; 3588 case 'a': 3589 if (strcmp (optarg, "zeros") == 0) option_data = DATA_ZEROS; 3590 else if (strcmp (optarg, "seq") == 0) option_data = DATA_SEQ; 3591 else if (strcmp (optarg, "ffs") == 0) option_data = DATA_FFS; 3592 else if (strcmp (optarg, "2fd") == 0) option_data = DATA_2FD; 3593 else 3594 { 3595 fprintf (stderr, "unrecognised data option: %s\n", optarg); 3596 exit (1); 3597 } 3598 break; 3599 case 'b': 3600 mp_trace_base = atoi (optarg); 3601 break; 3602 case 'E': 3603 /* re-seed */ 3604 sscanf (optarg, "%lu", &seed); 3605 printf ("Re-seeding with %lu\n", seed); 3606 break; 3607 case 'p': 3608 option_print = 1; 3609 break; 3610 case 'R': 3611 /* randomize */ 3612 seed = time (NULL); 3613 printf ("Seeding with %lu, re-run using \"-E %lu\"\n", seed, seed); 3614 break; 3615 case 'r': 3616 option_repetitions = atoi (optarg); 3617 break; 3618 case 's': 3619 { 3620 char *p; 3621 option_firstsize = strtol (optarg, 0, 0); 3622 if ((p = strchr (optarg, '-')) != NULL) 3623 option_lastsize = strtol (p+1, 0, 0); 3624 } 3625 break; 3626 case 'S': 3627 /* -S <size> sets the starting size for the second of a two size 3628 routine (like mpn_mul_basecase) */ 3629 option_firstsize2 = strtol (optarg, 0, 0); 3630 break; 3631 case 'W': 3632 /* use this when running in the debugger */ 3633 option_spinner = 0; 3634 break; 3635 case 'z': 3636 /* disable redzones */ 3637 option_redzones = 0; 3638 break; 3639 case '?': 3640 usage (argv[0]); 3641 break; 3642 } 3643 } 3644 3645 gmp_randinit_default (__gmp_rands); 3646 __gmp_rands_initialized = 1; 3647 gmp_randseed_ui (__gmp_rands, seed); 3648 } 3649 3650 try_init(); 3651 3652 if (argc <= optind) 3653 usage (argv[0]); 3654 3655 for (i = optind; i < argc; i++) 3656 try_name (argv[i]); 3657 3658 return 0; 3659 }