github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/gmp-impl.h (about) 1 /* Include file for internal GNU MP types and definitions. 2 3 THE CONTENTS OF THIS FILE ARE FOR INTERNAL USE AND ARE ALMOST CERTAIN TO 4 BE SUBJECT TO INCOMPATIBLE CHANGES IN FUTURE GNU MP RELEASES. 5 6 Copyright 1991, 1993-1997, 1999-2015 Free Software Foundation, Inc. 7 8 This file is part of the GNU MP Library. 9 10 The GNU MP Library is free software; you can redistribute it and/or modify 11 it under the terms of either: 12 13 * the GNU Lesser General Public License as published by the Free 14 Software Foundation; either version 3 of the License, or (at your 15 option) any later version. 16 17 or 18 19 * the GNU General Public License as published by the Free Software 20 Foundation; either version 2 of the License, or (at your option) any 21 later version. 22 23 or both in parallel, as here. 24 25 The GNU MP Library is distributed in the hope that it will be useful, but 26 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 27 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 28 for more details. 29 30 You should have received copies of the GNU General Public License and the 31 GNU Lesser General Public License along with the GNU MP Library. If not, 32 see https://www.gnu.org/licenses/. */ 33 34 35 /* __GMP_DECLSPEC must be given on any global data that will be accessed 36 from outside libgmp, meaning from the test or development programs, or 37 from libgmpxx. Failing to do this will result in an incorrect address 38 being used for the accesses. On functions __GMP_DECLSPEC makes calls 39 from outside libgmp more efficient, but they'll still work fine without 40 it. */ 41 42 43 #ifndef __GMP_IMPL_H__ 44 #define __GMP_IMPL_H__ 45 46 #if defined _CRAY 47 #include <intrinsics.h> /* for _popcnt */ 48 #endif 49 50 /* For INT_MAX, etc. We used to avoid it because of a bug (on solaris, 51 gcc 2.95 under -mcpu=ultrasparc in ABI=32 ends up getting wrong 52 values (the ABI=64 values)), but it should be safe now. 53 54 On Cray vector systems, however, we need the system limits.h since sizes 55 of signed and unsigned types can differ there, depending on compiler 56 options (eg. -hnofastmd), making our SHRT_MAX etc expressions fail. For 57 reference, int can be 46 or 64 bits, whereas uint is always 64 bits; and 58 short can be 24, 32, 46 or 64 bits, and different for ushort. */ 59 60 #include <limits.h> 61 62 /* For fat.h and other fat binary stuff. 63 No need for __GMP_ATTRIBUTE_PURE or __GMP_NOTHROW, since functions 64 declared this way are only used to set function pointers in __gmpn_cpuvec, 65 they're not called directly. */ 66 #define DECL_add_n(name) \ 67 __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t) 68 #define DECL_addlsh1_n(name) \ 69 DECL_add_n (name) 70 #define DECL_addlsh2_n(name) \ 71 DECL_add_n (name) 72 #define DECL_addmul_1(name) \ 73 __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t) 74 #define DECL_addmul_2(name) \ 75 __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr) 76 #define DECL_bdiv_dbm1c(name) \ 77 __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t) 78 #define DECL_cnd_add_n(name) \ 79 __GMP_DECLSPEC mp_limb_t name (mp_limb_t, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t) 80 #define DECL_cnd_sub_n(name) \ 81 __GMP_DECLSPEC mp_limb_t name (mp_limb_t, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t) 82 #define DECL_com(name) \ 83 __GMP_DECLSPEC void name (mp_ptr, mp_srcptr, mp_size_t) 84 #define DECL_copyd(name) \ 85 __GMP_DECLSPEC void name (mp_ptr, mp_srcptr, mp_size_t) 86 #define DECL_copyi(name) \ 87 DECL_copyd (name) 88 #define DECL_divexact_1(name) \ 89 __GMP_DECLSPEC void name (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t) 90 #define DECL_divexact_by3c(name) \ 91 __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t) 92 #define DECL_divrem_1(name) \ 93 __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t) 94 #define DECL_gcd_1(name) \ 95 __GMP_DECLSPEC mp_limb_t name (mp_srcptr, mp_size_t, mp_limb_t) 96 #define DECL_lshift(name) \ 97 __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_srcptr, mp_size_t, unsigned) 98 #define DECL_lshiftc(name) \ 99 DECL_lshift (name) 100 #define DECL_mod_1(name) \ 101 __GMP_DECLSPEC mp_limb_t name (mp_srcptr, mp_size_t, mp_limb_t) 102 #define DECL_mod_1_1p(name) \ 103 __GMP_DECLSPEC mp_limb_t name (mp_srcptr, mp_size_t, mp_limb_t, const mp_limb_t []) 104 #define DECL_mod_1_1p_cps(name) \ 105 __GMP_DECLSPEC void name (mp_limb_t cps[], mp_limb_t b) 106 #define DECL_mod_1s_2p(name) \ 107 DECL_mod_1_1p (name) 108 #define DECL_mod_1s_2p_cps(name) \ 109 DECL_mod_1_1p_cps (name) 110 #define DECL_mod_1s_4p(name) \ 111 DECL_mod_1_1p (name) 112 #define DECL_mod_1s_4p_cps(name) \ 113 DECL_mod_1_1p_cps (name) 114 #define DECL_mod_34lsub1(name) \ 115 __GMP_DECLSPEC mp_limb_t name (mp_srcptr, mp_size_t) 116 #define DECL_modexact_1c_odd(name) \ 117 __GMP_DECLSPEC mp_limb_t name (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t) 118 #define DECL_mul_1(name) \ 119 DECL_addmul_1 (name) 120 #define DECL_mul_basecase(name) \ 121 __GMP_DECLSPEC void name (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t) 122 #define DECL_mullo_basecase(name) \ 123 __GMP_DECLSPEC void name (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t) 124 #define DECL_preinv_divrem_1(name) \ 125 __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, int) 126 #define DECL_preinv_mod_1(name) \ 127 __GMP_DECLSPEC mp_limb_t name (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t) 128 #define DECL_redc_1(name) \ 129 __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t) 130 #define DECL_redc_2(name) \ 131 __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr) 132 #define DECL_rshift(name) \ 133 DECL_lshift (name) 134 #define DECL_sqr_basecase(name) \ 135 __GMP_DECLSPEC void name (mp_ptr, mp_srcptr, mp_size_t) 136 #define DECL_sub_n(name) \ 137 DECL_add_n (name) 138 #define DECL_sublsh1_n(name) \ 139 DECL_add_n (name) 140 #define DECL_submul_1(name) \ 141 DECL_addmul_1 (name) 142 143 #if ! defined (__GMP_WITHIN_CONFIGURE) 144 #include "config.h" 145 #include "gmp-mparam.h" 146 #include "fib_table.h" 147 #include "fac_table.h" 148 #include "mp_bases.h" 149 #if WANT_FAT_BINARY 150 #include "fat.h" 151 #endif 152 #endif 153 154 #if HAVE_INTTYPES_H /* for uint_least32_t */ 155 # include <inttypes.h> 156 #else 157 # if HAVE_STDINT_H 158 # include <stdint.h> 159 # endif 160 #endif 161 162 #ifdef __cplusplus 163 #include <cstring> /* for strlen */ 164 #include <string> /* for std::string */ 165 #endif 166 167 168 #ifndef WANT_TMP_DEBUG /* for TMP_ALLOC_LIMBS_2 and others */ 169 #define WANT_TMP_DEBUG 0 170 #endif 171 172 /* The following tries to get a good version of alloca. The tests are 173 adapted from autoconf AC_FUNC_ALLOCA, with a couple of additions. 174 Whether this succeeds is tested by GMP_FUNC_ALLOCA and HAVE_ALLOCA will 175 be setup appropriately. 176 177 ifndef alloca - a cpp define might already exist. 178 glibc <stdlib.h> includes <alloca.h> which uses GCC __builtin_alloca. 179 HP cc +Olibcalls adds a #define of alloca to __builtin_alloca. 180 181 GCC __builtin_alloca - preferred whenever available. 182 183 _AIX pragma - IBM compilers need a #pragma in "each module that needs to 184 use alloca". Pragma indented to protect pre-ANSI cpp's. _IBMR2 was 185 used in past versions of GMP, retained still in case it matters. 186 187 The autoconf manual says this pragma needs to be at the start of a C 188 file, apart from comments and preprocessor directives. Is that true? 189 xlc on aix 4.xxx doesn't seem to mind it being after prototypes etc 190 from gmp.h. 191 */ 192 193 #ifndef alloca 194 # ifdef __GNUC__ 195 # define alloca __builtin_alloca 196 # else 197 # ifdef __DECC 198 # define alloca(x) __ALLOCA(x) 199 # else 200 # ifdef _MSC_VER 201 # include <malloc.h> 202 # define alloca _alloca 203 # else 204 # if HAVE_ALLOCA_H 205 # include <alloca.h> 206 # else 207 # if defined (_AIX) || defined (_IBMR2) 208 #pragma alloca 209 # else 210 char *alloca (); 211 # endif 212 # endif 213 # endif 214 # endif 215 # endif 216 #endif 217 218 219 /* if not provided by gmp-mparam.h */ 220 #ifndef GMP_LIMB_BYTES 221 #define GMP_LIMB_BYTES SIZEOF_MP_LIMB_T 222 #endif 223 #ifndef GMP_LIMB_BITS 224 #define GMP_LIMB_BITS (8 * SIZEOF_MP_LIMB_T) 225 #endif 226 227 #define BITS_PER_ULONG (8 * SIZEOF_UNSIGNED_LONG) 228 229 230 /* gmp_uint_least32_t is an unsigned integer type with at least 32 bits. */ 231 #if HAVE_UINT_LEAST32_T 232 typedef uint_least32_t gmp_uint_least32_t; 233 #else 234 #if SIZEOF_UNSIGNED_SHORT >= 4 235 typedef unsigned short gmp_uint_least32_t; 236 #else 237 #if SIZEOF_UNSIGNED >= 4 238 typedef unsigned gmp_uint_least32_t; 239 #else 240 typedef unsigned long gmp_uint_least32_t; 241 #endif 242 #endif 243 #endif 244 245 246 /* gmp_intptr_t, for pointer to integer casts */ 247 #if HAVE_INTPTR_T 248 typedef intptr_t gmp_intptr_t; 249 #else /* fallback */ 250 typedef size_t gmp_intptr_t; 251 #endif 252 253 254 /* pre-inverse types for truncating division and modulo */ 255 typedef struct {mp_limb_t inv32;} gmp_pi1_t; 256 typedef struct {mp_limb_t inv21, inv32, inv53;} gmp_pi2_t; 257 258 259 /* "const" basically means a function does nothing but examine its arguments 260 and give a return value, it doesn't read or write any memory (neither 261 global nor pointed to by arguments), and has no other side-effects. This 262 is more restrictive than "pure". See info node "(gcc)Function 263 Attributes". __GMP_NO_ATTRIBUTE_CONST_PURE lets tune/common.c etc turn 264 this off when trying to write timing loops. */ 265 #if HAVE_ATTRIBUTE_CONST && ! defined (__GMP_NO_ATTRIBUTE_CONST_PURE) 266 #define ATTRIBUTE_CONST __attribute__ ((const)) 267 #else 268 #define ATTRIBUTE_CONST 269 #endif 270 271 #if HAVE_ATTRIBUTE_NORETURN 272 #define ATTRIBUTE_NORETURN __attribute__ ((noreturn)) 273 #else 274 #define ATTRIBUTE_NORETURN 275 #endif 276 277 /* "malloc" means a function behaves like malloc in that the pointer it 278 returns doesn't alias anything. */ 279 #if HAVE_ATTRIBUTE_MALLOC 280 #define ATTRIBUTE_MALLOC __attribute__ ((malloc)) 281 #else 282 #define ATTRIBUTE_MALLOC 283 #endif 284 285 286 #if ! HAVE_STRCHR 287 #define strchr(s,c) index(s,c) 288 #endif 289 290 #if ! HAVE_MEMSET 291 #define memset(p, c, n) \ 292 do { \ 293 ASSERT ((n) >= 0); \ 294 char *__memset__p = (p); \ 295 int __i; \ 296 for (__i = 0; __i < (n); __i++) \ 297 __memset__p[__i] = (c); \ 298 } while (0) 299 #endif 300 301 /* va_copy is standard in C99, and gcc provides __va_copy when in strict C89 302 mode. Falling back to a memcpy will give maximum portability, since it 303 works no matter whether va_list is a pointer, struct or array. */ 304 #if ! defined (va_copy) && defined (__va_copy) 305 #define va_copy(dst,src) __va_copy(dst,src) 306 #endif 307 #if ! defined (va_copy) 308 #define va_copy(dst,src) \ 309 do { memcpy (&(dst), &(src), sizeof (va_list)); } while (0) 310 #endif 311 312 313 /* HAVE_HOST_CPU_alpha_CIX is 1 on an alpha with the CIX instructions 314 (ie. ctlz, ctpop, cttz). */ 315 #if HAVE_HOST_CPU_alphaev67 || HAVE_HOST_CPU_alphaev68 \ 316 || HAVE_HOST_CPU_alphaev7 317 #define HAVE_HOST_CPU_alpha_CIX 1 318 #endif 319 320 321 #if defined (__cplusplus) 322 extern "C" { 323 #endif 324 325 326 /* Usage: TMP_DECL; 327 TMP_MARK; 328 ptr = TMP_ALLOC (bytes); 329 TMP_FREE; 330 331 Small allocations should use TMP_SALLOC, big allocations should use 332 TMP_BALLOC. Allocations that might be small or big should use TMP_ALLOC. 333 334 Functions that use just TMP_SALLOC should use TMP_SDECL, TMP_SMARK, and 335 TMP_SFREE. 336 337 TMP_DECL just declares a variable, but might be empty and so must be last 338 in a list of variables. TMP_MARK must be done before any TMP_ALLOC. 339 TMP_ALLOC(0) is not allowed. TMP_FREE doesn't need to be done if a 340 TMP_MARK was made, but then no TMP_ALLOCs. */ 341 342 /* The alignment in bytes, used for TMP_ALLOCed blocks, when alloca or 343 __gmp_allocate_func doesn't already determine it. Currently TMP_ALLOC 344 isn't used for "double"s, so that's not in the union. */ 345 union tmp_align_t { 346 mp_limb_t l; 347 char *p; 348 }; 349 #define __TMP_ALIGN sizeof (union tmp_align_t) 350 351 /* Return "a" rounded upwards to a multiple of "m", if it isn't already. 352 "a" must be an unsigned type. 353 This is designed for use with a compile-time constant "m". 354 The POW2 case is expected to be usual, and gcc 3.0 and up recognises 355 "(-(8*n))%8" or the like is always zero, which means the rounding up in 356 the WANT_TMP_NOTREENTRANT version of TMP_ALLOC below will be a noop. */ 357 #define ROUND_UP_MULTIPLE(a,m) \ 358 (POW2_P(m) ? (a) + (-(a))%(m) \ 359 : (a)+(m)-1 - (((a)+(m)-1) % (m))) 360 361 #if defined (WANT_TMP_ALLOCA) || defined (WANT_TMP_REENTRANT) 362 struct tmp_reentrant_t { 363 struct tmp_reentrant_t *next; 364 size_t size; /* bytes, including header */ 365 }; 366 __GMP_DECLSPEC void *__gmp_tmp_reentrant_alloc (struct tmp_reentrant_t **, size_t) ATTRIBUTE_MALLOC; 367 __GMP_DECLSPEC void __gmp_tmp_reentrant_free (struct tmp_reentrant_t *); 368 #endif 369 370 #if WANT_TMP_ALLOCA 371 #define TMP_SDECL 372 #define TMP_DECL struct tmp_reentrant_t *__tmp_marker 373 #define TMP_SMARK 374 #define TMP_MARK __tmp_marker = 0 375 #define TMP_SALLOC(n) alloca(n) 376 #define TMP_BALLOC(n) __gmp_tmp_reentrant_alloc (&__tmp_marker, n) 377 /* The peculiar stack allocation limit here is chosen for efficient asm. */ 378 #define TMP_ALLOC(n) \ 379 (LIKELY ((n) <= 0x7f00) ? TMP_SALLOC(n) : TMP_BALLOC(n)) 380 #define TMP_SFREE 381 #define TMP_FREE \ 382 do { \ 383 if (UNLIKELY (__tmp_marker != 0)) \ 384 __gmp_tmp_reentrant_free (__tmp_marker); \ 385 } while (0) 386 #endif 387 388 #if WANT_TMP_REENTRANT 389 #define TMP_SDECL TMP_DECL 390 #define TMP_DECL struct tmp_reentrant_t *__tmp_marker 391 #define TMP_SMARK TMP_MARK 392 #define TMP_MARK __tmp_marker = 0 393 #define TMP_SALLOC(n) TMP_ALLOC(n) 394 #define TMP_BALLOC(n) TMP_ALLOC(n) 395 #define TMP_ALLOC(n) __gmp_tmp_reentrant_alloc (&__tmp_marker, n) 396 #define TMP_SFREE TMP_FREE 397 #define TMP_FREE __gmp_tmp_reentrant_free (__tmp_marker) 398 #endif 399 400 #if WANT_TMP_NOTREENTRANT 401 struct tmp_marker 402 { 403 struct tmp_stack *which_chunk; 404 void *alloc_point; 405 }; 406 __GMP_DECLSPEC void *__gmp_tmp_alloc (unsigned long) ATTRIBUTE_MALLOC; 407 __GMP_DECLSPEC void __gmp_tmp_mark (struct tmp_marker *); 408 __GMP_DECLSPEC void __gmp_tmp_free (struct tmp_marker *); 409 #define TMP_SDECL TMP_DECL 410 #define TMP_DECL struct tmp_marker __tmp_marker 411 #define TMP_SMARK TMP_MARK 412 #define TMP_MARK __gmp_tmp_mark (&__tmp_marker) 413 #define TMP_SALLOC(n) TMP_ALLOC(n) 414 #define TMP_BALLOC(n) TMP_ALLOC(n) 415 #define TMP_ALLOC(n) \ 416 __gmp_tmp_alloc (ROUND_UP_MULTIPLE ((unsigned long) (n), __TMP_ALIGN)) 417 #define TMP_SFREE TMP_FREE 418 #define TMP_FREE __gmp_tmp_free (&__tmp_marker) 419 #endif 420 421 #if WANT_TMP_DEBUG 422 /* See tal-debug.c for some comments. */ 423 struct tmp_debug_t { 424 struct tmp_debug_entry_t *list; 425 const char *file; 426 int line; 427 }; 428 struct tmp_debug_entry_t { 429 struct tmp_debug_entry_t *next; 430 void *block; 431 size_t size; 432 }; 433 __GMP_DECLSPEC void __gmp_tmp_debug_mark (const char *, int, struct tmp_debug_t **, 434 struct tmp_debug_t *, 435 const char *, const char *); 436 __GMP_DECLSPEC void *__gmp_tmp_debug_alloc (const char *, int, int, 437 struct tmp_debug_t **, const char *, 438 size_t) ATTRIBUTE_MALLOC; 439 __GMP_DECLSPEC void __gmp_tmp_debug_free (const char *, int, int, 440 struct tmp_debug_t **, 441 const char *, const char *); 442 #define TMP_SDECL TMP_DECL_NAME(__tmp_xmarker, "__tmp_marker") 443 #define TMP_DECL TMP_DECL_NAME(__tmp_xmarker, "__tmp_marker") 444 #define TMP_SMARK TMP_MARK_NAME(__tmp_xmarker, "__tmp_marker") 445 #define TMP_MARK TMP_MARK_NAME(__tmp_xmarker, "__tmp_marker") 446 #define TMP_SFREE TMP_FREE_NAME(__tmp_xmarker, "__tmp_marker") 447 #define TMP_FREE TMP_FREE_NAME(__tmp_xmarker, "__tmp_marker") 448 /* The marker variable is designed to provoke an uninitialized variable 449 warning from the compiler if TMP_FREE is used without a TMP_MARK. 450 __tmp_marker_inscope does the same for TMP_ALLOC. Runtime tests pick 451 these things up too. */ 452 #define TMP_DECL_NAME(marker, marker_name) \ 453 int marker; \ 454 int __tmp_marker_inscope; \ 455 const char *__tmp_marker_name = marker_name; \ 456 struct tmp_debug_t __tmp_marker_struct; \ 457 /* don't demand NULL, just cast a zero */ \ 458 struct tmp_debug_t *__tmp_marker = (struct tmp_debug_t *) 0 459 #define TMP_MARK_NAME(marker, marker_name) \ 460 do { \ 461 marker = 1; \ 462 __tmp_marker_inscope = 1; \ 463 __gmp_tmp_debug_mark (ASSERT_FILE, ASSERT_LINE, \ 464 &__tmp_marker, &__tmp_marker_struct, \ 465 __tmp_marker_name, marker_name); \ 466 } while (0) 467 #define TMP_SALLOC(n) TMP_ALLOC(n) 468 #define TMP_BALLOC(n) TMP_ALLOC(n) 469 #define TMP_ALLOC(size) \ 470 __gmp_tmp_debug_alloc (ASSERT_FILE, ASSERT_LINE, \ 471 __tmp_marker_inscope, \ 472 &__tmp_marker, __tmp_marker_name, size) 473 #define TMP_FREE_NAME(marker, marker_name) \ 474 do { \ 475 __gmp_tmp_debug_free (ASSERT_FILE, ASSERT_LINE, \ 476 marker, &__tmp_marker, \ 477 __tmp_marker_name, marker_name); \ 478 } while (0) 479 #endif /* WANT_TMP_DEBUG */ 480 481 482 /* Allocating various types. */ 483 #define TMP_ALLOC_TYPE(n,type) ((type *) TMP_ALLOC ((n) * sizeof (type))) 484 #define TMP_SALLOC_TYPE(n,type) ((type *) TMP_SALLOC ((n) * sizeof (type))) 485 #define TMP_BALLOC_TYPE(n,type) ((type *) TMP_BALLOC ((n) * sizeof (type))) 486 #define TMP_ALLOC_LIMBS(n) TMP_ALLOC_TYPE(n,mp_limb_t) 487 #define TMP_SALLOC_LIMBS(n) TMP_SALLOC_TYPE(n,mp_limb_t) 488 #define TMP_BALLOC_LIMBS(n) TMP_BALLOC_TYPE(n,mp_limb_t) 489 #define TMP_ALLOC_MP_PTRS(n) TMP_ALLOC_TYPE(n,mp_ptr) 490 #define TMP_SALLOC_MP_PTRS(n) TMP_SALLOC_TYPE(n,mp_ptr) 491 #define TMP_BALLOC_MP_PTRS(n) TMP_BALLOC_TYPE(n,mp_ptr) 492 493 /* It's more efficient to allocate one block than many. This is certainly 494 true of the malloc methods, but it can even be true of alloca if that 495 involves copying a chunk of stack (various RISCs), or a call to a stack 496 bounds check (mingw). In any case, when debugging keep separate blocks 497 so a redzoning malloc debugger can protect each individually. */ 498 #define TMP_ALLOC_LIMBS_2(xp,xsize, yp,ysize) \ 499 do { \ 500 if (WANT_TMP_DEBUG) \ 501 { \ 502 (xp) = TMP_ALLOC_LIMBS (xsize); \ 503 (yp) = TMP_ALLOC_LIMBS (ysize); \ 504 } \ 505 else \ 506 { \ 507 (xp) = TMP_ALLOC_LIMBS ((xsize) + (ysize)); \ 508 (yp) = (xp) + (xsize); \ 509 } \ 510 } while (0) 511 #define TMP_ALLOC_LIMBS_3(xp,xsize, yp,ysize, zp,zsize) \ 512 do { \ 513 if (WANT_TMP_DEBUG) \ 514 { \ 515 (xp) = TMP_ALLOC_LIMBS (xsize); \ 516 (yp) = TMP_ALLOC_LIMBS (ysize); \ 517 (zp) = TMP_ALLOC_LIMBS (zsize); \ 518 } \ 519 else \ 520 { \ 521 (xp) = TMP_ALLOC_LIMBS ((xsize) + (ysize) + (zsize)); \ 522 (yp) = (xp) + (xsize); \ 523 (zp) = (yp) + (ysize); \ 524 } \ 525 } while (0) 526 527 /* From gmp.h, nicer names for internal use. */ 528 #define CRAY_Pragma(str) __GMP_CRAY_Pragma(str) 529 #define MPN_CMP(result, xp, yp, size) __GMPN_CMP(result, xp, yp, size) 530 #define LIKELY(cond) __GMP_LIKELY(cond) 531 #define UNLIKELY(cond) __GMP_UNLIKELY(cond) 532 533 #define ABS(x) ((x) >= 0 ? (x) : -(x)) 534 #define NEG_CAST(T,x) (- (__GMP_CAST (T, (x) + 1) - 1)) 535 #define ABS_CAST(T,x) ((x) >= 0 ? __GMP_CAST (T, x) : NEG_CAST (T,x)) 536 #undef MIN 537 #define MIN(l,o) ((l) < (o) ? (l) : (o)) 538 #undef MAX 539 #define MAX(h,i) ((h) > (i) ? (h) : (i)) 540 #define numberof(x) (sizeof (x) / sizeof ((x)[0])) 541 542 /* Field access macros. */ 543 #define SIZ(x) ((x)->_mp_size) 544 #define ABSIZ(x) ABS (SIZ (x)) 545 #define PTR(x) ((x)->_mp_d) 546 #define EXP(x) ((x)->_mp_exp) 547 #define PREC(x) ((x)->_mp_prec) 548 #define ALLOC(x) ((x)->_mp_alloc) 549 #define NUM(x) mpq_numref(x) 550 #define DEN(x) mpq_denref(x) 551 552 /* n-1 inverts any low zeros and the lowest one bit. If n&(n-1) leaves zero 553 then that lowest one bit must have been the only bit set. n==0 will 554 return true though, so avoid that. */ 555 #define POW2_P(n) (((n) & ((n) - 1)) == 0) 556 557 /* This is intended for constant THRESHOLDs only, where the compiler 558 can completely fold the result. */ 559 #define LOG2C(n) \ 560 (((n) >= 0x1) + ((n) >= 0x2) + ((n) >= 0x4) + ((n) >= 0x8) + \ 561 ((n) >= 0x10) + ((n) >= 0x20) + ((n) >= 0x40) + ((n) >= 0x80) + \ 562 ((n) >= 0x100) + ((n) >= 0x200) + ((n) >= 0x400) + ((n) >= 0x800) + \ 563 ((n) >= 0x1000) + ((n) >= 0x2000) + ((n) >= 0x4000) + ((n) >= 0x8000)) 564 565 #define MP_LIMB_T_MAX (~ (mp_limb_t) 0) 566 567 /* Must cast ULONG_MAX etc to unsigned long etc, since they might not be 568 unsigned on a K&R compiler. In particular the HP-UX 10 bundled K&R cc 569 treats the plain decimal values in <limits.h> as signed. */ 570 #define ULONG_HIGHBIT (ULONG_MAX ^ ((unsigned long) ULONG_MAX >> 1)) 571 #define UINT_HIGHBIT (UINT_MAX ^ ((unsigned) UINT_MAX >> 1)) 572 #define USHRT_HIGHBIT (USHRT_MAX ^ ((unsigned short) USHRT_MAX >> 1)) 573 #define GMP_LIMB_HIGHBIT (MP_LIMB_T_MAX ^ (MP_LIMB_T_MAX >> 1)) 574 575 #if __GMP_MP_SIZE_T_INT 576 #define MP_SIZE_T_MAX INT_MAX 577 #define MP_SIZE_T_MIN INT_MIN 578 #else 579 #define MP_SIZE_T_MAX LONG_MAX 580 #define MP_SIZE_T_MIN LONG_MIN 581 #endif 582 583 /* mp_exp_t is the same as mp_size_t */ 584 #define MP_EXP_T_MAX MP_SIZE_T_MAX 585 #define MP_EXP_T_MIN MP_SIZE_T_MIN 586 587 #define LONG_HIGHBIT LONG_MIN 588 #define INT_HIGHBIT INT_MIN 589 #define SHRT_HIGHBIT SHRT_MIN 590 591 592 #define GMP_NUMB_HIGHBIT (CNST_LIMB(1) << (GMP_NUMB_BITS-1)) 593 594 #if GMP_NAIL_BITS == 0 595 #define GMP_NAIL_LOWBIT CNST_LIMB(0) 596 #else 597 #define GMP_NAIL_LOWBIT (CNST_LIMB(1) << GMP_NUMB_BITS) 598 #endif 599 600 #if GMP_NAIL_BITS != 0 601 /* Set various *_THRESHOLD values to be used for nails. Thus we avoid using 602 code that has not yet been qualified. */ 603 604 #undef DC_DIV_QR_THRESHOLD 605 #define DC_DIV_QR_THRESHOLD 50 606 607 #undef DIVREM_1_NORM_THRESHOLD 608 #undef DIVREM_1_UNNORM_THRESHOLD 609 #undef MOD_1_NORM_THRESHOLD 610 #undef MOD_1_UNNORM_THRESHOLD 611 #undef USE_PREINV_DIVREM_1 612 #undef DIVREM_2_THRESHOLD 613 #undef DIVEXACT_1_THRESHOLD 614 #define DIVREM_1_NORM_THRESHOLD MP_SIZE_T_MAX /* no preinv */ 615 #define DIVREM_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* no preinv */ 616 #define MOD_1_NORM_THRESHOLD MP_SIZE_T_MAX /* no preinv */ 617 #define MOD_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* no preinv */ 618 #define USE_PREINV_DIVREM_1 0 /* no preinv */ 619 #define DIVREM_2_THRESHOLD MP_SIZE_T_MAX /* no preinv */ 620 621 /* mpn/generic/mul_fft.c is not nails-capable. */ 622 #undef MUL_FFT_THRESHOLD 623 #undef SQR_FFT_THRESHOLD 624 #define MUL_FFT_THRESHOLD MP_SIZE_T_MAX 625 #define SQR_FFT_THRESHOLD MP_SIZE_T_MAX 626 #endif 627 628 /* Swap macros. */ 629 630 #define MP_LIMB_T_SWAP(x, y) \ 631 do { \ 632 mp_limb_t __mp_limb_t_swap__tmp = (x); \ 633 (x) = (y); \ 634 (y) = __mp_limb_t_swap__tmp; \ 635 } while (0) 636 #define MP_SIZE_T_SWAP(x, y) \ 637 do { \ 638 mp_size_t __mp_size_t_swap__tmp = (x); \ 639 (x) = (y); \ 640 (y) = __mp_size_t_swap__tmp; \ 641 } while (0) 642 643 #define MP_PTR_SWAP(x, y) \ 644 do { \ 645 mp_ptr __mp_ptr_swap__tmp = (x); \ 646 (x) = (y); \ 647 (y) = __mp_ptr_swap__tmp; \ 648 } while (0) 649 #define MP_SRCPTR_SWAP(x, y) \ 650 do { \ 651 mp_srcptr __mp_srcptr_swap__tmp = (x); \ 652 (x) = (y); \ 653 (y) = __mp_srcptr_swap__tmp; \ 654 } while (0) 655 656 #define MPN_PTR_SWAP(xp,xs, yp,ys) \ 657 do { \ 658 MP_PTR_SWAP (xp, yp); \ 659 MP_SIZE_T_SWAP (xs, ys); \ 660 } while(0) 661 #define MPN_SRCPTR_SWAP(xp,xs, yp,ys) \ 662 do { \ 663 MP_SRCPTR_SWAP (xp, yp); \ 664 MP_SIZE_T_SWAP (xs, ys); \ 665 } while(0) 666 667 #define MPZ_PTR_SWAP(x, y) \ 668 do { \ 669 mpz_ptr __mpz_ptr_swap__tmp = (x); \ 670 (x) = (y); \ 671 (y) = __mpz_ptr_swap__tmp; \ 672 } while (0) 673 #define MPZ_SRCPTR_SWAP(x, y) \ 674 do { \ 675 mpz_srcptr __mpz_srcptr_swap__tmp = (x); \ 676 (x) = (y); \ 677 (y) = __mpz_srcptr_swap__tmp; \ 678 } while (0) 679 680 #define MPQ_PTR_SWAP(x, y) \ 681 do { \ 682 mpq_ptr __mpq_ptr_swap__tmp = (x); \ 683 (x) = (y); \ 684 (y) = __mpq_ptr_swap__tmp; \ 685 } while (0) 686 #define MPQ_SRCPTR_SWAP(x, y) \ 687 do { \ 688 mpq_srcptr __mpq_srcptr_swap__tmp = (x); \ 689 (x) = (y); \ 690 (y) = __mpq_srcptr_swap__tmp; \ 691 } while (0) 692 693 694 /* Enhancement: __gmp_allocate_func could have "__attribute__ ((malloc))", 695 but current gcc (3.0) doesn't seem to support that. */ 696 __GMP_DECLSPEC extern void * (*__gmp_allocate_func) (size_t); 697 __GMP_DECLSPEC extern void * (*__gmp_reallocate_func) (void *, size_t, size_t); 698 __GMP_DECLSPEC extern void (*__gmp_free_func) (void *, size_t); 699 700 __GMP_DECLSPEC void *__gmp_default_allocate (size_t); 701 __GMP_DECLSPEC void *__gmp_default_reallocate (void *, size_t, size_t); 702 __GMP_DECLSPEC void __gmp_default_free (void *, size_t); 703 704 #define __GMP_ALLOCATE_FUNC_TYPE(n,type) \ 705 ((type *) (*__gmp_allocate_func) ((n) * sizeof (type))) 706 #define __GMP_ALLOCATE_FUNC_LIMBS(n) __GMP_ALLOCATE_FUNC_TYPE (n, mp_limb_t) 707 708 #define __GMP_REALLOCATE_FUNC_TYPE(p, old_size, new_size, type) \ 709 ((type *) (*__gmp_reallocate_func) \ 710 (p, (old_size) * sizeof (type), (new_size) * sizeof (type))) 711 #define __GMP_REALLOCATE_FUNC_LIMBS(p, old_size, new_size) \ 712 __GMP_REALLOCATE_FUNC_TYPE(p, old_size, new_size, mp_limb_t) 713 714 #define __GMP_FREE_FUNC_TYPE(p,n,type) (*__gmp_free_func) (p, (n) * sizeof (type)) 715 #define __GMP_FREE_FUNC_LIMBS(p,n) __GMP_FREE_FUNC_TYPE (p, n, mp_limb_t) 716 717 #define __GMP_REALLOCATE_FUNC_MAYBE(ptr, oldsize, newsize) \ 718 do { \ 719 if ((oldsize) != (newsize)) \ 720 (ptr) = (*__gmp_reallocate_func) (ptr, oldsize, newsize); \ 721 } while (0) 722 723 #define __GMP_REALLOCATE_FUNC_MAYBE_TYPE(ptr, oldsize, newsize, type) \ 724 do { \ 725 if ((oldsize) != (newsize)) \ 726 (ptr) = (type *) (*__gmp_reallocate_func) \ 727 (ptr, (oldsize) * sizeof (type), (newsize) * sizeof (type)); \ 728 } while (0) 729 730 731 /* Dummy for non-gcc, code involving it will go dead. */ 732 #if ! defined (__GNUC__) || __GNUC__ < 2 733 #define __builtin_constant_p(x) 0 734 #endif 735 736 737 /* In gcc 2.96 and up on i386, tail calls are optimized to jumps if the 738 stack usage is compatible. __attribute__ ((regparm (N))) helps by 739 putting leading parameters in registers, avoiding extra stack. 740 741 regparm cannot be used with calls going through the PLT, because the 742 binding code there may clobber the registers (%eax, %edx, %ecx) used for 743 the regparm parameters. Calls to local (ie. static) functions could 744 still use this, if we cared to differentiate locals and globals. 745 746 On athlon-unknown-freebsd4.9 with gcc 3.3.3, regparm cannot be used with 747 -p or -pg profiling, since that version of gcc doesn't realize the 748 .mcount calls will clobber the parameter registers. Other systems are 749 ok, like debian with glibc 2.3.2 (mcount doesn't clobber), but we don't 750 bother to try to detect this. regparm is only an optimization so we just 751 disable it when profiling (profiling being a slowdown anyway). */ 752 753 #if HAVE_HOST_CPU_FAMILY_x86 && __GMP_GNUC_PREREQ (2,96) && ! defined (PIC) \ 754 && ! WANT_PROFILING_PROF && ! WANT_PROFILING_GPROF 755 #define USE_LEADING_REGPARM 1 756 #else 757 #define USE_LEADING_REGPARM 0 758 #endif 759 760 /* Macros for altering parameter order according to regparm usage. */ 761 #if USE_LEADING_REGPARM 762 #define REGPARM_2_1(a,b,x) x,a,b 763 #define REGPARM_3_1(a,b,c,x) x,a,b,c 764 #define REGPARM_ATTR(n) __attribute__ ((regparm (n))) 765 #else 766 #define REGPARM_2_1(a,b,x) a,b,x 767 #define REGPARM_3_1(a,b,c,x) a,b,c,x 768 #define REGPARM_ATTR(n) 769 #endif 770 771 772 /* ASM_L gives a local label for a gcc asm block, for use when temporary 773 local labels like "1:" might not be available, which is the case for 774 instance on the x86s (the SCO assembler doesn't support them). 775 776 The label generated is made unique by including "%=" which is a unique 777 number for each insn. This ensures the same name can be used in multiple 778 asm blocks, perhaps via a macro. Since jumps between asm blocks are not 779 allowed there's no need for a label to be usable outside a single 780 block. */ 781 782 #define ASM_L(name) LSYM_PREFIX "asm_%=_" #name 783 784 785 #if defined (__GNUC__) && HAVE_HOST_CPU_FAMILY_x86 786 #if 0 787 /* FIXME: Check that these actually improve things. 788 FIXME: Need a cld after each std. 789 FIXME: Can't have inputs in clobbered registers, must describe them as 790 dummy outputs, and add volatile. */ 791 #define MPN_COPY_INCR(DST, SRC, N) \ 792 __asm__ ("cld\n\trep\n\tmovsl" : : \ 793 "D" (DST), "S" (SRC), "c" (N) : \ 794 "cx", "di", "si", "memory") 795 #define MPN_COPY_DECR(DST, SRC, N) \ 796 __asm__ ("std\n\trep\n\tmovsl" : : \ 797 "D" ((DST) + (N) - 1), "S" ((SRC) + (N) - 1), "c" (N) : \ 798 "cx", "di", "si", "memory") 799 #endif 800 #endif 801 802 803 __GMP_DECLSPEC void __gmpz_aorsmul_1 (REGPARM_3_1 (mpz_ptr, mpz_srcptr, mp_limb_t, mp_size_t)) REGPARM_ATTR(1); 804 #define mpz_aorsmul_1(w,u,v,sub) __gmpz_aorsmul_1 (REGPARM_3_1 (w, u, v, sub)) 805 806 #define mpz_n_pow_ui __gmpz_n_pow_ui 807 __GMP_DECLSPEC void mpz_n_pow_ui (mpz_ptr, mp_srcptr, mp_size_t, unsigned long); 808 809 810 #define mpn_addmul_1c __MPN(addmul_1c) 811 __GMP_DECLSPEC mp_limb_t mpn_addmul_1c (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t); 812 813 #ifndef mpn_addmul_2 /* if not done with cpuvec in a fat binary */ 814 #define mpn_addmul_2 __MPN(addmul_2) 815 __GMP_DECLSPEC mp_limb_t mpn_addmul_2 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr); 816 #endif 817 818 #define mpn_addmul_3 __MPN(addmul_3) 819 __GMP_DECLSPEC mp_limb_t mpn_addmul_3 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr); 820 821 #define mpn_addmul_4 __MPN(addmul_4) 822 __GMP_DECLSPEC mp_limb_t mpn_addmul_4 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr); 823 824 #define mpn_addmul_5 __MPN(addmul_5) 825 __GMP_DECLSPEC mp_limb_t mpn_addmul_5 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr); 826 827 #define mpn_addmul_6 __MPN(addmul_6) 828 __GMP_DECLSPEC mp_limb_t mpn_addmul_6 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr); 829 830 #define mpn_addmul_7 __MPN(addmul_7) 831 __GMP_DECLSPEC mp_limb_t mpn_addmul_7 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr); 832 833 #define mpn_addmul_8 __MPN(addmul_8) 834 __GMP_DECLSPEC mp_limb_t mpn_addmul_8 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr); 835 836 /* Alternative entry point in mpn_addmul_2 for the benefit of mpn_sqr_basecase. */ 837 #define mpn_addmul_2s __MPN(addmul_2s) 838 __GMP_DECLSPEC mp_limb_t mpn_addmul_2s (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr); 839 840 /* mpn_addlsh1_n(c,a,b,n), when it exists, sets {c,n} to {a,n}+2*{b,n}, and 841 returns the carry out (0, 1 or 2). Use _ip1 when a=c. */ 842 #ifndef mpn_addlsh1_n /* if not done with cpuvec in a fat binary */ 843 #define mpn_addlsh1_n __MPN(addlsh1_n) 844 __GMP_DECLSPEC mp_limb_t mpn_addlsh1_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t); 845 #endif 846 #define mpn_addlsh1_nc __MPN(addlsh1_nc) 847 __GMP_DECLSPEC mp_limb_t mpn_addlsh1_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t); 848 #if HAVE_NATIVE_mpn_addlsh1_n && ! HAVE_NATIVE_mpn_addlsh1_n_ip1 849 #define mpn_addlsh1_n_ip1(dst,src,n) mpn_addlsh1_n(dst,dst,src,n) 850 #define HAVE_NATIVE_mpn_addlsh1_n_ip1 1 851 #else 852 #define mpn_addlsh1_n_ip1 __MPN(addlsh1_n_ip1) 853 __GMP_DECLSPEC mp_limb_t mpn_addlsh1_n_ip1 (mp_ptr, mp_srcptr, mp_size_t); 854 #endif 855 #if HAVE_NATIVE_mpn_addlsh1_nc && ! HAVE_NATIVE_mpn_addlsh1_nc_ip1 856 #define mpn_addlsh1_nc_ip1(dst,src,n,c) mpn_addlsh1_nc(dst,dst,src,n,c) 857 #define HAVE_NATIVE_mpn_addlsh1_nc_ip1 1 858 #else 859 #define mpn_addlsh1_nc_ip1 __MPN(addlsh1_nc_ip1) 860 __GMP_DECLSPEC mp_limb_t mpn_addlsh1_nc_ip1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t); 861 #endif 862 863 #ifndef mpn_addlsh2_n /* if not done with cpuvec in a fat binary */ 864 /* mpn_addlsh2_n(c,a,b,n), when it exists, sets {c,n} to {a,n}+4*{b,n}, and 865 returns the carry out (0, ..., 4). Use _ip1 when a=c. */ 866 #define mpn_addlsh2_n __MPN(addlsh2_n) 867 __GMP_DECLSPEC mp_limb_t mpn_addlsh2_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t); 868 #endif 869 #define mpn_addlsh2_nc __MPN(addlsh2_nc) 870 __GMP_DECLSPEC mp_limb_t mpn_addlsh2_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t); 871 #if HAVE_NATIVE_mpn_addlsh2_n && ! HAVE_NATIVE_mpn_addlsh2_n_ip1 872 #define mpn_addlsh2_n_ip1(dst,src,n) mpn_addlsh2_n(dst,dst,src,n) 873 #define HAVE_NATIVE_mpn_addlsh2_n_ip1 1 874 #else 875 #define mpn_addlsh2_n_ip1 __MPN(addlsh2_n_ip1) 876 __GMP_DECLSPEC mp_limb_t mpn_addlsh2_n_ip1 (mp_ptr, mp_srcptr, mp_size_t); 877 #endif 878 #if HAVE_NATIVE_mpn_addlsh2_nc && ! HAVE_NATIVE_mpn_addlsh2_nc_ip1 879 #define mpn_addlsh2_nc_ip1(dst,src,n,c) mpn_addlsh2_nc(dst,dst,src,n,c) 880 #define HAVE_NATIVE_mpn_addlsh2_nc_ip1 1 881 #else 882 #define mpn_addlsh2_nc_ip1 __MPN(addlsh2_nc_ip1) 883 __GMP_DECLSPEC mp_limb_t mpn_addlsh2_nc_ip1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t); 884 #endif 885 886 /* mpn_addlsh_n(c,a,b,n,k), when it exists, sets {c,n} to {a,n}+2^k*{b,n}, and 887 returns the carry out (0, ..., 2^k). Use _ip1 when a=c. */ 888 #define mpn_addlsh_n __MPN(addlsh_n) 889 __GMP_DECLSPEC mp_limb_t mpn_addlsh_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int); 890 #define mpn_addlsh_nc __MPN(addlsh_nc) 891 __GMP_DECLSPEC mp_limb_t mpn_addlsh_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int, mp_limb_t); 892 #if HAVE_NATIVE_mpn_addlsh_n && ! HAVE_NATIVE_mpn_addlsh_n_ip1 893 #define mpn_addlsh_n_ip1(dst,src,n,s) mpn_addlsh_n(dst,dst,src,n,s) 894 #define HAVE_NATIVE_mpn_addlsh_n_ip1 1 895 #else 896 #define mpn_addlsh_n_ip1 __MPN(addlsh_n_ip1) 897 __GMP_DECLSPEC mp_limb_t mpn_addlsh_n_ip1 (mp_ptr, mp_srcptr, mp_size_t, unsigned int); 898 #endif 899 #if HAVE_NATIVE_mpn_addlsh_nc && ! HAVE_NATIVE_mpn_addlsh_nc_ip1 900 #define mpn_addlsh_nc_ip1(dst,src,n,s,c) mpn_addlsh_nc(dst,dst,src,n,s,c) 901 #define HAVE_NATIVE_mpn_addlsh_nc_ip1 1 902 #else 903 #define mpn_addlsh_nc_ip1 __MPN(addlsh_nc_ip1) 904 __GMP_DECLSPEC mp_limb_t mpn_addlsh_nc_ip1 (mp_ptr, mp_srcptr, mp_size_t, unsigned int, mp_limb_t); 905 #endif 906 907 #ifndef mpn_sublsh1_n /* if not done with cpuvec in a fat binary */ 908 /* mpn_sublsh1_n(c,a,b,n), when it exists, sets {c,n} to {a,n}-2*{b,n}, and 909 returns the borrow out (0, 1 or 2). Use _ip1 when a=c. */ 910 #define mpn_sublsh1_n __MPN(sublsh1_n) 911 __GMP_DECLSPEC mp_limb_t mpn_sublsh1_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t); 912 #endif 913 #define mpn_sublsh1_nc __MPN(sublsh1_nc) 914 __GMP_DECLSPEC mp_limb_t mpn_sublsh1_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t); 915 #if HAVE_NATIVE_mpn_sublsh1_n && ! HAVE_NATIVE_mpn_sublsh1_n_ip1 916 #define mpn_sublsh1_n_ip1(dst,src,n) mpn_sublsh1_n(dst,dst,src,n) 917 #define HAVE_NATIVE_mpn_sublsh1_n_ip1 1 918 #else 919 #define mpn_sublsh1_n_ip1 __MPN(sublsh1_n_ip1) 920 __GMP_DECLSPEC mp_limb_t mpn_sublsh1_n_ip1 (mp_ptr, mp_srcptr, mp_size_t); 921 #endif 922 #if HAVE_NATIVE_mpn_sublsh1_nc && ! HAVE_NATIVE_mpn_sublsh1_nc_ip1 923 #define mpn_sublsh1_nc_ip1(dst,src,n,c) mpn_sublsh1_nc(dst,dst,src,n,c) 924 #define HAVE_NATIVE_mpn_sublsh1_nc_ip1 1 925 #else 926 #define mpn_sublsh1_nc_ip1 __MPN(sublsh1_nc_ip1) 927 __GMP_DECLSPEC mp_limb_t mpn_sublsh1_nc_ip1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t); 928 #endif 929 930 /* mpn_rsblsh1_n(c,a,b,n), when it exists, sets {c,n} to 2*{b,n}-{a,n}, and 931 returns the carry out (-1, 0, 1). */ 932 #define mpn_rsblsh1_n __MPN(rsblsh1_n) 933 __GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh1_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t); 934 #define mpn_rsblsh1_nc __MPN(rsblsh1_nc) 935 __GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh1_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t); 936 937 /* mpn_sublsh2_n(c,a,b,n), when it exists, sets {c,n} to {a,n}-4*{b,n}, and 938 returns the borrow out (0, ..., 4). Use _ip1 when a=c. */ 939 #define mpn_sublsh2_n __MPN(sublsh2_n) 940 __GMP_DECLSPEC mp_limb_t mpn_sublsh2_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t); 941 #define mpn_sublsh2_nc __MPN(sublsh2_nc) 942 __GMP_DECLSPEC mp_limb_t mpn_sublsh2_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t); 943 #if HAVE_NATIVE_mpn_sublsh2_n && ! HAVE_NATIVE_mpn_sublsh2_n_ip1 944 #define mpn_sublsh2_n_ip1(dst,src,n) mpn_sublsh2_n(dst,dst,src,n) 945 #define HAVE_NATIVE_mpn_sublsh2_n_ip1 1 946 #else 947 #define mpn_sublsh2_n_ip1 __MPN(sublsh2_n_ip1) 948 __GMP_DECLSPEC mp_limb_t mpn_sublsh2_n_ip1 (mp_ptr, mp_srcptr, mp_size_t); 949 #endif 950 #if HAVE_NATIVE_mpn_sublsh2_nc && ! HAVE_NATIVE_mpn_sublsh2_nc_ip1 951 #define mpn_sublsh2_nc_ip1(dst,src,n,c) mpn_sublsh2_nc(dst,dst,src,n,c) 952 #define HAVE_NATIVE_mpn_sublsh2_nc_ip1 1 953 #else 954 #define mpn_sublsh2_nc_ip1 __MPN(sublsh2_nc_ip1) 955 __GMP_DECLSPEC mp_limb_t mpn_sublsh2_nc_ip1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t); 956 #endif 957 958 /* mpn_sublsh_n(c,a,b,n,k), when it exists, sets {c,n} to {a,n}-2^k*{b,n}, and 959 returns the carry out (0, ..., 2^k). Use _ip1 when a=c. */ 960 #define mpn_sublsh_n __MPN(sublsh_n) 961 __GMP_DECLSPEC mp_limb_t mpn_sublsh_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int); 962 #if HAVE_NATIVE_mpn_sublsh_n && ! HAVE_NATIVE_mpn_sublsh_n_ip1 963 #define mpn_sublsh_n_ip1(dst,src,n,s) mpn_sublsh_n(dst,dst,src,n,s) 964 #define HAVE_NATIVE_mpn_sublsh_n_ip1 1 965 #else 966 #define mpn_sublsh_n_ip1 __MPN(sublsh_n_ip1) 967 __GMP_DECLSPEC mp_limb_t mpn_sublsh_n_ip1 (mp_ptr, mp_srcptr, mp_size_t, unsigned int); 968 #endif 969 #if HAVE_NATIVE_mpn_sublsh_nc && ! HAVE_NATIVE_mpn_sublsh_nc_ip1 970 #define mpn_sublsh_nc_ip1(dst,src,n,s,c) mpn_sublsh_nc(dst,dst,src,n,s,c) 971 #define HAVE_NATIVE_mpn_sublsh_nc_ip1 1 972 #else 973 #define mpn_sublsh_nc_ip1 __MPN(sublsh_nc_ip1) 974 __GMP_DECLSPEC mp_limb_t mpn_sublsh_nc_ip1 (mp_ptr, mp_srcptr, mp_size_t, unsigned int, mp_limb_t); 975 #endif 976 977 /* mpn_rsblsh2_n(c,a,b,n), when it exists, sets {c,n} to 4*{b,n}-{a,n}, and 978 returns the carry out (-1, ..., 3). */ 979 #define mpn_rsblsh2_n __MPN(rsblsh2_n) 980 __GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh2_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t); 981 #define mpn_rsblsh2_nc __MPN(rsblsh2_nc) 982 __GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh2_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t); 983 984 /* mpn_rsblsh_n(c,a,b,n,k), when it exists, sets {c,n} to 2^k*{b,n}-{a,n}, and 985 returns the carry out (-1, 0, ..., 2^k-1). */ 986 #define mpn_rsblsh_n __MPN(rsblsh_n) 987 __GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int); 988 #define mpn_rsblsh_nc __MPN(rsblsh_nc) 989 __GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int, mp_limb_t); 990 991 /* mpn_rsh1add_n(c,a,b,n), when it exists, sets {c,n} to ({a,n} + {b,n}) >> 1, 992 and returns the bit rshifted out (0 or 1). */ 993 #define mpn_rsh1add_n __MPN(rsh1add_n) 994 __GMP_DECLSPEC mp_limb_t mpn_rsh1add_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t); 995 #define mpn_rsh1add_nc __MPN(rsh1add_nc) 996 __GMP_DECLSPEC mp_limb_t mpn_rsh1add_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t); 997 998 /* mpn_rsh1sub_n(c,a,b,n), when it exists, sets {c,n} to ({a,n} - {b,n}) >> 1, 999 and returns the bit rshifted out (0 or 1). If there's a borrow from the 1000 subtract, it's stored as a 1 in the high bit of c[n-1], like a twos 1001 complement negative. */ 1002 #define mpn_rsh1sub_n __MPN(rsh1sub_n) 1003 __GMP_DECLSPEC mp_limb_t mpn_rsh1sub_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t); 1004 #define mpn_rsh1sub_nc __MPN(rsh1sub_nc) 1005 __GMP_DECLSPEC mp_limb_t mpn_rsh1sub_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t); 1006 1007 #ifndef mpn_lshiftc /* if not done with cpuvec in a fat binary */ 1008 #define mpn_lshiftc __MPN(lshiftc) 1009 __GMP_DECLSPEC mp_limb_t mpn_lshiftc (mp_ptr, mp_srcptr, mp_size_t, unsigned int); 1010 #endif 1011 1012 #define mpn_add_err1_n __MPN(add_err1_n) 1013 __GMP_DECLSPEC mp_limb_t mpn_add_err1_n (mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t); 1014 1015 #define mpn_add_err2_n __MPN(add_err2_n) 1016 __GMP_DECLSPEC mp_limb_t mpn_add_err2_n (mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t); 1017 1018 #define mpn_add_err3_n __MPN(add_err3_n) 1019 __GMP_DECLSPEC mp_limb_t mpn_add_err3_n (mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t); 1020 1021 #define mpn_sub_err1_n __MPN(sub_err1_n) 1022 __GMP_DECLSPEC mp_limb_t mpn_sub_err1_n (mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t); 1023 1024 #define mpn_sub_err2_n __MPN(sub_err2_n) 1025 __GMP_DECLSPEC mp_limb_t mpn_sub_err2_n (mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t); 1026 1027 #define mpn_sub_err3_n __MPN(sub_err3_n) 1028 __GMP_DECLSPEC mp_limb_t mpn_sub_err3_n (mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t); 1029 1030 #define mpn_add_n_sub_n __MPN(add_n_sub_n) 1031 __GMP_DECLSPEC mp_limb_t mpn_add_n_sub_n (mp_ptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t); 1032 1033 #define mpn_add_n_sub_nc __MPN(add_n_sub_nc) 1034 __GMP_DECLSPEC mp_limb_t mpn_add_n_sub_nc (mp_ptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t); 1035 1036 #define mpn_addaddmul_1msb0 __MPN(addaddmul_1msb0) 1037 __GMP_DECLSPEC mp_limb_t mpn_addaddmul_1msb0 (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t); 1038 1039 #define mpn_divrem_1c __MPN(divrem_1c) 1040 __GMP_DECLSPEC mp_limb_t mpn_divrem_1c (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t); 1041 1042 #define mpn_dump __MPN(dump) 1043 __GMP_DECLSPEC void mpn_dump (mp_srcptr, mp_size_t); 1044 1045 #define mpn_fib2_ui __MPN(fib2_ui) 1046 __GMP_DECLSPEC mp_size_t mpn_fib2_ui (mp_ptr, mp_ptr, unsigned long); 1047 1048 /* Remap names of internal mpn functions. */ 1049 #define __clz_tab __MPN(clz_tab) 1050 #define mpn_udiv_w_sdiv __MPN(udiv_w_sdiv) 1051 1052 #define mpn_jacobi_base __MPN(jacobi_base) 1053 __GMP_DECLSPEC int mpn_jacobi_base (mp_limb_t, mp_limb_t, int) ATTRIBUTE_CONST; 1054 1055 #define mpn_jacobi_2 __MPN(jacobi_2) 1056 __GMP_DECLSPEC int mpn_jacobi_2 (mp_srcptr, mp_srcptr, unsigned); 1057 1058 #define mpn_jacobi_n __MPN(jacobi_n) 1059 __GMP_DECLSPEC int mpn_jacobi_n (mp_ptr, mp_ptr, mp_size_t, unsigned); 1060 1061 #define mpn_mod_1c __MPN(mod_1c) 1062 __GMP_DECLSPEC mp_limb_t mpn_mod_1c (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t) __GMP_ATTRIBUTE_PURE; 1063 1064 #define mpn_mul_1c __MPN(mul_1c) 1065 __GMP_DECLSPEC mp_limb_t mpn_mul_1c (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t); 1066 1067 #define mpn_mul_2 __MPN(mul_2) 1068 __GMP_DECLSPEC mp_limb_t mpn_mul_2 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr); 1069 1070 #define mpn_mul_3 __MPN(mul_3) 1071 __GMP_DECLSPEC mp_limb_t mpn_mul_3 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr); 1072 1073 #define mpn_mul_4 __MPN(mul_4) 1074 __GMP_DECLSPEC mp_limb_t mpn_mul_4 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr); 1075 1076 #define mpn_mul_5 __MPN(mul_5) 1077 __GMP_DECLSPEC mp_limb_t mpn_mul_5 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr); 1078 1079 #define mpn_mul_6 __MPN(mul_6) 1080 __GMP_DECLSPEC mp_limb_t mpn_mul_6 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr); 1081 1082 #ifndef mpn_mul_basecase /* if not done with cpuvec in a fat binary */ 1083 #define mpn_mul_basecase __MPN(mul_basecase) 1084 __GMP_DECLSPEC void mpn_mul_basecase (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t); 1085 #endif 1086 1087 #define mpn_mullo_n __MPN(mullo_n) 1088 __GMP_DECLSPEC void mpn_mullo_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t); 1089 1090 #ifndef mpn_mullo_basecase /* if not done with cpuvec in a fat binary */ 1091 #define mpn_mullo_basecase __MPN(mullo_basecase) 1092 __GMP_DECLSPEC void mpn_mullo_basecase (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t); 1093 #endif 1094 1095 #ifndef mpn_sqr_basecase /* if not done with cpuvec in a fat binary */ 1096 #define mpn_sqr_basecase __MPN(sqr_basecase) 1097 __GMP_DECLSPEC void mpn_sqr_basecase (mp_ptr, mp_srcptr, mp_size_t); 1098 #endif 1099 1100 #define mpn_sqrlo __MPN(sqrlo) 1101 __GMP_DECLSPEC void mpn_sqrlo (mp_ptr, mp_srcptr, mp_size_t); 1102 1103 #define mpn_sqrlo_basecase __MPN(sqrlo_basecase) 1104 __GMP_DECLSPEC void mpn_sqrlo_basecase (mp_ptr, mp_srcptr, mp_size_t); 1105 1106 #define mpn_mulmid_basecase __MPN(mulmid_basecase) 1107 __GMP_DECLSPEC void mpn_mulmid_basecase (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t); 1108 1109 #define mpn_mulmid_n __MPN(mulmid_n) 1110 __GMP_DECLSPEC void mpn_mulmid_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t); 1111 1112 #define mpn_mulmid __MPN(mulmid) 1113 __GMP_DECLSPEC void mpn_mulmid (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t); 1114 1115 #define mpn_submul_1c __MPN(submul_1c) 1116 __GMP_DECLSPEC mp_limb_t mpn_submul_1c (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t); 1117 1118 #ifndef mpn_redc_1 /* if not done with cpuvec in a fat binary */ 1119 #define mpn_redc_1 __MPN(redc_1) 1120 __GMP_DECLSPEC mp_limb_t mpn_redc_1 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t); 1121 #endif 1122 1123 #ifndef mpn_redc_2 /* if not done with cpuvec in a fat binary */ 1124 #define mpn_redc_2 __MPN(redc_2) 1125 __GMP_DECLSPEC mp_limb_t mpn_redc_2 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr); 1126 #endif 1127 1128 #define mpn_redc_n __MPN(redc_n) 1129 __GMP_DECLSPEC void mpn_redc_n (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr); 1130 1131 1132 #ifndef mpn_mod_1_1p_cps /* if not done with cpuvec in a fat binary */ 1133 #define mpn_mod_1_1p_cps __MPN(mod_1_1p_cps) 1134 __GMP_DECLSPEC void mpn_mod_1_1p_cps (mp_limb_t [4], mp_limb_t); 1135 #endif 1136 #ifndef mpn_mod_1_1p /* if not done with cpuvec in a fat binary */ 1137 #define mpn_mod_1_1p __MPN(mod_1_1p) 1138 __GMP_DECLSPEC mp_limb_t mpn_mod_1_1p (mp_srcptr, mp_size_t, mp_limb_t, const mp_limb_t [4]) __GMP_ATTRIBUTE_PURE; 1139 #endif 1140 1141 #ifndef mpn_mod_1s_2p_cps /* if not done with cpuvec in a fat binary */ 1142 #define mpn_mod_1s_2p_cps __MPN(mod_1s_2p_cps) 1143 __GMP_DECLSPEC void mpn_mod_1s_2p_cps (mp_limb_t [5], mp_limb_t); 1144 #endif 1145 #ifndef mpn_mod_1s_2p /* if not done with cpuvec in a fat binary */ 1146 #define mpn_mod_1s_2p __MPN(mod_1s_2p) 1147 __GMP_DECLSPEC mp_limb_t mpn_mod_1s_2p (mp_srcptr, mp_size_t, mp_limb_t, const mp_limb_t [5]) __GMP_ATTRIBUTE_PURE; 1148 #endif 1149 1150 #ifndef mpn_mod_1s_3p_cps /* if not done with cpuvec in a fat binary */ 1151 #define mpn_mod_1s_3p_cps __MPN(mod_1s_3p_cps) 1152 __GMP_DECLSPEC void mpn_mod_1s_3p_cps (mp_limb_t [6], mp_limb_t); 1153 #endif 1154 #ifndef mpn_mod_1s_3p /* if not done with cpuvec in a fat binary */ 1155 #define mpn_mod_1s_3p __MPN(mod_1s_3p) 1156 __GMP_DECLSPEC mp_limb_t mpn_mod_1s_3p (mp_srcptr, mp_size_t, mp_limb_t, const mp_limb_t [6]) __GMP_ATTRIBUTE_PURE; 1157 #endif 1158 1159 #ifndef mpn_mod_1s_4p_cps /* if not done with cpuvec in a fat binary */ 1160 #define mpn_mod_1s_4p_cps __MPN(mod_1s_4p_cps) 1161 __GMP_DECLSPEC void mpn_mod_1s_4p_cps (mp_limb_t [7], mp_limb_t); 1162 #endif 1163 #ifndef mpn_mod_1s_4p /* if not done with cpuvec in a fat binary */ 1164 #define mpn_mod_1s_4p __MPN(mod_1s_4p) 1165 __GMP_DECLSPEC mp_limb_t mpn_mod_1s_4p (mp_srcptr, mp_size_t, mp_limb_t, const mp_limb_t [7]) __GMP_ATTRIBUTE_PURE; 1166 #endif 1167 1168 #define mpn_bc_mulmod_bnm1 __MPN(bc_mulmod_bnm1) 1169 __GMP_DECLSPEC void mpn_bc_mulmod_bnm1 (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr); 1170 #define mpn_mulmod_bnm1 __MPN(mulmod_bnm1) 1171 __GMP_DECLSPEC void mpn_mulmod_bnm1 (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr); 1172 #define mpn_mulmod_bnm1_next_size __MPN(mulmod_bnm1_next_size) 1173 __GMP_DECLSPEC mp_size_t mpn_mulmod_bnm1_next_size (mp_size_t) ATTRIBUTE_CONST; 1174 static inline mp_size_t 1175 mpn_mulmod_bnm1_itch (mp_size_t rn, mp_size_t an, mp_size_t bn) { 1176 mp_size_t n, itch; 1177 n = rn >> 1; 1178 itch = rn + 4 + 1179 (an > n ? (bn > n ? rn : n) : 0); 1180 return itch; 1181 } 1182 1183 #define mpn_sqrmod_bnm1 __MPN(sqrmod_bnm1) 1184 __GMP_DECLSPEC void mpn_sqrmod_bnm1 (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr); 1185 #define mpn_sqrmod_bnm1_next_size __MPN(sqrmod_bnm1_next_size) 1186 __GMP_DECLSPEC mp_size_t mpn_sqrmod_bnm1_next_size (mp_size_t) ATTRIBUTE_CONST; 1187 static inline mp_size_t 1188 mpn_sqrmod_bnm1_itch (mp_size_t rn, mp_size_t an) { 1189 mp_size_t n, itch; 1190 n = rn >> 1; 1191 itch = rn + 3 + 1192 (an > n ? an : 0); 1193 return itch; 1194 } 1195 1196 typedef __gmp_randstate_struct *gmp_randstate_ptr; 1197 typedef const __gmp_randstate_struct *gmp_randstate_srcptr; 1198 1199 /* Pseudo-random number generator function pointers structure. */ 1200 typedef struct { 1201 void (*randseed_fn) (gmp_randstate_t, mpz_srcptr); 1202 void (*randget_fn) (gmp_randstate_t, mp_ptr, unsigned long int); 1203 void (*randclear_fn) (gmp_randstate_t); 1204 void (*randiset_fn) (gmp_randstate_ptr, gmp_randstate_srcptr); 1205 } gmp_randfnptr_t; 1206 1207 /* Macro to obtain a void pointer to the function pointers structure. */ 1208 #define RNG_FNPTR(rstate) ((rstate)->_mp_algdata._mp_lc) 1209 1210 /* Macro to obtain a pointer to the generator's state. 1211 When used as a lvalue the rvalue needs to be cast to mp_ptr. */ 1212 #define RNG_STATE(rstate) ((rstate)->_mp_seed->_mp_d) 1213 1214 /* Write a given number of random bits to rp. */ 1215 #define _gmp_rand(rp, state, bits) \ 1216 do { \ 1217 gmp_randstate_ptr __rstate = (state); \ 1218 (*((gmp_randfnptr_t *) RNG_FNPTR (__rstate))->randget_fn) \ 1219 (__rstate, rp, bits); \ 1220 } while (0) 1221 1222 __GMP_DECLSPEC void __gmp_randinit_mt_noseed (gmp_randstate_t); 1223 1224 1225 /* __gmp_rands is the global state for the old-style random functions, and 1226 is also used in the test programs (hence the __GMP_DECLSPEC). 1227 1228 There's no seeding here, so mpz_random etc will generate the same 1229 sequence every time. This is not unlike the C library random functions 1230 if you don't seed them, so perhaps it's acceptable. Digging up a seed 1231 from /dev/random or the like would work on many systems, but might 1232 encourage a false confidence, since it'd be pretty much impossible to do 1233 something that would work reliably everywhere. In any case the new style 1234 functions are recommended to applications which care about randomness, so 1235 the old functions aren't too important. */ 1236 1237 __GMP_DECLSPEC extern char __gmp_rands_initialized; 1238 __GMP_DECLSPEC extern gmp_randstate_t __gmp_rands; 1239 1240 #define RANDS \ 1241 ((__gmp_rands_initialized ? 0 \ 1242 : (__gmp_rands_initialized = 1, \ 1243 __gmp_randinit_mt_noseed (__gmp_rands), 0)), \ 1244 __gmp_rands) 1245 1246 /* this is used by the test programs, to free memory */ 1247 #define RANDS_CLEAR() \ 1248 do { \ 1249 if (__gmp_rands_initialized) \ 1250 { \ 1251 __gmp_rands_initialized = 0; \ 1252 gmp_randclear (__gmp_rands); \ 1253 } \ 1254 } while (0) 1255 1256 1257 /* For a threshold between algorithms A and B, size>=thresh is where B 1258 should be used. Special value MP_SIZE_T_MAX means only ever use A, or 1259 value 0 means only ever use B. The tests for these special values will 1260 be compile-time constants, so the compiler should be able to eliminate 1261 the code for the unwanted algorithm. */ 1262 1263 #if ! defined (__GNUC__) || __GNUC__ < 2 1264 #define ABOVE_THRESHOLD(size,thresh) \ 1265 ((thresh) == 0 \ 1266 || ((thresh) != MP_SIZE_T_MAX \ 1267 && (size) >= (thresh))) 1268 #else 1269 #define ABOVE_THRESHOLD(size,thresh) \ 1270 ((__builtin_constant_p (thresh) && (thresh) == 0) \ 1271 || (!(__builtin_constant_p (thresh) && (thresh) == MP_SIZE_T_MAX) \ 1272 && (size) >= (thresh))) 1273 #endif 1274 #define BELOW_THRESHOLD(size,thresh) (! ABOVE_THRESHOLD (size, thresh)) 1275 1276 /* The minimal supported value for Toom22 depends also on Toom32 and 1277 Toom42 implementations. */ 1278 #define MPN_TOOM22_MUL_MINSIZE 6 1279 #define MPN_TOOM2_SQR_MINSIZE 4 1280 1281 #define MPN_TOOM33_MUL_MINSIZE 17 1282 #define MPN_TOOM3_SQR_MINSIZE 17 1283 1284 #define MPN_TOOM44_MUL_MINSIZE 30 1285 #define MPN_TOOM4_SQR_MINSIZE 30 1286 1287 #define MPN_TOOM6H_MUL_MINSIZE 46 1288 #define MPN_TOOM6_SQR_MINSIZE 46 1289 1290 #define MPN_TOOM8H_MUL_MINSIZE 86 1291 #define MPN_TOOM8_SQR_MINSIZE 86 1292 1293 #define MPN_TOOM32_MUL_MINSIZE 10 1294 #define MPN_TOOM42_MUL_MINSIZE 10 1295 #define MPN_TOOM43_MUL_MINSIZE 25 1296 #define MPN_TOOM53_MUL_MINSIZE 17 1297 #define MPN_TOOM54_MUL_MINSIZE 31 1298 #define MPN_TOOM63_MUL_MINSIZE 49 1299 1300 #define MPN_TOOM42_MULMID_MINSIZE 4 1301 1302 #define mpn_sqr_diagonal __MPN(sqr_diagonal) 1303 __GMP_DECLSPEC void mpn_sqr_diagonal (mp_ptr, mp_srcptr, mp_size_t); 1304 1305 #define mpn_sqr_diag_addlsh1 __MPN(sqr_diag_addlsh1) 1306 __GMP_DECLSPEC void mpn_sqr_diag_addlsh1 (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t); 1307 1308 #define mpn_toom_interpolate_5pts __MPN(toom_interpolate_5pts) 1309 __GMP_DECLSPEC void mpn_toom_interpolate_5pts (mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_size_t, int, mp_limb_t); 1310 1311 enum toom6_flags {toom6_all_pos = 0, toom6_vm1_neg = 1, toom6_vm2_neg = 2}; 1312 #define mpn_toom_interpolate_6pts __MPN(toom_interpolate_6pts) 1313 __GMP_DECLSPEC void mpn_toom_interpolate_6pts (mp_ptr, mp_size_t, enum toom6_flags, mp_ptr, mp_ptr, mp_ptr, mp_size_t); 1314 1315 enum toom7_flags { toom7_w1_neg = 1, toom7_w3_neg = 2 }; 1316 #define mpn_toom_interpolate_7pts __MPN(toom_interpolate_7pts) 1317 __GMP_DECLSPEC void mpn_toom_interpolate_7pts (mp_ptr, mp_size_t, enum toom7_flags, mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_ptr); 1318 1319 #define mpn_toom_interpolate_8pts __MPN(toom_interpolate_8pts) 1320 __GMP_DECLSPEC void mpn_toom_interpolate_8pts (mp_ptr, mp_size_t, mp_ptr, mp_ptr, mp_size_t, mp_ptr); 1321 1322 #define mpn_toom_interpolate_12pts __MPN(toom_interpolate_12pts) 1323 __GMP_DECLSPEC void mpn_toom_interpolate_12pts (mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_size_t, int, mp_ptr); 1324 1325 #define mpn_toom_interpolate_16pts __MPN(toom_interpolate_16pts) 1326 __GMP_DECLSPEC void mpn_toom_interpolate_16pts (mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_size_t, int, mp_ptr); 1327 1328 #define mpn_toom_couple_handling __MPN(toom_couple_handling) 1329 __GMP_DECLSPEC void mpn_toom_couple_handling (mp_ptr, mp_size_t, mp_ptr, int, mp_size_t, int, int); 1330 1331 #define mpn_toom_eval_dgr3_pm1 __MPN(toom_eval_dgr3_pm1) 1332 __GMP_DECLSPEC int mpn_toom_eval_dgr3_pm1 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_size_t, mp_ptr); 1333 1334 #define mpn_toom_eval_dgr3_pm2 __MPN(toom_eval_dgr3_pm2) 1335 __GMP_DECLSPEC int mpn_toom_eval_dgr3_pm2 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_size_t, mp_ptr); 1336 1337 #define mpn_toom_eval_pm1 __MPN(toom_eval_pm1) 1338 __GMP_DECLSPEC int mpn_toom_eval_pm1 (mp_ptr, mp_ptr, unsigned, mp_srcptr, mp_size_t, mp_size_t, mp_ptr); 1339 1340 #define mpn_toom_eval_pm2 __MPN(toom_eval_pm2) 1341 __GMP_DECLSPEC int mpn_toom_eval_pm2 (mp_ptr, mp_ptr, unsigned, mp_srcptr, mp_size_t, mp_size_t, mp_ptr); 1342 1343 #define mpn_toom_eval_pm2exp __MPN(toom_eval_pm2exp) 1344 __GMP_DECLSPEC int mpn_toom_eval_pm2exp (mp_ptr, mp_ptr, unsigned, mp_srcptr, mp_size_t, mp_size_t, unsigned, mp_ptr); 1345 1346 #define mpn_toom_eval_pm2rexp __MPN(toom_eval_pm2rexp) 1347 __GMP_DECLSPEC int mpn_toom_eval_pm2rexp (mp_ptr, mp_ptr, unsigned, mp_srcptr, mp_size_t, mp_size_t, unsigned, mp_ptr); 1348 1349 #define mpn_toom22_mul __MPN(toom22_mul) 1350 __GMP_DECLSPEC void mpn_toom22_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr); 1351 1352 #define mpn_toom32_mul __MPN(toom32_mul) 1353 __GMP_DECLSPEC void mpn_toom32_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr); 1354 1355 #define mpn_toom42_mul __MPN(toom42_mul) 1356 __GMP_DECLSPEC void mpn_toom42_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr); 1357 1358 #define mpn_toom52_mul __MPN(toom52_mul) 1359 __GMP_DECLSPEC void mpn_toom52_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr); 1360 1361 #define mpn_toom62_mul __MPN(toom62_mul) 1362 __GMP_DECLSPEC void mpn_toom62_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr); 1363 1364 #define mpn_toom2_sqr __MPN(toom2_sqr) 1365 __GMP_DECLSPEC void mpn_toom2_sqr (mp_ptr, mp_srcptr, mp_size_t, mp_ptr); 1366 1367 #define mpn_toom33_mul __MPN(toom33_mul) 1368 __GMP_DECLSPEC void mpn_toom33_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr); 1369 1370 #define mpn_toom43_mul __MPN(toom43_mul) 1371 __GMP_DECLSPEC void mpn_toom43_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr); 1372 1373 #define mpn_toom53_mul __MPN(toom53_mul) 1374 __GMP_DECLSPEC void mpn_toom53_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr); 1375 1376 #define mpn_toom54_mul __MPN(toom54_mul) 1377 __GMP_DECLSPEC void mpn_toom54_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr); 1378 1379 #define mpn_toom63_mul __MPN(toom63_mul) 1380 __GMP_DECLSPEC void mpn_toom63_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr); 1381 1382 #define mpn_toom3_sqr __MPN(toom3_sqr) 1383 __GMP_DECLSPEC void mpn_toom3_sqr (mp_ptr, mp_srcptr, mp_size_t, mp_ptr); 1384 1385 #define mpn_toom44_mul __MPN(toom44_mul) 1386 __GMP_DECLSPEC void mpn_toom44_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr); 1387 1388 #define mpn_toom4_sqr __MPN(toom4_sqr) 1389 __GMP_DECLSPEC void mpn_toom4_sqr (mp_ptr, mp_srcptr, mp_size_t, mp_ptr); 1390 1391 #define mpn_toom6h_mul __MPN(toom6h_mul) 1392 __GMP_DECLSPEC void mpn_toom6h_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr); 1393 1394 #define mpn_toom6_sqr __MPN(toom6_sqr) 1395 __GMP_DECLSPEC void mpn_toom6_sqr (mp_ptr, mp_srcptr, mp_size_t, mp_ptr); 1396 1397 #define mpn_toom8h_mul __MPN(toom8h_mul) 1398 __GMP_DECLSPEC void mpn_toom8h_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr); 1399 1400 #define mpn_toom8_sqr __MPN(toom8_sqr) 1401 __GMP_DECLSPEC void mpn_toom8_sqr (mp_ptr, mp_srcptr, mp_size_t, mp_ptr); 1402 1403 #define mpn_toom42_mulmid __MPN(toom42_mulmid) 1404 __GMP_DECLSPEC void mpn_toom42_mulmid (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr); 1405 1406 #define mpn_fft_best_k __MPN(fft_best_k) 1407 __GMP_DECLSPEC int mpn_fft_best_k (mp_size_t, int) ATTRIBUTE_CONST; 1408 1409 #define mpn_mul_fft __MPN(mul_fft) 1410 __GMP_DECLSPEC mp_limb_t mpn_mul_fft (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, int); 1411 1412 #define mpn_mul_fft_full __MPN(mul_fft_full) 1413 __GMP_DECLSPEC void mpn_mul_fft_full (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t); 1414 1415 #define mpn_nussbaumer_mul __MPN(nussbaumer_mul) 1416 __GMP_DECLSPEC void mpn_nussbaumer_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t); 1417 1418 #define mpn_fft_next_size __MPN(fft_next_size) 1419 __GMP_DECLSPEC mp_size_t mpn_fft_next_size (mp_size_t, int) ATTRIBUTE_CONST; 1420 1421 #define mpn_div_qr_1n_pi1 __MPN(div_qr_1n_pi1) 1422 __GMP_DECLSPEC mp_limb_t mpn_div_qr_1n_pi1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, mp_limb_t); 1423 1424 #define mpn_div_qr_2n_pi1 __MPN(div_qr_2n_pi1) 1425 __GMP_DECLSPEC mp_limb_t mpn_div_qr_2n_pi1 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, mp_limb_t); 1426 1427 #define mpn_div_qr_2u_pi1 __MPN(div_qr_2u_pi1) 1428 __GMP_DECLSPEC mp_limb_t mpn_div_qr_2u_pi1 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, int, mp_limb_t); 1429 1430 #define mpn_sbpi1_div_qr __MPN(sbpi1_div_qr) 1431 __GMP_DECLSPEC mp_limb_t mpn_sbpi1_div_qr (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t); 1432 1433 #define mpn_sbpi1_div_q __MPN(sbpi1_div_q) 1434 __GMP_DECLSPEC mp_limb_t mpn_sbpi1_div_q (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t); 1435 1436 #define mpn_sbpi1_divappr_q __MPN(sbpi1_divappr_q) 1437 __GMP_DECLSPEC mp_limb_t mpn_sbpi1_divappr_q (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t); 1438 1439 #define mpn_dcpi1_div_qr __MPN(dcpi1_div_qr) 1440 __GMP_DECLSPEC mp_limb_t mpn_dcpi1_div_qr (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, gmp_pi1_t *); 1441 #define mpn_dcpi1_div_qr_n __MPN(dcpi1_div_qr_n) 1442 __GMP_DECLSPEC mp_limb_t mpn_dcpi1_div_qr_n (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, gmp_pi1_t *, mp_ptr); 1443 1444 #define mpn_dcpi1_div_q __MPN(dcpi1_div_q) 1445 __GMP_DECLSPEC mp_limb_t mpn_dcpi1_div_q (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, gmp_pi1_t *); 1446 1447 #define mpn_dcpi1_divappr_q __MPN(dcpi1_divappr_q) 1448 __GMP_DECLSPEC mp_limb_t mpn_dcpi1_divappr_q (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, gmp_pi1_t *); 1449 #define mpn_dcpi1_divappr_q_n __MPN(dcpi1_divappr_q_n) 1450 __GMP_DECLSPEC mp_limb_t mpn_dcpi1_divappr_q_n (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, gmp_pi1_t *, mp_ptr); 1451 1452 #define mpn_mu_div_qr __MPN(mu_div_qr) 1453 __GMP_DECLSPEC mp_limb_t mpn_mu_div_qr (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr); 1454 #define mpn_mu_div_qr_itch __MPN(mu_div_qr_itch) 1455 __GMP_DECLSPEC mp_size_t mpn_mu_div_qr_itch (mp_size_t, mp_size_t, int) ATTRIBUTE_CONST; 1456 #define mpn_mu_div_qr_choose_in __MPN(mu_div_qr_choose_in) 1457 __GMP_DECLSPEC mp_size_t mpn_mu_div_qr_choose_in (mp_size_t, mp_size_t, int); 1458 1459 #define mpn_preinv_mu_div_qr __MPN(preinv_mu_div_qr) 1460 __GMP_DECLSPEC mp_limb_t mpn_preinv_mu_div_qr (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr); 1461 #define mpn_preinv_mu_div_qr_itch __MPN(preinv_mu_div_qr_itch) 1462 __GMP_DECLSPEC mp_size_t mpn_preinv_mu_div_qr_itch (mp_size_t, mp_size_t, mp_size_t) ATTRIBUTE_CONST; 1463 1464 #define mpn_mu_divappr_q __MPN(mu_divappr_q) 1465 __GMP_DECLSPEC mp_limb_t mpn_mu_divappr_q (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr); 1466 #define mpn_mu_divappr_q_itch __MPN(mu_divappr_q_itch) 1467 __GMP_DECLSPEC mp_size_t mpn_mu_divappr_q_itch (mp_size_t, mp_size_t, int) ATTRIBUTE_CONST; 1468 #define mpn_mu_divappr_q_choose_in __MPN(mu_divappr_q_choose_in) 1469 __GMP_DECLSPEC mp_size_t mpn_mu_divappr_q_choose_in (mp_size_t, mp_size_t, int); 1470 1471 #define mpn_preinv_mu_divappr_q __MPN(preinv_mu_divappr_q) 1472 __GMP_DECLSPEC mp_limb_t mpn_preinv_mu_divappr_q (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr); 1473 1474 #define mpn_mu_div_q __MPN(mu_div_q) 1475 __GMP_DECLSPEC mp_limb_t mpn_mu_div_q (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr); 1476 #define mpn_mu_div_q_itch __MPN(mu_div_q_itch) 1477 __GMP_DECLSPEC mp_size_t mpn_mu_div_q_itch (mp_size_t, mp_size_t, int) ATTRIBUTE_CONST; 1478 1479 #define mpn_div_q __MPN(div_q) 1480 __GMP_DECLSPEC void mpn_div_q (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr); 1481 1482 #define mpn_invert __MPN(invert) 1483 __GMP_DECLSPEC void mpn_invert (mp_ptr, mp_srcptr, mp_size_t, mp_ptr); 1484 #define mpn_invert_itch(n) mpn_invertappr_itch(n) 1485 1486 #define mpn_ni_invertappr __MPN(ni_invertappr) 1487 __GMP_DECLSPEC mp_limb_t mpn_ni_invertappr (mp_ptr, mp_srcptr, mp_size_t, mp_ptr); 1488 #define mpn_invertappr __MPN(invertappr) 1489 __GMP_DECLSPEC mp_limb_t mpn_invertappr (mp_ptr, mp_srcptr, mp_size_t, mp_ptr); 1490 #define mpn_invertappr_itch(n) (2 * (n)) 1491 1492 #define mpn_binvert __MPN(binvert) 1493 __GMP_DECLSPEC void mpn_binvert (mp_ptr, mp_srcptr, mp_size_t, mp_ptr); 1494 #define mpn_binvert_itch __MPN(binvert_itch) 1495 __GMP_DECLSPEC mp_size_t mpn_binvert_itch (mp_size_t) ATTRIBUTE_CONST; 1496 1497 #define mpn_bdiv_q_1 __MPN(bdiv_q_1) 1498 __GMP_DECLSPEC mp_limb_t mpn_bdiv_q_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t); 1499 1500 #define mpn_pi1_bdiv_q_1 __MPN(pi1_bdiv_q_1) 1501 __GMP_DECLSPEC mp_limb_t mpn_pi1_bdiv_q_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, int); 1502 1503 #define mpn_sbpi1_bdiv_qr __MPN(sbpi1_bdiv_qr) 1504 __GMP_DECLSPEC mp_limb_t mpn_sbpi1_bdiv_qr (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t); 1505 1506 #define mpn_sbpi1_bdiv_q __MPN(sbpi1_bdiv_q) 1507 __GMP_DECLSPEC void mpn_sbpi1_bdiv_q (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t); 1508 1509 #define mpn_dcpi1_bdiv_qr __MPN(dcpi1_bdiv_qr) 1510 __GMP_DECLSPEC mp_limb_t mpn_dcpi1_bdiv_qr (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t); 1511 #define mpn_dcpi1_bdiv_qr_n_itch __MPN(dcpi1_bdiv_qr_n_itch) 1512 __GMP_DECLSPEC mp_size_t mpn_dcpi1_bdiv_qr_n_itch (mp_size_t) ATTRIBUTE_CONST; 1513 1514 #define mpn_dcpi1_bdiv_qr_n __MPN(dcpi1_bdiv_qr_n) 1515 __GMP_DECLSPEC mp_limb_t mpn_dcpi1_bdiv_qr_n (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_ptr); 1516 #define mpn_dcpi1_bdiv_q __MPN(dcpi1_bdiv_q) 1517 __GMP_DECLSPEC void mpn_dcpi1_bdiv_q (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t); 1518 1519 #define mpn_dcpi1_bdiv_q_n __MPN(dcpi1_bdiv_q_n) 1520 __GMP_DECLSPEC void mpn_dcpi1_bdiv_q_n (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_ptr); 1521 #define mpn_dcpi1_bdiv_q_n_itch __MPN(dcpi1_bdiv_q_n_itch) 1522 __GMP_DECLSPEC mp_size_t mpn_dcpi1_bdiv_q_n_itch (mp_size_t) ATTRIBUTE_CONST; 1523 1524 #define mpn_mu_bdiv_qr __MPN(mu_bdiv_qr) 1525 __GMP_DECLSPEC mp_limb_t mpn_mu_bdiv_qr (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr); 1526 #define mpn_mu_bdiv_qr_itch __MPN(mu_bdiv_qr_itch) 1527 __GMP_DECLSPEC mp_size_t mpn_mu_bdiv_qr_itch (mp_size_t, mp_size_t) ATTRIBUTE_CONST; 1528 1529 #define mpn_mu_bdiv_q __MPN(mu_bdiv_q) 1530 __GMP_DECLSPEC void mpn_mu_bdiv_q (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr); 1531 #define mpn_mu_bdiv_q_itch __MPN(mu_bdiv_q_itch) 1532 __GMP_DECLSPEC mp_size_t mpn_mu_bdiv_q_itch (mp_size_t, mp_size_t) ATTRIBUTE_CONST; 1533 1534 #define mpn_bdiv_qr __MPN(bdiv_qr) 1535 __GMP_DECLSPEC mp_limb_t mpn_bdiv_qr (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr); 1536 #define mpn_bdiv_qr_itch __MPN(bdiv_qr_itch) 1537 __GMP_DECLSPEC mp_size_t mpn_bdiv_qr_itch (mp_size_t, mp_size_t) ATTRIBUTE_CONST; 1538 1539 #define mpn_bdiv_q __MPN(bdiv_q) 1540 __GMP_DECLSPEC void mpn_bdiv_q (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr); 1541 #define mpn_bdiv_q_itch __MPN(bdiv_q_itch) 1542 __GMP_DECLSPEC mp_size_t mpn_bdiv_q_itch (mp_size_t, mp_size_t) ATTRIBUTE_CONST; 1543 1544 #define mpn_divexact __MPN(divexact) 1545 __GMP_DECLSPEC void mpn_divexact (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t); 1546 #define mpn_divexact_itch __MPN(divexact_itch) 1547 __GMP_DECLSPEC mp_size_t mpn_divexact_itch (mp_size_t, mp_size_t) ATTRIBUTE_CONST; 1548 1549 #ifndef mpn_bdiv_dbm1c /* if not done with cpuvec in a fat binary */ 1550 #define mpn_bdiv_dbm1c __MPN(bdiv_dbm1c) 1551 __GMP_DECLSPEC mp_limb_t mpn_bdiv_dbm1c (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t); 1552 #endif 1553 1554 #define mpn_bdiv_dbm1(dst, src, size, divisor) \ 1555 mpn_bdiv_dbm1c (dst, src, size, divisor, __GMP_CAST (mp_limb_t, 0)) 1556 1557 #define mpn_powm __MPN(powm) 1558 __GMP_DECLSPEC void mpn_powm (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr); 1559 #define mpn_powlo __MPN(powlo) 1560 __GMP_DECLSPEC void mpn_powlo (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_size_t, mp_ptr); 1561 1562 #define mpn_sec_pi1_div_qr __MPN(sec_pi1_div_qr) 1563 __GMP_DECLSPEC mp_limb_t mpn_sec_pi1_div_qr (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_ptr); 1564 #define mpn_sec_pi1_div_r __MPN(sec_pi1_div_r) 1565 __GMP_DECLSPEC void mpn_sec_pi1_div_r (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_ptr); 1566 1567 1568 /* Override mpn_addlsh1_n, mpn_addlsh2_n, mpn_sublsh1_n, etc with mpn_addlsh_n, 1569 etc when !HAVE_NATIVE the former but HAVE_NATIVE_ the latter. We then lie 1570 and say these macros represent native functions, but leave a trace by using 1571 the value 2 rather than 1. */ 1572 1573 #if HAVE_NATIVE_mpn_addlsh_n && ! HAVE_NATIVE_mpn_addlsh1_n 1574 #undef mpn_addlsh1_n 1575 #define mpn_addlsh1_n(a,b,c,d) mpn_addlsh_n(a,b,c,d,1) 1576 #define HAVE_NATIVE_mpn_addlsh1_n 2 1577 #endif 1578 1579 #if HAVE_NATIVE_mpn_addlsh_n && ! HAVE_NATIVE_mpn_addlsh2_n 1580 #undef mpn_addlsh2_n 1581 #define mpn_addlsh2_n(a,b,c,d) mpn_addlsh_n(a,b,c,d,2) 1582 #define HAVE_NATIVE_mpn_addlsh2_n 2 1583 #endif 1584 1585 #if HAVE_NATIVE_mpn_sublsh_n && ! HAVE_NATIVE_mpn_sublsh1_n 1586 #undef mpn_sublsh1_n 1587 #define mpn_sublsh1_n(a,b,c,d) mpn_sublsh_n(a,b,c,d,1) 1588 #define HAVE_NATIVE_mpn_sublsh1_n 2 1589 #endif 1590 1591 #if HAVE_NATIVE_mpn_sublsh_n && ! HAVE_NATIVE_mpn_sublsh2_n 1592 #undef mpn_sublsh2_n 1593 #define mpn_sublsh2_n(a,b,c,d) mpn_sublsh_n(a,b,c,d,2) 1594 #define HAVE_NATIVE_mpn_sublsh2_n 2 1595 #endif 1596 1597 #if HAVE_NATIVE_mpn_rsblsh_n && ! HAVE_NATIVE_mpn_rsblsh1_n 1598 #undef mpn_rsblsh1_n 1599 #define mpn_rsblsh1_n(a,b,c,d) mpn_rsblsh_n(a,b,c,d,1) 1600 #define HAVE_NATIVE_mpn_rsblsh1_n 2 1601 #endif 1602 1603 #if HAVE_NATIVE_mpn_rsblsh_n && ! HAVE_NATIVE_mpn_rsblsh2_n 1604 #undef mpn_rsblsh2_n 1605 #define mpn_rsblsh2_n(a,b,c,d) mpn_rsblsh_n(a,b,c,d,2) 1606 #define HAVE_NATIVE_mpn_rsblsh2_n 2 1607 #endif 1608 1609 1610 #ifndef DIVEXACT_BY3_METHOD 1611 #if GMP_NUMB_BITS % 2 == 0 && ! defined (HAVE_NATIVE_mpn_divexact_by3c) 1612 #define DIVEXACT_BY3_METHOD 0 /* default to using mpn_bdiv_dbm1c */ 1613 #else 1614 #define DIVEXACT_BY3_METHOD 1 1615 #endif 1616 #endif 1617 1618 #if DIVEXACT_BY3_METHOD == 0 1619 #undef mpn_divexact_by3 1620 #define mpn_divexact_by3(dst,src,size) \ 1621 (3 & mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 3))) 1622 /* override mpn_divexact_by3c defined in gmp.h */ 1623 /* 1624 #undef mpn_divexact_by3c 1625 #define mpn_divexact_by3c(dst,src,size,cy) \ 1626 (3 & mpn_bdiv_dbm1c (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 3, GMP_NUMB_MASK / 3 * cy))) 1627 */ 1628 #endif 1629 1630 #if GMP_NUMB_BITS % 4 == 0 1631 #define mpn_divexact_by5(dst,src,size) \ 1632 (7 & 3 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 5))) 1633 #endif 1634 1635 #if GMP_NUMB_BITS % 3 == 0 1636 #define mpn_divexact_by7(dst,src,size) \ 1637 (7 & 1 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 7))) 1638 #endif 1639 1640 #if GMP_NUMB_BITS % 6 == 0 1641 #define mpn_divexact_by9(dst,src,size) \ 1642 (15 & 7 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 9))) 1643 #endif 1644 1645 #if GMP_NUMB_BITS % 10 == 0 1646 #define mpn_divexact_by11(dst,src,size) \ 1647 (15 & 5 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 11))) 1648 #endif 1649 1650 #if GMP_NUMB_BITS % 12 == 0 1651 #define mpn_divexact_by13(dst,src,size) \ 1652 (15 & 3 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 13))) 1653 #endif 1654 1655 #if GMP_NUMB_BITS % 4 == 0 1656 #define mpn_divexact_by15(dst,src,size) \ 1657 (15 & 1 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 15))) 1658 #endif 1659 1660 #define mpz_divexact_gcd __gmpz_divexact_gcd 1661 __GMP_DECLSPEC void mpz_divexact_gcd (mpz_ptr, mpz_srcptr, mpz_srcptr); 1662 1663 #define mpz_prodlimbs __gmpz_prodlimbs 1664 __GMP_DECLSPEC mp_size_t mpz_prodlimbs (mpz_ptr, mp_ptr, mp_size_t); 1665 1666 #define mpz_oddfac_1 __gmpz_oddfac_1 1667 __GMP_DECLSPEC void mpz_oddfac_1 (mpz_ptr, mp_limb_t, unsigned); 1668 1669 #define mpz_inp_str_nowhite __gmpz_inp_str_nowhite 1670 #ifdef _GMP_H_HAVE_FILE 1671 __GMP_DECLSPEC size_t mpz_inp_str_nowhite (mpz_ptr, FILE *, int, int, size_t); 1672 #endif 1673 1674 #define mpn_divisible_p __MPN(divisible_p) 1675 __GMP_DECLSPEC int mpn_divisible_p (mp_srcptr, mp_size_t, mp_srcptr, mp_size_t) __GMP_ATTRIBUTE_PURE; 1676 1677 #define mpn_rootrem __MPN(rootrem) 1678 __GMP_DECLSPEC mp_size_t mpn_rootrem (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t); 1679 1680 #define mpn_broot __MPN(broot) 1681 __GMP_DECLSPEC void mpn_broot (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t); 1682 1683 #define mpn_broot_invm1 __MPN(broot_invm1) 1684 __GMP_DECLSPEC void mpn_broot_invm1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t); 1685 1686 #define mpn_brootinv __MPN(brootinv) 1687 __GMP_DECLSPEC void mpn_brootinv (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_ptr); 1688 1689 #define mpn_bsqrt __MPN(bsqrt) 1690 __GMP_DECLSPEC void mpn_bsqrt (mp_ptr, mp_srcptr, mp_bitcnt_t, mp_ptr); 1691 1692 #define mpn_bsqrtinv __MPN(bsqrtinv) 1693 __GMP_DECLSPEC int mpn_bsqrtinv (mp_ptr, mp_srcptr, mp_bitcnt_t, mp_ptr); 1694 1695 #if defined (_CRAY) 1696 #define MPN_COPY_INCR(dst, src, n) \ 1697 do { \ 1698 int __i; /* Faster on some Crays with plain int */ \ 1699 _Pragma ("_CRI ivdep"); \ 1700 for (__i = 0; __i < (n); __i++) \ 1701 (dst)[__i] = (src)[__i]; \ 1702 } while (0) 1703 #endif 1704 1705 /* used by test programs, hence __GMP_DECLSPEC */ 1706 #ifndef mpn_copyi /* if not done with cpuvec in a fat binary */ 1707 #define mpn_copyi __MPN(copyi) 1708 __GMP_DECLSPEC void mpn_copyi (mp_ptr, mp_srcptr, mp_size_t); 1709 #endif 1710 1711 #if ! defined (MPN_COPY_INCR) && HAVE_NATIVE_mpn_copyi 1712 #define MPN_COPY_INCR(dst, src, size) \ 1713 do { \ 1714 ASSERT ((size) >= 0); \ 1715 ASSERT (MPN_SAME_OR_INCR_P (dst, src, size)); \ 1716 mpn_copyi (dst, src, size); \ 1717 } while (0) 1718 #endif 1719 1720 /* Copy N limbs from SRC to DST incrementing, N==0 allowed. */ 1721 #if ! defined (MPN_COPY_INCR) 1722 #define MPN_COPY_INCR(dst, src, n) \ 1723 do { \ 1724 ASSERT ((n) >= 0); \ 1725 ASSERT (MPN_SAME_OR_INCR_P (dst, src, n)); \ 1726 if ((n) != 0) \ 1727 { \ 1728 mp_size_t __n = (n) - 1; \ 1729 mp_ptr __dst = (dst); \ 1730 mp_srcptr __src = (src); \ 1731 mp_limb_t __x; \ 1732 __x = *__src++; \ 1733 if (__n != 0) \ 1734 { \ 1735 do \ 1736 { \ 1737 *__dst++ = __x; \ 1738 __x = *__src++; \ 1739 } \ 1740 while (--__n); \ 1741 } \ 1742 *__dst++ = __x; \ 1743 } \ 1744 } while (0) 1745 #endif 1746 1747 1748 #if defined (_CRAY) 1749 #define MPN_COPY_DECR(dst, src, n) \ 1750 do { \ 1751 int __i; /* Faster on some Crays with plain int */ \ 1752 _Pragma ("_CRI ivdep"); \ 1753 for (__i = (n) - 1; __i >= 0; __i--) \ 1754 (dst)[__i] = (src)[__i]; \ 1755 } while (0) 1756 #endif 1757 1758 /* used by test programs, hence __GMP_DECLSPEC */ 1759 #ifndef mpn_copyd /* if not done with cpuvec in a fat binary */ 1760 #define mpn_copyd __MPN(copyd) 1761 __GMP_DECLSPEC void mpn_copyd (mp_ptr, mp_srcptr, mp_size_t); 1762 #endif 1763 1764 #if ! defined (MPN_COPY_DECR) && HAVE_NATIVE_mpn_copyd 1765 #define MPN_COPY_DECR(dst, src, size) \ 1766 do { \ 1767 ASSERT ((size) >= 0); \ 1768 ASSERT (MPN_SAME_OR_DECR_P (dst, src, size)); \ 1769 mpn_copyd (dst, src, size); \ 1770 } while (0) 1771 #endif 1772 1773 /* Copy N limbs from SRC to DST decrementing, N==0 allowed. */ 1774 #if ! defined (MPN_COPY_DECR) 1775 #define MPN_COPY_DECR(dst, src, n) \ 1776 do { \ 1777 ASSERT ((n) >= 0); \ 1778 ASSERT (MPN_SAME_OR_DECR_P (dst, src, n)); \ 1779 if ((n) != 0) \ 1780 { \ 1781 mp_size_t __n = (n) - 1; \ 1782 mp_ptr __dst = (dst) + __n; \ 1783 mp_srcptr __src = (src) + __n; \ 1784 mp_limb_t __x; \ 1785 __x = *__src--; \ 1786 if (__n != 0) \ 1787 { \ 1788 do \ 1789 { \ 1790 *__dst-- = __x; \ 1791 __x = *__src--; \ 1792 } \ 1793 while (--__n); \ 1794 } \ 1795 *__dst-- = __x; \ 1796 } \ 1797 } while (0) 1798 #endif 1799 1800 1801 #ifndef MPN_COPY 1802 #define MPN_COPY(d,s,n) \ 1803 do { \ 1804 ASSERT (MPN_SAME_OR_SEPARATE_P (d, s, n)); \ 1805 MPN_COPY_INCR (d, s, n); \ 1806 } while (0) 1807 #endif 1808 1809 1810 /* Set {dst,size} to the limbs of {src,size} in reverse order. */ 1811 #define MPN_REVERSE(dst, src, size) \ 1812 do { \ 1813 mp_ptr __dst = (dst); \ 1814 mp_size_t __size = (size); \ 1815 mp_srcptr __src = (src) + __size - 1; \ 1816 mp_size_t __i; \ 1817 ASSERT ((size) >= 0); \ 1818 ASSERT (! MPN_OVERLAP_P (dst, size, src, size)); \ 1819 CRAY_Pragma ("_CRI ivdep"); \ 1820 for (__i = 0; __i < __size; __i++) \ 1821 { \ 1822 *__dst = *__src; \ 1823 __dst++; \ 1824 __src--; \ 1825 } \ 1826 } while (0) 1827 1828 1829 /* Zero n limbs at dst. 1830 1831 For power and powerpc we want an inline stu/bdnz loop for zeroing. On 1832 ppc630 for instance this is optimal since it can sustain only 1 store per 1833 cycle. 1834 1835 gcc 2.95.x (for powerpc64 -maix64, or powerpc32) doesn't recognise the 1836 "for" loop in the generic code below can become stu/bdnz. The do/while 1837 here helps it get to that. The same caveat about plain -mpowerpc64 mode 1838 applies here as to __GMPN_COPY_INCR in gmp.h. 1839 1840 xlc 3.1 already generates stu/bdnz from the generic C, and does so from 1841 this loop too. 1842 1843 Enhancement: GLIBC does some trickery with dcbz to zero whole cache lines 1844 at a time. MPN_ZERO isn't all that important in GMP, so it might be more 1845 trouble than it's worth to do the same, though perhaps a call to memset 1846 would be good when on a GNU system. */ 1847 1848 #if HAVE_HOST_CPU_FAMILY_power || HAVE_HOST_CPU_FAMILY_powerpc 1849 #define MPN_FILL(dst, n, f) \ 1850 do { \ 1851 mp_ptr __dst = (dst) - 1; \ 1852 mp_size_t __n = (n); \ 1853 ASSERT (__n > 0); \ 1854 do \ 1855 *++__dst = (f); \ 1856 while (--__n); \ 1857 } while (0) 1858 #endif 1859 1860 #ifndef MPN_FILL 1861 #define MPN_FILL(dst, n, f) \ 1862 do { \ 1863 mp_ptr __dst = (dst); \ 1864 mp_size_t __n = (n); \ 1865 ASSERT (__n > 0); \ 1866 do \ 1867 *__dst++ = (f); \ 1868 while (--__n); \ 1869 } while (0) 1870 #endif 1871 1872 #define MPN_ZERO(dst, n) \ 1873 do { \ 1874 ASSERT ((n) >= 0); \ 1875 if ((n) != 0) \ 1876 MPN_FILL (dst, n, CNST_LIMB (0)); \ 1877 } while (0) 1878 1879 /* On the x86s repe/scasl doesn't seem useful, since it takes many cycles to 1880 start up and would need to strip a lot of zeros before it'd be faster 1881 than a simple cmpl loop. Here are some times in cycles for 1882 std/repe/scasl/cld and cld/repe/scasl (the latter would be for stripping 1883 low zeros). 1884 1885 std cld 1886 P5 18 16 1887 P6 46 38 1888 K6 36 13 1889 K7 21 20 1890 */ 1891 #ifndef MPN_NORMALIZE 1892 #define MPN_NORMALIZE(DST, NLIMBS) \ 1893 do { \ 1894 while ((NLIMBS) > 0) \ 1895 { \ 1896 if ((DST)[(NLIMBS) - 1] != 0) \ 1897 break; \ 1898 (NLIMBS)--; \ 1899 } \ 1900 } while (0) 1901 #endif 1902 #ifndef MPN_NORMALIZE_NOT_ZERO 1903 #define MPN_NORMALIZE_NOT_ZERO(DST, NLIMBS) \ 1904 do { \ 1905 while (1) \ 1906 { \ 1907 ASSERT ((NLIMBS) >= 1); \ 1908 if ((DST)[(NLIMBS) - 1] != 0) \ 1909 break; \ 1910 (NLIMBS)--; \ 1911 } \ 1912 } while (0) 1913 #endif 1914 1915 /* Strip least significant zero limbs from {ptr,size} by incrementing ptr 1916 and decrementing size. low should be ptr[0], and will be the new ptr[0] 1917 on returning. The number in {ptr,size} must be non-zero, ie. size!=0 and 1918 somewhere a non-zero limb. */ 1919 #define MPN_STRIP_LOW_ZEROS_NOT_ZERO(ptr, size, low) \ 1920 do { \ 1921 ASSERT ((size) >= 1); \ 1922 ASSERT ((low) == (ptr)[0]); \ 1923 \ 1924 while ((low) == 0) \ 1925 { \ 1926 (size)--; \ 1927 ASSERT ((size) >= 1); \ 1928 (ptr)++; \ 1929 (low) = *(ptr); \ 1930 } \ 1931 } while (0) 1932 1933 /* Initialize X of type mpz_t with space for NLIMBS limbs. X should be a 1934 temporary variable; it will be automatically cleared out at function 1935 return. We use __x here to make it possible to accept both mpz_ptr and 1936 mpz_t arguments. */ 1937 #define MPZ_TMP_INIT(X, NLIMBS) \ 1938 do { \ 1939 mpz_ptr __x = (X); \ 1940 ASSERT ((NLIMBS) >= 1); \ 1941 __x->_mp_alloc = (NLIMBS); \ 1942 __x->_mp_d = TMP_ALLOC_LIMBS (NLIMBS); \ 1943 } while (0) 1944 1945 #if WANT_ASSERT 1946 static inline void * 1947 _mpz_newalloc (mpz_ptr z, mp_size_t n) 1948 { 1949 void * res = _mpz_realloc(z,n); 1950 /* If we are checking the code, force a random change to limbs. */ 1951 ((mp_ptr) res)[0] = ~ ((mp_ptr) res)[ALLOC (z) - 1]; 1952 return res; 1953 } 1954 #else 1955 #define _mpz_newalloc _mpz_realloc 1956 #endif 1957 /* Realloc for an mpz_t WHAT if it has less than NEEDED limbs. */ 1958 #define MPZ_REALLOC(z,n) (UNLIKELY ((n) > ALLOC(z)) \ 1959 ? (mp_ptr) _mpz_realloc(z,n) \ 1960 : PTR(z)) 1961 #define MPZ_NEWALLOC(z,n) (UNLIKELY ((n) > ALLOC(z)) \ 1962 ? (mp_ptr) _mpz_newalloc(z,n) \ 1963 : PTR(z)) 1964 1965 #define MPZ_EQUAL_1_P(z) (SIZ(z)==1 && PTR(z)[0] == 1) 1966 1967 1968 /* MPN_FIB2_SIZE(n) is the size in limbs required by mpn_fib2_ui for fp and 1969 f1p. 1970 1971 From Knuth vol 1 section 1.2.8, F[n] = phi^n/sqrt(5) rounded to the 1972 nearest integer, where phi=(1+sqrt(5))/2 is the golden ratio. So the 1973 number of bits required is n*log_2((1+sqrt(5))/2) = n*0.6942419. 1974 1975 The multiplier used is 23/32=0.71875 for efficient calculation on CPUs 1976 without good floating point. There's +2 for rounding up, and a further 1977 +2 since at the last step x limbs are doubled into a 2x+1 limb region 1978 whereas the actual F[2k] value might be only 2x-1 limbs. 1979 1980 Note that a division is done first, since on a 32-bit system it's at 1981 least conceivable to go right up to n==ULONG_MAX. (F[2^32-1] would be 1982 about 380Mbytes, plus temporary workspace of about 1.2Gbytes here and 1983 whatever a multiply of two 190Mbyte numbers takes.) 1984 1985 Enhancement: When GMP_NUMB_BITS is not a power of 2 the division could be 1986 worked into the multiplier. */ 1987 1988 #define MPN_FIB2_SIZE(n) \ 1989 ((mp_size_t) ((n) / 32 * 23 / GMP_NUMB_BITS) + 4) 1990 1991 1992 /* FIB_TABLE(n) returns the Fibonacci number F[n]. Must have n in the range 1993 -1 <= n <= FIB_TABLE_LIMIT (that constant in fib_table.h). 1994 1995 FIB_TABLE_LUCNUM_LIMIT (in fib_table.h) is the largest n for which L[n] = 1996 F[n] + 2*F[n-1] fits in a limb. */ 1997 1998 __GMP_DECLSPEC extern const mp_limb_t __gmp_fib_table[]; 1999 #define FIB_TABLE(n) (__gmp_fib_table[(n)+1]) 2000 2001 extern const mp_limb_t __gmp_oddfac_table[]; 2002 extern const mp_limb_t __gmp_odd2fac_table[]; 2003 extern const unsigned char __gmp_fac2cnt_table[]; 2004 extern const mp_limb_t __gmp_limbroots_table[]; 2005 2006 /* n^log <= GMP_NUMB_MAX, a limb can store log factors less than n */ 2007 static inline unsigned 2008 log_n_max (mp_limb_t n) 2009 { 2010 unsigned log; 2011 for (log = 8; n > __gmp_limbroots_table[log - 1]; log--); 2012 return log; 2013 } 2014 2015 #define SIEVESIZE 512 /* FIXME: Allow gmp_init_primesieve to choose */ 2016 typedef struct 2017 { 2018 unsigned long d; /* current index in s[] */ 2019 unsigned long s0; /* number corresponding to s[0] */ 2020 unsigned long sqrt_s0; /* misnomer for sqrt(s[SIEVESIZE-1]) */ 2021 unsigned char s[SIEVESIZE + 1]; /* sieve table */ 2022 } gmp_primesieve_t; 2023 2024 #define gmp_init_primesieve __gmp_init_primesieve 2025 __GMP_DECLSPEC void gmp_init_primesieve (gmp_primesieve_t *); 2026 2027 #define gmp_nextprime __gmp_nextprime 2028 __GMP_DECLSPEC unsigned long int gmp_nextprime (gmp_primesieve_t *); 2029 2030 #define gmp_primesieve __gmp_primesieve 2031 __GMP_DECLSPEC mp_limb_t gmp_primesieve (mp_ptr, mp_limb_t); 2032 2033 2034 #ifndef MUL_TOOM22_THRESHOLD 2035 #define MUL_TOOM22_THRESHOLD 30 2036 #endif 2037 2038 #ifndef MUL_TOOM33_THRESHOLD 2039 #define MUL_TOOM33_THRESHOLD 100 2040 #endif 2041 2042 #ifndef MUL_TOOM44_THRESHOLD 2043 #define MUL_TOOM44_THRESHOLD 300 2044 #endif 2045 2046 #ifndef MUL_TOOM6H_THRESHOLD 2047 #define MUL_TOOM6H_THRESHOLD 350 2048 #endif 2049 2050 #ifndef SQR_TOOM6_THRESHOLD 2051 #define SQR_TOOM6_THRESHOLD MUL_TOOM6H_THRESHOLD 2052 #endif 2053 2054 #ifndef MUL_TOOM8H_THRESHOLD 2055 #define MUL_TOOM8H_THRESHOLD 450 2056 #endif 2057 2058 #ifndef SQR_TOOM8_THRESHOLD 2059 #define SQR_TOOM8_THRESHOLD MUL_TOOM8H_THRESHOLD 2060 #endif 2061 2062 #ifndef MUL_TOOM32_TO_TOOM43_THRESHOLD 2063 #define MUL_TOOM32_TO_TOOM43_THRESHOLD 100 2064 #endif 2065 2066 #ifndef MUL_TOOM32_TO_TOOM53_THRESHOLD 2067 #define MUL_TOOM32_TO_TOOM53_THRESHOLD 110 2068 #endif 2069 2070 #ifndef MUL_TOOM42_TO_TOOM53_THRESHOLD 2071 #define MUL_TOOM42_TO_TOOM53_THRESHOLD 100 2072 #endif 2073 2074 #ifndef MUL_TOOM42_TO_TOOM63_THRESHOLD 2075 #define MUL_TOOM42_TO_TOOM63_THRESHOLD 110 2076 #endif 2077 2078 #ifndef MUL_TOOM43_TO_TOOM54_THRESHOLD 2079 #define MUL_TOOM43_TO_TOOM54_THRESHOLD 150 2080 #endif 2081 2082 /* MUL_TOOM22_THRESHOLD_LIMIT is the maximum for MUL_TOOM22_THRESHOLD. In a 2083 normal build MUL_TOOM22_THRESHOLD is a constant and we use that. In a fat 2084 binary or tune program build MUL_TOOM22_THRESHOLD is a variable and a 2085 separate hard limit will have been defined. Similarly for TOOM3. */ 2086 #ifndef MUL_TOOM22_THRESHOLD_LIMIT 2087 #define MUL_TOOM22_THRESHOLD_LIMIT MUL_TOOM22_THRESHOLD 2088 #endif 2089 #ifndef MUL_TOOM33_THRESHOLD_LIMIT 2090 #define MUL_TOOM33_THRESHOLD_LIMIT MUL_TOOM33_THRESHOLD 2091 #endif 2092 #ifndef MULLO_BASECASE_THRESHOLD_LIMIT 2093 #define MULLO_BASECASE_THRESHOLD_LIMIT MULLO_BASECASE_THRESHOLD 2094 #endif 2095 #ifndef SQRLO_BASECASE_THRESHOLD_LIMIT 2096 #define SQRLO_BASECASE_THRESHOLD_LIMIT SQRLO_BASECASE_THRESHOLD 2097 #endif 2098 #ifndef SQRLO_DC_THRESHOLD_LIMIT 2099 #define SQRLO_DC_THRESHOLD_LIMIT SQRLO_DC_THRESHOLD 2100 #endif 2101 2102 /* SQR_BASECASE_THRESHOLD is where mpn_sqr_basecase should take over from 2103 mpn_mul_basecase. Default is to use mpn_sqr_basecase from 0. (Note that we 2104 certainly always want it if there's a native assembler mpn_sqr_basecase.) 2105 2106 If it turns out that mpn_toom2_sqr becomes faster than mpn_mul_basecase 2107 before mpn_sqr_basecase does, then SQR_BASECASE_THRESHOLD is the toom2 2108 threshold and SQR_TOOM2_THRESHOLD is 0. This oddity arises more or less 2109 because SQR_TOOM2_THRESHOLD represents the size up to which mpn_sqr_basecase 2110 should be used, and that may be never. */ 2111 2112 #ifndef SQR_BASECASE_THRESHOLD 2113 #define SQR_BASECASE_THRESHOLD 0 /* never use mpn_mul_basecase */ 2114 #endif 2115 2116 #ifndef SQR_TOOM2_THRESHOLD 2117 #define SQR_TOOM2_THRESHOLD 50 2118 #endif 2119 2120 #ifndef SQR_TOOM3_THRESHOLD 2121 #define SQR_TOOM3_THRESHOLD 120 2122 #endif 2123 2124 #ifndef SQR_TOOM4_THRESHOLD 2125 #define SQR_TOOM4_THRESHOLD 400 2126 #endif 2127 2128 /* See comments above about MUL_TOOM33_THRESHOLD_LIMIT. */ 2129 #ifndef SQR_TOOM3_THRESHOLD_LIMIT 2130 #define SQR_TOOM3_THRESHOLD_LIMIT SQR_TOOM3_THRESHOLD 2131 #endif 2132 2133 #ifndef MULMID_TOOM42_THRESHOLD 2134 #define MULMID_TOOM42_THRESHOLD MUL_TOOM22_THRESHOLD 2135 #endif 2136 2137 #ifndef MULLO_BASECASE_THRESHOLD 2138 #define MULLO_BASECASE_THRESHOLD 0 /* never use mpn_mul_basecase */ 2139 #endif 2140 2141 #ifndef MULLO_DC_THRESHOLD 2142 #define MULLO_DC_THRESHOLD (2*MUL_TOOM22_THRESHOLD) 2143 #endif 2144 2145 #ifndef MULLO_MUL_N_THRESHOLD 2146 #define MULLO_MUL_N_THRESHOLD (2*MUL_FFT_THRESHOLD) 2147 #endif 2148 2149 #ifndef SQRLO_BASECASE_THRESHOLD 2150 #define SQRLO_BASECASE_THRESHOLD 0 /* never use mpn_sqr_basecase */ 2151 #endif 2152 2153 #ifndef SQRLO_DC_THRESHOLD 2154 #define SQRLO_DC_THRESHOLD (MULLO_DC_THRESHOLD) 2155 #endif 2156 2157 #ifndef SQRLO_SQR_THRESHOLD 2158 #define SQRLO_SQR_THRESHOLD (MULLO_MUL_N_THRESHOLD) 2159 #endif 2160 2161 #ifndef DC_DIV_QR_THRESHOLD 2162 #define DC_DIV_QR_THRESHOLD (2*MUL_TOOM22_THRESHOLD) 2163 #endif 2164 2165 #ifndef DC_DIVAPPR_Q_THRESHOLD 2166 #define DC_DIVAPPR_Q_THRESHOLD 200 2167 #endif 2168 2169 #ifndef DC_BDIV_QR_THRESHOLD 2170 #define DC_BDIV_QR_THRESHOLD (2*MUL_TOOM22_THRESHOLD) 2171 #endif 2172 2173 #ifndef DC_BDIV_Q_THRESHOLD 2174 #define DC_BDIV_Q_THRESHOLD 180 2175 #endif 2176 2177 #ifndef DIVEXACT_JEB_THRESHOLD 2178 #define DIVEXACT_JEB_THRESHOLD 25 2179 #endif 2180 2181 #ifndef INV_MULMOD_BNM1_THRESHOLD 2182 #define INV_MULMOD_BNM1_THRESHOLD (4*MULMOD_BNM1_THRESHOLD) 2183 #endif 2184 2185 #ifndef INV_APPR_THRESHOLD 2186 #define INV_APPR_THRESHOLD INV_NEWTON_THRESHOLD 2187 #endif 2188 2189 #ifndef INV_NEWTON_THRESHOLD 2190 #define INV_NEWTON_THRESHOLD 200 2191 #endif 2192 2193 #ifndef BINV_NEWTON_THRESHOLD 2194 #define BINV_NEWTON_THRESHOLD 300 2195 #endif 2196 2197 #ifndef MU_DIVAPPR_Q_THRESHOLD 2198 #define MU_DIVAPPR_Q_THRESHOLD 2000 2199 #endif 2200 2201 #ifndef MU_DIV_QR_THRESHOLD 2202 #define MU_DIV_QR_THRESHOLD 2000 2203 #endif 2204 2205 #ifndef MUPI_DIV_QR_THRESHOLD 2206 #define MUPI_DIV_QR_THRESHOLD 200 2207 #endif 2208 2209 #ifndef MU_BDIV_Q_THRESHOLD 2210 #define MU_BDIV_Q_THRESHOLD 2000 2211 #endif 2212 2213 #ifndef MU_BDIV_QR_THRESHOLD 2214 #define MU_BDIV_QR_THRESHOLD 2000 2215 #endif 2216 2217 #ifndef MULMOD_BNM1_THRESHOLD 2218 #define MULMOD_BNM1_THRESHOLD 16 2219 #endif 2220 2221 #ifndef SQRMOD_BNM1_THRESHOLD 2222 #define SQRMOD_BNM1_THRESHOLD 16 2223 #endif 2224 2225 #ifndef MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD 2226 #define MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD (INV_MULMOD_BNM1_THRESHOLD/2) 2227 #endif 2228 2229 #if HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2 2230 2231 #ifndef REDC_1_TO_REDC_2_THRESHOLD 2232 #define REDC_1_TO_REDC_2_THRESHOLD 15 2233 #endif 2234 #ifndef REDC_2_TO_REDC_N_THRESHOLD 2235 #define REDC_2_TO_REDC_N_THRESHOLD 100 2236 #endif 2237 2238 #else 2239 2240 #ifndef REDC_1_TO_REDC_N_THRESHOLD 2241 #define REDC_1_TO_REDC_N_THRESHOLD 100 2242 #endif 2243 2244 #endif /* HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2 */ 2245 2246 2247 /* First k to use for an FFT modF multiply. A modF FFT is an order 2248 log(2^k)/log(2^(k-1)) algorithm, so k=3 is merely 1.5 like karatsuba, 2249 whereas k=4 is 1.33 which is faster than toom3 at 1.485. */ 2250 #define FFT_FIRST_K 4 2251 2252 /* Threshold at which FFT should be used to do a modF NxN -> N multiply. */ 2253 #ifndef MUL_FFT_MODF_THRESHOLD 2254 #define MUL_FFT_MODF_THRESHOLD (MUL_TOOM33_THRESHOLD * 3) 2255 #endif 2256 #ifndef SQR_FFT_MODF_THRESHOLD 2257 #define SQR_FFT_MODF_THRESHOLD (SQR_TOOM3_THRESHOLD * 3) 2258 #endif 2259 2260 /* Threshold at which FFT should be used to do an NxN -> 2N multiply. This 2261 will be a size where FFT is using k=7 or k=8, since an FFT-k used for an 2262 NxN->2N multiply and not recursing into itself is an order 2263 log(2^k)/log(2^(k-2)) algorithm, so it'll be at least k=7 at 1.39 which 2264 is the first better than toom3. */ 2265 #ifndef MUL_FFT_THRESHOLD 2266 #define MUL_FFT_THRESHOLD (MUL_FFT_MODF_THRESHOLD * 10) 2267 #endif 2268 #ifndef SQR_FFT_THRESHOLD 2269 #define SQR_FFT_THRESHOLD (SQR_FFT_MODF_THRESHOLD * 10) 2270 #endif 2271 2272 /* Table of thresholds for successive modF FFT "k"s. The first entry is 2273 where FFT_FIRST_K+1 should be used, the second FFT_FIRST_K+2, 2274 etc. See mpn_fft_best_k(). */ 2275 #ifndef MUL_FFT_TABLE 2276 #define MUL_FFT_TABLE \ 2277 { MUL_TOOM33_THRESHOLD * 4, /* k=5 */ \ 2278 MUL_TOOM33_THRESHOLD * 8, /* k=6 */ \ 2279 MUL_TOOM33_THRESHOLD * 16, /* k=7 */ \ 2280 MUL_TOOM33_THRESHOLD * 32, /* k=8 */ \ 2281 MUL_TOOM33_THRESHOLD * 96, /* k=9 */ \ 2282 MUL_TOOM33_THRESHOLD * 288, /* k=10 */ \ 2283 0 } 2284 #endif 2285 #ifndef SQR_FFT_TABLE 2286 #define SQR_FFT_TABLE \ 2287 { SQR_TOOM3_THRESHOLD * 4, /* k=5 */ \ 2288 SQR_TOOM3_THRESHOLD * 8, /* k=6 */ \ 2289 SQR_TOOM3_THRESHOLD * 16, /* k=7 */ \ 2290 SQR_TOOM3_THRESHOLD * 32, /* k=8 */ \ 2291 SQR_TOOM3_THRESHOLD * 96, /* k=9 */ \ 2292 SQR_TOOM3_THRESHOLD * 288, /* k=10 */ \ 2293 0 } 2294 #endif 2295 2296 struct fft_table_nk 2297 { 2298 gmp_uint_least32_t n:27; 2299 gmp_uint_least32_t k:5; 2300 }; 2301 2302 #ifndef FFT_TABLE_ATTRS 2303 #define FFT_TABLE_ATTRS static const 2304 #endif 2305 2306 #define MPN_FFT_TABLE_SIZE 16 2307 2308 2309 #ifndef DC_DIV_QR_THRESHOLD 2310 #define DC_DIV_QR_THRESHOLD (3 * MUL_TOOM22_THRESHOLD) 2311 #endif 2312 2313 #ifndef GET_STR_DC_THRESHOLD 2314 #define GET_STR_DC_THRESHOLD 18 2315 #endif 2316 2317 #ifndef GET_STR_PRECOMPUTE_THRESHOLD 2318 #define GET_STR_PRECOMPUTE_THRESHOLD 35 2319 #endif 2320 2321 #ifndef SET_STR_DC_THRESHOLD 2322 #define SET_STR_DC_THRESHOLD 750 2323 #endif 2324 2325 #ifndef SET_STR_PRECOMPUTE_THRESHOLD 2326 #define SET_STR_PRECOMPUTE_THRESHOLD 2000 2327 #endif 2328 2329 #ifndef FAC_ODD_THRESHOLD 2330 #define FAC_ODD_THRESHOLD 35 2331 #endif 2332 2333 #ifndef FAC_DSC_THRESHOLD 2334 #define FAC_DSC_THRESHOLD 400 2335 #endif 2336 2337 /* Return non-zero if xp,xsize and yp,ysize overlap. 2338 If xp+xsize<=yp there's no overlap, or if yp+ysize<=xp there's no 2339 overlap. If both these are false, there's an overlap. */ 2340 #define MPN_OVERLAP_P(xp, xsize, yp, ysize) \ 2341 ((xp) + (xsize) > (yp) && (yp) + (ysize) > (xp)) 2342 #define MEM_OVERLAP_P(xp, xsize, yp, ysize) \ 2343 ( (char *) (xp) + (xsize) > (char *) (yp) \ 2344 && (char *) (yp) + (ysize) > (char *) (xp)) 2345 2346 /* Return non-zero if xp,xsize and yp,ysize are either identical or not 2347 overlapping. Return zero if they're partially overlapping. */ 2348 #define MPN_SAME_OR_SEPARATE_P(xp, yp, size) \ 2349 MPN_SAME_OR_SEPARATE2_P(xp, size, yp, size) 2350 #define MPN_SAME_OR_SEPARATE2_P(xp, xsize, yp, ysize) \ 2351 ((xp) == (yp) || ! MPN_OVERLAP_P (xp, xsize, yp, ysize)) 2352 2353 /* Return non-zero if dst,dsize and src,ssize are either identical or 2354 overlapping in a way suitable for an incrementing/decrementing algorithm. 2355 Return zero if they're partially overlapping in an unsuitable fashion. */ 2356 #define MPN_SAME_OR_INCR2_P(dst, dsize, src, ssize) \ 2357 ((dst) <= (src) || ! MPN_OVERLAP_P (dst, dsize, src, ssize)) 2358 #define MPN_SAME_OR_INCR_P(dst, src, size) \ 2359 MPN_SAME_OR_INCR2_P(dst, size, src, size) 2360 #define MPN_SAME_OR_DECR2_P(dst, dsize, src, ssize) \ 2361 ((dst) >= (src) || ! MPN_OVERLAP_P (dst, dsize, src, ssize)) 2362 #define MPN_SAME_OR_DECR_P(dst, src, size) \ 2363 MPN_SAME_OR_DECR2_P(dst, size, src, size) 2364 2365 2366 /* ASSERT() is a private assertion checking scheme, similar to <assert.h>. 2367 ASSERT() does the check only if WANT_ASSERT is selected, ASSERT_ALWAYS() 2368 does it always. Generally assertions are meant for development, but 2369 might help when looking for a problem later too. */ 2370 2371 #ifdef __LINE__ 2372 #define ASSERT_LINE __LINE__ 2373 #else 2374 #define ASSERT_LINE -1 2375 #endif 2376 2377 #ifdef __FILE__ 2378 #define ASSERT_FILE __FILE__ 2379 #else 2380 #define ASSERT_FILE "" 2381 #endif 2382 2383 __GMP_DECLSPEC void __gmp_assert_header (const char *, int); 2384 __GMP_DECLSPEC void __gmp_assert_fail (const char *, int, const char *) ATTRIBUTE_NORETURN; 2385 2386 #define ASSERT_FAIL(expr) __gmp_assert_fail (ASSERT_FILE, ASSERT_LINE, #expr) 2387 2388 #define ASSERT_ALWAYS(expr) \ 2389 do { \ 2390 if (UNLIKELY (!(expr))) \ 2391 ASSERT_FAIL (expr); \ 2392 } while (0) 2393 2394 #if WANT_ASSERT 2395 #define ASSERT(expr) ASSERT_ALWAYS (expr) 2396 #else 2397 #define ASSERT(expr) do {} while (0) 2398 #endif 2399 2400 2401 /* ASSERT_CARRY checks the expression is non-zero, and ASSERT_NOCARRY checks 2402 that it's zero. In both cases if assertion checking is disabled the 2403 expression is still evaluated. These macros are meant for use with 2404 routines like mpn_add_n() where the return value represents a carry or 2405 whatever that should or shouldn't occur in some context. For example, 2406 ASSERT_NOCARRY (mpn_add_n (rp, s1p, s2p, size)); */ 2407 #if WANT_ASSERT 2408 #define ASSERT_CARRY(expr) ASSERT_ALWAYS ((expr) != 0) 2409 #define ASSERT_NOCARRY(expr) ASSERT_ALWAYS ((expr) == 0) 2410 #else 2411 #define ASSERT_CARRY(expr) (expr) 2412 #define ASSERT_NOCARRY(expr) (expr) 2413 #endif 2414 2415 2416 /* ASSERT_CODE includes code when assertion checking is wanted. This is the 2417 same as writing "#if WANT_ASSERT", but more compact. */ 2418 #if WANT_ASSERT 2419 #define ASSERT_CODE(expr) expr 2420 #else 2421 #define ASSERT_CODE(expr) 2422 #endif 2423 2424 2425 /* Test that an mpq_t is in fully canonical form. This can be used as 2426 protection on routines like mpq_equal which give wrong results on 2427 non-canonical inputs. */ 2428 #if WANT_ASSERT 2429 #define ASSERT_MPQ_CANONICAL(q) \ 2430 do { \ 2431 ASSERT (q->_mp_den._mp_size > 0); \ 2432 if (q->_mp_num._mp_size == 0) \ 2433 { \ 2434 /* zero should be 0/1 */ \ 2435 ASSERT (mpz_cmp_ui (mpq_denref(q), 1L) == 0); \ 2436 } \ 2437 else \ 2438 { \ 2439 /* no common factors */ \ 2440 mpz_t __g; \ 2441 mpz_init (__g); \ 2442 mpz_gcd (__g, mpq_numref(q), mpq_denref(q)); \ 2443 ASSERT (mpz_cmp_ui (__g, 1) == 0); \ 2444 mpz_clear (__g); \ 2445 } \ 2446 } while (0) 2447 #else 2448 #define ASSERT_MPQ_CANONICAL(q) do {} while (0) 2449 #endif 2450 2451 /* Check that the nail parts are zero. */ 2452 #define ASSERT_ALWAYS_LIMB(limb) \ 2453 do { \ 2454 mp_limb_t __nail = (limb) & GMP_NAIL_MASK; \ 2455 ASSERT_ALWAYS (__nail == 0); \ 2456 } while (0) 2457 #define ASSERT_ALWAYS_MPN(ptr, size) \ 2458 do { \ 2459 /* let whole loop go dead when no nails */ \ 2460 if (GMP_NAIL_BITS != 0) \ 2461 { \ 2462 mp_size_t __i; \ 2463 for (__i = 0; __i < (size); __i++) \ 2464 ASSERT_ALWAYS_LIMB ((ptr)[__i]); \ 2465 } \ 2466 } while (0) 2467 #if WANT_ASSERT 2468 #define ASSERT_LIMB(limb) ASSERT_ALWAYS_LIMB (limb) 2469 #define ASSERT_MPN(ptr, size) ASSERT_ALWAYS_MPN (ptr, size) 2470 #else 2471 #define ASSERT_LIMB(limb) do {} while (0) 2472 #define ASSERT_MPN(ptr, size) do {} while (0) 2473 #endif 2474 2475 2476 /* Assert that an mpn region {ptr,size} is zero, or non-zero. 2477 size==0 is allowed, and in that case {ptr,size} considered to be zero. */ 2478 #if WANT_ASSERT 2479 #define ASSERT_MPN_ZERO_P(ptr,size) \ 2480 do { \ 2481 mp_size_t __i; \ 2482 ASSERT ((size) >= 0); \ 2483 for (__i = 0; __i < (size); __i++) \ 2484 ASSERT ((ptr)[__i] == 0); \ 2485 } while (0) 2486 #define ASSERT_MPN_NONZERO_P(ptr,size) \ 2487 do { \ 2488 mp_size_t __i; \ 2489 int __nonzero = 0; \ 2490 ASSERT ((size) >= 0); \ 2491 for (__i = 0; __i < (size); __i++) \ 2492 if ((ptr)[__i] != 0) \ 2493 { \ 2494 __nonzero = 1; \ 2495 break; \ 2496 } \ 2497 ASSERT (__nonzero); \ 2498 } while (0) 2499 #else 2500 #define ASSERT_MPN_ZERO_P(ptr,size) do {} while (0) 2501 #define ASSERT_MPN_NONZERO_P(ptr,size) do {} while (0) 2502 #endif 2503 2504 2505 #if ! HAVE_NATIVE_mpn_com 2506 #undef mpn_com 2507 #define mpn_com(d,s,n) \ 2508 do { \ 2509 mp_ptr __d = (d); \ 2510 mp_srcptr __s = (s); \ 2511 mp_size_t __n = (n); \ 2512 ASSERT (__n >= 1); \ 2513 ASSERT (MPN_SAME_OR_SEPARATE_P (__d, __s, __n)); \ 2514 do \ 2515 *__d++ = (~ *__s++) & GMP_NUMB_MASK; \ 2516 while (--__n); \ 2517 } while (0) 2518 #endif 2519 2520 #define MPN_LOGOPS_N_INLINE(rp, up, vp, n, operation) \ 2521 do { \ 2522 mp_srcptr __up = (up); \ 2523 mp_srcptr __vp = (vp); \ 2524 mp_ptr __rp = (rp); \ 2525 mp_size_t __n = (n); \ 2526 mp_limb_t __a, __b; \ 2527 ASSERT (__n > 0); \ 2528 ASSERT (MPN_SAME_OR_SEPARATE_P (__rp, __up, __n)); \ 2529 ASSERT (MPN_SAME_OR_SEPARATE_P (__rp, __vp, __n)); \ 2530 __up += __n; \ 2531 __vp += __n; \ 2532 __rp += __n; \ 2533 __n = -__n; \ 2534 do { \ 2535 __a = __up[__n]; \ 2536 __b = __vp[__n]; \ 2537 __rp[__n] = operation; \ 2538 } while (++__n); \ 2539 } while (0) 2540 2541 2542 #if ! HAVE_NATIVE_mpn_and_n 2543 #undef mpn_and_n 2544 #define mpn_and_n(rp, up, vp, n) \ 2545 MPN_LOGOPS_N_INLINE (rp, up, vp, n, __a & __b) 2546 #endif 2547 2548 #if ! HAVE_NATIVE_mpn_andn_n 2549 #undef mpn_andn_n 2550 #define mpn_andn_n(rp, up, vp, n) \ 2551 MPN_LOGOPS_N_INLINE (rp, up, vp, n, __a & ~__b) 2552 #endif 2553 2554 #if ! HAVE_NATIVE_mpn_nand_n 2555 #undef mpn_nand_n 2556 #define mpn_nand_n(rp, up, vp, n) \ 2557 MPN_LOGOPS_N_INLINE (rp, up, vp, n, ~(__a & __b) & GMP_NUMB_MASK) 2558 #endif 2559 2560 #if ! HAVE_NATIVE_mpn_ior_n 2561 #undef mpn_ior_n 2562 #define mpn_ior_n(rp, up, vp, n) \ 2563 MPN_LOGOPS_N_INLINE (rp, up, vp, n, __a | __b) 2564 #endif 2565 2566 #if ! HAVE_NATIVE_mpn_iorn_n 2567 #undef mpn_iorn_n 2568 #define mpn_iorn_n(rp, up, vp, n) \ 2569 MPN_LOGOPS_N_INLINE (rp, up, vp, n, (__a | ~__b) & GMP_NUMB_MASK) 2570 #endif 2571 2572 #if ! HAVE_NATIVE_mpn_nior_n 2573 #undef mpn_nior_n 2574 #define mpn_nior_n(rp, up, vp, n) \ 2575 MPN_LOGOPS_N_INLINE (rp, up, vp, n, ~(__a | __b) & GMP_NUMB_MASK) 2576 #endif 2577 2578 #if ! HAVE_NATIVE_mpn_xor_n 2579 #undef mpn_xor_n 2580 #define mpn_xor_n(rp, up, vp, n) \ 2581 MPN_LOGOPS_N_INLINE (rp, up, vp, n, __a ^ __b) 2582 #endif 2583 2584 #if ! HAVE_NATIVE_mpn_xnor_n 2585 #undef mpn_xnor_n 2586 #define mpn_xnor_n(rp, up, vp, n) \ 2587 MPN_LOGOPS_N_INLINE (rp, up, vp, n, ~(__a ^ __b) & GMP_NUMB_MASK) 2588 #endif 2589 2590 #define mpn_trialdiv __MPN(trialdiv) 2591 __GMP_DECLSPEC mp_limb_t mpn_trialdiv (mp_srcptr, mp_size_t, mp_size_t, int *); 2592 2593 #define mpn_remove __MPN(remove) 2594 __GMP_DECLSPEC mp_bitcnt_t mpn_remove (mp_ptr, mp_size_t *, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_bitcnt_t); 2595 2596 2597 /* ADDC_LIMB sets w=x+y and cout to 0 or 1 for a carry from that addition. */ 2598 #if GMP_NAIL_BITS == 0 2599 #define ADDC_LIMB(cout, w, x, y) \ 2600 do { \ 2601 mp_limb_t __x = (x); \ 2602 mp_limb_t __y = (y); \ 2603 mp_limb_t __w = __x + __y; \ 2604 (w) = __w; \ 2605 (cout) = __w < __x; \ 2606 } while (0) 2607 #else 2608 #define ADDC_LIMB(cout, w, x, y) \ 2609 do { \ 2610 mp_limb_t __w; \ 2611 ASSERT_LIMB (x); \ 2612 ASSERT_LIMB (y); \ 2613 __w = (x) + (y); \ 2614 (w) = __w & GMP_NUMB_MASK; \ 2615 (cout) = __w >> GMP_NUMB_BITS; \ 2616 } while (0) 2617 #endif 2618 2619 /* SUBC_LIMB sets w=x-y and cout to 0 or 1 for a borrow from that 2620 subtract. */ 2621 #if GMP_NAIL_BITS == 0 2622 #define SUBC_LIMB(cout, w, x, y) \ 2623 do { \ 2624 mp_limb_t __x = (x); \ 2625 mp_limb_t __y = (y); \ 2626 mp_limb_t __w = __x - __y; \ 2627 (w) = __w; \ 2628 (cout) = __w > __x; \ 2629 } while (0) 2630 #else 2631 #define SUBC_LIMB(cout, w, x, y) \ 2632 do { \ 2633 mp_limb_t __w = (x) - (y); \ 2634 (w) = __w & GMP_NUMB_MASK; \ 2635 (cout) = __w >> (GMP_LIMB_BITS-1); \ 2636 } while (0) 2637 #endif 2638 2639 2640 /* MPN_INCR_U does {ptr,size} += n, MPN_DECR_U does {ptr,size} -= n, both 2641 expecting no carry (or borrow) from that. 2642 2643 The size parameter is only for the benefit of assertion checking. In a 2644 normal build it's unused and the carry/borrow is just propagated as far 2645 as it needs to go. 2646 2647 On random data, usually only one or two limbs of {ptr,size} get updated, 2648 so there's no need for any sophisticated looping, just something compact 2649 and sensible. 2650 2651 FIXME: Switch all code from mpn_{incr,decr}_u to MPN_{INCR,DECR}_U, 2652 declaring their operand sizes, then remove the former. This is purely 2653 for the benefit of assertion checking. */ 2654 2655 #if defined (__GNUC__) && GMP_NAIL_BITS == 0 && ! defined (NO_ASM) \ 2656 && (defined(HAVE_HOST_CPU_FAMILY_x86) || defined(HAVE_HOST_CPU_FAMILY_x86_64)) \ 2657 && ! WANT_ASSERT 2658 /* Better flags handling than the generic C gives on i386, saving a few 2659 bytes of code and maybe a cycle or two. */ 2660 2661 #define MPN_IORD_U(ptr, incr, aors) \ 2662 do { \ 2663 mp_ptr __ptr_dummy; \ 2664 if (__builtin_constant_p (incr) && (incr) == 0) \ 2665 { \ 2666 } \ 2667 else if (__builtin_constant_p (incr) && (incr) == 1) \ 2668 { \ 2669 __asm__ __volatile__ \ 2670 ("\n" ASM_L(top) ":\n" \ 2671 "\t" aors "\t$1, (%0)\n" \ 2672 "\tlea\t%c2(%0), %0\n" \ 2673 "\tjc\t" ASM_L(top) \ 2674 : "=r" (__ptr_dummy) \ 2675 : "0" (ptr), "n" (sizeof(mp_limb_t)) \ 2676 : "memory"); \ 2677 } \ 2678 else \ 2679 { \ 2680 __asm__ __volatile__ \ 2681 ( aors "\t%2, (%0)\n" \ 2682 "\tjnc\t" ASM_L(done) "\n" \ 2683 ASM_L(top) ":\n" \ 2684 "\t" aors "\t$1, %c3(%0)\n" \ 2685 "\tlea\t%c3(%0), %0\n" \ 2686 "\tjc\t" ASM_L(top) "\n" \ 2687 ASM_L(done) ":\n" \ 2688 : "=r" (__ptr_dummy) \ 2689 : "0" (ptr), \ 2690 "re" ((mp_limb_t) (incr)), "n" (sizeof(mp_limb_t)) \ 2691 : "memory"); \ 2692 } \ 2693 } while (0) 2694 2695 #if GMP_LIMB_BITS == 32 2696 #define MPN_INCR_U(ptr, size, incr) MPN_IORD_U (ptr, incr, "addl") 2697 #define MPN_DECR_U(ptr, size, incr) MPN_IORD_U (ptr, incr, "subl") 2698 #endif 2699 #if GMP_LIMB_BITS == 64 2700 #define MPN_INCR_U(ptr, size, incr) MPN_IORD_U (ptr, incr, "addq") 2701 #define MPN_DECR_U(ptr, size, incr) MPN_IORD_U (ptr, incr, "subq") 2702 #endif 2703 #define mpn_incr_u(ptr, incr) MPN_INCR_U (ptr, 0, incr) 2704 #define mpn_decr_u(ptr, incr) MPN_DECR_U (ptr, 0, incr) 2705 #endif 2706 2707 #if GMP_NAIL_BITS == 0 2708 #ifndef mpn_incr_u 2709 #define mpn_incr_u(p,incr) \ 2710 do { \ 2711 mp_limb_t __x; \ 2712 mp_ptr __p = (p); \ 2713 if (__builtin_constant_p (incr) && (incr) == 1) \ 2714 { \ 2715 while (++(*(__p++)) == 0) \ 2716 ; \ 2717 } \ 2718 else \ 2719 { \ 2720 __x = *__p + (incr); \ 2721 *__p = __x; \ 2722 if (__x < (incr)) \ 2723 while (++(*(++__p)) == 0) \ 2724 ; \ 2725 } \ 2726 } while (0) 2727 #endif 2728 #ifndef mpn_decr_u 2729 #define mpn_decr_u(p,incr) \ 2730 do { \ 2731 mp_limb_t __x; \ 2732 mp_ptr __p = (p); \ 2733 if (__builtin_constant_p (incr) && (incr) == 1) \ 2734 { \ 2735 while ((*(__p++))-- == 0) \ 2736 ; \ 2737 } \ 2738 else \ 2739 { \ 2740 __x = *__p; \ 2741 *__p = __x - (incr); \ 2742 if (__x < (incr)) \ 2743 while ((*(++__p))-- == 0) \ 2744 ; \ 2745 } \ 2746 } while (0) 2747 #endif 2748 #endif 2749 2750 #if GMP_NAIL_BITS >= 1 2751 #ifndef mpn_incr_u 2752 #define mpn_incr_u(p,incr) \ 2753 do { \ 2754 mp_limb_t __x; \ 2755 mp_ptr __p = (p); \ 2756 if (__builtin_constant_p (incr) && (incr) == 1) \ 2757 { \ 2758 do \ 2759 { \ 2760 __x = (*__p + 1) & GMP_NUMB_MASK; \ 2761 *__p++ = __x; \ 2762 } \ 2763 while (__x == 0); \ 2764 } \ 2765 else \ 2766 { \ 2767 __x = (*__p + (incr)); \ 2768 *__p++ = __x & GMP_NUMB_MASK; \ 2769 if (__x >> GMP_NUMB_BITS != 0) \ 2770 { \ 2771 do \ 2772 { \ 2773 __x = (*__p + 1) & GMP_NUMB_MASK; \ 2774 *__p++ = __x; \ 2775 } \ 2776 while (__x == 0); \ 2777 } \ 2778 } \ 2779 } while (0) 2780 #endif 2781 #ifndef mpn_decr_u 2782 #define mpn_decr_u(p,incr) \ 2783 do { \ 2784 mp_limb_t __x; \ 2785 mp_ptr __p = (p); \ 2786 if (__builtin_constant_p (incr) && (incr) == 1) \ 2787 { \ 2788 do \ 2789 { \ 2790 __x = *__p; \ 2791 *__p++ = (__x - 1) & GMP_NUMB_MASK; \ 2792 } \ 2793 while (__x == 0); \ 2794 } \ 2795 else \ 2796 { \ 2797 __x = *__p - (incr); \ 2798 *__p++ = __x & GMP_NUMB_MASK; \ 2799 if (__x >> GMP_NUMB_BITS != 0) \ 2800 { \ 2801 do \ 2802 { \ 2803 __x = *__p; \ 2804 *__p++ = (__x - 1) & GMP_NUMB_MASK; \ 2805 } \ 2806 while (__x == 0); \ 2807 } \ 2808 } \ 2809 } while (0) 2810 #endif 2811 #endif 2812 2813 #ifndef MPN_INCR_U 2814 #if WANT_ASSERT 2815 #define MPN_INCR_U(ptr, size, n) \ 2816 do { \ 2817 ASSERT ((size) >= 1); \ 2818 ASSERT_NOCARRY (mpn_add_1 (ptr, ptr, size, n)); \ 2819 } while (0) 2820 #else 2821 #define MPN_INCR_U(ptr, size, n) mpn_incr_u (ptr, n) 2822 #endif 2823 #endif 2824 2825 #ifndef MPN_DECR_U 2826 #if WANT_ASSERT 2827 #define MPN_DECR_U(ptr, size, n) \ 2828 do { \ 2829 ASSERT ((size) >= 1); \ 2830 ASSERT_NOCARRY (mpn_sub_1 (ptr, ptr, size, n)); \ 2831 } while (0) 2832 #else 2833 #define MPN_DECR_U(ptr, size, n) mpn_decr_u (ptr, n) 2834 #endif 2835 #endif 2836 2837 2838 /* Structure for conversion between internal binary format and strings. */ 2839 struct bases 2840 { 2841 /* Number of digits in the conversion base that always fits in an mp_limb_t. 2842 For example, for base 10 on a machine where an mp_limb_t has 32 bits this 2843 is 9, since 10**9 is the largest number that fits into an mp_limb_t. */ 2844 int chars_per_limb; 2845 2846 /* log(2)/log(conversion_base) */ 2847 mp_limb_t logb2; 2848 2849 /* log(conversion_base)/log(2) */ 2850 mp_limb_t log2b; 2851 2852 /* base**chars_per_limb, i.e. the biggest number that fits a word, built by 2853 factors of base. Exception: For 2, 4, 8, etc, big_base is log2(base), 2854 i.e. the number of bits used to represent each digit in the base. */ 2855 mp_limb_t big_base; 2856 2857 /* A GMP_LIMB_BITS bit approximation to 1/big_base, represented as a 2858 fixed-point number. Instead of dividing by big_base an application can 2859 choose to multiply by big_base_inverted. */ 2860 mp_limb_t big_base_inverted; 2861 }; 2862 2863 #define mp_bases __MPN(bases) 2864 __GMP_DECLSPEC extern const struct bases mp_bases[257]; 2865 2866 2867 /* Compute the number of digits in base for nbits bits, making sure the result 2868 is never too small. The two variants of the macro implement the same 2869 function; the GT2 variant below works just for bases > 2. */ 2870 #define DIGITS_IN_BASE_FROM_BITS(res, nbits, b) \ 2871 do { \ 2872 mp_limb_t _ph, _dummy; \ 2873 size_t _nbits = (nbits); \ 2874 umul_ppmm (_ph, _dummy, mp_bases[b].logb2, _nbits); \ 2875 _ph += (_dummy + _nbits < _dummy); \ 2876 res = _ph + 1; \ 2877 } while (0) 2878 #define DIGITS_IN_BASEGT2_FROM_BITS(res, nbits, b) \ 2879 do { \ 2880 mp_limb_t _ph, _dummy; \ 2881 size_t _nbits = (nbits); \ 2882 umul_ppmm (_ph, _dummy, mp_bases[b].logb2 + 1, _nbits); \ 2883 res = _ph + 1; \ 2884 } while (0) 2885 2886 /* For power of 2 bases this is exact. For other bases the result is either 2887 exact or one too big. 2888 2889 To be exact always it'd be necessary to examine all the limbs of the 2890 operand, since numbers like 100..000 and 99...999 generally differ only 2891 in the lowest limb. It'd be possible to examine just a couple of high 2892 limbs to increase the probability of being exact, but that doesn't seem 2893 worth bothering with. */ 2894 2895 #define MPN_SIZEINBASE(result, ptr, size, base) \ 2896 do { \ 2897 int __lb_base, __cnt; \ 2898 size_t __totbits; \ 2899 \ 2900 ASSERT ((size) >= 0); \ 2901 ASSERT ((base) >= 2); \ 2902 ASSERT ((base) < numberof (mp_bases)); \ 2903 \ 2904 /* Special case for X == 0. */ \ 2905 if ((size) == 0) \ 2906 (result) = 1; \ 2907 else \ 2908 { \ 2909 /* Calculate the total number of significant bits of X. */ \ 2910 count_leading_zeros (__cnt, (ptr)[(size)-1]); \ 2911 __totbits = (size_t) (size) * GMP_NUMB_BITS - (__cnt - GMP_NAIL_BITS);\ 2912 \ 2913 if (POW2_P (base)) \ 2914 { \ 2915 __lb_base = mp_bases[base].big_base; \ 2916 (result) = (__totbits + __lb_base - 1) / __lb_base; \ 2917 } \ 2918 else \ 2919 { \ 2920 DIGITS_IN_BASEGT2_FROM_BITS (result, __totbits, base); \ 2921 } \ 2922 } \ 2923 } while (0) 2924 2925 #define MPN_SIZEINBASE_2EXP(result, ptr, size, base2exp) \ 2926 do { \ 2927 int __cnt; \ 2928 mp_bitcnt_t __totbits; \ 2929 ASSERT ((size) > 0); \ 2930 ASSERT ((ptr)[(size)-1] != 0); \ 2931 count_leading_zeros (__cnt, (ptr)[(size)-1]); \ 2932 __totbits = (mp_bitcnt_t) (size) * GMP_NUMB_BITS - (__cnt - GMP_NAIL_BITS); \ 2933 (result) = (__totbits + (base2exp)-1) / (base2exp); \ 2934 } while (0) 2935 2936 2937 /* bit count to limb count, rounding up */ 2938 #define BITS_TO_LIMBS(n) (((n) + (GMP_NUMB_BITS - 1)) / GMP_NUMB_BITS) 2939 2940 /* MPN_SET_UI sets an mpn (ptr, cnt) to given ui. MPZ_FAKE_UI creates fake 2941 mpz_t from ui. The zp argument must have room for LIMBS_PER_ULONG limbs 2942 in both cases (LIMBS_PER_ULONG is also defined here.) */ 2943 #if BITS_PER_ULONG <= GMP_NUMB_BITS /* need one limb per ulong */ 2944 2945 #define LIMBS_PER_ULONG 1 2946 #define MPN_SET_UI(zp, zn, u) \ 2947 (zp)[0] = (u); \ 2948 (zn) = ((zp)[0] != 0); 2949 #define MPZ_FAKE_UI(z, zp, u) \ 2950 (zp)[0] = (u); \ 2951 PTR (z) = (zp); \ 2952 SIZ (z) = ((zp)[0] != 0); \ 2953 ASSERT_CODE (ALLOC (z) = 1); 2954 2955 #else /* need two limbs per ulong */ 2956 2957 #define LIMBS_PER_ULONG 2 2958 #define MPN_SET_UI(zp, zn, u) \ 2959 (zp)[0] = (u) & GMP_NUMB_MASK; \ 2960 (zp)[1] = (u) >> GMP_NUMB_BITS; \ 2961 (zn) = ((zp)[1] != 0 ? 2 : (zp)[0] != 0 ? 1 : 0); 2962 #define MPZ_FAKE_UI(z, zp, u) \ 2963 (zp)[0] = (u) & GMP_NUMB_MASK; \ 2964 (zp)[1] = (u) >> GMP_NUMB_BITS; \ 2965 SIZ (z) = ((zp)[1] != 0 ? 2 : (zp)[0] != 0 ? 1 : 0); \ 2966 PTR (z) = (zp); \ 2967 ASSERT_CODE (ALLOC (z) = 2); 2968 2969 #endif 2970 2971 2972 #if HAVE_HOST_CPU_FAMILY_x86 2973 #define TARGET_REGISTER_STARVED 1 2974 #else 2975 #define TARGET_REGISTER_STARVED 0 2976 #endif 2977 2978 2979 /* LIMB_HIGHBIT_TO_MASK(n) examines the high bit of a limb value and turns 1 2980 or 0 there into a limb 0xFF..FF or 0 respectively. 2981 2982 On most CPUs this is just an arithmetic right shift by GMP_LIMB_BITS-1, 2983 but C99 doesn't guarantee signed right shifts are arithmetic, so we have 2984 a little compile-time test and a fallback to a "? :" form. The latter is 2985 necessary for instance on Cray vector systems. 2986 2987 Recent versions of gcc (eg. 3.3) will in fact optimize a "? :" like this 2988 to an arithmetic right shift anyway, but it's good to get the desired 2989 shift on past versions too (in particular since an important use of 2990 LIMB_HIGHBIT_TO_MASK is in udiv_qrnnd_preinv). */ 2991 2992 #define LIMB_HIGHBIT_TO_MASK(n) \ 2993 (((mp_limb_signed_t) -1 >> 1) < 0 \ 2994 ? (mp_limb_signed_t) (n) >> (GMP_LIMB_BITS - 1) \ 2995 : (n) & GMP_LIMB_HIGHBIT ? MP_LIMB_T_MAX : CNST_LIMB(0)) 2996 2997 2998 /* Use a library function for invert_limb, if available. */ 2999 #define mpn_invert_limb __MPN(invert_limb) 3000 __GMP_DECLSPEC mp_limb_t mpn_invert_limb (mp_limb_t) ATTRIBUTE_CONST; 3001 #if ! defined (invert_limb) && HAVE_NATIVE_mpn_invert_limb 3002 #define invert_limb(invxl,xl) \ 3003 do { \ 3004 (invxl) = mpn_invert_limb (xl); \ 3005 } while (0) 3006 #endif 3007 3008 #ifndef invert_limb 3009 #define invert_limb(invxl,xl) \ 3010 do { \ 3011 mp_limb_t _dummy; \ 3012 ASSERT ((xl) != 0); \ 3013 udiv_qrnnd (invxl, _dummy, ~(xl), ~CNST_LIMB(0), xl); \ 3014 } while (0) 3015 #endif 3016 3017 #define invert_pi1(dinv, d1, d0) \ 3018 do { \ 3019 mp_limb_t _v, _p, _t1, _t0, _mask; \ 3020 invert_limb (_v, d1); \ 3021 _p = (d1) * _v; \ 3022 _p += (d0); \ 3023 if (_p < (d0)) \ 3024 { \ 3025 _v--; \ 3026 _mask = -(mp_limb_t) (_p >= (d1)); \ 3027 _p -= (d1); \ 3028 _v += _mask; \ 3029 _p -= _mask & (d1); \ 3030 } \ 3031 umul_ppmm (_t1, _t0, d0, _v); \ 3032 _p += _t1; \ 3033 if (_p < _t1) \ 3034 { \ 3035 _v--; \ 3036 if (UNLIKELY (_p >= (d1))) \ 3037 { \ 3038 if (_p > (d1) || _t0 >= (d0)) \ 3039 _v--; \ 3040 } \ 3041 } \ 3042 (dinv).inv32 = _v; \ 3043 } while (0) 3044 3045 3046 /* udiv_qrnnd_preinv -- Based on work by Niels Möller and Torbjörn Granlund. 3047 We write things strangely below, to help gcc. A more straightforward 3048 version: 3049 _r = (nl) - _qh * (d); 3050 _t = _r + (d); 3051 if (_r >= _ql) 3052 { 3053 _qh--; 3054 _r = _t; 3055 } 3056 For one operation shorter critical path, one may want to use this form: 3057 _p = _qh * (d) 3058 _s = (nl) + (d); 3059 _r = (nl) - _p; 3060 _t = _s - _p; 3061 if (_r >= _ql) 3062 { 3063 _qh--; 3064 _r = _t; 3065 } 3066 */ 3067 #define udiv_qrnnd_preinv(q, r, nh, nl, d, di) \ 3068 do { \ 3069 mp_limb_t _qh, _ql, _r, _mask; \ 3070 umul_ppmm (_qh, _ql, (nh), (di)); \ 3071 if (__builtin_constant_p (nl) && (nl) == 0) \ 3072 { \ 3073 _qh += (nh) + 1; \ 3074 _r = - _qh * (d); \ 3075 _mask = -(mp_limb_t) (_r > _ql); /* both > and >= are OK */ \ 3076 _qh += _mask; \ 3077 _r += _mask & (d); \ 3078 } \ 3079 else \ 3080 { \ 3081 add_ssaaaa (_qh, _ql, _qh, _ql, (nh) + 1, (nl)); \ 3082 _r = (nl) - _qh * (d); \ 3083 _mask = -(mp_limb_t) (_r > _ql); /* both > and >= are OK */ \ 3084 _qh += _mask; \ 3085 _r += _mask & (d); \ 3086 if (UNLIKELY (_r >= (d))) \ 3087 { \ 3088 _r -= (d); \ 3089 _qh++; \ 3090 } \ 3091 } \ 3092 (r) = _r; \ 3093 (q) = _qh; \ 3094 } while (0) 3095 3096 /* Dividing (NH, NL) by D, returning the remainder only. Unlike 3097 udiv_qrnnd_preinv, works also for the case NH == D, where the 3098 quotient doesn't quite fit in a single limb. */ 3099 #define udiv_rnnd_preinv(r, nh, nl, d, di) \ 3100 do { \ 3101 mp_limb_t _qh, _ql, _r, _mask; \ 3102 umul_ppmm (_qh, _ql, (nh), (di)); \ 3103 if (__builtin_constant_p (nl) && (nl) == 0) \ 3104 { \ 3105 _r = ~(_qh + (nh)) * (d); \ 3106 _mask = -(mp_limb_t) (_r > _ql); /* both > and >= are OK */ \ 3107 _r += _mask & (d); \ 3108 } \ 3109 else \ 3110 { \ 3111 add_ssaaaa (_qh, _ql, _qh, _ql, (nh) + 1, (nl)); \ 3112 _r = (nl) - _qh * (d); \ 3113 _mask = -(mp_limb_t) (_r > _ql); /* both > and >= are OK */ \ 3114 _r += _mask & (d); \ 3115 if (UNLIKELY (_r >= (d))) \ 3116 _r -= (d); \ 3117 } \ 3118 (r) = _r; \ 3119 } while (0) 3120 3121 /* Compute quotient the quotient and remainder for n / d. Requires d 3122 >= B^2 / 2 and n < d B. di is the inverse 3123 3124 floor ((B^3 - 1) / (d0 + d1 B)) - B. 3125 3126 NOTE: Output variables are updated multiple times. Only some inputs 3127 and outputs may overlap. 3128 */ 3129 #define udiv_qr_3by2(q, r1, r0, n2, n1, n0, d1, d0, dinv) \ 3130 do { \ 3131 mp_limb_t _q0, _t1, _t0, _mask; \ 3132 umul_ppmm ((q), _q0, (n2), (dinv)); \ 3133 add_ssaaaa ((q), _q0, (q), _q0, (n2), (n1)); \ 3134 \ 3135 /* Compute the two most significant limbs of n - q'd */ \ 3136 (r1) = (n1) - (d1) * (q); \ 3137 sub_ddmmss ((r1), (r0), (r1), (n0), (d1), (d0)); \ 3138 umul_ppmm (_t1, _t0, (d0), (q)); \ 3139 sub_ddmmss ((r1), (r0), (r1), (r0), _t1, _t0); \ 3140 (q)++; \ 3141 \ 3142 /* Conditionally adjust q and the remainders */ \ 3143 _mask = - (mp_limb_t) ((r1) >= _q0); \ 3144 (q) += _mask; \ 3145 add_ssaaaa ((r1), (r0), (r1), (r0), _mask & (d1), _mask & (d0)); \ 3146 if (UNLIKELY ((r1) >= (d1))) \ 3147 { \ 3148 if ((r1) > (d1) || (r0) >= (d0)) \ 3149 { \ 3150 (q)++; \ 3151 sub_ddmmss ((r1), (r0), (r1), (r0), (d1), (d0)); \ 3152 } \ 3153 } \ 3154 } while (0) 3155 3156 #ifndef mpn_preinv_divrem_1 /* if not done with cpuvec in a fat binary */ 3157 #define mpn_preinv_divrem_1 __MPN(preinv_divrem_1) 3158 __GMP_DECLSPEC mp_limb_t mpn_preinv_divrem_1 (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, int); 3159 #endif 3160 3161 3162 /* USE_PREINV_DIVREM_1 is whether to use mpn_preinv_divrem_1, as opposed to the 3163 plain mpn_divrem_1. The default is yes, since the few CISC chips where 3164 preinv is not good have defines saying so. */ 3165 #ifndef USE_PREINV_DIVREM_1 3166 #define USE_PREINV_DIVREM_1 1 3167 #endif 3168 3169 #if USE_PREINV_DIVREM_1 3170 #define MPN_DIVREM_OR_PREINV_DIVREM_1(qp,xsize,ap,size,d,dinv,shift) \ 3171 mpn_preinv_divrem_1 (qp, xsize, ap, size, d, dinv, shift) 3172 #else 3173 #define MPN_DIVREM_OR_PREINV_DIVREM_1(qp,xsize,ap,size,d,dinv,shift) \ 3174 mpn_divrem_1 (qp, xsize, ap, size, d) 3175 #endif 3176 3177 #ifndef PREINV_MOD_1_TO_MOD_1_THRESHOLD 3178 #define PREINV_MOD_1_TO_MOD_1_THRESHOLD 10 3179 #endif 3180 3181 /* This selection may seem backwards. The reason mpn_mod_1 typically takes 3182 over for larger sizes is that it uses the mod_1_1 function. */ 3183 #define MPN_MOD_OR_PREINV_MOD_1(src,size,divisor,inverse) \ 3184 (BELOW_THRESHOLD (size, PREINV_MOD_1_TO_MOD_1_THRESHOLD) \ 3185 ? mpn_preinv_mod_1 (src, size, divisor, inverse) \ 3186 : mpn_mod_1 (src, size, divisor)) 3187 3188 3189 #ifndef mpn_mod_34lsub1 /* if not done with cpuvec in a fat binary */ 3190 #define mpn_mod_34lsub1 __MPN(mod_34lsub1) 3191 __GMP_DECLSPEC mp_limb_t mpn_mod_34lsub1 (mp_srcptr, mp_size_t) __GMP_ATTRIBUTE_PURE; 3192 #endif 3193 3194 3195 /* DIVEXACT_1_THRESHOLD is at what size to use mpn_divexact_1, as opposed to 3196 plain mpn_divrem_1. Likewise BMOD_1_TO_MOD_1_THRESHOLD for 3197 mpn_modexact_1_odd against plain mpn_mod_1. On most CPUs divexact and 3198 modexact are faster at all sizes, so the defaults are 0. Those CPUs 3199 where this is not right have a tuned threshold. */ 3200 #ifndef DIVEXACT_1_THRESHOLD 3201 #define DIVEXACT_1_THRESHOLD 0 3202 #endif 3203 #ifndef BMOD_1_TO_MOD_1_THRESHOLD 3204 #define BMOD_1_TO_MOD_1_THRESHOLD 10 3205 #endif 3206 3207 #define MPN_DIVREM_OR_DIVEXACT_1(rp, up, n, d) \ 3208 do { \ 3209 if (BELOW_THRESHOLD (n, DIVEXACT_1_THRESHOLD)) \ 3210 ASSERT_NOCARRY (mpn_divrem_1 (rp, (mp_size_t) 0, up, n, d)); \ 3211 else \ 3212 { \ 3213 ASSERT (mpn_mod_1 (up, n, d) == 0); \ 3214 mpn_divexact_1 (rp, up, n, d); \ 3215 } \ 3216 } while (0) 3217 3218 #ifndef mpn_modexact_1c_odd /* if not done with cpuvec in a fat binary */ 3219 #define mpn_modexact_1c_odd __MPN(modexact_1c_odd) 3220 __GMP_DECLSPEC mp_limb_t mpn_modexact_1c_odd (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t) __GMP_ATTRIBUTE_PURE; 3221 #endif 3222 3223 #if HAVE_NATIVE_mpn_modexact_1_odd 3224 #define mpn_modexact_1_odd __MPN(modexact_1_odd) 3225 __GMP_DECLSPEC mp_limb_t mpn_modexact_1_odd (mp_srcptr, mp_size_t, mp_limb_t) __GMP_ATTRIBUTE_PURE; 3226 #else 3227 #define mpn_modexact_1_odd(src,size,divisor) \ 3228 mpn_modexact_1c_odd (src, size, divisor, CNST_LIMB(0)) 3229 #endif 3230 3231 #define MPN_MOD_OR_MODEXACT_1_ODD(src,size,divisor) \ 3232 (BELOW_THRESHOLD (size, BMOD_1_TO_MOD_1_THRESHOLD) \ 3233 ? mpn_modexact_1_odd (src, size, divisor) \ 3234 : mpn_mod_1 (src, size, divisor)) 3235 3236 /* binvert_limb() sets inv to the multiplicative inverse of n modulo 3237 2^GMP_NUMB_BITS, ie. satisfying inv*n == 1 mod 2^GMP_NUMB_BITS. 3238 n must be odd (otherwise such an inverse doesn't exist). 3239 3240 This is not to be confused with invert_limb(), which is completely 3241 different. 3242 3243 The table lookup gives an inverse with the low 8 bits valid, and each 3244 multiply step doubles the number of bits. See Jebelean "An algorithm for 3245 exact division" end of section 4 (reference in gmp.texi). 3246 3247 Possible enhancement: Could use UHWtype until the last step, if half-size 3248 multiplies are faster (might help under _LONG_LONG_LIMB). 3249 3250 Alternative: As noted in Granlund and Montgomery "Division by Invariant 3251 Integers using Multiplication" (reference in gmp.texi), n itself gives a 3252 3-bit inverse immediately, and could be used instead of a table lookup. 3253 A 4-bit inverse can be obtained effectively from xoring bits 1 and 2 into 3254 bit 3, for instance with (((n + 2) & 4) << 1) ^ n. */ 3255 3256 #define binvert_limb_table __gmp_binvert_limb_table 3257 __GMP_DECLSPEC extern const unsigned char binvert_limb_table[128]; 3258 3259 #define binvert_limb(inv,n) \ 3260 do { \ 3261 mp_limb_t __n = (n); \ 3262 mp_limb_t __inv; \ 3263 ASSERT ((__n & 1) == 1); \ 3264 \ 3265 __inv = binvert_limb_table[(__n/2) & 0x7F]; /* 8 */ \ 3266 if (GMP_NUMB_BITS > 8) __inv = 2 * __inv - __inv * __inv * __n; \ 3267 if (GMP_NUMB_BITS > 16) __inv = 2 * __inv - __inv * __inv * __n; \ 3268 if (GMP_NUMB_BITS > 32) __inv = 2 * __inv - __inv * __inv * __n; \ 3269 \ 3270 if (GMP_NUMB_BITS > 64) \ 3271 { \ 3272 int __invbits = 64; \ 3273 do { \ 3274 __inv = 2 * __inv - __inv * __inv * __n; \ 3275 __invbits *= 2; \ 3276 } while (__invbits < GMP_NUMB_BITS); \ 3277 } \ 3278 \ 3279 ASSERT ((__inv * __n & GMP_NUMB_MASK) == 1); \ 3280 (inv) = __inv & GMP_NUMB_MASK; \ 3281 } while (0) 3282 #define modlimb_invert binvert_limb /* backward compatibility */ 3283 3284 /* Multiplicative inverse of 3, modulo 2^GMP_NUMB_BITS. 3285 Eg. 0xAAAAAAAB for 32 bits, 0xAAAAAAAAAAAAAAAB for 64 bits. 3286 GMP_NUMB_MAX/3*2+1 is right when GMP_NUMB_BITS is even, but when it's odd 3287 we need to start from GMP_NUMB_MAX>>1. */ 3288 #define MODLIMB_INVERSE_3 (((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 2)) / 3) * 2 + 1) 3289 3290 /* ceil(GMP_NUMB_MAX/3) and ceil(2*GMP_NUMB_MAX/3). 3291 These expressions work because GMP_NUMB_MAX%3 != 0 for all GMP_NUMB_BITS. */ 3292 #define GMP_NUMB_CEIL_MAX_DIV3 (GMP_NUMB_MAX / 3 + 1) 3293 #define GMP_NUMB_CEIL_2MAX_DIV3 ((GMP_NUMB_MAX>>1) / 3 + 1 + GMP_NUMB_HIGHBIT) 3294 3295 3296 /* Set r to -a mod d. a>=d is allowed. Can give r>d. All should be limbs. 3297 3298 It's not clear whether this is the best way to do this calculation. 3299 Anything congruent to -a would be fine for the one limb congruence 3300 tests. */ 3301 3302 #define NEG_MOD(r, a, d) \ 3303 do { \ 3304 ASSERT ((d) != 0); \ 3305 ASSERT_LIMB (a); \ 3306 ASSERT_LIMB (d); \ 3307 \ 3308 if ((a) <= (d)) \ 3309 { \ 3310 /* small a is reasonably likely */ \ 3311 (r) = (d) - (a); \ 3312 } \ 3313 else \ 3314 { \ 3315 unsigned __twos; \ 3316 mp_limb_t __dnorm; \ 3317 count_leading_zeros (__twos, d); \ 3318 __twos -= GMP_NAIL_BITS; \ 3319 __dnorm = (d) << __twos; \ 3320 (r) = ((a) <= __dnorm ? __dnorm : 2*__dnorm) - (a); \ 3321 } \ 3322 \ 3323 ASSERT_LIMB (r); \ 3324 } while (0) 3325 3326 /* A bit mask of all the least significant zero bits of n, or -1 if n==0. */ 3327 #define LOW_ZEROS_MASK(n) (((n) & -(n)) - 1) 3328 3329 3330 /* ULONG_PARITY sets "p" to 1 if there's an odd number of 1 bits in "n", or 3331 to 0 if there's an even number. "n" should be an unsigned long and "p" 3332 an int. */ 3333 3334 #if defined (__GNUC__) && ! defined (NO_ASM) && HAVE_HOST_CPU_alpha_CIX 3335 #define ULONG_PARITY(p, n) \ 3336 do { \ 3337 int __p; \ 3338 __asm__ ("ctpop %1, %0" : "=r" (__p) : "r" (n)); \ 3339 (p) = __p & 1; \ 3340 } while (0) 3341 #endif 3342 3343 /* Cray intrinsic _popcnt. */ 3344 #ifdef _CRAY 3345 #define ULONG_PARITY(p, n) \ 3346 do { \ 3347 (p) = _popcnt (n) & 1; \ 3348 } while (0) 3349 #endif 3350 3351 #if defined (__GNUC__) && ! defined (__INTEL_COMPILER) \ 3352 && ! defined (NO_ASM) && defined (__ia64) 3353 /* unsigned long is either 32 or 64 bits depending on the ABI, zero extend 3354 to a 64 bit unsigned long long for popcnt */ 3355 #define ULONG_PARITY(p, n) \ 3356 do { \ 3357 unsigned long long __n = (unsigned long) (n); \ 3358 int __p; \ 3359 __asm__ ("popcnt %0 = %1" : "=r" (__p) : "r" (__n)); \ 3360 (p) = __p & 1; \ 3361 } while (0) 3362 #endif 3363 3364 #if defined (__GNUC__) && ! defined (__INTEL_COMPILER) \ 3365 && ! defined (NO_ASM) && HAVE_HOST_CPU_FAMILY_x86 3366 #if __GMP_GNUC_PREREQ (3,1) 3367 #define __GMP_qm "=Qm" 3368 #define __GMP_q "=Q" 3369 #else 3370 #define __GMP_qm "=qm" 3371 #define __GMP_q "=q" 3372 #endif 3373 #define ULONG_PARITY(p, n) \ 3374 do { \ 3375 char __p; \ 3376 unsigned long __n = (n); \ 3377 __n ^= (__n >> 16); \ 3378 __asm__ ("xorb %h1, %b1\n\t" \ 3379 "setpo %0" \ 3380 : __GMP_qm (__p), __GMP_q (__n) \ 3381 : "1" (__n)); \ 3382 (p) = __p; \ 3383 } while (0) 3384 #endif 3385 3386 #if ! defined (ULONG_PARITY) 3387 #define ULONG_PARITY(p, n) \ 3388 do { \ 3389 unsigned long __n = (n); \ 3390 int __p = 0; \ 3391 do \ 3392 { \ 3393 __p ^= 0x96696996L >> (__n & 0x1F); \ 3394 __n >>= 5; \ 3395 } \ 3396 while (__n != 0); \ 3397 \ 3398 (p) = __p & 1; \ 3399 } while (0) 3400 #endif 3401 3402 3403 /* 3 cycles on 604 or 750 since shifts and rlwimi's can pair. gcc (as of 3404 version 3.1 at least) doesn't seem to know how to generate rlwimi for 3405 anything other than bit-fields, so use "asm". */ 3406 #if defined (__GNUC__) && ! defined (NO_ASM) \ 3407 && HAVE_HOST_CPU_FAMILY_powerpc && GMP_LIMB_BITS == 32 3408 #define BSWAP_LIMB(dst, src) \ 3409 do { \ 3410 mp_limb_t __bswapl_src = (src); \ 3411 mp_limb_t __tmp1 = __bswapl_src >> 24; /* low byte */ \ 3412 mp_limb_t __tmp2 = __bswapl_src << 24; /* high byte */ \ 3413 __asm__ ("rlwimi %0, %2, 24, 16, 23" /* 2nd low */ \ 3414 : "=r" (__tmp1) : "0" (__tmp1), "r" (__bswapl_src)); \ 3415 __asm__ ("rlwimi %0, %2, 8, 8, 15" /* 3nd high */ \ 3416 : "=r" (__tmp2) : "0" (__tmp2), "r" (__bswapl_src)); \ 3417 (dst) = __tmp1 | __tmp2; /* whole */ \ 3418 } while (0) 3419 #endif 3420 3421 /* bswap is available on i486 and up and is fast. A combination rorw $8 / 3422 roll $16 / rorw $8 is used in glibc for plain i386 (and in the linux 3423 kernel with xchgb instead of rorw), but this is not done here, because 3424 i386 means generic x86 and mixing word and dword operations will cause 3425 partial register stalls on P6 chips. */ 3426 #if defined (__GNUC__) && ! defined (NO_ASM) \ 3427 && HAVE_HOST_CPU_FAMILY_x86 && ! HAVE_HOST_CPU_i386 \ 3428 && GMP_LIMB_BITS == 32 3429 #define BSWAP_LIMB(dst, src) \ 3430 do { \ 3431 __asm__ ("bswap %0" : "=r" (dst) : "0" (src)); \ 3432 } while (0) 3433 #endif 3434 3435 #if defined (__GNUC__) && ! defined (NO_ASM) \ 3436 && defined (__amd64__) && GMP_LIMB_BITS == 64 3437 #define BSWAP_LIMB(dst, src) \ 3438 do { \ 3439 __asm__ ("bswap %q0" : "=r" (dst) : "0" (src)); \ 3440 } while (0) 3441 #endif 3442 3443 #if defined (__GNUC__) && ! defined (__INTEL_COMPILER) \ 3444 && ! defined (NO_ASM) && defined (__ia64) && GMP_LIMB_BITS == 64 3445 #define BSWAP_LIMB(dst, src) \ 3446 do { \ 3447 __asm__ ("mux1 %0 = %1, @rev" : "=r" (dst) : "r" (src)); \ 3448 } while (0) 3449 #endif 3450 3451 /* As per glibc. */ 3452 #if defined (__GNUC__) && ! defined (NO_ASM) \ 3453 && HAVE_HOST_CPU_FAMILY_m68k && GMP_LIMB_BITS == 32 3454 #define BSWAP_LIMB(dst, src) \ 3455 do { \ 3456 mp_limb_t __bswapl_src = (src); \ 3457 __asm__ ("ror%.w %#8, %0\n\t" \ 3458 "swap %0\n\t" \ 3459 "ror%.w %#8, %0" \ 3460 : "=d" (dst) \ 3461 : "0" (__bswapl_src)); \ 3462 } while (0) 3463 #endif 3464 3465 #if ! defined (BSWAP_LIMB) 3466 #if GMP_LIMB_BITS == 8 3467 #define BSWAP_LIMB(dst, src) \ 3468 do { (dst) = (src); } while (0) 3469 #endif 3470 #if GMP_LIMB_BITS == 16 3471 #define BSWAP_LIMB(dst, src) \ 3472 do { \ 3473 (dst) = ((src) << 8) + ((src) >> 8); \ 3474 } while (0) 3475 #endif 3476 #if GMP_LIMB_BITS == 32 3477 #define BSWAP_LIMB(dst, src) \ 3478 do { \ 3479 (dst) = \ 3480 ((src) << 24) \ 3481 + (((src) & 0xFF00) << 8) \ 3482 + (((src) >> 8) & 0xFF00) \ 3483 + ((src) >> 24); \ 3484 } while (0) 3485 #endif 3486 #if GMP_LIMB_BITS == 64 3487 #define BSWAP_LIMB(dst, src) \ 3488 do { \ 3489 (dst) = \ 3490 ((src) << 56) \ 3491 + (((src) & 0xFF00) << 40) \ 3492 + (((src) & 0xFF0000) << 24) \ 3493 + (((src) & 0xFF000000) << 8) \ 3494 + (((src) >> 8) & 0xFF000000) \ 3495 + (((src) >> 24) & 0xFF0000) \ 3496 + (((src) >> 40) & 0xFF00) \ 3497 + ((src) >> 56); \ 3498 } while (0) 3499 #endif 3500 #endif 3501 3502 #if ! defined (BSWAP_LIMB) 3503 #define BSWAP_LIMB(dst, src) \ 3504 do { \ 3505 mp_limb_t __bswapl_src = (src); \ 3506 mp_limb_t __dstl = 0; \ 3507 int __i; \ 3508 for (__i = 0; __i < GMP_LIMB_BYTES; __i++) \ 3509 { \ 3510 __dstl = (__dstl << 8) | (__bswapl_src & 0xFF); \ 3511 __bswapl_src >>= 8; \ 3512 } \ 3513 (dst) = __dstl; \ 3514 } while (0) 3515 #endif 3516 3517 3518 /* Apparently lwbrx might be slow on some PowerPC chips, so restrict it to 3519 those we know are fast. */ 3520 #if defined (__GNUC__) && ! defined (NO_ASM) \ 3521 && GMP_LIMB_BITS == 32 && HAVE_LIMB_BIG_ENDIAN \ 3522 && (HAVE_HOST_CPU_powerpc604 \ 3523 || HAVE_HOST_CPU_powerpc604e \ 3524 || HAVE_HOST_CPU_powerpc750 \ 3525 || HAVE_HOST_CPU_powerpc7400) 3526 #define BSWAP_LIMB_FETCH(limb, src) \ 3527 do { \ 3528 mp_srcptr __blf_src = (src); \ 3529 mp_limb_t __limb; \ 3530 __asm__ ("lwbrx %0, 0, %1" \ 3531 : "=r" (__limb) \ 3532 : "r" (__blf_src), \ 3533 "m" (*__blf_src)); \ 3534 (limb) = __limb; \ 3535 } while (0) 3536 #endif 3537 3538 #if ! defined (BSWAP_LIMB_FETCH) 3539 #define BSWAP_LIMB_FETCH(limb, src) BSWAP_LIMB (limb, *(src)) 3540 #endif 3541 3542 3543 /* On the same basis that lwbrx might be slow, restrict stwbrx to those we 3544 know are fast. FIXME: Is this necessary? */ 3545 #if defined (__GNUC__) && ! defined (NO_ASM) \ 3546 && GMP_LIMB_BITS == 32 && HAVE_LIMB_BIG_ENDIAN \ 3547 && (HAVE_HOST_CPU_powerpc604 \ 3548 || HAVE_HOST_CPU_powerpc604e \ 3549 || HAVE_HOST_CPU_powerpc750 \ 3550 || HAVE_HOST_CPU_powerpc7400) 3551 #define BSWAP_LIMB_STORE(dst, limb) \ 3552 do { \ 3553 mp_ptr __dst = (dst); \ 3554 mp_limb_t __limb = (limb); \ 3555 __asm__ ("stwbrx %1, 0, %2" \ 3556 : "=m" (*__dst) \ 3557 : "r" (__limb), \ 3558 "r" (__dst)); \ 3559 } while (0) 3560 #endif 3561 3562 #if ! defined (BSWAP_LIMB_STORE) 3563 #define BSWAP_LIMB_STORE(dst, limb) BSWAP_LIMB (*(dst), limb) 3564 #endif 3565 3566 3567 /* Byte swap limbs from {src,size} and store at {dst,size}. */ 3568 #define MPN_BSWAP(dst, src, size) \ 3569 do { \ 3570 mp_ptr __dst = (dst); \ 3571 mp_srcptr __src = (src); \ 3572 mp_size_t __size = (size); \ 3573 mp_size_t __i; \ 3574 ASSERT ((size) >= 0); \ 3575 ASSERT (MPN_SAME_OR_SEPARATE_P (dst, src, size)); \ 3576 CRAY_Pragma ("_CRI ivdep"); \ 3577 for (__i = 0; __i < __size; __i++) \ 3578 { \ 3579 BSWAP_LIMB_FETCH (*__dst, __src); \ 3580 __dst++; \ 3581 __src++; \ 3582 } \ 3583 } while (0) 3584 3585 /* Byte swap limbs from {dst,size} and store in reverse order at {src,size}. */ 3586 #define MPN_BSWAP_REVERSE(dst, src, size) \ 3587 do { \ 3588 mp_ptr __dst = (dst); \ 3589 mp_size_t __size = (size); \ 3590 mp_srcptr __src = (src) + __size - 1; \ 3591 mp_size_t __i; \ 3592 ASSERT ((size) >= 0); \ 3593 ASSERT (! MPN_OVERLAP_P (dst, size, src, size)); \ 3594 CRAY_Pragma ("_CRI ivdep"); \ 3595 for (__i = 0; __i < __size; __i++) \ 3596 { \ 3597 BSWAP_LIMB_FETCH (*__dst, __src); \ 3598 __dst++; \ 3599 __src--; \ 3600 } \ 3601 } while (0) 3602 3603 3604 /* No processor claiming to be SPARC v9 compliant seems to 3605 implement the POPC instruction. Disable pattern for now. */ 3606 #if 0 3607 #if defined __GNUC__ && defined __sparc_v9__ && GMP_LIMB_BITS == 64 3608 #define popc_limb(result, input) \ 3609 do { \ 3610 DItype __res; \ 3611 __asm__ ("popc %1,%0" : "=r" (result) : "rI" (input)); \ 3612 } while (0) 3613 #endif 3614 #endif 3615 3616 #if defined (__GNUC__) && ! defined (NO_ASM) && HAVE_HOST_CPU_alpha_CIX 3617 #define popc_limb(result, input) \ 3618 do { \ 3619 __asm__ ("ctpop %1, %0" : "=r" (result) : "r" (input)); \ 3620 } while (0) 3621 #endif 3622 3623 /* Cray intrinsic. */ 3624 #ifdef _CRAY 3625 #define popc_limb(result, input) \ 3626 do { \ 3627 (result) = _popcnt (input); \ 3628 } while (0) 3629 #endif 3630 3631 #if defined (__GNUC__) && ! defined (__INTEL_COMPILER) \ 3632 && ! defined (NO_ASM) && defined (__ia64) && GMP_LIMB_BITS == 64 3633 #define popc_limb(result, input) \ 3634 do { \ 3635 __asm__ ("popcnt %0 = %1" : "=r" (result) : "r" (input)); \ 3636 } while (0) 3637 #endif 3638 3639 /* Cool population count of an mp_limb_t. 3640 You have to figure out how this works, We won't tell you! 3641 3642 The constants could also be expressed as: 3643 0x55... = [2^N / 3] = [(2^N-1)/3] 3644 0x33... = [2^N / 5] = [(2^N-1)/5] 3645 0x0f... = [2^N / 17] = [(2^N-1)/17] 3646 (N is GMP_LIMB_BITS, [] denotes truncation.) */ 3647 3648 #if ! defined (popc_limb) && GMP_LIMB_BITS == 8 3649 #define popc_limb(result, input) \ 3650 do { \ 3651 mp_limb_t __x = (input); \ 3652 __x -= (__x >> 1) & MP_LIMB_T_MAX/3; \ 3653 __x = ((__x >> 2) & MP_LIMB_T_MAX/5) + (__x & MP_LIMB_T_MAX/5); \ 3654 __x = ((__x >> 4) + __x); \ 3655 (result) = __x & 0x0f; \ 3656 } while (0) 3657 #endif 3658 3659 #if ! defined (popc_limb) && GMP_LIMB_BITS == 16 3660 #define popc_limb(result, input) \ 3661 do { \ 3662 mp_limb_t __x = (input); \ 3663 __x -= (__x >> 1) & MP_LIMB_T_MAX/3; \ 3664 __x = ((__x >> 2) & MP_LIMB_T_MAX/5) + (__x & MP_LIMB_T_MAX/5); \ 3665 __x = ((__x >> 4) + __x) & MP_LIMB_T_MAX/17; \ 3666 __x = ((__x >> 8) + __x); \ 3667 (result) = __x & 0xff; \ 3668 } while (0) 3669 #endif 3670 3671 #if ! defined (popc_limb) && GMP_LIMB_BITS == 32 3672 #define popc_limb(result, input) \ 3673 do { \ 3674 mp_limb_t __x = (input); \ 3675 __x -= (__x >> 1) & MP_LIMB_T_MAX/3; \ 3676 __x = ((__x >> 2) & MP_LIMB_T_MAX/5) + (__x & MP_LIMB_T_MAX/5); \ 3677 __x = ((__x >> 4) + __x) & MP_LIMB_T_MAX/17; \ 3678 __x = ((__x >> 8) + __x); \ 3679 __x = ((__x >> 16) + __x); \ 3680 (result) = __x & 0xff; \ 3681 } while (0) 3682 #endif 3683 3684 #if ! defined (popc_limb) && GMP_LIMB_BITS == 64 3685 #define popc_limb(result, input) \ 3686 do { \ 3687 mp_limb_t __x = (input); \ 3688 __x -= (__x >> 1) & MP_LIMB_T_MAX/3; \ 3689 __x = ((__x >> 2) & MP_LIMB_T_MAX/5) + (__x & MP_LIMB_T_MAX/5); \ 3690 __x = ((__x >> 4) + __x) & MP_LIMB_T_MAX/17; \ 3691 __x = ((__x >> 8) + __x); \ 3692 __x = ((__x >> 16) + __x); \ 3693 __x = ((__x >> 32) + __x); \ 3694 (result) = __x & 0xff; \ 3695 } while (0) 3696 #endif 3697 3698 3699 /* Define stuff for longlong.h. */ 3700 #if HAVE_ATTRIBUTE_MODE 3701 typedef unsigned int UQItype __attribute__ ((mode (QI))); 3702 typedef int SItype __attribute__ ((mode (SI))); 3703 typedef unsigned int USItype __attribute__ ((mode (SI))); 3704 typedef int DItype __attribute__ ((mode (DI))); 3705 typedef unsigned int UDItype __attribute__ ((mode (DI))); 3706 #else 3707 typedef unsigned char UQItype; 3708 typedef long SItype; 3709 typedef unsigned long USItype; 3710 #if HAVE_LONG_LONG 3711 typedef long long int DItype; 3712 typedef unsigned long long int UDItype; 3713 #else /* Assume `long' gives us a wide enough type. Needed for hppa2.0w. */ 3714 typedef long int DItype; 3715 typedef unsigned long int UDItype; 3716 #endif 3717 #endif 3718 3719 typedef mp_limb_t UWtype; 3720 typedef unsigned int UHWtype; 3721 #define W_TYPE_SIZE GMP_LIMB_BITS 3722 3723 /* Define ieee_double_extract and _GMP_IEEE_FLOATS. 3724 3725 Bit field packing is "implementation defined" according to C99, which 3726 leaves us at the compiler's mercy here. For some systems packing is 3727 defined in the ABI (eg. x86). In any case so far it seems universal that 3728 little endian systems pack from low to high, and big endian from high to 3729 low within the given type. 3730 3731 Within the fields we rely on the integer endianness being the same as the 3732 float endianness, this is true everywhere we know of and it'd be a fairly 3733 strange system that did anything else. */ 3734 3735 #if HAVE_DOUBLE_IEEE_LITTLE_SWAPPED 3736 #define _GMP_IEEE_FLOATS 1 3737 union ieee_double_extract 3738 { 3739 struct 3740 { 3741 gmp_uint_least32_t manh:20; 3742 gmp_uint_least32_t exp:11; 3743 gmp_uint_least32_t sig:1; 3744 gmp_uint_least32_t manl:32; 3745 } s; 3746 double d; 3747 }; 3748 #endif 3749 3750 #if HAVE_DOUBLE_IEEE_LITTLE_ENDIAN 3751 #define _GMP_IEEE_FLOATS 1 3752 union ieee_double_extract 3753 { 3754 struct 3755 { 3756 gmp_uint_least32_t manl:32; 3757 gmp_uint_least32_t manh:20; 3758 gmp_uint_least32_t exp:11; 3759 gmp_uint_least32_t sig:1; 3760 } s; 3761 double d; 3762 }; 3763 #endif 3764 3765 #if HAVE_DOUBLE_IEEE_BIG_ENDIAN 3766 #define _GMP_IEEE_FLOATS 1 3767 union ieee_double_extract 3768 { 3769 struct 3770 { 3771 gmp_uint_least32_t sig:1; 3772 gmp_uint_least32_t exp:11; 3773 gmp_uint_least32_t manh:20; 3774 gmp_uint_least32_t manl:32; 3775 } s; 3776 double d; 3777 }; 3778 #endif 3779 3780 #if HAVE_DOUBLE_VAX_D 3781 union double_extract 3782 { 3783 struct 3784 { 3785 gmp_uint_least32_t man3:7; /* highest 7 bits */ 3786 gmp_uint_least32_t exp:8; /* excess-128 exponent */ 3787 gmp_uint_least32_t sig:1; 3788 gmp_uint_least32_t man2:16; 3789 gmp_uint_least32_t man1:16; 3790 gmp_uint_least32_t man0:16; /* lowest 16 bits */ 3791 } s; 3792 double d; 3793 }; 3794 #endif 3795 3796 /* Use (4.0 * ...) instead of (2.0 * ...) to work around buggy compilers 3797 that don't convert ulong->double correctly (eg. SunOS 4 native cc). */ 3798 #define MP_BASE_AS_DOUBLE (4.0 * ((mp_limb_t) 1 << (GMP_NUMB_BITS - 2))) 3799 /* Maximum number of limbs it will take to store any `double'. 3800 We assume doubles have 53 mantissa bits. */ 3801 #define LIMBS_PER_DOUBLE ((53 + GMP_NUMB_BITS - 2) / GMP_NUMB_BITS + 1) 3802 3803 __GMP_DECLSPEC int __gmp_extract_double (mp_ptr, double); 3804 3805 #define mpn_get_d __gmpn_get_d 3806 __GMP_DECLSPEC double mpn_get_d (mp_srcptr, mp_size_t, mp_size_t, long) __GMP_ATTRIBUTE_PURE; 3807 3808 3809 /* DOUBLE_NAN_INF_ACTION executes code a_nan if x is a NaN, or executes 3810 a_inf if x is an infinity. Both are considered unlikely values, for 3811 branch prediction. */ 3812 3813 #if _GMP_IEEE_FLOATS 3814 #define DOUBLE_NAN_INF_ACTION(x, a_nan, a_inf) \ 3815 do { \ 3816 union ieee_double_extract u; \ 3817 u.d = (x); \ 3818 if (UNLIKELY (u.s.exp == 0x7FF)) \ 3819 { \ 3820 if (u.s.manl == 0 && u.s.manh == 0) \ 3821 { a_inf; } \ 3822 else \ 3823 { a_nan; } \ 3824 } \ 3825 } while (0) 3826 #endif 3827 3828 #if HAVE_DOUBLE_VAX_D || HAVE_DOUBLE_VAX_G || HAVE_DOUBLE_CRAY_CFP 3829 /* no nans or infs in these formats */ 3830 #define DOUBLE_NAN_INF_ACTION(x, a_nan, a_inf) \ 3831 do { } while (0) 3832 #endif 3833 3834 #ifndef DOUBLE_NAN_INF_ACTION 3835 /* Unknown format, try something generic. 3836 NaN should be "unordered", so x!=x. 3837 Inf should be bigger than DBL_MAX. */ 3838 #define DOUBLE_NAN_INF_ACTION(x, a_nan, a_inf) \ 3839 do { \ 3840 { \ 3841 if (UNLIKELY ((x) != (x))) \ 3842 { a_nan; } \ 3843 else if (UNLIKELY ((x) > DBL_MAX || (x) < -DBL_MAX)) \ 3844 { a_inf; } \ 3845 } \ 3846 } while (0) 3847 #endif 3848 3849 /* On m68k, x86 and amd64, gcc (and maybe other compilers) can hold doubles 3850 in the coprocessor, which means a bigger exponent range than normal, and 3851 depending on the rounding mode, a bigger mantissa than normal. (See 3852 "Disappointments" in the gcc manual.) FORCE_DOUBLE stores and fetches 3853 "d" through memory to force any rounding and overflows to occur. 3854 3855 On amd64, and on x86s with SSE2, gcc (depending on options) uses the xmm 3856 registers, where there's no such extra precision and no need for the 3857 FORCE_DOUBLE. We don't bother to detect this since the present uses for 3858 FORCE_DOUBLE are only in test programs and default generic C code. 3859 3860 Not quite sure that an "automatic volatile" will use memory, but it does 3861 in gcc. An asm("":"=m"(d):"0"(d)) can't be used to trick gcc, since 3862 apparently matching operands like "0" are only allowed on a register 3863 output. gcc 3.4 warns about this, though in fact it and past versions 3864 seem to put the operand through memory as hoped. */ 3865 3866 #if (HAVE_HOST_CPU_FAMILY_m68k || HAVE_HOST_CPU_FAMILY_x86 \ 3867 || defined (__amd64__)) 3868 #define FORCE_DOUBLE(d) \ 3869 do { volatile double __gmp_force = (d); (d) = __gmp_force; } while (0) 3870 #else 3871 #define FORCE_DOUBLE(d) do { } while (0) 3872 #endif 3873 3874 3875 __GMP_DECLSPEC extern const unsigned char __gmp_digit_value_tab[]; 3876 3877 __GMP_DECLSPEC extern int __gmp_junk; 3878 __GMP_DECLSPEC extern const int __gmp_0; 3879 __GMP_DECLSPEC void __gmp_exception (int) ATTRIBUTE_NORETURN; 3880 __GMP_DECLSPEC void __gmp_divide_by_zero (void) ATTRIBUTE_NORETURN; 3881 __GMP_DECLSPEC void __gmp_sqrt_of_negative (void) ATTRIBUTE_NORETURN; 3882 __GMP_DECLSPEC void __gmp_invalid_operation (void) ATTRIBUTE_NORETURN; 3883 #define GMP_ERROR(code) __gmp_exception (code) 3884 #define DIVIDE_BY_ZERO __gmp_divide_by_zero () 3885 #define SQRT_OF_NEGATIVE __gmp_sqrt_of_negative () 3886 3887 #if defined _LONG_LONG_LIMB 3888 #define CNST_LIMB(C) ((mp_limb_t) C##LL) 3889 #else /* not _LONG_LONG_LIMB */ 3890 #define CNST_LIMB(C) ((mp_limb_t) C##L) 3891 #endif /* _LONG_LONG_LIMB */ 3892 3893 /* Stuff used by mpn/generic/perfsqr.c and mpz/prime_p.c */ 3894 #if GMP_NUMB_BITS == 2 3895 #define PP 0x3 /* 3 */ 3896 #define PP_FIRST_OMITTED 5 3897 #endif 3898 #if GMP_NUMB_BITS == 4 3899 #define PP 0xF /* 3 x 5 */ 3900 #define PP_FIRST_OMITTED 7 3901 #endif 3902 #if GMP_NUMB_BITS == 8 3903 #define PP 0x69 /* 3 x 5 x 7 */ 3904 #define PP_FIRST_OMITTED 11 3905 #endif 3906 #if GMP_NUMB_BITS == 16 3907 #define PP 0x3AA7 /* 3 x 5 x 7 x 11 x 13 */ 3908 #define PP_FIRST_OMITTED 17 3909 #endif 3910 #if GMP_NUMB_BITS == 32 3911 #define PP 0xC0CFD797L /* 3 x 5 x 7 x 11 x ... x 29 */ 3912 #define PP_INVERTED 0x53E5645CL 3913 #define PP_FIRST_OMITTED 31 3914 #endif 3915 #if GMP_NUMB_BITS == 64 3916 #define PP CNST_LIMB(0xE221F97C30E94E1D) /* 3 x 5 x 7 x 11 x ... x 53 */ 3917 #define PP_INVERTED CNST_LIMB(0x21CFE6CFC938B36B) 3918 #define PP_FIRST_OMITTED 59 3919 #endif 3920 #ifndef PP_FIRST_OMITTED 3921 #define PP_FIRST_OMITTED 3 3922 #endif 3923 3924 /* BIT1 means a result value in bit 1 (second least significant bit), with a 3925 zero bit representing +1 and a one bit representing -1. Bits other than 3926 bit 1 are garbage. These are meant to be kept in "int"s, and casts are 3927 used to ensure the expressions are "int"s even if a and/or b might be 3928 other types. 3929 3930 JACOBI_TWOS_U_BIT1 and JACOBI_RECIP_UU_BIT1 are used in mpn_jacobi_base 3931 and their speed is important. Expressions are used rather than 3932 conditionals to accumulate sign changes, which effectively means XORs 3933 instead of conditional JUMPs. */ 3934 3935 /* (a/0), with a signed; is 1 if a=+/-1, 0 otherwise */ 3936 #define JACOBI_S0(a) (((a) == 1) | ((a) == -1)) 3937 3938 /* (a/0), with a unsigned; is 1 if a=+/-1, 0 otherwise */ 3939 #define JACOBI_U0(a) ((a) == 1) 3940 3941 /* FIXME: JACOBI_LS0 and JACOBI_0LS are the same, so delete one and 3942 come up with a better name. */ 3943 3944 /* (a/0), with a given by low and size; 3945 is 1 if a=+/-1, 0 otherwise */ 3946 #define JACOBI_LS0(alow,asize) \ 3947 (((asize) == 1 || (asize) == -1) && (alow) == 1) 3948 3949 /* (a/0), with a an mpz_t; 3950 fetch of low limb always valid, even if size is zero */ 3951 #define JACOBI_Z0(a) JACOBI_LS0 (PTR(a)[0], SIZ(a)) 3952 3953 /* (0/b), with b unsigned; is 1 if b=1, 0 otherwise */ 3954 #define JACOBI_0U(b) ((b) == 1) 3955 3956 /* (0/b), with b unsigned; is 1 if b=+/-1, 0 otherwise */ 3957 #define JACOBI_0S(b) ((b) == 1 || (b) == -1) 3958 3959 /* (0/b), with b given by low and size; is 1 if b=+/-1, 0 otherwise */ 3960 #define JACOBI_0LS(blow,bsize) \ 3961 (((bsize) == 1 || (bsize) == -1) && (blow) == 1) 3962 3963 /* Convert a bit1 to +1 or -1. */ 3964 #define JACOBI_BIT1_TO_PN(result_bit1) \ 3965 (1 - ((int) (result_bit1) & 2)) 3966 3967 /* (2/b), with b unsigned and odd; 3968 is (-1)^((b^2-1)/8) which is 1 if b==1,7mod8 or -1 if b==3,5mod8 and 3969 hence obtained from (b>>1)^b */ 3970 #define JACOBI_TWO_U_BIT1(b) \ 3971 ((int) (((b) >> 1) ^ (b))) 3972 3973 /* (2/b)^twos, with b unsigned and odd */ 3974 #define JACOBI_TWOS_U_BIT1(twos, b) \ 3975 ((int) ((twos) << 1) & JACOBI_TWO_U_BIT1 (b)) 3976 3977 /* (2/b)^twos, with b unsigned and odd */ 3978 #define JACOBI_TWOS_U(twos, b) \ 3979 (JACOBI_BIT1_TO_PN (JACOBI_TWOS_U_BIT1 (twos, b))) 3980 3981 /* (-1/b), with b odd (signed or unsigned); 3982 is (-1)^((b-1)/2) */ 3983 #define JACOBI_N1B_BIT1(b) \ 3984 ((int) (b)) 3985 3986 /* (a/b) effect due to sign of a: signed/unsigned, b odd; 3987 is (-1/b) if a<0, or +1 if a>=0 */ 3988 #define JACOBI_ASGN_SU_BIT1(a, b) \ 3989 ((((a) < 0) << 1) & JACOBI_N1B_BIT1(b)) 3990 3991 /* (a/b) effect due to sign of b: signed/signed; 3992 is -1 if a and b both negative, +1 otherwise */ 3993 #define JACOBI_BSGN_SS_BIT1(a, b) \ 3994 ((((a)<0) & ((b)<0)) << 1) 3995 3996 /* (a/b) effect due to sign of b: signed/mpz; 3997 is -1 if a and b both negative, +1 otherwise */ 3998 #define JACOBI_BSGN_SZ_BIT1(a, b) \ 3999 JACOBI_BSGN_SS_BIT1 (a, SIZ(b)) 4000 4001 /* (a/b) effect due to sign of b: mpz/signed; 4002 is -1 if a and b both negative, +1 otherwise */ 4003 #define JACOBI_BSGN_ZS_BIT1(a, b) \ 4004 JACOBI_BSGN_SZ_BIT1 (b, a) 4005 4006 /* (a/b) reciprocity to switch to (b/a), a,b both unsigned and odd; 4007 is (-1)^((a-1)*(b-1)/4), which means +1 if either a,b==1mod4, or -1 if 4008 both a,b==3mod4, achieved in bit 1 by a&b. No ASSERT()s about a,b odd 4009 because this is used in a couple of places with only bit 1 of a or b 4010 valid. */ 4011 #define JACOBI_RECIP_UU_BIT1(a, b) \ 4012 ((int) ((a) & (b))) 4013 4014 /* Strip low zero limbs from {b_ptr,b_size} by incrementing b_ptr and 4015 decrementing b_size. b_low should be b_ptr[0] on entry, and will be 4016 updated for the new b_ptr. result_bit1 is updated according to the 4017 factors of 2 stripped, as per (a/2). */ 4018 #define JACOBI_STRIP_LOW_ZEROS(result_bit1, a, b_ptr, b_size, b_low) \ 4019 do { \ 4020 ASSERT ((b_size) >= 1); \ 4021 ASSERT ((b_low) == (b_ptr)[0]); \ 4022 \ 4023 while (UNLIKELY ((b_low) == 0)) \ 4024 { \ 4025 (b_size)--; \ 4026 ASSERT ((b_size) >= 1); \ 4027 (b_ptr)++; \ 4028 (b_low) = *(b_ptr); \ 4029 \ 4030 ASSERT (((a) & 1) != 0); \ 4031 if ((GMP_NUMB_BITS % 2) == 1) \ 4032 (result_bit1) ^= JACOBI_TWO_U_BIT1(a); \ 4033 } \ 4034 } while (0) 4035 4036 /* Set a_rem to {a_ptr,a_size} reduced modulo b, either using mod_1 or 4037 modexact_1_odd, but in either case leaving a_rem<b. b must be odd and 4038 unsigned. modexact_1_odd effectively calculates -a mod b, and 4039 result_bit1 is adjusted for the factor of -1. 4040 4041 The way mpn_modexact_1_odd sometimes bases its remainder on a_size and 4042 sometimes on a_size-1 means if GMP_NUMB_BITS is odd we can't know what 4043 factor to introduce into result_bit1, so for that case use mpn_mod_1 4044 unconditionally. 4045 4046 FIXME: mpn_modexact_1_odd is more efficient, so some way to get it used 4047 for odd GMP_NUMB_BITS would be good. Perhaps it could mung its result, 4048 or not skip a divide step, or something. */ 4049 4050 #define JACOBI_MOD_OR_MODEXACT_1_ODD(result_bit1, a_rem, a_ptr, a_size, b) \ 4051 do { \ 4052 mp_srcptr __a_ptr = (a_ptr); \ 4053 mp_size_t __a_size = (a_size); \ 4054 mp_limb_t __b = (b); \ 4055 \ 4056 ASSERT (__a_size >= 1); \ 4057 ASSERT (__b & 1); \ 4058 \ 4059 if ((GMP_NUMB_BITS % 2) != 0 \ 4060 || ABOVE_THRESHOLD (__a_size, BMOD_1_TO_MOD_1_THRESHOLD)) \ 4061 { \ 4062 (a_rem) = mpn_mod_1 (__a_ptr, __a_size, __b); \ 4063 } \ 4064 else \ 4065 { \ 4066 (result_bit1) ^= JACOBI_N1B_BIT1 (__b); \ 4067 (a_rem) = mpn_modexact_1_odd (__a_ptr, __a_size, __b); \ 4068 } \ 4069 } while (0) 4070 4071 /* State for the Jacobi computation using Lehmer. */ 4072 #define jacobi_table __gmp_jacobi_table 4073 __GMP_DECLSPEC extern const unsigned char jacobi_table[208]; 4074 4075 /* Bit layout for the initial state. b must be odd. 4076 4077 3 2 1 0 4078 +--+--+--+--+ 4079 |a1|a0|b1| s| 4080 +--+--+--+--+ 4081 4082 */ 4083 static inline unsigned 4084 mpn_jacobi_init (unsigned a, unsigned b, unsigned s) 4085 { 4086 ASSERT (b & 1); 4087 ASSERT (s <= 1); 4088 return ((a & 3) << 2) + (b & 2) + s; 4089 } 4090 4091 static inline int 4092 mpn_jacobi_finish (unsigned bits) 4093 { 4094 /* (a, b) = (1,0) or (0,1) */ 4095 ASSERT ( (bits & 14) == 0); 4096 4097 return 1-2*(bits & 1); 4098 } 4099 4100 static inline unsigned 4101 mpn_jacobi_update (unsigned bits, unsigned denominator, unsigned q) 4102 { 4103 /* FIXME: Could halve table size by not including the e bit in the 4104 * index, and instead xor when updating. Then the lookup would be 4105 * like 4106 * 4107 * bits ^= table[((bits & 30) << 2) + (denominator << 2) + q]; 4108 */ 4109 4110 ASSERT (bits < 26); 4111 ASSERT (denominator < 2); 4112 ASSERT (q < 4); 4113 4114 /* For almost all calls, denominator is constant and quite often q 4115 is constant too. So use addition rather than or, so the compiler 4116 can put the constant part can into the offset of an indexed 4117 addressing instruction. 4118 4119 With constant denominator, the below table lookup is compiled to 4120 4121 C Constant q = 1, constant denominator = 1 4122 movzbl table+5(%eax,8), %eax 4123 4124 or 4125 4126 C q in %edx, constant denominator = 1 4127 movzbl table+4(%edx,%eax,8), %eax 4128 4129 One could maintain the state preshifted 3 bits, to save a shift 4130 here, but at least on x86, that's no real saving. 4131 */ 4132 return bits = jacobi_table[(bits << 3) + (denominator << 2) + q]; 4133 } 4134 4135 /* Matrix multiplication */ 4136 #define mpn_matrix22_mul __MPN(matrix22_mul) 4137 __GMP_DECLSPEC void mpn_matrix22_mul (mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_srcptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr); 4138 #define mpn_matrix22_mul_strassen __MPN(matrix22_mul_strassen) 4139 __GMP_DECLSPEC void mpn_matrix22_mul_strassen (mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_srcptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr); 4140 #define mpn_matrix22_mul_itch __MPN(matrix22_mul_itch) 4141 __GMP_DECLSPEC mp_size_t mpn_matrix22_mul_itch (mp_size_t, mp_size_t) ATTRIBUTE_CONST; 4142 4143 #ifndef MATRIX22_STRASSEN_THRESHOLD 4144 #define MATRIX22_STRASSEN_THRESHOLD 30 4145 #endif 4146 4147 /* HGCD definitions */ 4148 4149 /* Extract one numb, shifting count bits left 4150 ________ ________ 4151 |___xh___||___xl___| 4152 |____r____| 4153 >count < 4154 4155 The count includes any nail bits, so it should work fine if count 4156 is computed using count_leading_zeros. If GMP_NAIL_BITS > 0, all of 4157 xh, xl and r include nail bits. Must have 0 < count < GMP_LIMB_BITS. 4158 4159 FIXME: Omit masking with GMP_NUMB_MASK, and let callers do that for 4160 those calls where the count high bits of xh may be non-zero. 4161 */ 4162 4163 #define MPN_EXTRACT_NUMB(count, xh, xl) \ 4164 ((((xh) << ((count) - GMP_NAIL_BITS)) & GMP_NUMB_MASK) | \ 4165 ((xl) >> (GMP_LIMB_BITS - (count)))) 4166 4167 4168 /* The matrix non-negative M = (u, u'; v,v') keeps track of the 4169 reduction (a;b) = M (alpha; beta) where alpha, beta are smaller 4170 than a, b. The determinant must always be one, so that M has an 4171 inverse (v', -u'; -v, u). Elements always fit in GMP_NUMB_BITS - 1 4172 bits. */ 4173 struct hgcd_matrix1 4174 { 4175 mp_limb_t u[2][2]; 4176 }; 4177 4178 #define mpn_hgcd2 __MPN (hgcd2) 4179 __GMP_DECLSPEC int mpn_hgcd2 (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t, struct hgcd_matrix1 *); 4180 4181 #define mpn_hgcd_mul_matrix1_vector __MPN (hgcd_mul_matrix1_vector) 4182 __GMP_DECLSPEC mp_size_t mpn_hgcd_mul_matrix1_vector (const struct hgcd_matrix1 *, mp_ptr, mp_srcptr, mp_ptr, mp_size_t); 4183 4184 #define mpn_matrix22_mul1_inverse_vector __MPN (matrix22_mul1_inverse_vector) 4185 __GMP_DECLSPEC mp_size_t mpn_matrix22_mul1_inverse_vector (const struct hgcd_matrix1 *, mp_ptr, mp_srcptr, mp_ptr, mp_size_t); 4186 4187 #define mpn_hgcd2_jacobi __MPN (hgcd2_jacobi) 4188 __GMP_DECLSPEC int mpn_hgcd2_jacobi (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t, struct hgcd_matrix1 *, unsigned *); 4189 4190 struct hgcd_matrix 4191 { 4192 mp_size_t alloc; /* for sanity checking only */ 4193 mp_size_t n; 4194 mp_ptr p[2][2]; 4195 }; 4196 4197 #define MPN_HGCD_MATRIX_INIT_ITCH(n) (4 * ((n+1)/2 + 1)) 4198 4199 #define mpn_hgcd_matrix_init __MPN (hgcd_matrix_init) 4200 __GMP_DECLSPEC void mpn_hgcd_matrix_init (struct hgcd_matrix *, mp_size_t, mp_ptr); 4201 4202 #define mpn_hgcd_matrix_update_q __MPN (hgcd_matrix_update_q) 4203 __GMP_DECLSPEC void mpn_hgcd_matrix_update_q (struct hgcd_matrix *, mp_srcptr, mp_size_t, unsigned, mp_ptr); 4204 4205 #define mpn_hgcd_matrix_mul_1 __MPN (hgcd_matrix_mul_1) 4206 __GMP_DECLSPEC void mpn_hgcd_matrix_mul_1 (struct hgcd_matrix *, const struct hgcd_matrix1 *, mp_ptr); 4207 4208 #define mpn_hgcd_matrix_mul __MPN (hgcd_matrix_mul) 4209 __GMP_DECLSPEC void mpn_hgcd_matrix_mul (struct hgcd_matrix *, const struct hgcd_matrix *, mp_ptr); 4210 4211 #define mpn_hgcd_matrix_adjust __MPN (hgcd_matrix_adjust) 4212 __GMP_DECLSPEC mp_size_t mpn_hgcd_matrix_adjust (const struct hgcd_matrix *, mp_size_t, mp_ptr, mp_ptr, mp_size_t, mp_ptr); 4213 4214 #define mpn_hgcd_step __MPN(hgcd_step) 4215 __GMP_DECLSPEC mp_size_t mpn_hgcd_step (mp_size_t, mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr); 4216 4217 #define mpn_hgcd_reduce __MPN(hgcd_reduce) 4218 __GMP_DECLSPEC mp_size_t mpn_hgcd_reduce (struct hgcd_matrix *, mp_ptr, mp_ptr, mp_size_t, mp_size_t, mp_ptr); 4219 4220 #define mpn_hgcd_reduce_itch __MPN(hgcd_reduce_itch) 4221 __GMP_DECLSPEC mp_size_t mpn_hgcd_reduce_itch (mp_size_t, mp_size_t) ATTRIBUTE_CONST; 4222 4223 #define mpn_hgcd_itch __MPN (hgcd_itch) 4224 __GMP_DECLSPEC mp_size_t mpn_hgcd_itch (mp_size_t) ATTRIBUTE_CONST; 4225 4226 #define mpn_hgcd __MPN (hgcd) 4227 __GMP_DECLSPEC mp_size_t mpn_hgcd (mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr); 4228 4229 #define mpn_hgcd_appr_itch __MPN (hgcd_appr_itch) 4230 __GMP_DECLSPEC mp_size_t mpn_hgcd_appr_itch (mp_size_t) ATTRIBUTE_CONST; 4231 4232 #define mpn_hgcd_appr __MPN (hgcd_appr) 4233 __GMP_DECLSPEC int mpn_hgcd_appr (mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr); 4234 4235 #define mpn_hgcd_jacobi __MPN (hgcd_jacobi) 4236 __GMP_DECLSPEC mp_size_t mpn_hgcd_jacobi (mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, unsigned *, mp_ptr); 4237 4238 typedef void gcd_subdiv_step_hook(void *, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, int); 4239 4240 /* Needs storage for the quotient */ 4241 #define MPN_GCD_SUBDIV_STEP_ITCH(n) (n) 4242 4243 #define mpn_gcd_subdiv_step __MPN(gcd_subdiv_step) 4244 __GMP_DECLSPEC mp_size_t mpn_gcd_subdiv_step (mp_ptr, mp_ptr, mp_size_t, mp_size_t, gcd_subdiv_step_hook *, void *, mp_ptr); 4245 4246 struct gcdext_ctx 4247 { 4248 /* Result parameters. */ 4249 mp_ptr gp; 4250 mp_size_t gn; 4251 mp_ptr up; 4252 mp_size_t *usize; 4253 4254 /* Cofactors updated in each step. */ 4255 mp_size_t un; 4256 mp_ptr u0, u1, tp; 4257 }; 4258 4259 #define mpn_gcdext_hook __MPN (gcdext_hook) 4260 gcd_subdiv_step_hook mpn_gcdext_hook; 4261 4262 #define MPN_GCDEXT_LEHMER_N_ITCH(n) (4*(n) + 3) 4263 4264 #define mpn_gcdext_lehmer_n __MPN(gcdext_lehmer_n) 4265 __GMP_DECLSPEC mp_size_t mpn_gcdext_lehmer_n (mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_ptr, mp_size_t, mp_ptr); 4266 4267 /* 4*(an + 1) + 4*(bn + 1) + an */ 4268 #define MPN_GCDEXT_LEHMER_ITCH(an, bn) (5*(an) + 4*(bn) + 8) 4269 4270 #ifndef HGCD_THRESHOLD 4271 #define HGCD_THRESHOLD 400 4272 #endif 4273 4274 #ifndef HGCD_APPR_THRESHOLD 4275 #define HGCD_APPR_THRESHOLD 400 4276 #endif 4277 4278 #ifndef HGCD_REDUCE_THRESHOLD 4279 #define HGCD_REDUCE_THRESHOLD 1000 4280 #endif 4281 4282 #ifndef GCD_DC_THRESHOLD 4283 #define GCD_DC_THRESHOLD 1000 4284 #endif 4285 4286 #ifndef GCDEXT_DC_THRESHOLD 4287 #define GCDEXT_DC_THRESHOLD 600 4288 #endif 4289 4290 /* Definitions for mpn_set_str and mpn_get_str */ 4291 struct powers 4292 { 4293 mp_ptr p; /* actual power value */ 4294 mp_size_t n; /* # of limbs at p */ 4295 mp_size_t shift; /* weight of lowest limb, in limb base B */ 4296 size_t digits_in_base; /* number of corresponding digits */ 4297 int base; 4298 }; 4299 typedef struct powers powers_t; 4300 #define mpn_dc_set_str_powtab_alloc(n) ((n) + GMP_LIMB_BITS) 4301 #define mpn_dc_set_str_itch(n) ((n) + GMP_LIMB_BITS) 4302 #define mpn_dc_get_str_powtab_alloc(n) ((n) + 2 * GMP_LIMB_BITS) 4303 #define mpn_dc_get_str_itch(n) ((n) + GMP_LIMB_BITS) 4304 4305 #define mpn_dc_set_str __MPN(dc_set_str) 4306 __GMP_DECLSPEC mp_size_t mpn_dc_set_str (mp_ptr, const unsigned char *, size_t, const powers_t *, mp_ptr); 4307 #define mpn_bc_set_str __MPN(bc_set_str) 4308 __GMP_DECLSPEC mp_size_t mpn_bc_set_str (mp_ptr, const unsigned char *, size_t, int); 4309 #define mpn_set_str_compute_powtab __MPN(set_str_compute_powtab) 4310 __GMP_DECLSPEC void mpn_set_str_compute_powtab (powers_t *, mp_ptr, mp_size_t, int); 4311 4312 4313 /* __GMPF_BITS_TO_PREC applies a minimum 53 bits, rounds upwards to a whole 4314 limb and adds an extra limb. __GMPF_PREC_TO_BITS drops that extra limb, 4315 hence giving back the user's size in bits rounded up. Notice that 4316 converting prec->bits->prec gives an unchanged value. */ 4317 #define __GMPF_BITS_TO_PREC(n) \ 4318 ((mp_size_t) ((__GMP_MAX (53, n) + 2 * GMP_NUMB_BITS - 1) / GMP_NUMB_BITS)) 4319 #define __GMPF_PREC_TO_BITS(n) \ 4320 ((mp_bitcnt_t) (n) * GMP_NUMB_BITS - GMP_NUMB_BITS) 4321 4322 __GMP_DECLSPEC extern mp_size_t __gmp_default_fp_limb_precision; 4323 4324 /* Compute the number of base-b digits corresponding to nlimbs limbs, rounding 4325 down. */ 4326 #define DIGITS_IN_BASE_PER_LIMB(res, nlimbs, b) \ 4327 do { \ 4328 mp_limb_t _ph, _dummy; \ 4329 umul_ppmm (_ph, _dummy, \ 4330 mp_bases[b].logb2, GMP_NUMB_BITS * (mp_limb_t) (nlimbs));\ 4331 res = _ph; \ 4332 } while (0) 4333 4334 /* Compute the number of limbs corresponding to ndigits base-b digits, rounding 4335 up. */ 4336 #define LIMBS_PER_DIGIT_IN_BASE(res, ndigits, b) \ 4337 do { \ 4338 mp_limb_t _ph, _dummy; \ 4339 umul_ppmm (_ph, _dummy, mp_bases[b].log2b, (mp_limb_t) (ndigits)); \ 4340 res = 8 * _ph / GMP_NUMB_BITS + 2; \ 4341 } while (0) 4342 4343 4344 /* Set n to the number of significant digits an mpf of the given _mp_prec 4345 field, in the given base. This is a rounded up value, designed to ensure 4346 there's enough digits to reproduce all the guaranteed part of the value. 4347 4348 There are prec many limbs, but the high might be only "1" so forget it 4349 and just count prec-1 limbs into chars. +1 rounds that upwards, and a 4350 further +1 is because the limbs usually won't fall on digit boundaries. 4351 4352 FIXME: If base is a power of 2 and the bits per digit divides 4353 GMP_LIMB_BITS then the +2 is unnecessary. This happens always for 4354 base==2, and in base==16 with the current 32 or 64 bit limb sizes. */ 4355 4356 #define MPF_SIGNIFICANT_DIGITS(n, base, prec) \ 4357 do { \ 4358 size_t rawn; \ 4359 ASSERT (base >= 2 && base < numberof (mp_bases)); \ 4360 DIGITS_IN_BASE_PER_LIMB (rawn, (prec) - 1, base); \ 4361 n = rawn + 2; \ 4362 } while (0) 4363 4364 4365 /* Decimal point string, from the current C locale. Needs <langinfo.h> for 4366 nl_langinfo and constants, preferably with _GNU_SOURCE defined to get 4367 DECIMAL_POINT from glibc, and needs <locale.h> for localeconv, each under 4368 their respective #if HAVE_FOO_H. 4369 4370 GLIBC recommends nl_langinfo because getting only one facet can be 4371 faster, apparently. */ 4372 4373 /* DECIMAL_POINT seems to need _GNU_SOURCE defined to get it from glibc. */ 4374 #if HAVE_NL_LANGINFO && defined (DECIMAL_POINT) 4375 #define GMP_DECIMAL_POINT (nl_langinfo (DECIMAL_POINT)) 4376 #endif 4377 /* RADIXCHAR is deprecated, still in unix98 or some such. */ 4378 #if HAVE_NL_LANGINFO && defined (RADIXCHAR) && ! defined (GMP_DECIMAL_POINT) 4379 #define GMP_DECIMAL_POINT (nl_langinfo (RADIXCHAR)) 4380 #endif 4381 /* localeconv is slower since it returns all locale stuff */ 4382 #if HAVE_LOCALECONV && ! defined (GMP_DECIMAL_POINT) 4383 #define GMP_DECIMAL_POINT (localeconv()->decimal_point) 4384 #endif 4385 #if ! defined (GMP_DECIMAL_POINT) 4386 #define GMP_DECIMAL_POINT (".") 4387 #endif 4388 4389 4390 #define DOPRNT_CONV_FIXED 1 4391 #define DOPRNT_CONV_SCIENTIFIC 2 4392 #define DOPRNT_CONV_GENERAL 3 4393 4394 #define DOPRNT_JUSTIFY_NONE 0 4395 #define DOPRNT_JUSTIFY_LEFT 1 4396 #define DOPRNT_JUSTIFY_RIGHT 2 4397 #define DOPRNT_JUSTIFY_INTERNAL 3 4398 4399 #define DOPRNT_SHOWBASE_YES 1 4400 #define DOPRNT_SHOWBASE_NO 2 4401 #define DOPRNT_SHOWBASE_NONZERO 3 4402 4403 struct doprnt_params_t { 4404 int base; /* negative for upper case */ 4405 int conv; /* choices above */ 4406 const char *expfmt; /* exponent format */ 4407 int exptimes4; /* exponent multiply by 4 */ 4408 char fill; /* character */ 4409 int justify; /* choices above */ 4410 int prec; /* prec field, or -1 for all digits */ 4411 int showbase; /* choices above */ 4412 int showpoint; /* if radix point always shown */ 4413 int showtrailing; /* if trailing zeros wanted */ 4414 char sign; /* '+', ' ', or '\0' */ 4415 int width; /* width field */ 4416 }; 4417 4418 #if _GMP_H_HAVE_VA_LIST 4419 4420 typedef int (*doprnt_format_t) (void *, const char *, va_list); 4421 typedef int (*doprnt_memory_t) (void *, const char *, size_t); 4422 typedef int (*doprnt_reps_t) (void *, int, int); 4423 typedef int (*doprnt_final_t) (void *); 4424 4425 struct doprnt_funs_t { 4426 doprnt_format_t format; 4427 doprnt_memory_t memory; 4428 doprnt_reps_t reps; 4429 doprnt_final_t final; /* NULL if not required */ 4430 }; 4431 4432 extern const struct doprnt_funs_t __gmp_fprintf_funs; 4433 extern const struct doprnt_funs_t __gmp_sprintf_funs; 4434 extern const struct doprnt_funs_t __gmp_snprintf_funs; 4435 extern const struct doprnt_funs_t __gmp_obstack_printf_funs; 4436 extern const struct doprnt_funs_t __gmp_ostream_funs; 4437 4438 /* "buf" is a __gmp_allocate_func block of "alloc" many bytes. The first 4439 "size" of these have been written. "alloc > size" is maintained, so 4440 there's room to store a '\0' at the end. "result" is where the 4441 application wants the final block pointer. */ 4442 struct gmp_asprintf_t { 4443 char **result; 4444 char *buf; 4445 size_t size; 4446 size_t alloc; 4447 }; 4448 4449 #define GMP_ASPRINTF_T_INIT(d, output) \ 4450 do { \ 4451 (d).result = (output); \ 4452 (d).alloc = 256; \ 4453 (d).buf = (char *) (*__gmp_allocate_func) ((d).alloc); \ 4454 (d).size = 0; \ 4455 } while (0) 4456 4457 /* If a realloc is necessary, use twice the size actually required, so as to 4458 avoid repeated small reallocs. */ 4459 #define GMP_ASPRINTF_T_NEED(d, n) \ 4460 do { \ 4461 size_t alloc, newsize, newalloc; \ 4462 ASSERT ((d)->alloc >= (d)->size + 1); \ 4463 \ 4464 alloc = (d)->alloc; \ 4465 newsize = (d)->size + (n); \ 4466 if (alloc <= newsize) \ 4467 { \ 4468 newalloc = 2*newsize; \ 4469 (d)->alloc = newalloc; \ 4470 (d)->buf = __GMP_REALLOCATE_FUNC_TYPE ((d)->buf, \ 4471 alloc, newalloc, char); \ 4472 } \ 4473 } while (0) 4474 4475 __GMP_DECLSPEC int __gmp_asprintf_memory (struct gmp_asprintf_t *, const char *, size_t); 4476 __GMP_DECLSPEC int __gmp_asprintf_reps (struct gmp_asprintf_t *, int, int); 4477 __GMP_DECLSPEC int __gmp_asprintf_final (struct gmp_asprintf_t *); 4478 4479 /* buf is where to write the next output, and size is how much space is left 4480 there. If the application passed size==0 then that's what we'll have 4481 here, and nothing at all should be written. */ 4482 struct gmp_snprintf_t { 4483 char *buf; 4484 size_t size; 4485 }; 4486 4487 /* Add the bytes printed by the call to the total retval, or bail out on an 4488 error. */ 4489 #define DOPRNT_ACCUMULATE(call) \ 4490 do { \ 4491 int __ret; \ 4492 __ret = call; \ 4493 if (__ret == -1) \ 4494 goto error; \ 4495 retval += __ret; \ 4496 } while (0) 4497 #define DOPRNT_ACCUMULATE_FUN(fun, params) \ 4498 do { \ 4499 ASSERT ((fun) != NULL); \ 4500 DOPRNT_ACCUMULATE ((*(fun)) params); \ 4501 } while (0) 4502 4503 #define DOPRNT_FORMAT(fmt, ap) \ 4504 DOPRNT_ACCUMULATE_FUN (funs->format, (data, fmt, ap)) 4505 #define DOPRNT_MEMORY(ptr, len) \ 4506 DOPRNT_ACCUMULATE_FUN (funs->memory, (data, ptr, len)) 4507 #define DOPRNT_REPS(c, n) \ 4508 DOPRNT_ACCUMULATE_FUN (funs->reps, (data, c, n)) 4509 4510 #define DOPRNT_STRING(str) DOPRNT_MEMORY (str, strlen (str)) 4511 4512 #define DOPRNT_REPS_MAYBE(c, n) \ 4513 do { \ 4514 if ((n) != 0) \ 4515 DOPRNT_REPS (c, n); \ 4516 } while (0) 4517 #define DOPRNT_MEMORY_MAYBE(ptr, len) \ 4518 do { \ 4519 if ((len) != 0) \ 4520 DOPRNT_MEMORY (ptr, len); \ 4521 } while (0) 4522 4523 __GMP_DECLSPEC int __gmp_doprnt (const struct doprnt_funs_t *, void *, const char *, va_list); 4524 __GMP_DECLSPEC int __gmp_doprnt_integer (const struct doprnt_funs_t *, void *, const struct doprnt_params_t *, const char *); 4525 4526 #define __gmp_doprnt_mpf __gmp_doprnt_mpf2 4527 __GMP_DECLSPEC int __gmp_doprnt_mpf (const struct doprnt_funs_t *, void *, const struct doprnt_params_t *, const char *, mpf_srcptr); 4528 4529 __GMP_DECLSPEC int __gmp_replacement_vsnprintf (char *, size_t, const char *, va_list); 4530 #endif /* _GMP_H_HAVE_VA_LIST */ 4531 4532 4533 typedef int (*gmp_doscan_scan_t) (void *, const char *, ...); 4534 typedef void *(*gmp_doscan_step_t) (void *, int); 4535 typedef int (*gmp_doscan_get_t) (void *); 4536 typedef int (*gmp_doscan_unget_t) (int, void *); 4537 4538 struct gmp_doscan_funs_t { 4539 gmp_doscan_scan_t scan; 4540 gmp_doscan_step_t step; 4541 gmp_doscan_get_t get; 4542 gmp_doscan_unget_t unget; 4543 }; 4544 extern const struct gmp_doscan_funs_t __gmp_fscanf_funs; 4545 extern const struct gmp_doscan_funs_t __gmp_sscanf_funs; 4546 4547 #if _GMP_H_HAVE_VA_LIST 4548 __GMP_DECLSPEC int __gmp_doscan (const struct gmp_doscan_funs_t *, void *, const char *, va_list); 4549 #endif 4550 4551 4552 /* For testing and debugging. */ 4553 #define MPZ_CHECK_FORMAT(z) \ 4554 do { \ 4555 ASSERT_ALWAYS (SIZ(z) == 0 || PTR(z)[ABSIZ(z) - 1] != 0); \ 4556 ASSERT_ALWAYS (ALLOC(z) >= ABSIZ(z)); \ 4557 ASSERT_ALWAYS_MPN (PTR(z), ABSIZ(z)); \ 4558 } while (0) 4559 4560 #define MPQ_CHECK_FORMAT(q) \ 4561 do { \ 4562 MPZ_CHECK_FORMAT (mpq_numref (q)); \ 4563 MPZ_CHECK_FORMAT (mpq_denref (q)); \ 4564 ASSERT_ALWAYS (SIZ(mpq_denref(q)) >= 1); \ 4565 \ 4566 if (SIZ(mpq_numref(q)) == 0) \ 4567 { \ 4568 /* should have zero as 0/1 */ \ 4569 ASSERT_ALWAYS (SIZ(mpq_denref(q)) == 1 \ 4570 && PTR(mpq_denref(q))[0] == 1); \ 4571 } \ 4572 else \ 4573 { \ 4574 /* should have no common factors */ \ 4575 mpz_t g; \ 4576 mpz_init (g); \ 4577 mpz_gcd (g, mpq_numref(q), mpq_denref(q)); \ 4578 ASSERT_ALWAYS (mpz_cmp_ui (g, 1) == 0); \ 4579 mpz_clear (g); \ 4580 } \ 4581 } while (0) 4582 4583 #define MPF_CHECK_FORMAT(f) \ 4584 do { \ 4585 ASSERT_ALWAYS (PREC(f) >= __GMPF_BITS_TO_PREC(53)); \ 4586 ASSERT_ALWAYS (ABSIZ(f) <= PREC(f)+1); \ 4587 if (SIZ(f) == 0) \ 4588 ASSERT_ALWAYS (EXP(f) == 0); \ 4589 if (SIZ(f) != 0) \ 4590 ASSERT_ALWAYS (PTR(f)[ABSIZ(f) - 1] != 0); \ 4591 } while (0) 4592 4593 4594 /* Enhancement: The "mod" and "gcd_1" functions below could have 4595 __GMP_ATTRIBUTE_PURE, but currently (gcc 3.3) that's not supported on 4596 function pointers, only actual functions. It probably doesn't make much 4597 difference to the gmp code, since hopefully we arrange calls so there's 4598 no great need for the compiler to move things around. */ 4599 4600 #if WANT_FAT_BINARY && (HAVE_HOST_CPU_FAMILY_x86 || HAVE_HOST_CPU_FAMILY_x86_64) 4601 /* NOTE: The function pointers in this struct are also in CPUVEC_FUNCS_LIST 4602 in mpn/x86/x86-defs.m4 and mpn/x86_64/x86_64-defs.m4. Be sure to update 4603 those when changing here. */ 4604 struct cpuvec_t { 4605 DECL_add_n ((*add_n)); 4606 DECL_addlsh1_n ((*addlsh1_n)); 4607 DECL_addlsh2_n ((*addlsh2_n)); 4608 DECL_addmul_1 ((*addmul_1)); 4609 DECL_addmul_2 ((*addmul_2)); 4610 DECL_bdiv_dbm1c ((*bdiv_dbm1c)); 4611 DECL_cnd_add_n ((*cnd_add_n)); 4612 DECL_cnd_sub_n ((*cnd_sub_n)); 4613 DECL_com ((*com)); 4614 DECL_copyd ((*copyd)); 4615 DECL_copyi ((*copyi)); 4616 DECL_divexact_1 ((*divexact_1)); 4617 DECL_divrem_1 ((*divrem_1)); 4618 DECL_gcd_1 ((*gcd_1)); 4619 DECL_lshift ((*lshift)); 4620 DECL_lshiftc ((*lshiftc)); 4621 DECL_mod_1 ((*mod_1)); 4622 DECL_mod_1_1p ((*mod_1_1p)); 4623 DECL_mod_1_1p_cps ((*mod_1_1p_cps)); 4624 DECL_mod_1s_2p ((*mod_1s_2p)); 4625 DECL_mod_1s_2p_cps ((*mod_1s_2p_cps)); 4626 DECL_mod_1s_4p ((*mod_1s_4p)); 4627 DECL_mod_1s_4p_cps ((*mod_1s_4p_cps)); 4628 DECL_mod_34lsub1 ((*mod_34lsub1)); 4629 DECL_modexact_1c_odd ((*modexact_1c_odd)); 4630 DECL_mul_1 ((*mul_1)); 4631 DECL_mul_basecase ((*mul_basecase)); 4632 DECL_mullo_basecase ((*mullo_basecase)); 4633 DECL_preinv_divrem_1 ((*preinv_divrem_1)); 4634 DECL_preinv_mod_1 ((*preinv_mod_1)); 4635 DECL_redc_1 ((*redc_1)); 4636 DECL_redc_2 ((*redc_2)); 4637 DECL_rshift ((*rshift)); 4638 DECL_sqr_basecase ((*sqr_basecase)); 4639 DECL_sub_n ((*sub_n)); 4640 DECL_sublsh1_n ((*sublsh1_n)); 4641 DECL_submul_1 ((*submul_1)); 4642 mp_size_t mul_toom22_threshold; 4643 mp_size_t mul_toom33_threshold; 4644 mp_size_t sqr_toom2_threshold; 4645 mp_size_t sqr_toom3_threshold; 4646 mp_size_t bmod_1_to_mod_1_threshold; 4647 }; 4648 __GMP_DECLSPEC extern struct cpuvec_t __gmpn_cpuvec; 4649 __GMP_DECLSPEC extern int __gmpn_cpuvec_initialized; 4650 #endif /* x86 fat binary */ 4651 4652 __GMP_DECLSPEC void __gmpn_cpuvec_init (void); 4653 4654 /* Get a threshold "field" from __gmpn_cpuvec, running __gmpn_cpuvec_init() 4655 if that hasn't yet been done (to establish the right values). */ 4656 #define CPUVEC_THRESHOLD(field) \ 4657 ((LIKELY (__gmpn_cpuvec_initialized) ? 0 : (__gmpn_cpuvec_init (), 0)), \ 4658 __gmpn_cpuvec.field) 4659 4660 4661 #if HAVE_NATIVE_mpn_add_nc 4662 #define mpn_add_nc __MPN(add_nc) 4663 __GMP_DECLSPEC mp_limb_t mpn_add_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t); 4664 #else 4665 static inline 4666 mp_limb_t 4667 mpn_add_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t ci) 4668 { 4669 mp_limb_t co; 4670 co = mpn_add_n (rp, up, vp, n); 4671 co += mpn_add_1 (rp, rp, n, ci); 4672 return co; 4673 } 4674 #endif 4675 4676 #if HAVE_NATIVE_mpn_sub_nc 4677 #define mpn_sub_nc __MPN(sub_nc) 4678 __GMP_DECLSPEC mp_limb_t mpn_sub_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t); 4679 #else 4680 static inline mp_limb_t 4681 mpn_sub_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t ci) 4682 { 4683 mp_limb_t co; 4684 co = mpn_sub_n (rp, up, vp, n); 4685 co += mpn_sub_1 (rp, rp, n, ci); 4686 return co; 4687 } 4688 #endif 4689 4690 #if TUNE_PROGRAM_BUILD 4691 /* Some extras wanted when recompiling some .c files for use by the tune 4692 program. Not part of a normal build. 4693 4694 It's necessary to keep these thresholds as #defines (just to an 4695 identically named variable), since various defaults are established based 4696 on #ifdef in the .c files. For some this is not so (the defaults are 4697 instead established above), but all are done this way for consistency. */ 4698 4699 #undef MUL_TOOM22_THRESHOLD 4700 #define MUL_TOOM22_THRESHOLD mul_toom22_threshold 4701 extern mp_size_t mul_toom22_threshold; 4702 4703 #undef MUL_TOOM33_THRESHOLD 4704 #define MUL_TOOM33_THRESHOLD mul_toom33_threshold 4705 extern mp_size_t mul_toom33_threshold; 4706 4707 #undef MUL_TOOM44_THRESHOLD 4708 #define MUL_TOOM44_THRESHOLD mul_toom44_threshold 4709 extern mp_size_t mul_toom44_threshold; 4710 4711 #undef MUL_TOOM6H_THRESHOLD 4712 #define MUL_TOOM6H_THRESHOLD mul_toom6h_threshold 4713 extern mp_size_t mul_toom6h_threshold; 4714 4715 #undef MUL_TOOM8H_THRESHOLD 4716 #define MUL_TOOM8H_THRESHOLD mul_toom8h_threshold 4717 extern mp_size_t mul_toom8h_threshold; 4718 4719 #undef MUL_TOOM32_TO_TOOM43_THRESHOLD 4720 #define MUL_TOOM32_TO_TOOM43_THRESHOLD mul_toom32_to_toom43_threshold 4721 extern mp_size_t mul_toom32_to_toom43_threshold; 4722 4723 #undef MUL_TOOM32_TO_TOOM53_THRESHOLD 4724 #define MUL_TOOM32_TO_TOOM53_THRESHOLD mul_toom32_to_toom53_threshold 4725 extern mp_size_t mul_toom32_to_toom53_threshold; 4726 4727 #undef MUL_TOOM42_TO_TOOM53_THRESHOLD 4728 #define MUL_TOOM42_TO_TOOM53_THRESHOLD mul_toom42_to_toom53_threshold 4729 extern mp_size_t mul_toom42_to_toom53_threshold; 4730 4731 #undef MUL_TOOM42_TO_TOOM63_THRESHOLD 4732 #define MUL_TOOM42_TO_TOOM63_THRESHOLD mul_toom42_to_toom63_threshold 4733 extern mp_size_t mul_toom42_to_toom63_threshold; 4734 4735 #undef MUL_TOOM43_TO_TOOM54_THRESHOLD 4736 #define MUL_TOOM43_TO_TOOM54_THRESHOLD mul_toom43_to_toom54_threshold; 4737 extern mp_size_t mul_toom43_to_toom54_threshold; 4738 4739 #undef MUL_FFT_THRESHOLD 4740 #define MUL_FFT_THRESHOLD mul_fft_threshold 4741 extern mp_size_t mul_fft_threshold; 4742 4743 #undef MUL_FFT_MODF_THRESHOLD 4744 #define MUL_FFT_MODF_THRESHOLD mul_fft_modf_threshold 4745 extern mp_size_t mul_fft_modf_threshold; 4746 4747 #undef MUL_FFT_TABLE 4748 #define MUL_FFT_TABLE { 0 } 4749 4750 #undef MUL_FFT_TABLE3 4751 #define MUL_FFT_TABLE3 { {0,0} } 4752 4753 /* A native mpn_sqr_basecase is not tuned and SQR_BASECASE_THRESHOLD should 4754 remain as zero (always use it). */ 4755 #if ! HAVE_NATIVE_mpn_sqr_basecase 4756 #undef SQR_BASECASE_THRESHOLD 4757 #define SQR_BASECASE_THRESHOLD sqr_basecase_threshold 4758 extern mp_size_t sqr_basecase_threshold; 4759 #endif 4760 4761 #if TUNE_PROGRAM_BUILD_SQR 4762 #undef SQR_TOOM2_THRESHOLD 4763 #define SQR_TOOM2_THRESHOLD SQR_TOOM2_MAX_GENERIC 4764 #else 4765 #undef SQR_TOOM2_THRESHOLD 4766 #define SQR_TOOM2_THRESHOLD sqr_toom2_threshold 4767 extern mp_size_t sqr_toom2_threshold; 4768 #endif 4769 4770 #undef SQR_TOOM3_THRESHOLD 4771 #define SQR_TOOM3_THRESHOLD sqr_toom3_threshold 4772 extern mp_size_t sqr_toom3_threshold; 4773 4774 #undef SQR_TOOM4_THRESHOLD 4775 #define SQR_TOOM4_THRESHOLD sqr_toom4_threshold 4776 extern mp_size_t sqr_toom4_threshold; 4777 4778 #undef SQR_TOOM6_THRESHOLD 4779 #define SQR_TOOM6_THRESHOLD sqr_toom6_threshold 4780 extern mp_size_t sqr_toom6_threshold; 4781 4782 #undef SQR_TOOM8_THRESHOLD 4783 #define SQR_TOOM8_THRESHOLD sqr_toom8_threshold 4784 extern mp_size_t sqr_toom8_threshold; 4785 4786 #undef SQR_FFT_THRESHOLD 4787 #define SQR_FFT_THRESHOLD sqr_fft_threshold 4788 extern mp_size_t sqr_fft_threshold; 4789 4790 #undef SQR_FFT_MODF_THRESHOLD 4791 #define SQR_FFT_MODF_THRESHOLD sqr_fft_modf_threshold 4792 extern mp_size_t sqr_fft_modf_threshold; 4793 4794 #undef SQR_FFT_TABLE 4795 #define SQR_FFT_TABLE { 0 } 4796 4797 #undef SQR_FFT_TABLE3 4798 #define SQR_FFT_TABLE3 { {0,0} } 4799 4800 #undef MULLO_BASECASE_THRESHOLD 4801 #define MULLO_BASECASE_THRESHOLD mullo_basecase_threshold 4802 extern mp_size_t mullo_basecase_threshold; 4803 4804 #undef MULLO_DC_THRESHOLD 4805 #define MULLO_DC_THRESHOLD mullo_dc_threshold 4806 extern mp_size_t mullo_dc_threshold; 4807 4808 #undef MULLO_MUL_N_THRESHOLD 4809 #define MULLO_MUL_N_THRESHOLD mullo_mul_n_threshold 4810 extern mp_size_t mullo_mul_n_threshold; 4811 4812 #undef SQRLO_BASECASE_THRESHOLD 4813 #define SQRLO_BASECASE_THRESHOLD sqrlo_basecase_threshold 4814 extern mp_size_t sqrlo_basecase_threshold; 4815 4816 #undef SQRLO_DC_THRESHOLD 4817 #define SQRLO_DC_THRESHOLD sqrlo_dc_threshold 4818 extern mp_size_t sqrlo_dc_threshold; 4819 4820 #undef SQRLO_SQR_THRESHOLD 4821 #define SQRLO_SQR_THRESHOLD sqrlo_sqr_threshold 4822 extern mp_size_t sqrlo_sqr_threshold; 4823 4824 #undef MULMID_TOOM42_THRESHOLD 4825 #define MULMID_TOOM42_THRESHOLD mulmid_toom42_threshold 4826 extern mp_size_t mulmid_toom42_threshold; 4827 4828 #undef DIV_QR_2_PI2_THRESHOLD 4829 #define DIV_QR_2_PI2_THRESHOLD div_qr_2_pi2_threshold 4830 extern mp_size_t div_qr_2_pi2_threshold; 4831 4832 #undef DC_DIV_QR_THRESHOLD 4833 #define DC_DIV_QR_THRESHOLD dc_div_qr_threshold 4834 extern mp_size_t dc_div_qr_threshold; 4835 4836 #undef DC_DIVAPPR_Q_THRESHOLD 4837 #define DC_DIVAPPR_Q_THRESHOLD dc_divappr_q_threshold 4838 extern mp_size_t dc_divappr_q_threshold; 4839 4840 #undef DC_BDIV_Q_THRESHOLD 4841 #define DC_BDIV_Q_THRESHOLD dc_bdiv_q_threshold 4842 extern mp_size_t dc_bdiv_q_threshold; 4843 4844 #undef DC_BDIV_QR_THRESHOLD 4845 #define DC_BDIV_QR_THRESHOLD dc_bdiv_qr_threshold 4846 extern mp_size_t dc_bdiv_qr_threshold; 4847 4848 #undef MU_DIV_QR_THRESHOLD 4849 #define MU_DIV_QR_THRESHOLD mu_div_qr_threshold 4850 extern mp_size_t mu_div_qr_threshold; 4851 4852 #undef MU_DIVAPPR_Q_THRESHOLD 4853 #define MU_DIVAPPR_Q_THRESHOLD mu_divappr_q_threshold 4854 extern mp_size_t mu_divappr_q_threshold; 4855 4856 #undef MUPI_DIV_QR_THRESHOLD 4857 #define MUPI_DIV_QR_THRESHOLD mupi_div_qr_threshold 4858 extern mp_size_t mupi_div_qr_threshold; 4859 4860 #undef MU_BDIV_QR_THRESHOLD 4861 #define MU_BDIV_QR_THRESHOLD mu_bdiv_qr_threshold 4862 extern mp_size_t mu_bdiv_qr_threshold; 4863 4864 #undef MU_BDIV_Q_THRESHOLD 4865 #define MU_BDIV_Q_THRESHOLD mu_bdiv_q_threshold 4866 extern mp_size_t mu_bdiv_q_threshold; 4867 4868 #undef INV_MULMOD_BNM1_THRESHOLD 4869 #define INV_MULMOD_BNM1_THRESHOLD inv_mulmod_bnm1_threshold 4870 extern mp_size_t inv_mulmod_bnm1_threshold; 4871 4872 #undef INV_NEWTON_THRESHOLD 4873 #define INV_NEWTON_THRESHOLD inv_newton_threshold 4874 extern mp_size_t inv_newton_threshold; 4875 4876 #undef INV_APPR_THRESHOLD 4877 #define INV_APPR_THRESHOLD inv_appr_threshold 4878 extern mp_size_t inv_appr_threshold; 4879 4880 #undef BINV_NEWTON_THRESHOLD 4881 #define BINV_NEWTON_THRESHOLD binv_newton_threshold 4882 extern mp_size_t binv_newton_threshold; 4883 4884 #undef REDC_1_TO_REDC_2_THRESHOLD 4885 #define REDC_1_TO_REDC_2_THRESHOLD redc_1_to_redc_2_threshold 4886 extern mp_size_t redc_1_to_redc_2_threshold; 4887 4888 #undef REDC_2_TO_REDC_N_THRESHOLD 4889 #define REDC_2_TO_REDC_N_THRESHOLD redc_2_to_redc_n_threshold 4890 extern mp_size_t redc_2_to_redc_n_threshold; 4891 4892 #undef REDC_1_TO_REDC_N_THRESHOLD 4893 #define REDC_1_TO_REDC_N_THRESHOLD redc_1_to_redc_n_threshold 4894 extern mp_size_t redc_1_to_redc_n_threshold; 4895 4896 #undef MATRIX22_STRASSEN_THRESHOLD 4897 #define MATRIX22_STRASSEN_THRESHOLD matrix22_strassen_threshold 4898 extern mp_size_t matrix22_strassen_threshold; 4899 4900 #undef HGCD_THRESHOLD 4901 #define HGCD_THRESHOLD hgcd_threshold 4902 extern mp_size_t hgcd_threshold; 4903 4904 #undef HGCD_APPR_THRESHOLD 4905 #define HGCD_APPR_THRESHOLD hgcd_appr_threshold 4906 extern mp_size_t hgcd_appr_threshold; 4907 4908 #undef HGCD_REDUCE_THRESHOLD 4909 #define HGCD_REDUCE_THRESHOLD hgcd_reduce_threshold 4910 extern mp_size_t hgcd_reduce_threshold; 4911 4912 #undef GCD_DC_THRESHOLD 4913 #define GCD_DC_THRESHOLD gcd_dc_threshold 4914 extern mp_size_t gcd_dc_threshold; 4915 4916 #undef GCDEXT_DC_THRESHOLD 4917 #define GCDEXT_DC_THRESHOLD gcdext_dc_threshold 4918 extern mp_size_t gcdext_dc_threshold; 4919 4920 #undef DIV_QR_1N_PI1_METHOD 4921 #define DIV_QR_1N_PI1_METHOD div_qr_1n_pi1_method 4922 extern int div_qr_1n_pi1_method; 4923 4924 #undef DIV_QR_1_NORM_THRESHOLD 4925 #define DIV_QR_1_NORM_THRESHOLD div_qr_1_norm_threshold 4926 extern mp_size_t div_qr_1_norm_threshold; 4927 4928 #undef DIV_QR_1_UNNORM_THRESHOLD 4929 #define DIV_QR_1_UNNORM_THRESHOLD div_qr_1_unnorm_threshold 4930 extern mp_size_t div_qr_1_unnorm_threshold; 4931 4932 #undef DIVREM_1_NORM_THRESHOLD 4933 #define DIVREM_1_NORM_THRESHOLD divrem_1_norm_threshold 4934 extern mp_size_t divrem_1_norm_threshold; 4935 4936 #undef DIVREM_1_UNNORM_THRESHOLD 4937 #define DIVREM_1_UNNORM_THRESHOLD divrem_1_unnorm_threshold 4938 extern mp_size_t divrem_1_unnorm_threshold; 4939 4940 #undef MOD_1_NORM_THRESHOLD 4941 #define MOD_1_NORM_THRESHOLD mod_1_norm_threshold 4942 extern mp_size_t mod_1_norm_threshold; 4943 4944 #undef MOD_1_UNNORM_THRESHOLD 4945 #define MOD_1_UNNORM_THRESHOLD mod_1_unnorm_threshold 4946 extern mp_size_t mod_1_unnorm_threshold; 4947 4948 #undef MOD_1_1P_METHOD 4949 #define MOD_1_1P_METHOD mod_1_1p_method 4950 extern int mod_1_1p_method; 4951 4952 #undef MOD_1N_TO_MOD_1_1_THRESHOLD 4953 #define MOD_1N_TO_MOD_1_1_THRESHOLD mod_1n_to_mod_1_1_threshold 4954 extern mp_size_t mod_1n_to_mod_1_1_threshold; 4955 4956 #undef MOD_1U_TO_MOD_1_1_THRESHOLD 4957 #define MOD_1U_TO_MOD_1_1_THRESHOLD mod_1u_to_mod_1_1_threshold 4958 extern mp_size_t mod_1u_to_mod_1_1_threshold; 4959 4960 #undef MOD_1_1_TO_MOD_1_2_THRESHOLD 4961 #define MOD_1_1_TO_MOD_1_2_THRESHOLD mod_1_1_to_mod_1_2_threshold 4962 extern mp_size_t mod_1_1_to_mod_1_2_threshold; 4963 4964 #undef MOD_1_2_TO_MOD_1_4_THRESHOLD 4965 #define MOD_1_2_TO_MOD_1_4_THRESHOLD mod_1_2_to_mod_1_4_threshold 4966 extern mp_size_t mod_1_2_to_mod_1_4_threshold; 4967 4968 #undef PREINV_MOD_1_TO_MOD_1_THRESHOLD 4969 #define PREINV_MOD_1_TO_MOD_1_THRESHOLD preinv_mod_1_to_mod_1_threshold 4970 extern mp_size_t preinv_mod_1_to_mod_1_threshold; 4971 4972 #if ! UDIV_PREINV_ALWAYS 4973 #undef DIVREM_2_THRESHOLD 4974 #define DIVREM_2_THRESHOLD divrem_2_threshold 4975 extern mp_size_t divrem_2_threshold; 4976 #endif 4977 4978 #undef MULMOD_BNM1_THRESHOLD 4979 #define MULMOD_BNM1_THRESHOLD mulmod_bnm1_threshold 4980 extern mp_size_t mulmod_bnm1_threshold; 4981 4982 #undef SQRMOD_BNM1_THRESHOLD 4983 #define SQRMOD_BNM1_THRESHOLD sqrmod_bnm1_threshold 4984 extern mp_size_t sqrmod_bnm1_threshold; 4985 4986 #undef GET_STR_DC_THRESHOLD 4987 #define GET_STR_DC_THRESHOLD get_str_dc_threshold 4988 extern mp_size_t get_str_dc_threshold; 4989 4990 #undef GET_STR_PRECOMPUTE_THRESHOLD 4991 #define GET_STR_PRECOMPUTE_THRESHOLD get_str_precompute_threshold 4992 extern mp_size_t get_str_precompute_threshold; 4993 4994 #undef SET_STR_DC_THRESHOLD 4995 #define SET_STR_DC_THRESHOLD set_str_dc_threshold 4996 extern mp_size_t set_str_dc_threshold; 4997 4998 #undef SET_STR_PRECOMPUTE_THRESHOLD 4999 #define SET_STR_PRECOMPUTE_THRESHOLD set_str_precompute_threshold 5000 extern mp_size_t set_str_precompute_threshold; 5001 5002 #undef FAC_ODD_THRESHOLD 5003 #define FAC_ODD_THRESHOLD fac_odd_threshold 5004 extern mp_size_t fac_odd_threshold; 5005 5006 #undef FAC_DSC_THRESHOLD 5007 #define FAC_DSC_THRESHOLD fac_dsc_threshold 5008 extern mp_size_t fac_dsc_threshold; 5009 5010 #undef FFT_TABLE_ATTRS 5011 #define FFT_TABLE_ATTRS 5012 extern mp_size_t mpn_fft_table[2][MPN_FFT_TABLE_SIZE]; 5013 #define FFT_TABLE3_SIZE 2000 /* generous space for tuning */ 5014 extern struct fft_table_nk mpn_fft_table3[2][FFT_TABLE3_SIZE]; 5015 5016 /* Sizes the tune program tests up to, used in a couple of recompilations. */ 5017 #undef MUL_TOOM22_THRESHOLD_LIMIT 5018 #undef MUL_TOOM33_THRESHOLD_LIMIT 5019 #undef MULLO_BASECASE_THRESHOLD_LIMIT 5020 #undef SQRLO_BASECASE_THRESHOLD_LIMIT 5021 #undef SQRLO_DC_THRESHOLD_LIMIT 5022 #undef SQR_TOOM3_THRESHOLD_LIMIT 5023 #define SQR_TOOM2_MAX_GENERIC 200 5024 #define MUL_TOOM22_THRESHOLD_LIMIT 700 5025 #define MUL_TOOM33_THRESHOLD_LIMIT 700 5026 #define SQR_TOOM3_THRESHOLD_LIMIT 400 5027 #define MUL_TOOM44_THRESHOLD_LIMIT 1000 5028 #define SQR_TOOM4_THRESHOLD_LIMIT 1000 5029 #define MUL_TOOM6H_THRESHOLD_LIMIT 1100 5030 #define SQR_TOOM6_THRESHOLD_LIMIT 1100 5031 #define MUL_TOOM8H_THRESHOLD_LIMIT 1200 5032 #define SQR_TOOM8_THRESHOLD_LIMIT 1200 5033 #define MULLO_BASECASE_THRESHOLD_LIMIT 200 5034 #define SQRLO_BASECASE_THRESHOLD_LIMIT 200 5035 #define SQRLO_DC_THRESHOLD_LIMIT 400 5036 #define GET_STR_THRESHOLD_LIMIT 150 5037 #define FAC_DSC_THRESHOLD_LIMIT 2048 5038 5039 #endif /* TUNE_PROGRAM_BUILD */ 5040 5041 #if defined (__cplusplus) 5042 } 5043 #endif 5044 5045 /* FIXME: Make these itch functions less conservative. Also consider making 5046 them dependent on just 'an', and compute the allocation directly from 'an' 5047 instead of via n. */ 5048 5049 /* toom22/toom2: Scratch need is 2*(an + k), k is the recursion depth. 5050 k is ths smallest k such that 5051 ceil(an/2^k) < MUL_TOOM22_THRESHOLD. 5052 which implies that 5053 k = bitsize of floor ((an-1)/(MUL_TOOM22_THRESHOLD-1)) 5054 = 1 + floor (log_2 (floor ((an-1)/(MUL_TOOM22_THRESHOLD-1)))) 5055 */ 5056 #define mpn_toom22_mul_itch(an, bn) \ 5057 (2 * ((an) + GMP_NUMB_BITS)) 5058 #define mpn_toom2_sqr_itch(an) \ 5059 (2 * ((an) + GMP_NUMB_BITS)) 5060 5061 /* toom33/toom3: Scratch need is 5an/2 + 10k, k is the recursion depth. 5062 We use 3an + C, so that we can use a smaller constant. 5063 */ 5064 #define mpn_toom33_mul_itch(an, bn) \ 5065 (3 * (an) + GMP_NUMB_BITS) 5066 #define mpn_toom3_sqr_itch(an) \ 5067 (3 * (an) + GMP_NUMB_BITS) 5068 5069 /* toom33/toom3: Scratch need is 8an/3 + 13k, k is the recursion depth. 5070 We use 3an + C, so that we can use a smaller constant. 5071 */ 5072 #define mpn_toom44_mul_itch(an, bn) \ 5073 (3 * (an) + GMP_NUMB_BITS) 5074 #define mpn_toom4_sqr_itch(an) \ 5075 (3 * (an) + GMP_NUMB_BITS) 5076 5077 #define mpn_toom6_sqr_itch(n) \ 5078 (((n) - SQR_TOOM6_THRESHOLD)*2 + \ 5079 MAX(SQR_TOOM6_THRESHOLD*2 + GMP_NUMB_BITS*6, \ 5080 mpn_toom4_sqr_itch(SQR_TOOM6_THRESHOLD))) 5081 5082 #define MUL_TOOM6H_MIN \ 5083 ((MUL_TOOM6H_THRESHOLD > MUL_TOOM44_THRESHOLD) ? \ 5084 MUL_TOOM6H_THRESHOLD : MUL_TOOM44_THRESHOLD) 5085 #define mpn_toom6_mul_n_itch(n) \ 5086 (((n) - MUL_TOOM6H_MIN)*2 + \ 5087 MAX(MUL_TOOM6H_MIN*2 + GMP_NUMB_BITS*6, \ 5088 mpn_toom44_mul_itch(MUL_TOOM6H_MIN,MUL_TOOM6H_MIN))) 5089 5090 static inline mp_size_t 5091 mpn_toom6h_mul_itch (mp_size_t an, mp_size_t bn) { 5092 mp_size_t estimatedN; 5093 estimatedN = (an + bn) / (size_t) 10 + 1; 5094 return mpn_toom6_mul_n_itch (estimatedN * 6); 5095 } 5096 5097 #define mpn_toom8_sqr_itch(n) \ 5098 ((((n)*15)>>3) - ((SQR_TOOM8_THRESHOLD*15)>>3) + \ 5099 MAX(((SQR_TOOM8_THRESHOLD*15)>>3) + GMP_NUMB_BITS*6, \ 5100 mpn_toom6_sqr_itch(SQR_TOOM8_THRESHOLD))) 5101 5102 #define MUL_TOOM8H_MIN \ 5103 ((MUL_TOOM8H_THRESHOLD > MUL_TOOM6H_MIN) ? \ 5104 MUL_TOOM8H_THRESHOLD : MUL_TOOM6H_MIN) 5105 #define mpn_toom8_mul_n_itch(n) \ 5106 ((((n)*15)>>3) - ((MUL_TOOM8H_MIN*15)>>3) + \ 5107 MAX(((MUL_TOOM8H_MIN*15)>>3) + GMP_NUMB_BITS*6, \ 5108 mpn_toom6_mul_n_itch(MUL_TOOM8H_MIN))) 5109 5110 static inline mp_size_t 5111 mpn_toom8h_mul_itch (mp_size_t an, mp_size_t bn) { 5112 mp_size_t estimatedN; 5113 estimatedN = (an + bn) / (size_t) 14 + 1; 5114 return mpn_toom8_mul_n_itch (estimatedN * 8); 5115 } 5116 5117 static inline mp_size_t 5118 mpn_toom32_mul_itch (mp_size_t an, mp_size_t bn) 5119 { 5120 mp_size_t n = 1 + (2 * an >= 3 * bn ? (an - 1) / (size_t) 3 : (bn - 1) >> 1); 5121 mp_size_t itch = 2 * n + 1; 5122 5123 return itch; 5124 } 5125 5126 static inline mp_size_t 5127 mpn_toom42_mul_itch (mp_size_t an, mp_size_t bn) 5128 { 5129 mp_size_t n = an >= 2 * bn ? (an + 3) >> 2 : (bn + 1) >> 1; 5130 return 6 * n + 3; 5131 } 5132 5133 static inline mp_size_t 5134 mpn_toom43_mul_itch (mp_size_t an, mp_size_t bn) 5135 { 5136 mp_size_t n = 1 + (3 * an >= 4 * bn ? (an - 1) >> 2 : (bn - 1) / (size_t) 3); 5137 5138 return 6*n + 4; 5139 } 5140 5141 static inline mp_size_t 5142 mpn_toom52_mul_itch (mp_size_t an, mp_size_t bn) 5143 { 5144 mp_size_t n = 1 + (2 * an >= 5 * bn ? (an - 1) / (size_t) 5 : (bn - 1) >> 1); 5145 return 6*n + 4; 5146 } 5147 5148 static inline mp_size_t 5149 mpn_toom53_mul_itch (mp_size_t an, mp_size_t bn) 5150 { 5151 mp_size_t n = 1 + (3 * an >= 5 * bn ? (an - 1) / (size_t) 5 : (bn - 1) / (size_t) 3); 5152 return 10 * n + 10; 5153 } 5154 5155 static inline mp_size_t 5156 mpn_toom62_mul_itch (mp_size_t an, mp_size_t bn) 5157 { 5158 mp_size_t n = 1 + (an >= 3 * bn ? (an - 1) / (size_t) 6 : (bn - 1) >> 1); 5159 return 10 * n + 10; 5160 } 5161 5162 static inline mp_size_t 5163 mpn_toom63_mul_itch (mp_size_t an, mp_size_t bn) 5164 { 5165 mp_size_t n = 1 + (an >= 2 * bn ? (an - 1) / (size_t) 6 : (bn - 1) / (size_t) 3); 5166 return 9 * n + 3; 5167 } 5168 5169 static inline mp_size_t 5170 mpn_toom54_mul_itch (mp_size_t an, mp_size_t bn) 5171 { 5172 mp_size_t n = 1 + (4 * an >= 5 * bn ? (an - 1) / (size_t) 5 : (bn - 1) / (size_t) 4); 5173 return 9 * n + 3; 5174 } 5175 5176 /* let S(n) = space required for input size n, 5177 then S(n) = 3 floor(n/2) + 1 + S(floor(n/2)). */ 5178 #define mpn_toom42_mulmid_itch(n) \ 5179 (3 * (n) + GMP_NUMB_BITS) 5180 5181 #if 0 5182 #define mpn_fft_mul mpn_mul_fft_full 5183 #else 5184 #define mpn_fft_mul mpn_nussbaumer_mul 5185 #endif 5186 5187 #ifdef __cplusplus 5188 5189 /* A little helper for a null-terminated __gmp_allocate_func string. 5190 The destructor ensures it's freed even if an exception is thrown. 5191 The len field is needed by the destructor, and can be used by anyone else 5192 to avoid a second strlen pass over the data. 5193 5194 Since our input is a C string, using strlen is correct. Perhaps it'd be 5195 more C++-ish style to use std::char_traits<char>::length, but char_traits 5196 isn't available in gcc 2.95.4. */ 5197 5198 class gmp_allocated_string { 5199 public: 5200 char *str; 5201 size_t len; 5202 gmp_allocated_string(char *arg) 5203 { 5204 str = arg; 5205 len = std::strlen (str); 5206 } 5207 ~gmp_allocated_string() 5208 { 5209 (*__gmp_free_func) (str, len+1); 5210 } 5211 }; 5212 5213 std::istream &__gmpz_operator_in_nowhite (std::istream &, mpz_ptr, char); 5214 int __gmp_istream_set_base (std::istream &, char &, bool &, bool &); 5215 void __gmp_istream_set_digits (std::string &, std::istream &, char &, bool &, int); 5216 void __gmp_doprnt_params_from_ios (struct doprnt_params_t *, std::ios &); 5217 std::ostream& __gmp_doprnt_integer_ostream (std::ostream &, struct doprnt_params_t *, char *); 5218 extern const struct doprnt_funs_t __gmp_asprintf_funs_noformat; 5219 5220 #endif /* __cplusplus */ 5221 5222 #endif /* __GMP_IMPL_H__ */