github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/scanf/doscan.c (about) 1 /* __gmp_doscan -- formatted input internals. 2 3 THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY. THEY'RE ALMOST 4 CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN 5 FUTURE GNU MP RELEASES. 6 7 Copyright 2001-2003 Free Software Foundation, Inc. 8 9 This file is part of the GNU MP Library. 10 11 The GNU MP Library is free software; you can redistribute it and/or modify 12 it under the terms of either: 13 14 * the GNU Lesser General Public License as published by the Free 15 Software Foundation; either version 3 of the License, or (at your 16 option) any later version. 17 18 or 19 20 * the GNU General Public License as published by the Free Software 21 Foundation; either version 2 of the License, or (at your option) any 22 later version. 23 24 or both in parallel, as here. 25 26 The GNU MP Library is distributed in the hope that it will be useful, but 27 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 28 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 29 for more details. 30 31 You should have received copies of the GNU General Public License and the 32 GNU Lesser General Public License along with the GNU MP Library. If not, 33 see https://www.gnu.org/licenses/. */ 34 35 #define _GNU_SOURCE /* for DECIMAL_POINT in langinfo.h */ 36 37 #include "config.h" /* needed for the HAVE_, could also move gmp incls */ 38 39 #include <stdarg.h> 40 #include <ctype.h> 41 #include <stddef.h> /* for ptrdiff_t */ 42 #include <stdio.h> 43 #include <stdlib.h> /* for strtol */ 44 #include <string.h> 45 46 #if HAVE_LANGINFO_H 47 #include <langinfo.h> /* for nl_langinfo */ 48 #endif 49 50 #if HAVE_LOCALE_H 51 #include <locale.h> /* for localeconv */ 52 #endif 53 54 #if HAVE_INTTYPES_H 55 # include <inttypes.h> /* for intmax_t */ 56 #else 57 # if HAVE_STDINT_H 58 # include <stdint.h> 59 # endif 60 #endif 61 62 #if HAVE_SYS_TYPES_H 63 #include <sys/types.h> /* for quad_t */ 64 #endif 65 66 #include "gmp.h" 67 #include "gmp-impl.h" 68 69 70 /* Change this to "#define TRACE(x) x" for some traces. */ 71 #define TRACE(x) 72 73 74 /* General: 75 76 It's necessary to parse up the format string to recognise the GMP 77 extra types F, Q and Z. Other types and conversions are passed 78 across to the standard sscanf or fscanf via funs->scan, for ease of 79 implementation. This is essential in the case of something like glibc 80 %p where the pointer format isn't actually documented. 81 82 Because funs->scan doesn't get the whole input it can't put the right 83 values in for %n, so that's handled in __gmp_doscan. Neither sscanf 84 nor fscanf directly indicate how many characters were read, so an 85 extra %n is appended to each run for that. For fscanf this merely 86 supports our %n output, but for sscanf it lets funs->step move us 87 along the input string. 88 89 Whitespace and literal matches in the format string, including %%, 90 are handled directly within __gmp_doscan. This is reasonably 91 efficient, and avoids some suspicious behaviour observed in various 92 system libc's. GLIBC 2.2.4 for instance returns 0 on 93 94 sscanf(" ", " x") 95 or 96 sscanf(" ", " x%d",&n) 97 98 whereas we think they should return EOF, since end-of-string is 99 reached when a match of "x" is required. 100 101 For standard % conversions, funs->scan is called once for each 102 conversion. If we had vfscanf and vsscanf and could rely on their 103 fixed text matching behaviour then we could call them with multiple 104 consecutive standard conversions. But plain fscanf and sscanf work 105 fine, and parsing one field at a time shouldn't be too much of a 106 slowdown. 107 108 gmpscan: 109 110 gmpscan reads a gmp type. It's only used from one place, but is a 111 separate subroutine to avoid a big chunk of complicated code in the 112 middle of __gmp_doscan. Within gmpscan a couple of loopbacks make it 113 possible to share code for parsing integers, rationals and floats. 114 115 In gmpscan normally one char of lookahead is maintained, but when width 116 is reached that stops, on the principle that an fgetc/ungetc of a char 117 past where we're told to stop would be undesirable. "chars" is how many 118 characters have been read so far, including the current c. When 119 chars==width and another character is desired then a jump is done to the 120 "convert" stage. c is invalid and mustn't be unget'ed in this case; 121 chars is set to width+1 to indicate that. 122 123 gmpscan normally returns the number of characters read. -1 means an 124 invalid field, -2 means EOF reached before any matching characters 125 were read. 126 127 For hex floats, the mantissa part is passed to mpf_set_str, then the 128 exponent is applied with mpf_mul_exp or mpf_div_2exp. This is easier 129 than teaching mpf_set_str about an exponent factor (ie. 2) differing 130 from the mantissa radix point factor (ie. 16). mpf_mul_exp and 131 mpf_div_2exp will preserve the application requested precision, so 132 nothing in that respect is lost by making this a two-step process. 133 134 Matching and errors: 135 136 C99 7.19.6.2 paras 9 and 10 say an input item is read as the longest 137 string which is a match for the appropriate type, or a prefix of a 138 match. With that done, if it's only a prefix then the result is a 139 matching failure, ie. invalid input. 140 141 This rule seems fairly clear, but doesn't seem to be universally 142 applied in system C libraries. Even GLIBC doesn't seem to get it 143 right, insofar as it seems to accept some apparently invalid forms. 144 Eg. glibc 2.3.1 accepts "0x" for a "%i", where a reading of the 145 standard would suggest a non-empty sequence of digits should be 146 required after an "0x". 147 148 A footnote to 7.19.6.2 para 17 notes how this input item reading can 149 mean inputs acceptable to strtol are not acceptable to fscanf. We 150 think this confirms our reading of "0x" as invalid. 151 152 Clearly gmp_sscanf could backtrack to a longest input which was a 153 valid match for a given item, but this is not done, since C99 says 154 sscanf is identical to fscanf, so we make gmp_sscanf identical to 155 gmp_fscanf. 156 157 Types: 158 159 C99 says "ll" is for long long, and "L" is for long double floats. 160 Unfortunately in GMP 4.1.1 we documented the two as equivalent. This 161 doesn't affect us directly, since both are passed through to plain 162 scanf. It seems wisest not to try to enforce the C99 rule. This is 163 consistent with what we said before, though whether it actually 164 worked was always up to the C library. 165 166 Alternatives: 167 168 Consideration was given to using separate code for gmp_fscanf and 169 gmp_sscanf. The sscanf case could zip across a string doing literal 170 matches or recognising digits in gmpscan, rather than making a 171 function call fun->get per character. The fscanf could use getc 172 rather than fgetc too, which might help those systems where getc is a 173 macro or otherwise inlined. But none of this scanning and converting 174 will be particularly fast, so the two are done together to keep it a 175 little simpler for now. 176 177 Various multibyte string issues are not addressed, for a start C99 178 scanf says the format string is multibyte. Since we pass %c, %s and 179 %[ to the system scanf, they might do multibyte reads already, but 180 it's another matter whether or not that can be used, since our digit 181 and whitespace parsing is only unibyte. The plan is to quietly 182 ignore multibyte locales for now. This is not as bad as it sounds, 183 since GMP is presumably used mostly on numbers, which can be 184 perfectly adequately treated in plain ASCII. 185 186 */ 187 188 189 struct gmp_doscan_params_t { 190 int base; 191 int ignore; 192 char type; 193 int width; 194 }; 195 196 197 #define GET(c) \ 198 do { \ 199 ASSERT (chars <= width); \ 200 chars++; \ 201 if (chars > width) \ 202 goto convert; \ 203 (c) = (*funs->get) (data); \ 204 } while (0) 205 206 /* store into "s", extending if necessary */ 207 #define STORE(c) \ 208 do { \ 209 ASSERT (s_upto <= s_alloc); \ 210 if (s_upto >= s_alloc) \ 211 { \ 212 size_t s_alloc_new = s_alloc + S_ALLOC_STEP; \ 213 s = __GMP_REALLOCATE_FUNC_TYPE (s, s_alloc, s_alloc_new, char); \ 214 s_alloc = s_alloc_new; \ 215 } \ 216 s[s_upto++] = c; \ 217 } while (0) 218 219 #define S_ALLOC_STEP 512 220 221 static int 222 gmpscan (const struct gmp_doscan_funs_t *funs, void *data, 223 const struct gmp_doscan_params_t *p, void *dst) 224 { 225 int chars, c, base, first, width, seen_point, seen_digit, hexfloat; 226 size_t s_upto, s_alloc, hexexp; 227 char *s; 228 int invalid = 0; 229 230 TRACE (printf ("gmpscan\n")); 231 232 ASSERT (p->type == 'F' || p->type == 'Q' || p->type == 'Z'); 233 234 c = (*funs->get) (data); 235 if (c == EOF) 236 return -2; 237 238 chars = 1; 239 first = 1; 240 seen_point = 0; 241 width = (p->width == 0 ? INT_MAX-1 : p->width); 242 base = p->base; 243 s_alloc = S_ALLOC_STEP; 244 s = __GMP_ALLOCATE_FUNC_TYPE (s_alloc, char); 245 s_upto = 0; 246 hexfloat = 0; 247 hexexp = 0; 248 249 another: 250 seen_digit = 0; 251 if (c == '-') 252 { 253 STORE (c); 254 goto get_for_sign; 255 } 256 else if (c == '+') 257 { 258 /* don't store '+', it's not accepted by mpz_set_str etc */ 259 get_for_sign: 260 GET (c); 261 } 262 263 if (base == 0) 264 { 265 base = 10; /* decimal if no base indicator */ 266 if (c == '0') 267 { 268 seen_digit = 1; /* 0 alone is a valid number */ 269 if (p->type != 'F') 270 base = 8; /* leading 0 is octal, for non-floats */ 271 STORE (c); 272 GET (c); 273 if (c == 'x' || c == 'X') 274 { 275 base = 16; 276 seen_digit = 0; /* must have digits after an 0x */ 277 if (p->type == 'F') /* don't pass 'x' to mpf_set_str_point */ 278 hexfloat = 1; 279 else 280 STORE (c); 281 GET (c); 282 } 283 } 284 } 285 286 digits: 287 for (;;) 288 { 289 if (base == 16) 290 { 291 if (! isxdigit (c)) 292 break; 293 } 294 else 295 { 296 if (! isdigit (c)) 297 break; 298 if (base == 8 && (c == '8' || c == '9')) 299 break; 300 } 301 302 seen_digit = 1; 303 STORE (c); 304 GET (c); 305 } 306 307 if (first) 308 { 309 /* decimal point */ 310 if (p->type == 'F' && ! seen_point) 311 { 312 /* For a multi-character decimal point, if the first character is 313 present then all of it must be, otherwise the input is 314 considered invalid. */ 315 const char *point = GMP_DECIMAL_POINT; 316 int pc = (unsigned char) *point++; 317 if (c == pc) 318 { 319 for (;;) 320 { 321 STORE (c); 322 GET (c); 323 pc = (unsigned char) *point++; 324 if (pc == '\0') 325 break; 326 if (c != pc) 327 goto set_invalid; 328 } 329 seen_point = 1; 330 goto digits; 331 } 332 } 333 334 /* exponent */ 335 if (p->type == 'F') 336 { 337 if (hexfloat && (c == 'p' || c == 'P')) 338 { 339 hexexp = s_upto; /* exponent location */ 340 base = 10; /* exponent in decimal */ 341 goto exponent; 342 } 343 else if (! hexfloat && (c == 'e' || c == 'E')) 344 { 345 exponent: 346 /* must have at least one digit in the mantissa, just an exponent 347 is not good enough */ 348 if (! seen_digit) 349 goto set_invalid; 350 351 do_second: 352 first = 0; 353 STORE (c); 354 GET (c); 355 goto another; 356 } 357 } 358 359 /* denominator */ 360 if (p->type == 'Q' && c == '/') 361 { 362 /* must have at least one digit in the numerator */ 363 if (! seen_digit) 364 goto set_invalid; 365 366 /* now look for at least one digit in the denominator */ 367 seen_digit = 0; 368 369 /* allow the base to be redetermined for "%i" */ 370 base = p->base; 371 goto do_second; 372 } 373 } 374 375 convert: 376 if (! seen_digit) 377 { 378 set_invalid: 379 invalid = 1; 380 goto done; 381 } 382 383 if (! p->ignore) 384 { 385 STORE ('\0'); 386 TRACE (printf (" convert \"%s\"\n", s)); 387 388 /* We ought to have parsed out a valid string above, so just test 389 mpz_set_str etc with an ASSERT. */ 390 switch (p->type) { 391 case 'F': 392 { 393 mpf_ptr f = (mpf_ptr) dst; 394 if (hexexp != 0) 395 s[hexexp] = '\0'; 396 ASSERT_NOCARRY (mpf_set_str (f, s, hexfloat ? 16 : 10)); 397 if (hexexp != 0) 398 { 399 char *dummy; 400 long exp; 401 exp = strtol (s + hexexp + 1, &dummy, 10); 402 if (exp >= 0) 403 mpf_mul_2exp (f, f, (unsigned long) exp); 404 else 405 mpf_div_2exp (f, f, - (unsigned long) exp); 406 } 407 } 408 break; 409 case 'Q': 410 ASSERT_NOCARRY (mpq_set_str ((mpq_ptr) dst, s, p->base)); 411 break; 412 case 'Z': 413 ASSERT_NOCARRY (mpz_set_str ((mpz_ptr) dst, s, p->base)); 414 break; 415 default: 416 ASSERT (0); 417 /*FALLTHRU*/ 418 break; 419 } 420 } 421 422 done: 423 ASSERT (chars <= width+1); 424 if (chars != width+1) 425 { 426 (*funs->unget) (c, data); 427 TRACE (printf (" ungetc %d, to give %d chars\n", c, chars-1)); 428 } 429 chars--; 430 431 (*__gmp_free_func) (s, s_alloc); 432 433 if (invalid) 434 { 435 TRACE (printf (" invalid\n")); 436 return -1; 437 } 438 439 TRACE (printf (" return %d chars (cf width %d)\n", chars, width)); 440 return chars; 441 } 442 443 444 /* Read and discard whitespace, if any. Return number of chars skipped. 445 Whitespace skipping never provokes the EOF return from __gmp_doscan, so 446 it's not necessary to watch for EOF from funs->get, */ 447 static int 448 skip_white (const struct gmp_doscan_funs_t *funs, void *data) 449 { 450 int c; 451 int ret = 0; 452 453 do 454 { 455 c = (funs->get) (data); 456 ret++; 457 } 458 while (isspace (c)); 459 460 (funs->unget) (c, data); 461 ret--; 462 463 TRACE (printf (" skip white %d\n", ret)); 464 return ret; 465 } 466 467 468 int 469 __gmp_doscan (const struct gmp_doscan_funs_t *funs, void *data, 470 const char *orig_fmt, va_list orig_ap) 471 { 472 struct gmp_doscan_params_t param; 473 va_list ap; 474 char *alloc_fmt; 475 const char *fmt, *this_fmt, *end_fmt; 476 size_t orig_fmt_len, alloc_fmt_size, len; 477 int new_fields, new_chars; 478 char fchar; 479 int fields = 0; 480 int chars = 0; 481 482 TRACE (printf ("__gmp_doscan \"%s\"\n", orig_fmt); 483 if (funs->scan == (gmp_doscan_scan_t) sscanf) 484 printf (" s=\"%s\"\n", * (const char **) data)); 485 486 /* Don't modify orig_ap, if va_list is actually an array and hence call by 487 reference. It could be argued that it'd be more efficient to leave 488 callers to make a copy if they care, but doing so here is going to be a 489 very small part of the total work, and we may as well keep applications 490 out of trouble. */ 491 va_copy (ap, orig_ap); 492 493 /* Parts of the format string are going to be copied so that a " %n" can 494 be appended. alloc_fmt is some space for that. orig_fmt_len+4 will be 495 needed if fmt consists of a single "%" specifier, but otherwise is an 496 overestimate. We're not going to be very fast here, so use 497 __gmp_allocate_func rather than TMP_ALLOC. */ 498 orig_fmt_len = strlen (orig_fmt); 499 alloc_fmt_size = orig_fmt_len + 4; 500 alloc_fmt = __GMP_ALLOCATE_FUNC_TYPE (alloc_fmt_size, char); 501 502 fmt = orig_fmt; 503 end_fmt = orig_fmt + orig_fmt_len; 504 505 for (;;) 506 { 507 next: 508 fchar = *fmt++; 509 510 if (fchar == '\0') 511 break; 512 513 if (isspace (fchar)) 514 { 515 chars += skip_white (funs, data); 516 continue; 517 } 518 519 if (fchar != '%') 520 { 521 int c; 522 literal: 523 c = (funs->get) (data); 524 if (c != fchar) 525 { 526 (funs->unget) (c, data); 527 if (c == EOF) 528 { 529 eof_no_match: 530 if (fields == 0) 531 fields = EOF; 532 } 533 goto done; 534 } 535 chars++; 536 continue; 537 } 538 539 param.type = '\0'; 540 param.base = 0; /* for e,f,g,i */ 541 param.ignore = 0; 542 param.width = 0; 543 544 this_fmt = fmt-1; 545 TRACE (printf (" this_fmt \"%s\"\n", this_fmt)); 546 547 for (;;) 548 { 549 ASSERT (fmt <= end_fmt); 550 551 fchar = *fmt++; 552 switch (fchar) { 553 554 case '\0': /* unterminated % sequence */ 555 ASSERT (0); 556 goto done; 557 558 case '%': /* literal % */ 559 goto literal; 560 561 case '[': /* character range */ 562 fchar = *fmt++; 563 if (fchar == '^') 564 fchar = *fmt++; 565 /* ']' allowed as the first char (possibly after '^') */ 566 if (fchar == ']') 567 fchar = *fmt++; 568 for (;;) 569 { 570 ASSERT (fmt <= end_fmt); 571 if (fchar == '\0') 572 { 573 /* unterminated % sequence */ 574 ASSERT (0); 575 goto done; 576 } 577 if (fchar == ']') 578 break; 579 fchar = *fmt++; 580 } 581 /*FALLTHRU*/ 582 case 'c': /* characters */ 583 case 's': /* string of non-whitespace */ 584 case 'p': /* pointer */ 585 libc_type: 586 len = fmt - this_fmt; 587 memcpy (alloc_fmt, this_fmt, len); 588 alloc_fmt[len++] = '%'; 589 alloc_fmt[len++] = 'n'; 590 alloc_fmt[len] = '\0'; 591 592 TRACE (printf (" scan \"%s\"\n", alloc_fmt); 593 if (funs->scan == (gmp_doscan_scan_t) sscanf) 594 printf (" s=\"%s\"\n", * (const char **) data)); 595 596 new_chars = -1; 597 if (param.ignore) 598 { 599 new_fields = (*funs->scan) (data, alloc_fmt, &new_chars, NULL); 600 ASSERT (new_fields == 0 || new_fields == EOF); 601 } 602 else 603 { 604 void *arg = va_arg (ap, void *); 605 new_fields = (*funs->scan) (data, alloc_fmt, arg, &new_chars); 606 ASSERT (new_fields==0 || new_fields==1 || new_fields==EOF); 607 608 if (new_fields == 0) 609 goto done; /* invalid input */ 610 611 if (new_fields == 1) 612 ASSERT (new_chars != -1); 613 } 614 TRACE (printf (" new_fields %d new_chars %d\n", 615 new_fields, new_chars)); 616 617 if (new_fields == -1) 618 goto eof_no_match; /* EOF before anything matched */ 619 620 /* Under param.ignore, when new_fields==0 we don't know if 621 it's a successful match or an invalid field. new_chars 622 won't have been assigned if it was an invalid field. */ 623 if (new_chars == -1) 624 goto done; /* invalid input */ 625 626 chars += new_chars; 627 (*funs->step) (data, new_chars); 628 629 increment_fields: 630 if (! param.ignore) 631 fields++; 632 goto next; 633 634 case 'd': /* decimal */ 635 case 'u': /* decimal */ 636 param.base = 10; 637 goto numeric; 638 639 case 'e': /* float */ 640 case 'E': /* float */ 641 case 'f': /* float */ 642 case 'g': /* float */ 643 case 'G': /* float */ 644 case 'i': /* integer with base marker */ 645 numeric: 646 if (param.type != 'F' && param.type != 'Q' && param.type != 'Z') 647 goto libc_type; 648 649 chars += skip_white (funs, data); 650 651 new_chars = gmpscan (funs, data, ¶m, 652 param.ignore ? NULL : va_arg (ap, void*)); 653 if (new_chars == -2) 654 goto eof_no_match; 655 if (new_chars == -1) 656 goto done; 657 658 ASSERT (new_chars >= 0); 659 chars += new_chars; 660 goto increment_fields; 661 662 case 'a': /* glibc allocate string */ 663 case '\'': /* glibc digit groupings */ 664 break; 665 666 case 'F': /* mpf_t */ 667 case 'j': /* intmax_t */ 668 case 'L': /* long long */ 669 case 'q': /* quad_t */ 670 case 'Q': /* mpq_t */ 671 case 't': /* ptrdiff_t */ 672 case 'z': /* size_t */ 673 case 'Z': /* mpz_t */ 674 set_type: 675 param.type = fchar; 676 break; 677 678 case 'h': /* short or char */ 679 if (param.type != 'h') 680 goto set_type; 681 param.type = 'H'; /* internal code for "hh" */ 682 break; 683 684 goto numeric; 685 686 case 'l': /* long, long long, double or long double */ 687 if (param.type != 'l') 688 goto set_type; 689 param.type = 'L'; /* "ll" means "L" */ 690 break; 691 692 case 'n': 693 if (! param.ignore) 694 { 695 void *p; 696 p = va_arg (ap, void *); 697 TRACE (printf (" store %%n to %p\n", p)); 698 switch (param.type) { 699 case '\0': * (int *) p = chars; break; 700 case 'F': mpf_set_si ((mpf_ptr) p, (long) chars); break; 701 case 'H': * (char *) p = chars; break; 702 case 'h': * (short *) p = chars; break; 703 #if HAVE_INTMAX_T 704 case 'j': * (intmax_t *) p = chars; break; 705 #else 706 case 'j': ASSERT_FAIL (intmax_t not available); break; 707 #endif 708 case 'l': * (long *) p = chars; break; 709 #if HAVE_QUAD_T && HAVE_LONG_LONG 710 case 'q': 711 ASSERT_ALWAYS (sizeof (quad_t) == sizeof (long long)); 712 /*FALLTHRU*/ 713 #else 714 case 'q': ASSERT_FAIL (quad_t not available); break; 715 #endif 716 #if HAVE_LONG_LONG 717 case 'L': * (long long *) p = chars; break; 718 #else 719 case 'L': ASSERT_FAIL (long long not available); break; 720 #endif 721 case 'Q': mpq_set_si ((mpq_ptr) p, (long) chars, 1L); break; 722 #if HAVE_PTRDIFF_T 723 case 't': * (ptrdiff_t *) p = chars; break; 724 #else 725 case 't': ASSERT_FAIL (ptrdiff_t not available); break; 726 #endif 727 case 'z': * (size_t *) p = chars; break; 728 case 'Z': mpz_set_si ((mpz_ptr) p, (long) chars); break; 729 default: ASSERT (0); break; 730 } 731 } 732 goto next; 733 734 case 'o': 735 param.base = 8; 736 goto numeric; 737 738 case 'x': 739 case 'X': 740 param.base = 16; 741 goto numeric; 742 743 case '0': case '1': case '2': case '3': case '4': 744 case '5': case '6': case '7': case '8': case '9': 745 param.width = 0; 746 do { 747 param.width = param.width * 10 + (fchar-'0'); 748 fchar = *fmt++; 749 } while (isdigit (fchar)); 750 fmt--; /* unget the non-digit */ 751 break; 752 753 case '*': 754 param.ignore = 1; 755 break; 756 757 default: 758 /* something invalid in a % sequence */ 759 ASSERT (0); 760 goto next; 761 } 762 } 763 } 764 765 done: 766 (*__gmp_free_func) (alloc_fmt, alloc_fmt_size); 767 return fields; 768 }