github.com/twelsh-aw/go/src@v0.0.0-20230516233729-a56fe86a7c81/strconv/atof.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package strconv 6 7 // decimal to binary floating point conversion. 8 // Algorithm: 9 // 1) Store input in multiprecision decimal. 10 // 2) Multiply/divide decimal by powers of two until in range [0.5, 1) 11 // 3) Multiply by 2^precision and round to get mantissa. 12 13 import "math" 14 15 var optimize = true // set to false to force slow-path conversions for testing 16 17 // commonPrefixLenIgnoreCase returns the length of the common 18 // prefix of s and prefix, with the character case of s ignored. 19 // The prefix argument must be all lower-case. 20 func commonPrefixLenIgnoreCase(s, prefix string) int { 21 n := len(prefix) 22 if n > len(s) { 23 n = len(s) 24 } 25 for i := 0; i < n; i++ { 26 c := s[i] 27 if 'A' <= c && c <= 'Z' { 28 c += 'a' - 'A' 29 } 30 if c != prefix[i] { 31 return i 32 } 33 } 34 return n 35 } 36 37 // special returns the floating-point value for the special, 38 // possibly signed floating-point representations inf, infinity, 39 // and NaN. The result is ok if a prefix of s contains one 40 // of these representations and n is the length of that prefix. 41 // The character case is ignored. 42 func special(s string) (f float64, n int, ok bool) { 43 if len(s) == 0 { 44 return 0, 0, false 45 } 46 sign := 1 47 nsign := 0 48 switch s[0] { 49 case '+', '-': 50 if s[0] == '-' { 51 sign = -1 52 } 53 nsign = 1 54 s = s[1:] 55 fallthrough 56 case 'i', 'I': 57 n := commonPrefixLenIgnoreCase(s, "infinity") 58 // Anything longer than "inf" is ok, but if we 59 // don't have "infinity", only consume "inf". 60 if 3 < n && n < 8 { 61 n = 3 62 } 63 if n == 3 || n == 8 { 64 return math.Inf(sign), nsign + n, true 65 } 66 case 'n', 'N': 67 if commonPrefixLenIgnoreCase(s, "nan") == 3 { 68 return math.NaN(), 3, true 69 } 70 } 71 return 0, 0, false 72 } 73 74 func (b *decimal) set(s string) (ok bool) { 75 i := 0 76 b.neg = false 77 b.trunc = false 78 79 // optional sign 80 if i >= len(s) { 81 return 82 } 83 switch { 84 case s[i] == '+': 85 i++ 86 case s[i] == '-': 87 b.neg = true 88 i++ 89 } 90 91 // digits 92 sawdot := false 93 sawdigits := false 94 for ; i < len(s); i++ { 95 switch { 96 case s[i] == '_': 97 // readFloat already checked underscores 98 continue 99 case s[i] == '.': 100 if sawdot { 101 return 102 } 103 sawdot = true 104 b.dp = b.nd 105 continue 106 107 case '0' <= s[i] && s[i] <= '9': 108 sawdigits = true 109 if s[i] == '0' && b.nd == 0 { // ignore leading zeros 110 b.dp-- 111 continue 112 } 113 if b.nd < len(b.d) { 114 b.d[b.nd] = s[i] 115 b.nd++ 116 } else if s[i] != '0' { 117 b.trunc = true 118 } 119 continue 120 } 121 break 122 } 123 if !sawdigits { 124 return 125 } 126 if !sawdot { 127 b.dp = b.nd 128 } 129 130 // optional exponent moves decimal point. 131 // if we read a very large, very long number, 132 // just be sure to move the decimal point by 133 // a lot (say, 100000). it doesn't matter if it's 134 // not the exact number. 135 if i < len(s) && lower(s[i]) == 'e' { 136 i++ 137 if i >= len(s) { 138 return 139 } 140 esign := 1 141 if s[i] == '+' { 142 i++ 143 } else if s[i] == '-' { 144 i++ 145 esign = -1 146 } 147 if i >= len(s) || s[i] < '0' || s[i] > '9' { 148 return 149 } 150 e := 0 151 for ; i < len(s) && ('0' <= s[i] && s[i] <= '9' || s[i] == '_'); i++ { 152 if s[i] == '_' { 153 // readFloat already checked underscores 154 continue 155 } 156 if e < 10000 { 157 e = e*10 + int(s[i]) - '0' 158 } 159 } 160 b.dp += e * esign 161 } 162 163 if i != len(s) { 164 return 165 } 166 167 ok = true 168 return 169 } 170 171 // readFloat reads a decimal or hexadecimal mantissa and exponent from a float 172 // string representation in s; the number may be followed by other characters. 173 // readFloat reports the number of bytes consumed (i), and whether the number 174 // is valid (ok). 175 func readFloat(s string) (mantissa uint64, exp int, neg, trunc, hex bool, i int, ok bool) { 176 underscores := false 177 178 // optional sign 179 if i >= len(s) { 180 return 181 } 182 switch { 183 case s[i] == '+': 184 i++ 185 case s[i] == '-': 186 neg = true 187 i++ 188 } 189 190 // digits 191 base := uint64(10) 192 maxMantDigits := 19 // 10^19 fits in uint64 193 expChar := byte('e') 194 if i+2 < len(s) && s[i] == '0' && lower(s[i+1]) == 'x' { 195 base = 16 196 maxMantDigits = 16 // 16^16 fits in uint64 197 i += 2 198 expChar = 'p' 199 hex = true 200 } 201 sawdot := false 202 sawdigits := false 203 nd := 0 204 ndMant := 0 205 dp := 0 206 loop: 207 for ; i < len(s); i++ { 208 switch c := s[i]; true { 209 case c == '_': 210 underscores = true 211 continue 212 213 case c == '.': 214 if sawdot { 215 break loop 216 } 217 sawdot = true 218 dp = nd 219 continue 220 221 case '0' <= c && c <= '9': 222 sawdigits = true 223 if c == '0' && nd == 0 { // ignore leading zeros 224 dp-- 225 continue 226 } 227 nd++ 228 if ndMant < maxMantDigits { 229 mantissa *= base 230 mantissa += uint64(c - '0') 231 ndMant++ 232 } else if c != '0' { 233 trunc = true 234 } 235 continue 236 237 case base == 16 && 'a' <= lower(c) && lower(c) <= 'f': 238 sawdigits = true 239 nd++ 240 if ndMant < maxMantDigits { 241 mantissa *= 16 242 mantissa += uint64(lower(c) - 'a' + 10) 243 ndMant++ 244 } else { 245 trunc = true 246 } 247 continue 248 } 249 break 250 } 251 if !sawdigits { 252 return 253 } 254 if !sawdot { 255 dp = nd 256 } 257 258 if base == 16 { 259 dp *= 4 260 ndMant *= 4 261 } 262 263 // optional exponent moves decimal point. 264 // if we read a very large, very long number, 265 // just be sure to move the decimal point by 266 // a lot (say, 100000). it doesn't matter if it's 267 // not the exact number. 268 if i < len(s) && lower(s[i]) == expChar { 269 i++ 270 if i >= len(s) { 271 return 272 } 273 esign := 1 274 if s[i] == '+' { 275 i++ 276 } else if s[i] == '-' { 277 i++ 278 esign = -1 279 } 280 if i >= len(s) || s[i] < '0' || s[i] > '9' { 281 return 282 } 283 e := 0 284 for ; i < len(s) && ('0' <= s[i] && s[i] <= '9' || s[i] == '_'); i++ { 285 if s[i] == '_' { 286 underscores = true 287 continue 288 } 289 if e < 10000 { 290 e = e*10 + int(s[i]) - '0' 291 } 292 } 293 dp += e * esign 294 } else if base == 16 { 295 // Must have exponent. 296 return 297 } 298 299 if mantissa != 0 { 300 exp = dp - ndMant 301 } 302 303 if underscores && !underscoreOK(s[:i]) { 304 return 305 } 306 307 ok = true 308 return 309 } 310 311 // decimal power of ten to binary power of two. 312 var powtab = []int{1, 3, 6, 9, 13, 16, 19, 23, 26} 313 314 func (d *decimal) floatBits(flt *floatInfo) (b uint64, overflow bool) { 315 var exp int 316 var mant uint64 317 318 // Zero is always a special case. 319 if d.nd == 0 { 320 mant = 0 321 exp = flt.bias 322 goto out 323 } 324 325 // Obvious overflow/underflow. 326 // These bounds are for 64-bit floats. 327 // Will have to change if we want to support 80-bit floats in the future. 328 if d.dp > 310 { 329 goto overflow 330 } 331 if d.dp < -330 { 332 // zero 333 mant = 0 334 exp = flt.bias 335 goto out 336 } 337 338 // Scale by powers of two until in range [0.5, 1.0) 339 exp = 0 340 for d.dp > 0 { 341 var n int 342 if d.dp >= len(powtab) { 343 n = 27 344 } else { 345 n = powtab[d.dp] 346 } 347 d.Shift(-n) 348 exp += n 349 } 350 for d.dp < 0 || d.dp == 0 && d.d[0] < '5' { 351 var n int 352 if -d.dp >= len(powtab) { 353 n = 27 354 } else { 355 n = powtab[-d.dp] 356 } 357 d.Shift(n) 358 exp -= n 359 } 360 361 // Our range is [0.5,1) but floating point range is [1,2). 362 exp-- 363 364 // Minimum representable exponent is flt.bias+1. 365 // If the exponent is smaller, move it up and 366 // adjust d accordingly. 367 if exp < flt.bias+1 { 368 n := flt.bias + 1 - exp 369 d.Shift(-n) 370 exp += n 371 } 372 373 if exp-flt.bias >= 1<<flt.expbits-1 { 374 goto overflow 375 } 376 377 // Extract 1+flt.mantbits bits. 378 d.Shift(int(1 + flt.mantbits)) 379 mant = d.RoundedInteger() 380 381 // Rounding might have added a bit; shift down. 382 if mant == 2<<flt.mantbits { 383 mant >>= 1 384 exp++ 385 if exp-flt.bias >= 1<<flt.expbits-1 { 386 goto overflow 387 } 388 } 389 390 // Denormalized? 391 if mant&(1<<flt.mantbits) == 0 { 392 exp = flt.bias 393 } 394 goto out 395 396 overflow: 397 // ±Inf 398 mant = 0 399 exp = 1<<flt.expbits - 1 + flt.bias 400 overflow = true 401 402 out: 403 // Assemble bits. 404 bits := mant & (uint64(1)<<flt.mantbits - 1) 405 bits |= uint64((exp-flt.bias)&(1<<flt.expbits-1)) << flt.mantbits 406 if d.neg { 407 bits |= 1 << flt.mantbits << flt.expbits 408 } 409 return bits, overflow 410 } 411 412 // Exact powers of 10. 413 var float64pow10 = []float64{ 414 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 415 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, 416 1e20, 1e21, 1e22, 417 } 418 var float32pow10 = []float32{1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10} 419 420 // If possible to convert decimal representation to 64-bit float f exactly, 421 // entirely in floating-point math, do so, avoiding the expense of decimalToFloatBits. 422 // Three common cases: 423 // 424 // value is exact integer 425 // value is exact integer * exact power of ten 426 // value is exact integer / exact power of ten 427 // 428 // These all produce potentially inexact but correctly rounded answers. 429 func atof64exact(mantissa uint64, exp int, neg bool) (f float64, ok bool) { 430 if mantissa>>float64info.mantbits != 0 { 431 return 432 } 433 f = float64(mantissa) 434 if neg { 435 f = -f 436 } 437 switch { 438 case exp == 0: 439 // an integer. 440 return f, true 441 // Exact integers are <= 10^15. 442 // Exact powers of ten are <= 10^22. 443 case exp > 0 && exp <= 15+22: // int * 10^k 444 // If exponent is big but number of digits is not, 445 // can move a few zeros into the integer part. 446 if exp > 22 { 447 f *= float64pow10[exp-22] 448 exp = 22 449 } 450 if f > 1e15 || f < -1e15 { 451 // the exponent was really too large. 452 return 453 } 454 return f * float64pow10[exp], true 455 case exp < 0 && exp >= -22: // int / 10^k 456 return f / float64pow10[-exp], true 457 } 458 return 459 } 460 461 // If possible to compute mantissa*10^exp to 32-bit float f exactly, 462 // entirely in floating-point math, do so, avoiding the machinery above. 463 func atof32exact(mantissa uint64, exp int, neg bool) (f float32, ok bool) { 464 if mantissa>>float32info.mantbits != 0 { 465 return 466 } 467 f = float32(mantissa) 468 if neg { 469 f = -f 470 } 471 switch { 472 case exp == 0: 473 return f, true 474 // Exact integers are <= 10^7. 475 // Exact powers of ten are <= 10^10. 476 case exp > 0 && exp <= 7+10: // int * 10^k 477 // If exponent is big but number of digits is not, 478 // can move a few zeros into the integer part. 479 if exp > 10 { 480 f *= float32pow10[exp-10] 481 exp = 10 482 } 483 if f > 1e7 || f < -1e7 { 484 // the exponent was really too large. 485 return 486 } 487 return f * float32pow10[exp], true 488 case exp < 0 && exp >= -10: // int / 10^k 489 return f / float32pow10[-exp], true 490 } 491 return 492 } 493 494 // atofHex converts the hex floating-point string s 495 // to a rounded float32 or float64 value (depending on flt==&float32info or flt==&float64info) 496 // and returns it as a float64. 497 // The string s has already been parsed into a mantissa, exponent, and sign (neg==true for negative). 498 // If trunc is true, trailing non-zero bits have been omitted from the mantissa. 499 func atofHex(s string, flt *floatInfo, mantissa uint64, exp int, neg, trunc bool) (float64, error) { 500 maxExp := 1<<flt.expbits + flt.bias - 2 501 minExp := flt.bias + 1 502 exp += int(flt.mantbits) // mantissa now implicitly divided by 2^mantbits. 503 504 // Shift mantissa and exponent to bring representation into float range. 505 // Eventually we want a mantissa with a leading 1-bit followed by mantbits other bits. 506 // For rounding, we need two more, where the bottom bit represents 507 // whether that bit or any later bit was non-zero. 508 // (If the mantissa has already lost non-zero bits, trunc is true, 509 // and we OR in a 1 below after shifting left appropriately.) 510 for mantissa != 0 && mantissa>>(flt.mantbits+2) == 0 { 511 mantissa <<= 1 512 exp-- 513 } 514 if trunc { 515 mantissa |= 1 516 } 517 for mantissa>>(1+flt.mantbits+2) != 0 { 518 mantissa = mantissa>>1 | mantissa&1 519 exp++ 520 } 521 522 // If exponent is too negative, 523 // denormalize in hopes of making it representable. 524 // (The -2 is for the rounding bits.) 525 for mantissa > 1 && exp < minExp-2 { 526 mantissa = mantissa>>1 | mantissa&1 527 exp++ 528 } 529 530 // Round using two bottom bits. 531 round := mantissa & 3 532 mantissa >>= 2 533 round |= mantissa & 1 // round to even (round up if mantissa is odd) 534 exp += 2 535 if round == 3 { 536 mantissa++ 537 if mantissa == 1<<(1+flt.mantbits) { 538 mantissa >>= 1 539 exp++ 540 } 541 } 542 543 if mantissa>>flt.mantbits == 0 { // Denormal or zero. 544 exp = flt.bias 545 } 546 var err error 547 if exp > maxExp { // infinity and range error 548 mantissa = 1 << flt.mantbits 549 exp = maxExp + 1 550 err = rangeError(fnParseFloat, s) 551 } 552 553 bits := mantissa & (1<<flt.mantbits - 1) 554 bits |= uint64((exp-flt.bias)&(1<<flt.expbits-1)) << flt.mantbits 555 if neg { 556 bits |= 1 << flt.mantbits << flt.expbits 557 } 558 if flt == &float32info { 559 return float64(math.Float32frombits(uint32(bits))), err 560 } 561 return math.Float64frombits(bits), err 562 } 563 564 const fnParseFloat = "ParseFloat" 565 566 func atof32(s string) (f float32, n int, err error) { 567 if val, n, ok := special(s); ok { 568 return float32(val), n, nil 569 } 570 571 mantissa, exp, neg, trunc, hex, n, ok := readFloat(s) 572 if !ok { 573 return 0, n, syntaxError(fnParseFloat, s) 574 } 575 576 if hex { 577 f, err := atofHex(s[:n], &float32info, mantissa, exp, neg, trunc) 578 return float32(f), n, err 579 } 580 581 if optimize { 582 // Try pure floating-point arithmetic conversion, and if that fails, 583 // the Eisel-Lemire algorithm. 584 if !trunc { 585 if f, ok := atof32exact(mantissa, exp, neg); ok { 586 return f, n, nil 587 } 588 } 589 f, ok := eiselLemire32(mantissa, exp, neg) 590 if ok { 591 if !trunc { 592 return f, n, nil 593 } 594 // Even if the mantissa was truncated, we may 595 // have found the correct result. Confirm by 596 // converting the upper mantissa bound. 597 fUp, ok := eiselLemire32(mantissa+1, exp, neg) 598 if ok && f == fUp { 599 return f, n, nil 600 } 601 } 602 } 603 604 // Slow fallback. 605 var d decimal 606 if !d.set(s[:n]) { 607 return 0, n, syntaxError(fnParseFloat, s) 608 } 609 b, ovf := d.floatBits(&float32info) 610 f = math.Float32frombits(uint32(b)) 611 if ovf { 612 err = rangeError(fnParseFloat, s) 613 } 614 return f, n, err 615 } 616 617 func atof64(s string) (f float64, n int, err error) { 618 if val, n, ok := special(s); ok { 619 return val, n, nil 620 } 621 622 mantissa, exp, neg, trunc, hex, n, ok := readFloat(s) 623 if !ok { 624 return 0, n, syntaxError(fnParseFloat, s) 625 } 626 627 if hex { 628 f, err := atofHex(s[:n], &float64info, mantissa, exp, neg, trunc) 629 return f, n, err 630 } 631 632 if optimize { 633 // Try pure floating-point arithmetic conversion, and if that fails, 634 // the Eisel-Lemire algorithm. 635 if !trunc { 636 if f, ok := atof64exact(mantissa, exp, neg); ok { 637 return f, n, nil 638 } 639 } 640 f, ok := eiselLemire64(mantissa, exp, neg) 641 if ok { 642 if !trunc { 643 return f, n, nil 644 } 645 // Even if the mantissa was truncated, we may 646 // have found the correct result. Confirm by 647 // converting the upper mantissa bound. 648 fUp, ok := eiselLemire64(mantissa+1, exp, neg) 649 if ok && f == fUp { 650 return f, n, nil 651 } 652 } 653 } 654 655 // Slow fallback. 656 var d decimal 657 if !d.set(s[:n]) { 658 return 0, n, syntaxError(fnParseFloat, s) 659 } 660 b, ovf := d.floatBits(&float64info) 661 f = math.Float64frombits(b) 662 if ovf { 663 err = rangeError(fnParseFloat, s) 664 } 665 return f, n, err 666 } 667 668 // ParseFloat converts the string s to a floating-point number 669 // with the precision specified by bitSize: 32 for float32, or 64 for float64. 670 // When bitSize=32, the result still has type float64, but it will be 671 // convertible to float32 without changing its value. 672 // 673 // ParseFloat accepts decimal and hexadecimal floating-point numbers 674 // as defined by the Go syntax for [floating-point literals]. 675 // If s is well-formed and near a valid floating-point number, 676 // ParseFloat returns the nearest floating-point number rounded 677 // using IEEE754 unbiased rounding. 678 // (Parsing a hexadecimal floating-point value only rounds when 679 // there are more bits in the hexadecimal representation than 680 // will fit in the mantissa.) 681 // 682 // The errors that ParseFloat returns have concrete type *NumError 683 // and include err.Num = s. 684 // 685 // If s is not syntactically well-formed, ParseFloat returns err.Err = ErrSyntax. 686 // 687 // If s is syntactically well-formed but is more than 1/2 ULP 688 // away from the largest floating point number of the given size, 689 // ParseFloat returns f = ±Inf, err.Err = ErrRange. 690 // 691 // ParseFloat recognizes the string "NaN", and the (possibly signed) strings "Inf" and "Infinity" 692 // as their respective special floating point values. It ignores case when matching. 693 // 694 // [floating-point literals]: https://go.dev/ref/spec#Floating-point_literals 695 func ParseFloat(s string, bitSize int) (float64, error) { 696 f, n, err := parseFloatPrefix(s, bitSize) 697 if n != len(s) && (err == nil || err.(*NumError).Err != ErrSyntax) { 698 return 0, syntaxError(fnParseFloat, s) 699 } 700 return f, err 701 } 702 703 func parseFloatPrefix(s string, bitSize int) (float64, int, error) { 704 if bitSize == 32 { 705 f, n, err := atof32(s) 706 return float64(f), n, err 707 } 708 return atof64(s) 709 }