github.com/x04/go/src@v0.0.0-20200202162449-3d481ceb3525/strconv/atof.go (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package strconv
     6  
     7  // decimal to binary floating point conversion.
     8  // Algorithm:
     9  //   1) Store input in multiprecision decimal.
    10  //   2) Multiply/divide decimal by powers of two until in range [0.5, 1)
    11  //   3) Multiply by 2^precision and round to get mantissa.
    12  
    13  import "github.com/x04/go/src/math"
    14  
    15  var optimize = true	// set to false to force slow-path conversions for testing
    16  
    17  func equalIgnoreCase(s1, s2 string) bool {
    18  	if len(s1) != len(s2) {
    19  		return false
    20  	}
    21  	for i := 0; i < len(s1); i++ {
    22  		c1 := s1[i]
    23  		if 'A' <= c1 && c1 <= 'Z' {
    24  			c1 += 'a' - 'A'
    25  		}
    26  		c2 := s2[i]
    27  		if 'A' <= c2 && c2 <= 'Z' {
    28  			c2 += 'a' - 'A'
    29  		}
    30  		if c1 != c2 {
    31  			return false
    32  		}
    33  	}
    34  	return true
    35  }
    36  
    37  func special(s string) (f float64, ok bool) {
    38  	if len(s) == 0 {
    39  		return
    40  	}
    41  	switch s[0] {
    42  	default:
    43  		return
    44  	case '+':
    45  		if equalIgnoreCase(s, "+inf") || equalIgnoreCase(s, "+infinity") {
    46  			return math.Inf(1), true
    47  		}
    48  	case '-':
    49  		if equalIgnoreCase(s, "-inf") || equalIgnoreCase(s, "-infinity") {
    50  			return math.Inf(-1), true
    51  		}
    52  	case 'n', 'N':
    53  		if equalIgnoreCase(s, "nan") {
    54  			return math.NaN(), true
    55  		}
    56  	case 'i', 'I':
    57  		if equalIgnoreCase(s, "inf") || equalIgnoreCase(s, "infinity") {
    58  			return math.Inf(1), true
    59  		}
    60  	}
    61  	return
    62  }
    63  
    64  func (b *decimal) set(s string) (ok bool) {
    65  	i := 0
    66  	b.neg = false
    67  	b.trunc = false
    68  
    69  	// optional sign
    70  	if i >= len(s) {
    71  		return
    72  	}
    73  	switch {
    74  	case s[i] == '+':
    75  		i++
    76  	case s[i] == '-':
    77  		b.neg = true
    78  		i++
    79  	}
    80  
    81  	// digits
    82  	sawdot := false
    83  	sawdigits := false
    84  	for ; i < len(s); i++ {
    85  		switch {
    86  		case s[i] == '_':
    87  			// readFloat already checked underscores
    88  			continue
    89  		case s[i] == '.':
    90  			if sawdot {
    91  				return
    92  			}
    93  			sawdot = true
    94  			b.dp = b.nd
    95  			continue
    96  
    97  		case '0' <= s[i] && s[i] <= '9':
    98  			sawdigits = true
    99  			if s[i] == '0' && b.nd == 0 {	// ignore leading zeros
   100  				b.dp--
   101  				continue
   102  			}
   103  			if b.nd < len(b.d) {
   104  				b.d[b.nd] = s[i]
   105  				b.nd++
   106  			} else if s[i] != '0' {
   107  				b.trunc = true
   108  			}
   109  			continue
   110  		}
   111  		break
   112  	}
   113  	if !sawdigits {
   114  		return
   115  	}
   116  	if !sawdot {
   117  		b.dp = b.nd
   118  	}
   119  
   120  	// optional exponent moves decimal point.
   121  	// if we read a very large, very long number,
   122  	// just be sure to move the decimal point by
   123  	// a lot (say, 100000).  it doesn't matter if it's
   124  	// not the exact number.
   125  	if i < len(s) && lower(s[i]) == 'e' {
   126  		i++
   127  		if i >= len(s) {
   128  			return
   129  		}
   130  		esign := 1
   131  		if s[i] == '+' {
   132  			i++
   133  		} else if s[i] == '-' {
   134  			i++
   135  			esign = -1
   136  		}
   137  		if i >= len(s) || s[i] < '0' || s[i] > '9' {
   138  			return
   139  		}
   140  		e := 0
   141  		for ; i < len(s) && ('0' <= s[i] && s[i] <= '9' || s[i] == '_'); i++ {
   142  			if s[i] == '_' {
   143  				// readFloat already checked underscores
   144  				continue
   145  			}
   146  			if e < 10000 {
   147  				e = e*10 + int(s[i]) - '0'
   148  			}
   149  		}
   150  		b.dp += e * esign
   151  	}
   152  
   153  	if i != len(s) {
   154  		return
   155  	}
   156  
   157  	ok = true
   158  	return
   159  }
   160  
   161  // readFloat reads a decimal mantissa and exponent from a float
   162  // string representation. It returns ok==false if the number
   163  // is invalid.
   164  func readFloat(s string) (mantissa uint64, exp int, neg, trunc, hex, ok bool) {
   165  	i := 0
   166  	underscores := false
   167  
   168  	// optional sign
   169  	if i >= len(s) {
   170  		return
   171  	}
   172  	switch {
   173  	case s[i] == '+':
   174  		i++
   175  	case s[i] == '-':
   176  		neg = true
   177  		i++
   178  	}
   179  
   180  	// digits
   181  	base := uint64(10)
   182  	maxMantDigits := 19	// 10^19 fits in uint64
   183  	expChar := byte('e')
   184  	if i+2 < len(s) && s[i] == '0' && lower(s[i+1]) == 'x' {
   185  		base = 16
   186  		maxMantDigits = 16	// 16^16 fits in uint64
   187  		i += 2
   188  		expChar = 'p'
   189  		hex = true
   190  	}
   191  	sawdot := false
   192  	sawdigits := false
   193  	nd := 0
   194  	ndMant := 0
   195  	dp := 0
   196  	for ; i < len(s); i++ {
   197  		switch c := s[i]; true {
   198  		case c == '_':
   199  			underscores = true
   200  			continue
   201  
   202  		case c == '.':
   203  			if sawdot {
   204  				return
   205  			}
   206  			sawdot = true
   207  			dp = nd
   208  			continue
   209  
   210  		case '0' <= c && c <= '9':
   211  			sawdigits = true
   212  			if c == '0' && nd == 0 {	// ignore leading zeros
   213  				dp--
   214  				continue
   215  			}
   216  			nd++
   217  			if ndMant < maxMantDigits {
   218  				mantissa *= base
   219  				mantissa += uint64(c - '0')
   220  				ndMant++
   221  			} else if c != '0' {
   222  				trunc = true
   223  			}
   224  			continue
   225  
   226  		case base == 16 && 'a' <= lower(c) && lower(c) <= 'f':
   227  			sawdigits = true
   228  			nd++
   229  			if ndMant < maxMantDigits {
   230  				mantissa *= 16
   231  				mantissa += uint64(lower(c) - 'a' + 10)
   232  				ndMant++
   233  			} else {
   234  				trunc = true
   235  			}
   236  			continue
   237  		}
   238  		break
   239  	}
   240  	if !sawdigits {
   241  		return
   242  	}
   243  	if !sawdot {
   244  		dp = nd
   245  	}
   246  
   247  	if base == 16 {
   248  		dp *= 4
   249  		ndMant *= 4
   250  	}
   251  
   252  	// optional exponent moves decimal point.
   253  	// if we read a very large, very long number,
   254  	// just be sure to move the decimal point by
   255  	// a lot (say, 100000).  it doesn't matter if it's
   256  	// not the exact number.
   257  	if i < len(s) && lower(s[i]) == expChar {
   258  		i++
   259  		if i >= len(s) {
   260  			return
   261  		}
   262  		esign := 1
   263  		if s[i] == '+' {
   264  			i++
   265  		} else if s[i] == '-' {
   266  			i++
   267  			esign = -1
   268  		}
   269  		if i >= len(s) || s[i] < '0' || s[i] > '9' {
   270  			return
   271  		}
   272  		e := 0
   273  		for ; i < len(s) && ('0' <= s[i] && s[i] <= '9' || s[i] == '_'); i++ {
   274  			if s[i] == '_' {
   275  				underscores = true
   276  				continue
   277  			}
   278  			if e < 10000 {
   279  				e = e*10 + int(s[i]) - '0'
   280  			}
   281  		}
   282  		dp += e * esign
   283  	} else if base == 16 {
   284  		// Must have exponent.
   285  		return
   286  	}
   287  
   288  	if i != len(s) {
   289  		return
   290  	}
   291  
   292  	if mantissa != 0 {
   293  		exp = dp - ndMant
   294  	}
   295  
   296  	if underscores && !underscoreOK(s) {
   297  		return
   298  	}
   299  
   300  	ok = true
   301  	return
   302  }
   303  
   304  // decimal power of ten to binary power of two.
   305  var powtab = []int{1, 3, 6, 9, 13, 16, 19, 23, 26}
   306  
   307  func (d *decimal) floatBits(flt *floatInfo) (b uint64, overflow bool) {
   308  	var exp int
   309  	var mant uint64
   310  
   311  	// Zero is always a special case.
   312  	if d.nd == 0 {
   313  		mant = 0
   314  		exp = flt.bias
   315  		goto out
   316  	}
   317  
   318  	// Obvious overflow/underflow.
   319  	// These bounds are for 64-bit floats.
   320  	// Will have to change if we want to support 80-bit floats in the future.
   321  	if d.dp > 310 {
   322  		goto overflow
   323  	}
   324  	if d.dp < -330 {
   325  		// zero
   326  		mant = 0
   327  		exp = flt.bias
   328  		goto out
   329  	}
   330  
   331  	// Scale by powers of two until in range [0.5, 1.0)
   332  	exp = 0
   333  	for d.dp > 0 {
   334  		var n int
   335  		if d.dp >= len(powtab) {
   336  			n = 27
   337  		} else {
   338  			n = powtab[d.dp]
   339  		}
   340  		d.Shift(-n)
   341  		exp += n
   342  	}
   343  	for d.dp < 0 || d.dp == 0 && d.d[0] < '5' {
   344  		var n int
   345  		if -d.dp >= len(powtab) {
   346  			n = 27
   347  		} else {
   348  			n = powtab[-d.dp]
   349  		}
   350  		d.Shift(n)
   351  		exp -= n
   352  	}
   353  
   354  	// Our range is [0.5,1) but floating point range is [1,2).
   355  	exp--
   356  
   357  	// Minimum representable exponent is flt.bias+1.
   358  	// If the exponent is smaller, move it up and
   359  	// adjust d accordingly.
   360  	if exp < flt.bias+1 {
   361  		n := flt.bias + 1 - exp
   362  		d.Shift(-n)
   363  		exp += n
   364  	}
   365  
   366  	if exp-flt.bias >= 1<<flt.expbits-1 {
   367  		goto overflow
   368  	}
   369  
   370  	// Extract 1+flt.mantbits bits.
   371  	d.Shift(int(1 + flt.mantbits))
   372  	mant = d.RoundedInteger()
   373  
   374  	// Rounding might have added a bit; shift down.
   375  	if mant == 2<<flt.mantbits {
   376  		mant >>= 1
   377  		exp++
   378  		if exp-flt.bias >= 1<<flt.expbits-1 {
   379  			goto overflow
   380  		}
   381  	}
   382  
   383  	// Denormalized?
   384  	if mant&(1<<flt.mantbits) == 0 {
   385  		exp = flt.bias
   386  	}
   387  	goto out
   388  
   389  overflow:
   390  	// ±Inf
   391  	mant = 0
   392  	exp = 1<<flt.expbits - 1 + flt.bias
   393  	overflow = true
   394  
   395  out:
   396  	// Assemble bits.
   397  	bits := mant & (uint64(1)<<flt.mantbits - 1)
   398  	bits |= uint64((exp-flt.bias)&(1<<flt.expbits-1)) << flt.mantbits
   399  	if d.neg {
   400  		bits |= 1 << flt.mantbits << flt.expbits
   401  	}
   402  	return bits, overflow
   403  }
   404  
   405  // Exact powers of 10.
   406  var float64pow10 = []float64{
   407  	1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9,
   408  	1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19,
   409  	1e20, 1e21, 1e22,
   410  }
   411  var float32pow10 = []float32{1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10}
   412  
   413  // If possible to convert decimal representation to 64-bit float f exactly,
   414  // entirely in floating-point math, do so, avoiding the expense of decimalToFloatBits.
   415  // Three common cases:
   416  //	value is exact integer
   417  //	value is exact integer * exact power of ten
   418  //	value is exact integer / exact power of ten
   419  // These all produce potentially inexact but correctly rounded answers.
   420  func atof64exact(mantissa uint64, exp int, neg bool) (f float64, ok bool) {
   421  	if mantissa>>float64info.mantbits != 0 {
   422  		return
   423  	}
   424  	f = float64(mantissa)
   425  	if neg {
   426  		f = -f
   427  	}
   428  	switch {
   429  	case exp == 0:
   430  		// an integer.
   431  		return f, true
   432  	// Exact integers are <= 10^15.
   433  	// Exact powers of ten are <= 10^22.
   434  	case exp > 0 && exp <= 15+22:	// int * 10^k
   435  		// If exponent is big but number of digits is not,
   436  		// can move a few zeros into the integer part.
   437  		if exp > 22 {
   438  			f *= float64pow10[exp-22]
   439  			exp = 22
   440  		}
   441  		if f > 1e15 || f < -1e15 {
   442  			// the exponent was really too large.
   443  			return
   444  		}
   445  		return f * float64pow10[exp], true
   446  	case exp < 0 && exp >= -22:	// int / 10^k
   447  		return f / float64pow10[-exp], true
   448  	}
   449  	return
   450  }
   451  
   452  // If possible to compute mantissa*10^exp to 32-bit float f exactly,
   453  // entirely in floating-point math, do so, avoiding the machinery above.
   454  func atof32exact(mantissa uint64, exp int, neg bool) (f float32, ok bool) {
   455  	if mantissa>>float32info.mantbits != 0 {
   456  		return
   457  	}
   458  	f = float32(mantissa)
   459  	if neg {
   460  		f = -f
   461  	}
   462  	switch {
   463  	case exp == 0:
   464  		return f, true
   465  	// Exact integers are <= 10^7.
   466  	// Exact powers of ten are <= 10^10.
   467  	case exp > 0 && exp <= 7+10:	// int * 10^k
   468  		// If exponent is big but number of digits is not,
   469  		// can move a few zeros into the integer part.
   470  		if exp > 10 {
   471  			f *= float32pow10[exp-10]
   472  			exp = 10
   473  		}
   474  		if f > 1e7 || f < -1e7 {
   475  			// the exponent was really too large.
   476  			return
   477  		}
   478  		return f * float32pow10[exp], true
   479  	case exp < 0 && exp >= -10:	// int / 10^k
   480  		return f / float32pow10[-exp], true
   481  	}
   482  	return
   483  }
   484  
   485  // atofHex converts the hex floating-point string s
   486  // to a rounded float32 or float64 value (depending on flt==&float32info or flt==&float64info)
   487  // and returns it as a float64.
   488  // The string s has already been parsed into a mantissa, exponent, and sign (neg==true for negative).
   489  // If trunc is true, trailing non-zero bits have been omitted from the mantissa.
   490  func atofHex(s string, flt *floatInfo, mantissa uint64, exp int, neg, trunc bool) (float64, error) {
   491  	maxExp := 1<<flt.expbits + flt.bias - 2
   492  	minExp := flt.bias + 1
   493  	exp += int(flt.mantbits)	// mantissa now implicitly divided by 2^mantbits.
   494  
   495  	// Shift mantissa and exponent to bring representation into float range.
   496  	// Eventually we want a mantissa with a leading 1-bit followed by mantbits other bits.
   497  	// For rounding, we need two more, where the bottom bit represents
   498  	// whether that bit or any later bit was non-zero.
   499  	// (If the mantissa has already lost non-zero bits, trunc is true,
   500  	// and we OR in a 1 below after shifting left appropriately.)
   501  	for mantissa != 0 && mantissa>>(flt.mantbits+2) == 0 {
   502  		mantissa <<= 1
   503  		exp--
   504  	}
   505  	if trunc {
   506  		mantissa |= 1
   507  	}
   508  	for mantissa>>(1+flt.mantbits+2) != 0 {
   509  		mantissa = mantissa>>1 | mantissa&1
   510  		exp++
   511  	}
   512  
   513  	// If exponent is too negative,
   514  	// denormalize in hopes of making it representable.
   515  	// (The -2 is for the rounding bits.)
   516  	for mantissa > 1 && exp < minExp-2 {
   517  		mantissa = mantissa>>1 | mantissa&1
   518  		exp++
   519  	}
   520  
   521  	// Round using two bottom bits.
   522  	round := mantissa & 3
   523  	mantissa >>= 2
   524  	round |= mantissa & 1	// round to even (round up if mantissa is odd)
   525  	exp += 2
   526  	if round == 3 {
   527  		mantissa++
   528  		if mantissa == 1<<(1+flt.mantbits) {
   529  			mantissa >>= 1
   530  			exp++
   531  		}
   532  	}
   533  
   534  	if mantissa>>flt.mantbits == 0 {	// Denormal or zero.
   535  		exp = flt.bias
   536  	}
   537  	var err error
   538  	if exp > maxExp {	// infinity and range error
   539  		mantissa = 1 << flt.mantbits
   540  		exp = maxExp + 1
   541  		err = rangeError(fnParseFloat, s)
   542  	}
   543  
   544  	bits := mantissa & (1<<flt.mantbits - 1)
   545  	bits |= uint64((exp-flt.bias)&(1<<flt.expbits-1)) << flt.mantbits
   546  	if neg {
   547  		bits |= 1 << flt.mantbits << flt.expbits
   548  	}
   549  	if flt == &float32info {
   550  		return float64(math.Float32frombits(uint32(bits))), err
   551  	}
   552  	return math.Float64frombits(bits), err
   553  }
   554  
   555  const fnParseFloat = "ParseFloat"
   556  
   557  func atof32(s string) (f float32, err error) {
   558  	if val, ok := special(s); ok {
   559  		return float32(val), nil
   560  	}
   561  
   562  	mantissa, exp, neg, trunc, hex, ok := readFloat(s)
   563  	if !ok {
   564  		return 0, syntaxError(fnParseFloat, s)
   565  	}
   566  
   567  	if hex {
   568  		f, err := atofHex(s, &float32info, mantissa, exp, neg, trunc)
   569  		return float32(f), err
   570  	}
   571  
   572  	if optimize {
   573  		// Try pure floating-point arithmetic conversion.
   574  		if !trunc {
   575  			if f, ok := atof32exact(mantissa, exp, neg); ok {
   576  				return f, nil
   577  			}
   578  		}
   579  		// Try another fast path.
   580  		ext := new(extFloat)
   581  		if ok := ext.AssignDecimal(mantissa, exp, neg, trunc, &float32info); ok {
   582  			b, ovf := ext.floatBits(&float32info)
   583  			f = math.Float32frombits(uint32(b))
   584  			if ovf {
   585  				err = rangeError(fnParseFloat, s)
   586  			}
   587  			return f, err
   588  		}
   589  	}
   590  
   591  	// Slow fallback.
   592  	var d decimal
   593  	if !d.set(s) {
   594  		return 0, syntaxError(fnParseFloat, s)
   595  	}
   596  	b, ovf := d.floatBits(&float32info)
   597  	f = math.Float32frombits(uint32(b))
   598  	if ovf {
   599  		err = rangeError(fnParseFloat, s)
   600  	}
   601  	return f, err
   602  }
   603  
   604  func atof64(s string) (f float64, err error) {
   605  	if val, ok := special(s); ok {
   606  		return val, nil
   607  	}
   608  
   609  	mantissa, exp, neg, trunc, hex, ok := readFloat(s)
   610  	if !ok {
   611  		return 0, syntaxError(fnParseFloat, s)
   612  	}
   613  
   614  	if hex {
   615  		return atofHex(s, &float64info, mantissa, exp, neg, trunc)
   616  	}
   617  
   618  	if optimize {
   619  		// Try pure floating-point arithmetic conversion.
   620  		if !trunc {
   621  			if f, ok := atof64exact(mantissa, exp, neg); ok {
   622  				return f, nil
   623  			}
   624  		}
   625  		// Try another fast path.
   626  		ext := new(extFloat)
   627  		if ok := ext.AssignDecimal(mantissa, exp, neg, trunc, &float64info); ok {
   628  			b, ovf := ext.floatBits(&float64info)
   629  			f = math.Float64frombits(b)
   630  			if ovf {
   631  				err = rangeError(fnParseFloat, s)
   632  			}
   633  			return f, err
   634  		}
   635  	}
   636  
   637  	// Slow fallback.
   638  	var d decimal
   639  	if !d.set(s) {
   640  		return 0, syntaxError(fnParseFloat, s)
   641  	}
   642  	b, ovf := d.floatBits(&float64info)
   643  	f = math.Float64frombits(b)
   644  	if ovf {
   645  		err = rangeError(fnParseFloat, s)
   646  	}
   647  	return f, err
   648  }
   649  
   650  // ParseFloat converts the string s to a floating-point number
   651  // with the precision specified by bitSize: 32 for float32, or 64 for float64.
   652  // When bitSize=32, the result still has type float64, but it will be
   653  // convertible to float32 without changing its value.
   654  //
   655  // ParseFloat accepts decimal and hexadecimal floating-point number syntax.
   656  // If s is well-formed and near a valid floating-point number,
   657  // ParseFloat returns the nearest floating-point number rounded
   658  // using IEEE754 unbiased rounding.
   659  // (Parsing a hexadecimal floating-point value only rounds when
   660  // there are more bits in the hexadecimal representation than
   661  // will fit in the mantissa.)
   662  //
   663  // The errors that ParseFloat returns have concrete type *NumError
   664  // and include err.Num = s.
   665  //
   666  // If s is not syntactically well-formed, ParseFloat returns err.Err = ErrSyntax.
   667  //
   668  // If s is syntactically well-formed but is more than 1/2 ULP
   669  // away from the largest floating point number of the given size,
   670  // ParseFloat returns f = ±Inf, err.Err = ErrRange.
   671  //
   672  // ParseFloat recognizes the strings "NaN", "+Inf", and "-Inf" as their
   673  // respective special floating point values. It ignores case when matching.
   674  func ParseFloat(s string, bitSize int) (float64, error) {
   675  	if bitSize == 32 {
   676  		f, err := atof32(s)
   677  		return float64(f), err
   678  	}
   679  	return atof64(s)
   680  }