github.com/jgbaldwinbrown/perf@v0.1.1/benchfmt/internal/bytesconv/atof.go (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package bytesconv
     6  
     7  // decimal to binary floating point conversion.
     8  // Algorithm:
     9  //   1) Store input in multiprecision decimal.
    10  //   2) Multiply/divide decimal by powers of two until in range [0.5, 1)
    11  //   3) Multiply by 2^precision and round to get mantissa.
    12  
    13  import "math"
    14  
    15  var optimize = true // set to false to force slow-path conversions for testing
    16  
    17  func equalIgnoreCase(s1 []byte, s2 string) bool {
    18  	if len(s1) != len(s2) {
    19  		return false
    20  	}
    21  	for i := 0; i < len(s1); i++ {
    22  		c1 := s1[i]
    23  		if 'A' <= c1 && c1 <= 'Z' {
    24  			c1 += 'a' - 'A'
    25  		}
    26  		c2 := s2[i]
    27  		if 'A' <= c2 && c2 <= 'Z' {
    28  			c2 += 'a' - 'A'
    29  		}
    30  		if c1 != c2 {
    31  			return false
    32  		}
    33  	}
    34  	return true
    35  }
    36  
    37  func special(s []byte) (f float64, ok bool) {
    38  	if len(s) == 0 {
    39  		return
    40  	}
    41  	switch s[0] {
    42  	default:
    43  		return
    44  	case '+':
    45  		if equalIgnoreCase(s, "+inf") || equalIgnoreCase(s, "+infinity") {
    46  			return math.Inf(1), true
    47  		}
    48  	case '-':
    49  		if equalIgnoreCase(s, "-inf") || equalIgnoreCase(s, "-infinity") {
    50  			return math.Inf(-1), true
    51  		}
    52  	case 'n', 'N':
    53  		if equalIgnoreCase(s, "nan") {
    54  			return math.NaN(), true
    55  		}
    56  	case 'i', 'I':
    57  		if equalIgnoreCase(s, "inf") || equalIgnoreCase(s, "infinity") {
    58  			return math.Inf(1), true
    59  		}
    60  	}
    61  	return
    62  }
    63  
    64  func (b *decimal) set(s []byte) (ok bool) {
    65  	i := 0
    66  	b.neg = false
    67  	b.trunc = false
    68  
    69  	// optional sign
    70  	if i >= len(s) {
    71  		return
    72  	}
    73  	switch {
    74  	case s[i] == '+':
    75  		i++
    76  	case s[i] == '-':
    77  		b.neg = true
    78  		i++
    79  	}
    80  
    81  	// digits
    82  	sawdot := false
    83  	sawdigits := false
    84  	for ; i < len(s); i++ {
    85  		switch {
    86  		case s[i] == '_':
    87  			// underscoreOK already called
    88  			continue
    89  		case s[i] == '.':
    90  			if sawdot {
    91  				return
    92  			}
    93  			sawdot = true
    94  			b.dp = b.nd
    95  			continue
    96  
    97  		case '0' <= s[i] && s[i] <= '9':
    98  			sawdigits = true
    99  			if s[i] == '0' && b.nd == 0 { // ignore leading zeros
   100  				b.dp--
   101  				continue
   102  			}
   103  			if b.nd < len(b.d) {
   104  				b.d[b.nd] = s[i]
   105  				b.nd++
   106  			} else if s[i] != '0' {
   107  				b.trunc = true
   108  			}
   109  			continue
   110  		}
   111  		break
   112  	}
   113  	if !sawdigits {
   114  		return
   115  	}
   116  	if !sawdot {
   117  		b.dp = b.nd
   118  	}
   119  
   120  	// optional exponent moves decimal point.
   121  	// if we read a very large, very long number,
   122  	// just be sure to move the decimal point by
   123  	// a lot (say, 100000).  it doesn't matter if it's
   124  	// not the exact number.
   125  	if i < len(s) && lower(s[i]) == 'e' {
   126  		i++
   127  		if i >= len(s) {
   128  			return
   129  		}
   130  		esign := 1
   131  		if s[i] == '+' {
   132  			i++
   133  		} else if s[i] == '-' {
   134  			i++
   135  			esign = -1
   136  		}
   137  		if i >= len(s) || s[i] < '0' || s[i] > '9' {
   138  			return
   139  		}
   140  		e := 0
   141  		for ; i < len(s) && ('0' <= s[i] && s[i] <= '9' || s[i] == '_'); i++ {
   142  			if s[i] == '_' {
   143  				// underscoreOK already called
   144  				continue
   145  			}
   146  			if e < 10000 {
   147  				e = e*10 + int(s[i]) - '0'
   148  			}
   149  		}
   150  		b.dp += e * esign
   151  	}
   152  
   153  	if i != len(s) {
   154  		return
   155  	}
   156  
   157  	ok = true
   158  	return
   159  }
   160  
   161  // readFloat reads a decimal mantissa and exponent from a float
   162  // string representation. It returns ok==false if the number could
   163  // not fit return types or is invalid.
   164  func readFloat(s []byte) (mantissa uint64, exp int, neg, trunc, hex, ok bool) {
   165  	i := 0
   166  
   167  	// optional sign
   168  	if i >= len(s) {
   169  		return
   170  	}
   171  	switch {
   172  	case s[i] == '+':
   173  		i++
   174  	case s[i] == '-':
   175  		neg = true
   176  		i++
   177  	}
   178  
   179  	// digits
   180  	base := uint64(10)
   181  	maxMantDigits := 19 // 10^19 fits in uint64
   182  	expChar := byte('e')
   183  	if i+2 < len(s) && s[i] == '0' && lower(s[i+1]) == 'x' {
   184  		base = 16
   185  		maxMantDigits = 16 // 16^16 fits in uint64
   186  		i += 2
   187  		expChar = 'p'
   188  		hex = true
   189  	}
   190  	sawdot := false
   191  	sawdigits := false
   192  	nd := 0
   193  	ndMant := 0
   194  	dp := 0
   195  	for ; i < len(s); i++ {
   196  		switch c := s[i]; true {
   197  		case c == '_':
   198  			// underscoreOK already called
   199  			continue
   200  
   201  		case c == '.':
   202  			if sawdot {
   203  				return
   204  			}
   205  			sawdot = true
   206  			dp = nd
   207  			continue
   208  
   209  		case '0' <= c && c <= '9':
   210  			sawdigits = true
   211  			if c == '0' && nd == 0 { // ignore leading zeros
   212  				dp--
   213  				continue
   214  			}
   215  			nd++
   216  			if ndMant < maxMantDigits {
   217  				mantissa *= base
   218  				mantissa += uint64(c - '0')
   219  				ndMant++
   220  			} else if c != '0' {
   221  				trunc = true
   222  			}
   223  			continue
   224  
   225  		case base == 16 && 'a' <= lower(c) && lower(c) <= 'f':
   226  			sawdigits = true
   227  			nd++
   228  			if ndMant < maxMantDigits {
   229  				mantissa *= 16
   230  				mantissa += uint64(lower(c) - 'a' + 10)
   231  				ndMant++
   232  			} else {
   233  				trunc = true
   234  			}
   235  			continue
   236  		}
   237  		break
   238  	}
   239  	if !sawdigits {
   240  		return
   241  	}
   242  	if !sawdot {
   243  		dp = nd
   244  	}
   245  
   246  	if base == 16 {
   247  		dp *= 4
   248  		ndMant *= 4
   249  	}
   250  
   251  	// optional exponent moves decimal point.
   252  	// if we read a very large, very long number,
   253  	// just be sure to move the decimal point by
   254  	// a lot (say, 100000).  it doesn't matter if it's
   255  	// not the exact number.
   256  	if i < len(s) && lower(s[i]) == expChar {
   257  		i++
   258  		if i >= len(s) {
   259  			return
   260  		}
   261  		esign := 1
   262  		if s[i] == '+' {
   263  			i++
   264  		} else if s[i] == '-' {
   265  			i++
   266  			esign = -1
   267  		}
   268  		if i >= len(s) || s[i] < '0' || s[i] > '9' {
   269  			return
   270  		}
   271  		e := 0
   272  		for ; i < len(s) && ('0' <= s[i] && s[i] <= '9' || s[i] == '_'); i++ {
   273  			if s[i] == '_' {
   274  				// underscoreOK already called
   275  				continue
   276  			}
   277  			if e < 10000 {
   278  				e = e*10 + int(s[i]) - '0'
   279  			}
   280  		}
   281  		dp += e * esign
   282  	} else if base == 16 {
   283  		// Must have exponent.
   284  		return
   285  	}
   286  
   287  	if i != len(s) {
   288  		return
   289  	}
   290  
   291  	if mantissa != 0 {
   292  		exp = dp - ndMant
   293  	}
   294  	ok = true
   295  	return
   296  }
   297  
   298  // decimal power of ten to binary power of two.
   299  var powtab = []int{1, 3, 6, 9, 13, 16, 19, 23, 26}
   300  
   301  func (d *decimal) floatBits(flt *floatInfo) (b uint64, overflow bool) {
   302  	var exp int
   303  	var mant uint64
   304  
   305  	// Zero is always a special case.
   306  	if d.nd == 0 {
   307  		mant = 0
   308  		exp = flt.bias
   309  		goto out
   310  	}
   311  
   312  	// Obvious overflow/underflow.
   313  	// These bounds are for 64-bit floats.
   314  	// Will have to change if we want to support 80-bit floats in the future.
   315  	if d.dp > 310 {
   316  		goto overflow
   317  	}
   318  	if d.dp < -330 {
   319  		// zero
   320  		mant = 0
   321  		exp = flt.bias
   322  		goto out
   323  	}
   324  
   325  	// Scale by powers of two until in range [0.5, 1.0)
   326  	exp = 0
   327  	for d.dp > 0 {
   328  		var n int
   329  		if d.dp >= len(powtab) {
   330  			n = 27
   331  		} else {
   332  			n = powtab[d.dp]
   333  		}
   334  		d.Shift(-n)
   335  		exp += n
   336  	}
   337  	for d.dp < 0 || d.dp == 0 && d.d[0] < '5' {
   338  		var n int
   339  		if -d.dp >= len(powtab) {
   340  			n = 27
   341  		} else {
   342  			n = powtab[-d.dp]
   343  		}
   344  		d.Shift(n)
   345  		exp -= n
   346  	}
   347  
   348  	// Our range is [0.5,1) but floating point range is [1,2).
   349  	exp--
   350  
   351  	// Minimum representable exponent is flt.bias+1.
   352  	// If the exponent is smaller, move it up and
   353  	// adjust d accordingly.
   354  	if exp < flt.bias+1 {
   355  		n := flt.bias + 1 - exp
   356  		d.Shift(-n)
   357  		exp += n
   358  	}
   359  
   360  	if exp-flt.bias >= 1<<flt.expbits-1 {
   361  		goto overflow
   362  	}
   363  
   364  	// Extract 1+flt.mantbits bits.
   365  	d.Shift(int(1 + flt.mantbits))
   366  	mant = d.RoundedInteger()
   367  
   368  	// Rounding might have added a bit; shift down.
   369  	if mant == 2<<flt.mantbits {
   370  		mant >>= 1
   371  		exp++
   372  		if exp-flt.bias >= 1<<flt.expbits-1 {
   373  			goto overflow
   374  		}
   375  	}
   376  
   377  	// Denormalized?
   378  	if mant&(1<<flt.mantbits) == 0 {
   379  		exp = flt.bias
   380  	}
   381  	goto out
   382  
   383  overflow:
   384  	// ±Inf
   385  	mant = 0
   386  	exp = 1<<flt.expbits - 1 + flt.bias
   387  	overflow = true
   388  
   389  out:
   390  	// Assemble bits.
   391  	bits := mant & (uint64(1)<<flt.mantbits - 1)
   392  	bits |= uint64((exp-flt.bias)&(1<<flt.expbits-1)) << flt.mantbits
   393  	if d.neg {
   394  		bits |= 1 << flt.mantbits << flt.expbits
   395  	}
   396  	return bits, overflow
   397  }
   398  
   399  // Exact powers of 10.
   400  var float64pow10 = []float64{
   401  	1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9,
   402  	1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19,
   403  	1e20, 1e21, 1e22,
   404  }
   405  var float32pow10 = []float32{1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10}
   406  
   407  // If possible to convert decimal representation to 64-bit float f exactly,
   408  // entirely in floating-point math, do so, avoiding the expense of decimalToFloatBits.
   409  // Three common cases:
   410  //
   411  //	value is exact integer
   412  //	value is exact integer * exact power of ten
   413  //	value is exact integer / exact power of ten
   414  //
   415  // These all produce potentially inexact but correctly rounded answers.
   416  func atof64exact(mantissa uint64, exp int, neg bool) (f float64, ok bool) {
   417  	if mantissa>>float64info.mantbits != 0 {
   418  		return
   419  	}
   420  	f = float64(mantissa)
   421  	if neg {
   422  		f = -f
   423  	}
   424  	switch {
   425  	case exp == 0:
   426  		// an integer.
   427  		return f, true
   428  	// Exact integers are <= 10^15.
   429  	// Exact powers of ten are <= 10^22.
   430  	case exp > 0 && exp <= 15+22: // int * 10^k
   431  		// If exponent is big but number of digits is not,
   432  		// can move a few zeros into the integer part.
   433  		if exp > 22 {
   434  			f *= float64pow10[exp-22]
   435  			exp = 22
   436  		}
   437  		if f > 1e15 || f < -1e15 {
   438  			// the exponent was really too large.
   439  			return
   440  		}
   441  		return f * float64pow10[exp], true
   442  	case exp < 0 && exp >= -22: // int / 10^k
   443  		return f / float64pow10[-exp], true
   444  	}
   445  	return
   446  }
   447  
   448  // If possible to compute mantissa*10^exp to 32-bit float f exactly,
   449  // entirely in floating-point math, do so, avoiding the machinery above.
   450  func atof32exact(mantissa uint64, exp int, neg bool) (f float32, ok bool) {
   451  	if mantissa>>float32info.mantbits != 0 {
   452  		return
   453  	}
   454  	f = float32(mantissa)
   455  	if neg {
   456  		f = -f
   457  	}
   458  	switch {
   459  	case exp == 0:
   460  		return f, true
   461  	// Exact integers are <= 10^7.
   462  	// Exact powers of ten are <= 10^10.
   463  	case exp > 0 && exp <= 7+10: // int * 10^k
   464  		// If exponent is big but number of digits is not,
   465  		// can move a few zeros into the integer part.
   466  		if exp > 10 {
   467  			f *= float32pow10[exp-10]
   468  			exp = 10
   469  		}
   470  		if f > 1e7 || f < -1e7 {
   471  			// the exponent was really too large.
   472  			return
   473  		}
   474  		return f * float32pow10[exp], true
   475  	case exp < 0 && exp >= -10: // int / 10^k
   476  		return f / float32pow10[-exp], true
   477  	}
   478  	return
   479  }
   480  
   481  // atofHex converts the hex floating-point string s
   482  // to a rounded float32 or float64 value (depending on flt==&float32info or flt==&float64info)
   483  // and returns it as a float64.
   484  // The string s has already been parsed into a mantissa, exponent, and sign (neg==true for negative).
   485  // If trunc is true, trailing non-zero bits have been omitted from the mantissa.
   486  func atofHex(s []byte, flt *floatInfo, mantissa uint64, exp int, neg, trunc bool) (float64, error) {
   487  	maxExp := 1<<flt.expbits + flt.bias - 2
   488  	minExp := flt.bias + 1
   489  	exp += int(flt.mantbits) // mantissa now implicitly divided by 2^mantbits.
   490  
   491  	// Shift mantissa and exponent to bring representation into float range.
   492  	// Eventually we want a mantissa with a leading 1-bit followed by mantbits other bits.
   493  	// For rounding, we need two more, where the bottom bit represents
   494  	// whether that bit or any later bit was non-zero.
   495  	// (If the mantissa has already lost non-zero bits, trunc is true,
   496  	// and we OR in a 1 below after shifting left appropriately.)
   497  	for mantissa != 0 && mantissa>>(flt.mantbits+2) == 0 {
   498  		mantissa <<= 1
   499  		exp--
   500  	}
   501  	if trunc {
   502  		mantissa |= 1
   503  	}
   504  	for mantissa>>(1+flt.mantbits+2) != 0 {
   505  		mantissa = mantissa>>1 | mantissa&1
   506  		exp++
   507  	}
   508  
   509  	// If exponent is too negative,
   510  	// denormalize in hopes of making it representable.
   511  	// (The -2 is for the rounding bits.)
   512  	for mantissa > 1 && exp < minExp-2 {
   513  		mantissa = mantissa>>1 | mantissa&1
   514  		exp++
   515  	}
   516  
   517  	// Round using two bottom bits.
   518  	round := mantissa & 3
   519  	mantissa >>= 2
   520  	round |= mantissa & 1 // round to even (round up if mantissa is odd)
   521  	exp += 2
   522  	if round == 3 {
   523  		mantissa++
   524  		if mantissa == 1<<(1+flt.mantbits) {
   525  			mantissa >>= 1
   526  			exp++
   527  		}
   528  	}
   529  
   530  	if mantissa>>flt.mantbits == 0 { // Denormal or zero.
   531  		exp = flt.bias
   532  	}
   533  	var err error
   534  	if exp > maxExp { // infinity and range error
   535  		mantissa = 1 << flt.mantbits
   536  		exp = maxExp + 1
   537  		err = rangeError(fnParseFloat, s)
   538  	}
   539  
   540  	bits := mantissa & (1<<flt.mantbits - 1)
   541  	bits |= uint64((exp-flt.bias)&(1<<flt.expbits-1)) << flt.mantbits
   542  	if neg {
   543  		bits |= 1 << flt.mantbits << flt.expbits
   544  	}
   545  	if flt == &float32info {
   546  		return float64(math.Float32frombits(uint32(bits))), err
   547  	}
   548  	return math.Float64frombits(bits), err
   549  }
   550  
   551  const fnParseFloat = "ParseFloat"
   552  
   553  func atof32(s []byte) (f float32, err error) {
   554  	if val, ok := special(s); ok {
   555  		return float32(val), nil
   556  	}
   557  
   558  	mantissa, exp, neg, trunc, hex, ok := readFloat(s)
   559  	if hex && ok {
   560  		f, err := atofHex(s, &float32info, mantissa, exp, neg, trunc)
   561  		return float32(f), err
   562  	}
   563  
   564  	if optimize && ok {
   565  		// Try pure floating-point arithmetic conversion.
   566  		if !trunc {
   567  			if f, ok := atof32exact(mantissa, exp, neg); ok {
   568  				return f, nil
   569  			}
   570  		}
   571  	}
   572  
   573  	// Slow fallback.
   574  	var d decimal
   575  	if !d.set(s) {
   576  		return 0, syntaxError(fnParseFloat, s)
   577  	}
   578  	b, ovf := d.floatBits(&float32info)
   579  	f = math.Float32frombits(uint32(b))
   580  	if ovf {
   581  		err = rangeError(fnParseFloat, s)
   582  	}
   583  	return f, err
   584  }
   585  
   586  func atof64(s []byte) (f float64, err error) {
   587  	if val, ok := special(s); ok {
   588  		return val, nil
   589  	}
   590  
   591  	mantissa, exp, neg, trunc, hex, ok := readFloat(s)
   592  	if hex && ok {
   593  		return atofHex(s, &float64info, mantissa, exp, neg, trunc)
   594  	}
   595  
   596  	if optimize && ok {
   597  		// Try pure floating-point arithmetic conversion.
   598  		if !trunc {
   599  			if f, ok := atof64exact(mantissa, exp, neg); ok {
   600  				return f, nil
   601  			}
   602  		}
   603  	}
   604  
   605  	// Slow fallback.
   606  	var d decimal
   607  	if !d.set(s) {
   608  		return 0, syntaxError(fnParseFloat, s)
   609  	}
   610  	b, ovf := d.floatBits(&float64info)
   611  	f = math.Float64frombits(b)
   612  	if ovf {
   613  		err = rangeError(fnParseFloat, s)
   614  	}
   615  	return f, err
   616  }
   617  
   618  // ParseFloat converts the string s to a floating-point number
   619  // with the precision specified by bitSize: 32 for float32, or 64 for float64.
   620  // When bitSize=32, the result still has type float64, but it will be
   621  // convertible to float32 without changing its value.
   622  //
   623  // ParseFloat accepts decimal and hexadecimal floating-point number syntax.
   624  // If s is well-formed and near a valid floating-point number,
   625  // ParseFloat returns the nearest floating-point number rounded
   626  // using IEEE754 unbiased rounding.
   627  // (Parsing a hexadecimal floating-point value only rounds when
   628  // there are more bits in the hexadecimal representation than
   629  // will fit in the mantissa.)
   630  //
   631  // The errors that ParseFloat returns have concrete type *NumError
   632  // and include err.Num = s.
   633  //
   634  // If s is not syntactically well-formed, ParseFloat returns err.Err = ErrSyntax.
   635  //
   636  // If s is syntactically well-formed but is more than 1/2 ULP
   637  // away from the largest floating point number of the given size,
   638  // ParseFloat returns f = ±Inf, err.Err = ErrRange.
   639  //
   640  // ParseFloat recognizes the strings "NaN", "+Inf", and "-Inf" as their
   641  // respective special floating point values. It ignores case when matching.
   642  func ParseFloat(s []byte, bitSize int) (float64, error) {
   643  	if !underscoreOK(s) {
   644  		return 0, syntaxError(fnParseFloat, s)
   645  	}
   646  	if bitSize == 32 {
   647  		f, err := atof32(s)
   648  		return float64(f), err
   649  	}
   650  	return atof64(s)
   651  }