github.com/ugorji/go/codec@v1.2.13-0.20240307214044-07c54c229a5a/decimal.go (about)

     1  // Copyright (c) 2012-2020 Ugorji Nwoke. All rights reserved.
     2  // Use of this source code is governed by a MIT license found in the LICENSE file.
     3  
     4  package codec
     5  
     6  import (
     7  	"math"
     8  	"strconv"
     9  )
    10  
    11  // Per go spec, floats are represented in memory as
    12  // IEEE single or double precision floating point values.
    13  //
    14  // We also looked at the source for stdlib math/modf.go,
    15  // reviewed https://github.com/chewxy/math32
    16  // and read wikipedia documents describing the formats.
    17  //
    18  // It became clear that we could easily look at the bits to determine
    19  // whether any fraction exists.
    20  
    21  func parseFloat32(b []byte) (f float32, err error) {
    22  	return parseFloat32_custom(b)
    23  }
    24  
    25  func parseFloat64(b []byte) (f float64, err error) {
    26  	return parseFloat64_custom(b)
    27  }
    28  
    29  func parseFloat32_strconv(b []byte) (f float32, err error) {
    30  	f64, err := strconv.ParseFloat(stringView(b), 32)
    31  	f = float32(f64)
    32  	return
    33  }
    34  
    35  func parseFloat64_strconv(b []byte) (f float64, err error) {
    36  	return strconv.ParseFloat(stringView(b), 64)
    37  }
    38  
    39  // ------ parseFloat custom below --------
    40  
    41  // JSON really supports decimal numbers in base 10 notation, with exponent support.
    42  //
    43  // We assume the following:
    44  //   - a lot of floating point numbers in json files will have defined precision
    45  //     (in terms of number of digits after decimal point), etc.
    46  //   - these (referenced above) can be written in exact format.
    47  //
    48  // strconv.ParseFloat has some unnecessary overhead which we can do without
    49  // for the common case:
    50  //
    51  //    - expensive char-by-char check to see if underscores are in right place
    52  //    - testing for and skipping underscores
    53  //    - check if the string matches ignorecase +/- inf, +/- infinity, nan
    54  //    - support for base 16 (0xFFFF...)
    55  //
    56  // The functions below will try a fast-path for floats which can be decoded
    57  // without any loss of precision, meaning they:
    58  //
    59  //    - fits within the significand bits of the 32-bits or 64-bits
    60  //    - exponent fits within the exponent value
    61  //    - there is no truncation (any extra numbers are all trailing zeros)
    62  //
    63  // To figure out what the values are for maxMantDigits, use this idea below:
    64  //
    65  // 2^23 =                 838 8608 (between 10^ 6 and 10^ 7) (significand bits of uint32)
    66  // 2^32 =             42 9496 7296 (between 10^ 9 and 10^10) (full uint32)
    67  // 2^52 =      4503 5996 2737 0496 (between 10^15 and 10^16) (significand bits of uint64)
    68  // 2^64 = 1844 6744 0737 0955 1616 (between 10^19 and 10^20) (full uint64)
    69  //
    70  // Note: we only allow for up to what can comfortably fit into the significand
    71  // ignoring the exponent, and we only try to parse iff significand fits.
    72  
    73  const (
    74  	fMaxMultiplierForExactPow10_64 = 1e15
    75  	fMaxMultiplierForExactPow10_32 = 1e7
    76  
    77  	fUint64Cutoff = (1<<64-1)/10 + 1
    78  	// fUint32Cutoff = (1<<32-1)/10 + 1
    79  
    80  	fBase = 10
    81  )
    82  
    83  const (
    84  	thousand    = 1000
    85  	million     = thousand * thousand
    86  	billion     = thousand * million
    87  	trillion    = thousand * billion
    88  	quadrillion = thousand * trillion
    89  	quintillion = thousand * quadrillion
    90  )
    91  
    92  // Exact powers of 10.
    93  var uint64pow10 = [...]uint64{
    94  	1, 10, 100,
    95  	1 * thousand, 10 * thousand, 100 * thousand,
    96  	1 * million, 10 * million, 100 * million,
    97  	1 * billion, 10 * billion, 100 * billion,
    98  	1 * trillion, 10 * trillion, 100 * trillion,
    99  	1 * quadrillion, 10 * quadrillion, 100 * quadrillion,
   100  	1 * quintillion, 10 * quintillion,
   101  }
   102  var float64pow10 = [...]float64{
   103  	1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9,
   104  	1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19,
   105  	1e20, 1e21, 1e22,
   106  }
   107  var float32pow10 = [...]float32{
   108  	1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10,
   109  }
   110  
   111  type floatinfo struct {
   112  	mantbits uint8
   113  
   114  	// expbits uint8 // (unused)
   115  	// bias    int16 // (unused)
   116  	// is32bit bool // (unused)
   117  
   118  	exactPow10 int8 // Exact powers of ten are <= 10^N (32: 10, 64: 22)
   119  
   120  	exactInts int8 // Exact integers are <= 10^N (for non-float, set to 0)
   121  
   122  	// maxMantDigits int8 // 10^19 fits in uint64, while 10^9 fits in uint32
   123  
   124  	mantCutoffIsUint64Cutoff bool
   125  
   126  	mantCutoff uint64
   127  }
   128  
   129  var fi32 = floatinfo{23, 10, 7, false, 1<<23 - 1}
   130  var fi64 = floatinfo{52, 22, 15, false, 1<<52 - 1}
   131  
   132  var fi64u = floatinfo{0, 19, 0, true, fUint64Cutoff}
   133  
   134  func noFrac64(fbits uint64) bool {
   135  	if fbits == 0 {
   136  		return true
   137  	}
   138  
   139  	exp := uint64(fbits>>52)&0x7FF - 1023 // uint(x>>shift)&mask - bias
   140  	// clear top 12+e bits, the integer part; if the rest is 0, then no fraction.
   141  	return exp < 52 && fbits<<(12+exp) == 0 // means there's no fractional part
   142  }
   143  
   144  func noFrac32(fbits uint32) bool {
   145  	if fbits == 0 {
   146  		return true
   147  	}
   148  
   149  	exp := uint32(fbits>>23)&0xFF - 127 // uint(x>>shift)&mask - bias
   150  	// clear top 9+e bits, the integer part; if the rest is 0, then no fraction.
   151  	return exp < 23 && fbits<<(9+exp) == 0 // means there's no fractional part
   152  }
   153  
   154  func strconvParseErr(b []byte, fn string) error {
   155  	return &strconv.NumError{
   156  		Func: fn,
   157  		Err:  strconv.ErrSyntax,
   158  		Num:  string(b),
   159  	}
   160  }
   161  
   162  func parseFloat32_reader(r readFloatResult) (f float32, fail bool) {
   163  	f = float32(r.mantissa)
   164  	if r.exp == 0 {
   165  	} else if r.exp < 0 { // int / 10^k
   166  		f /= float32pow10[uint8(-r.exp)]
   167  	} else { // exp > 0
   168  		if r.exp > fi32.exactPow10 {
   169  			f *= float32pow10[r.exp-fi32.exactPow10]
   170  			if f > fMaxMultiplierForExactPow10_32 { // exponent too large - outside range
   171  				fail = true
   172  				return // ok = false
   173  			}
   174  			f *= float32pow10[fi32.exactPow10]
   175  		} else {
   176  			f *= float32pow10[uint8(r.exp)]
   177  		}
   178  	}
   179  	if r.neg {
   180  		f = -f
   181  	}
   182  	return
   183  }
   184  
   185  func parseFloat32_custom(b []byte) (f float32, err error) {
   186  	r := readFloat(b, fi32)
   187  	if r.bad {
   188  		return 0, strconvParseErr(b, "ParseFloat")
   189  	}
   190  	if r.ok {
   191  		f, r.bad = parseFloat32_reader(r)
   192  		if !r.bad {
   193  			return
   194  		}
   195  	}
   196  	return parseFloat32_strconv(b)
   197  }
   198  
   199  func parseFloat64_reader(r readFloatResult) (f float64, fail bool) {
   200  	f = float64(r.mantissa)
   201  	if r.exp == 0 {
   202  	} else if r.exp < 0 { // int / 10^k
   203  		f /= float64pow10[-uint8(r.exp)]
   204  	} else { // exp > 0
   205  		if r.exp > fi64.exactPow10 {
   206  			f *= float64pow10[r.exp-fi64.exactPow10]
   207  			if f > fMaxMultiplierForExactPow10_64 { // exponent too large - outside range
   208  				fail = true
   209  				return
   210  			}
   211  			f *= float64pow10[fi64.exactPow10]
   212  		} else {
   213  			f *= float64pow10[uint8(r.exp)]
   214  		}
   215  	}
   216  	if r.neg {
   217  		f = -f
   218  	}
   219  	return
   220  }
   221  
   222  func parseFloat64_custom(b []byte) (f float64, err error) {
   223  	r := readFloat(b, fi64)
   224  	if r.bad {
   225  		return 0, strconvParseErr(b, "ParseFloat")
   226  	}
   227  	if r.ok {
   228  		f, r.bad = parseFloat64_reader(r)
   229  		if !r.bad {
   230  			return
   231  		}
   232  	}
   233  	return parseFloat64_strconv(b)
   234  }
   235  
   236  func parseUint64_simple(b []byte) (n uint64, ok bool) {
   237  	if len(b) > 1 && b[0] == '0' { // punt on numbers with leading zeros
   238  		return
   239  	}
   240  
   241  	var i int
   242  	var n1 uint64
   243  	var c uint8
   244  LOOP:
   245  	if i < len(b) {
   246  		c = b[i]
   247  		// unsigned integers don't overflow well on multiplication, so check cutoff here
   248  		// e.g. (maxUint64-5)*10 doesn't overflow well ...
   249  		// if n >= fUint64Cutoff || !isDigitChar(b[i]) { // if c < '0' || c > '9' {
   250  		if n >= fUint64Cutoff || c < '0' || c > '9' {
   251  			return
   252  		} else if c == '0' {
   253  			n *= fBase
   254  		} else {
   255  			n1 = n
   256  			n = n*fBase + uint64(c-'0')
   257  			if n < n1 {
   258  				return
   259  			}
   260  		}
   261  		i++
   262  		goto LOOP
   263  	}
   264  	ok = true
   265  	return
   266  }
   267  
   268  func parseUint64_reader(r readFloatResult) (f uint64, fail bool) {
   269  	f = r.mantissa
   270  	if r.exp == 0 {
   271  	} else if r.exp < 0 { // int / 10^k
   272  		if f%uint64pow10[uint8(-r.exp)] != 0 {
   273  			fail = true
   274  		} else {
   275  			f /= uint64pow10[uint8(-r.exp)]
   276  		}
   277  	} else { // exp > 0
   278  		f *= uint64pow10[uint8(r.exp)]
   279  	}
   280  	return
   281  }
   282  
   283  func parseInteger_bytes(b []byte) (u uint64, neg, ok bool) {
   284  	if len(b) == 0 {
   285  		ok = true
   286  		return
   287  	}
   288  	if b[0] == '-' {
   289  		if len(b) == 1 {
   290  			return
   291  		}
   292  		neg = true
   293  		b = b[1:]
   294  	}
   295  
   296  	u, ok = parseUint64_simple(b)
   297  	if ok {
   298  		return
   299  	}
   300  
   301  	r := readFloat(b, fi64u)
   302  	if r.ok {
   303  		var fail bool
   304  		u, fail = parseUint64_reader(r)
   305  		if fail {
   306  			f, err := parseFloat64(b)
   307  			if err != nil {
   308  				return
   309  			}
   310  			if !noFrac64(math.Float64bits(f)) {
   311  				return
   312  			}
   313  			u = uint64(f)
   314  		}
   315  		ok = true
   316  		return
   317  	}
   318  	return
   319  }
   320  
   321  // parseNumber will return an integer if only composed of [-]?[0-9]+
   322  // Else it will return a float.
   323  func parseNumber(b []byte, z *fauxUnion, preferSignedInt bool) (err error) {
   324  	var ok, neg bool
   325  	var f uint64
   326  
   327  	if len(b) == 0 {
   328  		return
   329  	}
   330  
   331  	if b[0] == '-' {
   332  		neg = true
   333  		f, ok = parseUint64_simple(b[1:])
   334  	} else {
   335  		f, ok = parseUint64_simple(b)
   336  	}
   337  
   338  	if ok {
   339  		if neg {
   340  			z.v = valueTypeInt
   341  			if chkOvf.Uint2Int(f, neg) {
   342  				return strconvParseErr(b, "ParseInt")
   343  			}
   344  			z.i = -int64(f)
   345  		} else if preferSignedInt {
   346  			z.v = valueTypeInt
   347  			if chkOvf.Uint2Int(f, neg) {
   348  				return strconvParseErr(b, "ParseInt")
   349  			}
   350  			z.i = int64(f)
   351  		} else {
   352  			z.v = valueTypeUint
   353  			z.u = f
   354  		}
   355  		return
   356  	}
   357  
   358  	z.v = valueTypeFloat
   359  	z.f, err = parseFloat64_custom(b)
   360  	return
   361  }
   362  
   363  type readFloatResult struct {
   364  	mantissa uint64
   365  	exp      int8
   366  	neg      bool
   367  	trunc    bool
   368  	bad      bool // bad decimal string
   369  	hardexp  bool // exponent is hard to handle (> 2 digits, etc)
   370  	ok       bool
   371  	// sawdot   bool
   372  	// sawexp   bool
   373  	//_ [2]bool // padding
   374  }
   375  
   376  func readFloat(s []byte, y floatinfo) (r readFloatResult) {
   377  	var i uint // uint, so that we eliminate bounds checking
   378  	var slen = uint(len(s))
   379  	if slen == 0 {
   380  		// read an empty string as the zero value
   381  		// r.bad = true
   382  		r.ok = true
   383  		return
   384  	}
   385  
   386  	if s[0] == '-' {
   387  		r.neg = true
   388  		i++
   389  	}
   390  
   391  	// considered punting early if string has length > maxMantDigits, but doesn't account
   392  	// for trailing 0's e.g. 700000000000000000000 can be encoded exactly as it is 7e20
   393  
   394  	var nd, ndMant, dp int8
   395  	var sawdot, sawexp bool
   396  	var xu uint64
   397  
   398  	if i+1 < slen && s[i] == '0' {
   399  		switch s[i+1] {
   400  		case '.', 'e', 'E':
   401  			// ok
   402  		default:
   403  			r.bad = true
   404  			return
   405  		}
   406  	}
   407  
   408  LOOP:
   409  	for ; i < slen; i++ {
   410  		switch s[i] {
   411  		case '.':
   412  			if sawdot {
   413  				r.bad = true
   414  				return
   415  			}
   416  			sawdot = true
   417  			dp = nd
   418  		case 'e', 'E':
   419  			sawexp = true
   420  			break LOOP
   421  		case '0':
   422  			if nd == 0 {
   423  				dp--
   424  				continue LOOP
   425  			}
   426  			nd++
   427  			if r.mantissa < y.mantCutoff {
   428  				r.mantissa *= fBase
   429  				ndMant++
   430  			}
   431  		case '1', '2', '3', '4', '5', '6', '7', '8', '9':
   432  			nd++
   433  			if y.mantCutoffIsUint64Cutoff && r.mantissa < fUint64Cutoff {
   434  				r.mantissa *= fBase
   435  				xu = r.mantissa + uint64(s[i]-'0')
   436  				if xu < r.mantissa {
   437  					r.trunc = true
   438  					return
   439  				}
   440  				r.mantissa = xu
   441  			} else if r.mantissa < y.mantCutoff {
   442  				// mantissa = (mantissa << 1) + (mantissa << 3) + uint64(c-'0')
   443  				r.mantissa = r.mantissa*fBase + uint64(s[i]-'0')
   444  			} else {
   445  				r.trunc = true
   446  				return
   447  			}
   448  			ndMant++
   449  		default:
   450  			r.bad = true
   451  			return
   452  		}
   453  	}
   454  
   455  	if !sawdot {
   456  		dp = nd
   457  	}
   458  
   459  	if sawexp {
   460  		i++
   461  		if i < slen {
   462  			var eneg bool
   463  			if s[i] == '+' {
   464  				i++
   465  			} else if s[i] == '-' {
   466  				i++
   467  				eneg = true
   468  			}
   469  			if i < slen {
   470  				// for exact match, exponent is 1 or 2 digits (float64: -22 to 37, float32: -1 to 17).
   471  				// exit quick if exponent is more than 2 digits.
   472  				if i+2 < slen {
   473  					r.hardexp = true
   474  					return
   475  				}
   476  				var e int8
   477  				if s[i] < '0' || s[i] > '9' { // !isDigitChar(s[i]) { //
   478  					r.bad = true
   479  					return
   480  				}
   481  				e = int8(s[i] - '0')
   482  				i++
   483  				if i < slen {
   484  					if s[i] < '0' || s[i] > '9' { // !isDigitChar(s[i]) { //
   485  						r.bad = true
   486  						return
   487  					}
   488  					e = e*fBase + int8(s[i]-'0') // (e << 1) + (e << 3) + int8(s[i]-'0')
   489  					i++
   490  				}
   491  				if eneg {
   492  					dp -= e
   493  				} else {
   494  					dp += e
   495  				}
   496  			}
   497  		}
   498  	}
   499  
   500  	if r.mantissa != 0 {
   501  		r.exp = dp - ndMant
   502  		// do not set ok=true for cases we cannot handle
   503  		if r.exp < -y.exactPow10 ||
   504  			r.exp > y.exactInts+y.exactPow10 ||
   505  			(y.mantbits != 0 && r.mantissa>>y.mantbits != 0) {
   506  			r.hardexp = true
   507  			return
   508  		}
   509  	}
   510  
   511  	r.ok = true
   512  	return
   513  }