github.com/patricebensoussan/go/codec@v1.2.99/decimal.go (about)

     1  // Copyright (c) 2012-2020 Ugorji Nwoke. All rights reserved.
     2  // Use of this source code is governed by a MIT license found in the LICENSE file.
     3  
     4  package codec
     5  
     6  import (
     7  	"math"
     8  	"strconv"
     9  )
    10  
    11  // Per go spec, floats are represented in memory as
    12  // IEEE single or double precision floating point values.
    13  //
    14  // We also looked at the source for stdlib math/modf.go,
    15  // reviewed https://github.com/chewxy/math32
    16  // and read wikipedia documents describing the formats.
    17  //
    18  // It became clear that we could easily look at the bits to determine
    19  // whether any fraction exists.
    20  
    21  func parseFloat32(b []byte) (f float32, err error) {
    22  	return parseFloat32_custom(b)
    23  }
    24  
    25  func parseFloat64(b []byte) (f float64, err error) {
    26  	return parseFloat64_custom(b)
    27  }
    28  
    29  func parseFloat32_strconv(b []byte) (f float32, err error) {
    30  	f64, err := strconv.ParseFloat(stringView(b), 32)
    31  	f = float32(f64)
    32  	return
    33  }
    34  
    35  func parseFloat64_strconv(b []byte) (f float64, err error) {
    36  	return strconv.ParseFloat(stringView(b), 64)
    37  }
    38  
    39  // ------ parseFloat custom below --------
    40  
    41  // JSON really supports decimal numbers in base 10 notation, with exponent support.
    42  //
    43  // We assume the following:
    44  //   - a lot of floating point numbers in json files will have defined precision
    45  //     (in terms of number of digits after decimal point), etc.
    46  //   - these (referenced above) can be written in exact format.
    47  //
    48  // strconv.ParseFloat has some unnecessary overhead which we can do without
    49  // for the common case:
    50  //
    51  //    - expensive char-by-char check to see if underscores are in right place
    52  //    - testing for and skipping underscores
    53  //    - check if the string matches ignorecase +/- inf, +/- infinity, nan
    54  //    - support for base 16 (0xFFFF...)
    55  //
    56  // The functions below will try a fast-path for floats which can be decoded
    57  // without any loss of precision, meaning they:
    58  //
    59  //    - fits within the significand bits of the 32-bits or 64-bits
    60  //    - exponent fits within the exponent value
    61  //    - there is no truncation (any extra numbers are all trailing zeros)
    62  //
    63  // To figure out what the values are for maxMantDigits, use this idea below:
    64  //
    65  // 2^23 =                 838 8608 (between 10^ 6 and 10^ 7) (significand bits of uint32)
    66  // 2^32 =             42 9496 7296 (between 10^ 9 and 10^10) (full uint32)
    67  // 2^52 =      4503 5996 2737 0496 (between 10^15 and 10^16) (significand bits of uint64)
    68  // 2^64 = 1844 6744 0737 0955 1616 (between 10^19 and 10^20) (full uint64)
    69  //
    70  // Note: we only allow for up to what can comfortably fit into the significand
    71  // ignoring the exponent, and we only try to parse iff significand fits.
    72  
    73  const (
    74  	fMaxMultiplierForExactPow10_64 = 1e15
    75  	fMaxMultiplierForExactPow10_32 = 1e7
    76  
    77  	fUint64Cutoff = (1<<64-1)/10 + 1
    78  	// fUint32Cutoff = (1<<32-1)/10 + 1
    79  
    80  	fBase = 10
    81  )
    82  
    83  const (
    84  	thousand    = 1000
    85  	million     = thousand * thousand
    86  	billion     = thousand * million
    87  	trillion    = thousand * billion
    88  	quadrillion = thousand * trillion
    89  	quintillion = thousand * quadrillion
    90  )
    91  
    92  // Exact powers of 10.
    93  var uint64pow10 = [...]uint64{
    94  	1, 10, 100,
    95  	1 * thousand, 10 * thousand, 100 * thousand,
    96  	1 * million, 10 * million, 100 * million,
    97  	1 * billion, 10 * billion, 100 * billion,
    98  	1 * trillion, 10 * trillion, 100 * trillion,
    99  	1 * quadrillion, 10 * quadrillion, 100 * quadrillion,
   100  	1 * quintillion, 10 * quintillion,
   101  }
   102  var float64pow10 = [...]float64{
   103  	1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9,
   104  	1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19,
   105  	1e20, 1e21, 1e22,
   106  }
   107  var float32pow10 = [...]float32{
   108  	1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10,
   109  }
   110  
   111  type floatinfo struct {
   112  	mantbits uint8
   113  
   114  	// expbits uint8 // (unused)
   115  	// bias    int16 // (unused)
   116  	// is32bit bool // (unused)
   117  
   118  	exactPow10 int8 // Exact powers of ten are <= 10^N (32: 10, 64: 22)
   119  
   120  	exactInts int8 // Exact integers are <= 10^N (for non-float, set to 0)
   121  
   122  	// maxMantDigits int8 // 10^19 fits in uint64, while 10^9 fits in uint32
   123  
   124  	mantCutoffIsUint64Cutoff bool
   125  
   126  	mantCutoff uint64
   127  }
   128  
   129  var fi32 = floatinfo{23, 10, 7, false, 1<<23 - 1}
   130  var fi64 = floatinfo{52, 22, 15, false, 1<<52 - 1}
   131  
   132  var fi64u = floatinfo{0, 19, 0, true, fUint64Cutoff}
   133  
   134  func noFrac64(fbits uint64) bool {
   135  	exp := uint64(fbits>>52)&0x7FF - 1023 // uint(x>>shift)&mask - bias
   136  	// clear top 12+e bits, the integer part; if the rest is 0, then no fraction.
   137  	return exp < 52 && fbits<<(12+exp) == 0 // means there's no fractional part
   138  }
   139  
   140  func noFrac32(fbits uint32) bool {
   141  	exp := uint32(fbits>>23)&0xFF - 127 // uint(x>>shift)&mask - bias
   142  	// clear top 9+e bits, the integer part; if the rest is 0, then no fraction.
   143  	return exp < 23 && fbits<<(9+exp) == 0 // means there's no fractional part
   144  }
   145  
   146  func strconvParseErr(b []byte, fn string) error {
   147  	return &strconv.NumError{
   148  		Func: fn,
   149  		Err:  strconv.ErrSyntax,
   150  		Num:  string(b),
   151  	}
   152  }
   153  
   154  func parseFloat32_reader(r readFloatResult) (f float32, fail bool) {
   155  	f = float32(r.mantissa)
   156  	if r.exp == 0 {
   157  	} else if r.exp < 0 { // int / 10^k
   158  		f /= float32pow10[uint8(-r.exp)]
   159  	} else { // exp > 0
   160  		if r.exp > fi32.exactPow10 {
   161  			f *= float32pow10[r.exp-fi32.exactPow10]
   162  			if f > fMaxMultiplierForExactPow10_32 { // exponent too large - outside range
   163  				fail = true
   164  				return // ok = false
   165  			}
   166  			f *= float32pow10[fi32.exactPow10]
   167  		} else {
   168  			f *= float32pow10[uint8(r.exp)]
   169  		}
   170  	}
   171  	if r.neg {
   172  		f = -f
   173  	}
   174  	return
   175  }
   176  
   177  func parseFloat32_custom(b []byte) (f float32, err error) {
   178  	r := readFloat(b, fi32)
   179  	if r.bad {
   180  		return 0, strconvParseErr(b, "ParseFloat")
   181  	}
   182  	if r.ok {
   183  		f, r.bad = parseFloat32_reader(r)
   184  		if !r.bad {
   185  			return
   186  		}
   187  	}
   188  	return parseFloat32_strconv(b)
   189  }
   190  
   191  func parseFloat64_reader(r readFloatResult) (f float64, fail bool) {
   192  	f = float64(r.mantissa)
   193  	if r.exp == 0 {
   194  	} else if r.exp < 0 { // int / 10^k
   195  		f /= float64pow10[-uint8(r.exp)]
   196  	} else { // exp > 0
   197  		if r.exp > fi64.exactPow10 {
   198  			f *= float64pow10[r.exp-fi64.exactPow10]
   199  			if f > fMaxMultiplierForExactPow10_64 { // exponent too large - outside range
   200  				fail = true
   201  				return
   202  			}
   203  			f *= float64pow10[fi64.exactPow10]
   204  		} else {
   205  			f *= float64pow10[uint8(r.exp)]
   206  		}
   207  	}
   208  	if r.neg {
   209  		f = -f
   210  	}
   211  	return
   212  }
   213  
   214  func parseFloat64_custom(b []byte) (f float64, err error) {
   215  	r := readFloat(b, fi64)
   216  	if r.bad {
   217  		return 0, strconvParseErr(b, "ParseFloat")
   218  	}
   219  	if r.ok {
   220  		f, r.bad = parseFloat64_reader(r)
   221  		if !r.bad {
   222  			return
   223  		}
   224  	}
   225  	return parseFloat64_strconv(b)
   226  }
   227  
   228  func parseUint64_simple(b []byte) (n uint64, ok bool) {
   229  	var i int
   230  	var n1 uint64
   231  	var c uint8
   232  LOOP:
   233  	if i < len(b) {
   234  		c = b[i]
   235  		// unsigned integers don't overflow well on multiplication, so check cutoff here
   236  		// e.g. (maxUint64-5)*10 doesn't overflow well ...
   237  		// if n >= fUint64Cutoff || !isDigitChar(b[i]) { // if c < '0' || c > '9' {
   238  		if n >= fUint64Cutoff || c < '0' || c > '9' {
   239  			return
   240  		} else if c == '0' {
   241  			n *= fBase
   242  		} else {
   243  			n1 = n
   244  			n = n*fBase + uint64(c-'0')
   245  			if n < n1 {
   246  				return
   247  			}
   248  		}
   249  		i++
   250  		goto LOOP
   251  	}
   252  	ok = true
   253  	return
   254  }
   255  
   256  func parseUint64_reader(r readFloatResult) (f uint64, fail bool) {
   257  	f = r.mantissa
   258  	if r.exp == 0 {
   259  	} else if r.exp < 0 { // int / 10^k
   260  		if f%uint64pow10[uint8(-r.exp)] != 0 {
   261  			fail = true
   262  		} else {
   263  			f /= uint64pow10[uint8(-r.exp)]
   264  		}
   265  	} else { // exp > 0
   266  		f *= uint64pow10[uint8(r.exp)]
   267  	}
   268  	return
   269  }
   270  
   271  func parseInteger_bytes(b []byte) (u uint64, neg, ok bool) {
   272  	if len(b) == 0 {
   273  		ok = true
   274  		return
   275  	}
   276  	if b[0] == '-' {
   277  		if len(b) == 1 {
   278  			return
   279  		}
   280  		neg = true
   281  		b = b[1:]
   282  	}
   283  
   284  	u, ok = parseUint64_simple(b)
   285  	if ok {
   286  		return
   287  	}
   288  
   289  	r := readFloat(b, fi64u)
   290  	if r.ok {
   291  		var fail bool
   292  		u, fail = parseUint64_reader(r)
   293  		if fail {
   294  			f, err := parseFloat64(b)
   295  			if err != nil {
   296  				return
   297  			}
   298  			if !noFrac64(math.Float64bits(f)) {
   299  				return
   300  			}
   301  			u = uint64(f)
   302  		}
   303  		ok = true
   304  		return
   305  	}
   306  	return
   307  }
   308  
   309  // parseNumber will return an integer if only composed of [-]?[0-9]+
   310  // Else it will return a float.
   311  func parseNumber(b []byte, z *fauxUnion, preferSignedInt bool) (err error) {
   312  	var ok, neg bool
   313  	var f uint64
   314  
   315  	if len(b) == 0 {
   316  		return
   317  	}
   318  
   319  	if b[0] == '-' {
   320  		neg = true
   321  		f, ok = parseUint64_simple(b[1:])
   322  	} else {
   323  		f, ok = parseUint64_simple(b)
   324  	}
   325  
   326  	if ok {
   327  		if neg {
   328  			z.v = valueTypeInt
   329  			if chkOvf.Uint2Int(f, neg) {
   330  				return strconvParseErr(b, "ParseInt")
   331  			}
   332  			z.i = -int64(f)
   333  		} else if preferSignedInt {
   334  			z.v = valueTypeInt
   335  			if chkOvf.Uint2Int(f, neg) {
   336  				return strconvParseErr(b, "ParseInt")
   337  			}
   338  			z.i = int64(f)
   339  		} else {
   340  			z.v = valueTypeUint
   341  			z.u = f
   342  		}
   343  		return
   344  	}
   345  
   346  	z.v = valueTypeFloat
   347  	z.f, err = parseFloat64_custom(b)
   348  	return
   349  }
   350  
   351  type readFloatResult struct {
   352  	mantissa uint64
   353  	exp      int8
   354  	neg      bool
   355  	trunc    bool
   356  	bad      bool // bad decimal string
   357  	hardexp  bool // exponent is hard to handle (> 2 digits, etc)
   358  	ok       bool
   359  	// sawdot   bool
   360  	// sawexp   bool
   361  	//_ [2]bool // padding
   362  }
   363  
   364  func readFloat(s []byte, y floatinfo) (r readFloatResult) {
   365  	var i uint // uint, so that we eliminate bounds checking
   366  	var slen = uint(len(s))
   367  	if slen == 0 {
   368  		// read an empty string as the zero value
   369  		// r.bad = true
   370  		r.ok = true
   371  		return
   372  	}
   373  
   374  	if s[0] == '-' {
   375  		r.neg = true
   376  		i++
   377  	}
   378  
   379  	// we considered punting early if string has length > maxMantDigits, but this doesn't account
   380  	// for trailing 0's e.g. 700000000000000000000 can be encoded exactly as it is 7e20
   381  
   382  	var nd, ndMant, dp int8
   383  	var sawdot, sawexp bool
   384  	var xu uint64
   385  
   386  LOOP:
   387  	for ; i < slen; i++ {
   388  		switch s[i] {
   389  		case '.':
   390  			if sawdot {
   391  				r.bad = true
   392  				return
   393  			}
   394  			sawdot = true
   395  			dp = nd
   396  		case 'e', 'E':
   397  			sawexp = true
   398  			break LOOP
   399  		case '0':
   400  			if nd == 0 {
   401  				dp--
   402  				continue LOOP
   403  			}
   404  			nd++
   405  			if r.mantissa < y.mantCutoff {
   406  				r.mantissa *= fBase
   407  				ndMant++
   408  			}
   409  		case '1', '2', '3', '4', '5', '6', '7', '8', '9':
   410  			nd++
   411  			if y.mantCutoffIsUint64Cutoff && r.mantissa < fUint64Cutoff {
   412  				r.mantissa *= fBase
   413  				xu = r.mantissa + uint64(s[i]-'0')
   414  				if xu < r.mantissa {
   415  					r.trunc = true
   416  					return
   417  				}
   418  				r.mantissa = xu
   419  			} else if r.mantissa < y.mantCutoff {
   420  				// mantissa = (mantissa << 1) + (mantissa << 3) + uint64(c-'0')
   421  				r.mantissa = r.mantissa*fBase + uint64(s[i]-'0')
   422  			} else {
   423  				r.trunc = true
   424  				return
   425  			}
   426  			ndMant++
   427  		default:
   428  			r.bad = true
   429  			return
   430  		}
   431  	}
   432  
   433  	if !sawdot {
   434  		dp = nd
   435  	}
   436  
   437  	if sawexp {
   438  		i++
   439  		if i < slen {
   440  			var eneg bool
   441  			if s[i] == '+' {
   442  				i++
   443  			} else if s[i] == '-' {
   444  				i++
   445  				eneg = true
   446  			}
   447  			if i < slen {
   448  				// for exact match, exponent is 1 or 2 digits (float64: -22 to 37, float32: -1 to 17).
   449  				// exit quick if exponent is more than 2 digits.
   450  				if i+2 < slen {
   451  					r.hardexp = true
   452  					return
   453  				}
   454  				var e int8
   455  				if s[i] < '0' || s[i] > '9' { // !isDigitChar(s[i]) { //
   456  					r.bad = true
   457  					return
   458  				}
   459  				e = int8(s[i] - '0')
   460  				i++
   461  				if i < slen {
   462  					if s[i] < '0' || s[i] > '9' { // !isDigitChar(s[i]) { //
   463  						r.bad = true
   464  						return
   465  					}
   466  					e = e*fBase + int8(s[i]-'0') // (e << 1) + (e << 3) + int8(s[i]-'0')
   467  					i++
   468  				}
   469  				if eneg {
   470  					dp -= e
   471  				} else {
   472  					dp += e
   473  				}
   474  			}
   475  		}
   476  	}
   477  
   478  	if r.mantissa != 0 {
   479  		r.exp = dp - ndMant
   480  		// do not set ok=true for cases we cannot handle
   481  		if r.exp < -y.exactPow10 ||
   482  			r.exp > y.exactInts+y.exactPow10 ||
   483  			(y.mantbits != 0 && r.mantissa>>y.mantbits != 0) {
   484  			r.hardexp = true
   485  			return
   486  		}
   487  	}
   488  
   489  	r.ok = true
   490  	return
   491  }