github.com/patricebensoussan/go/codec@v1.2.99/decimal.go (about) 1 // Copyright (c) 2012-2020 Ugorji Nwoke. All rights reserved. 2 // Use of this source code is governed by a MIT license found in the LICENSE file. 3 4 package codec 5 6 import ( 7 "math" 8 "strconv" 9 ) 10 11 // Per go spec, floats are represented in memory as 12 // IEEE single or double precision floating point values. 13 // 14 // We also looked at the source for stdlib math/modf.go, 15 // reviewed https://github.com/chewxy/math32 16 // and read wikipedia documents describing the formats. 17 // 18 // It became clear that we could easily look at the bits to determine 19 // whether any fraction exists. 20 21 func parseFloat32(b []byte) (f float32, err error) { 22 return parseFloat32_custom(b) 23 } 24 25 func parseFloat64(b []byte) (f float64, err error) { 26 return parseFloat64_custom(b) 27 } 28 29 func parseFloat32_strconv(b []byte) (f float32, err error) { 30 f64, err := strconv.ParseFloat(stringView(b), 32) 31 f = float32(f64) 32 return 33 } 34 35 func parseFloat64_strconv(b []byte) (f float64, err error) { 36 return strconv.ParseFloat(stringView(b), 64) 37 } 38 39 // ------ parseFloat custom below -------- 40 41 // JSON really supports decimal numbers in base 10 notation, with exponent support. 42 // 43 // We assume the following: 44 // - a lot of floating point numbers in json files will have defined precision 45 // (in terms of number of digits after decimal point), etc. 46 // - these (referenced above) can be written in exact format. 47 // 48 // strconv.ParseFloat has some unnecessary overhead which we can do without 49 // for the common case: 50 // 51 // - expensive char-by-char check to see if underscores are in right place 52 // - testing for and skipping underscores 53 // - check if the string matches ignorecase +/- inf, +/- infinity, nan 54 // - support for base 16 (0xFFFF...) 55 // 56 // The functions below will try a fast-path for floats which can be decoded 57 // without any loss of precision, meaning they: 58 // 59 // - fits within the significand bits of the 32-bits or 64-bits 60 // - exponent fits within the exponent value 61 // - there is no truncation (any extra numbers are all trailing zeros) 62 // 63 // To figure out what the values are for maxMantDigits, use this idea below: 64 // 65 // 2^23 = 838 8608 (between 10^ 6 and 10^ 7) (significand bits of uint32) 66 // 2^32 = 42 9496 7296 (between 10^ 9 and 10^10) (full uint32) 67 // 2^52 = 4503 5996 2737 0496 (between 10^15 and 10^16) (significand bits of uint64) 68 // 2^64 = 1844 6744 0737 0955 1616 (between 10^19 and 10^20) (full uint64) 69 // 70 // Note: we only allow for up to what can comfortably fit into the significand 71 // ignoring the exponent, and we only try to parse iff significand fits. 72 73 const ( 74 fMaxMultiplierForExactPow10_64 = 1e15 75 fMaxMultiplierForExactPow10_32 = 1e7 76 77 fUint64Cutoff = (1<<64-1)/10 + 1 78 // fUint32Cutoff = (1<<32-1)/10 + 1 79 80 fBase = 10 81 ) 82 83 const ( 84 thousand = 1000 85 million = thousand * thousand 86 billion = thousand * million 87 trillion = thousand * billion 88 quadrillion = thousand * trillion 89 quintillion = thousand * quadrillion 90 ) 91 92 // Exact powers of 10. 93 var uint64pow10 = [...]uint64{ 94 1, 10, 100, 95 1 * thousand, 10 * thousand, 100 * thousand, 96 1 * million, 10 * million, 100 * million, 97 1 * billion, 10 * billion, 100 * billion, 98 1 * trillion, 10 * trillion, 100 * trillion, 99 1 * quadrillion, 10 * quadrillion, 100 * quadrillion, 100 1 * quintillion, 10 * quintillion, 101 } 102 var float64pow10 = [...]float64{ 103 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 104 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, 105 1e20, 1e21, 1e22, 106 } 107 var float32pow10 = [...]float32{ 108 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 109 } 110 111 type floatinfo struct { 112 mantbits uint8 113 114 // expbits uint8 // (unused) 115 // bias int16 // (unused) 116 // is32bit bool // (unused) 117 118 exactPow10 int8 // Exact powers of ten are <= 10^N (32: 10, 64: 22) 119 120 exactInts int8 // Exact integers are <= 10^N (for non-float, set to 0) 121 122 // maxMantDigits int8 // 10^19 fits in uint64, while 10^9 fits in uint32 123 124 mantCutoffIsUint64Cutoff bool 125 126 mantCutoff uint64 127 } 128 129 var fi32 = floatinfo{23, 10, 7, false, 1<<23 - 1} 130 var fi64 = floatinfo{52, 22, 15, false, 1<<52 - 1} 131 132 var fi64u = floatinfo{0, 19, 0, true, fUint64Cutoff} 133 134 func noFrac64(fbits uint64) bool { 135 exp := uint64(fbits>>52)&0x7FF - 1023 // uint(x>>shift)&mask - bias 136 // clear top 12+e bits, the integer part; if the rest is 0, then no fraction. 137 return exp < 52 && fbits<<(12+exp) == 0 // means there's no fractional part 138 } 139 140 func noFrac32(fbits uint32) bool { 141 exp := uint32(fbits>>23)&0xFF - 127 // uint(x>>shift)&mask - bias 142 // clear top 9+e bits, the integer part; if the rest is 0, then no fraction. 143 return exp < 23 && fbits<<(9+exp) == 0 // means there's no fractional part 144 } 145 146 func strconvParseErr(b []byte, fn string) error { 147 return &strconv.NumError{ 148 Func: fn, 149 Err: strconv.ErrSyntax, 150 Num: string(b), 151 } 152 } 153 154 func parseFloat32_reader(r readFloatResult) (f float32, fail bool) { 155 f = float32(r.mantissa) 156 if r.exp == 0 { 157 } else if r.exp < 0 { // int / 10^k 158 f /= float32pow10[uint8(-r.exp)] 159 } else { // exp > 0 160 if r.exp > fi32.exactPow10 { 161 f *= float32pow10[r.exp-fi32.exactPow10] 162 if f > fMaxMultiplierForExactPow10_32 { // exponent too large - outside range 163 fail = true 164 return // ok = false 165 } 166 f *= float32pow10[fi32.exactPow10] 167 } else { 168 f *= float32pow10[uint8(r.exp)] 169 } 170 } 171 if r.neg { 172 f = -f 173 } 174 return 175 } 176 177 func parseFloat32_custom(b []byte) (f float32, err error) { 178 r := readFloat(b, fi32) 179 if r.bad { 180 return 0, strconvParseErr(b, "ParseFloat") 181 } 182 if r.ok { 183 f, r.bad = parseFloat32_reader(r) 184 if !r.bad { 185 return 186 } 187 } 188 return parseFloat32_strconv(b) 189 } 190 191 func parseFloat64_reader(r readFloatResult) (f float64, fail bool) { 192 f = float64(r.mantissa) 193 if r.exp == 0 { 194 } else if r.exp < 0 { // int / 10^k 195 f /= float64pow10[-uint8(r.exp)] 196 } else { // exp > 0 197 if r.exp > fi64.exactPow10 { 198 f *= float64pow10[r.exp-fi64.exactPow10] 199 if f > fMaxMultiplierForExactPow10_64 { // exponent too large - outside range 200 fail = true 201 return 202 } 203 f *= float64pow10[fi64.exactPow10] 204 } else { 205 f *= float64pow10[uint8(r.exp)] 206 } 207 } 208 if r.neg { 209 f = -f 210 } 211 return 212 } 213 214 func parseFloat64_custom(b []byte) (f float64, err error) { 215 r := readFloat(b, fi64) 216 if r.bad { 217 return 0, strconvParseErr(b, "ParseFloat") 218 } 219 if r.ok { 220 f, r.bad = parseFloat64_reader(r) 221 if !r.bad { 222 return 223 } 224 } 225 return parseFloat64_strconv(b) 226 } 227 228 func parseUint64_simple(b []byte) (n uint64, ok bool) { 229 var i int 230 var n1 uint64 231 var c uint8 232 LOOP: 233 if i < len(b) { 234 c = b[i] 235 // unsigned integers don't overflow well on multiplication, so check cutoff here 236 // e.g. (maxUint64-5)*10 doesn't overflow well ... 237 // if n >= fUint64Cutoff || !isDigitChar(b[i]) { // if c < '0' || c > '9' { 238 if n >= fUint64Cutoff || c < '0' || c > '9' { 239 return 240 } else if c == '0' { 241 n *= fBase 242 } else { 243 n1 = n 244 n = n*fBase + uint64(c-'0') 245 if n < n1 { 246 return 247 } 248 } 249 i++ 250 goto LOOP 251 } 252 ok = true 253 return 254 } 255 256 func parseUint64_reader(r readFloatResult) (f uint64, fail bool) { 257 f = r.mantissa 258 if r.exp == 0 { 259 } else if r.exp < 0 { // int / 10^k 260 if f%uint64pow10[uint8(-r.exp)] != 0 { 261 fail = true 262 } else { 263 f /= uint64pow10[uint8(-r.exp)] 264 } 265 } else { // exp > 0 266 f *= uint64pow10[uint8(r.exp)] 267 } 268 return 269 } 270 271 func parseInteger_bytes(b []byte) (u uint64, neg, ok bool) { 272 if len(b) == 0 { 273 ok = true 274 return 275 } 276 if b[0] == '-' { 277 if len(b) == 1 { 278 return 279 } 280 neg = true 281 b = b[1:] 282 } 283 284 u, ok = parseUint64_simple(b) 285 if ok { 286 return 287 } 288 289 r := readFloat(b, fi64u) 290 if r.ok { 291 var fail bool 292 u, fail = parseUint64_reader(r) 293 if fail { 294 f, err := parseFloat64(b) 295 if err != nil { 296 return 297 } 298 if !noFrac64(math.Float64bits(f)) { 299 return 300 } 301 u = uint64(f) 302 } 303 ok = true 304 return 305 } 306 return 307 } 308 309 // parseNumber will return an integer if only composed of [-]?[0-9]+ 310 // Else it will return a float. 311 func parseNumber(b []byte, z *fauxUnion, preferSignedInt bool) (err error) { 312 var ok, neg bool 313 var f uint64 314 315 if len(b) == 0 { 316 return 317 } 318 319 if b[0] == '-' { 320 neg = true 321 f, ok = parseUint64_simple(b[1:]) 322 } else { 323 f, ok = parseUint64_simple(b) 324 } 325 326 if ok { 327 if neg { 328 z.v = valueTypeInt 329 if chkOvf.Uint2Int(f, neg) { 330 return strconvParseErr(b, "ParseInt") 331 } 332 z.i = -int64(f) 333 } else if preferSignedInt { 334 z.v = valueTypeInt 335 if chkOvf.Uint2Int(f, neg) { 336 return strconvParseErr(b, "ParseInt") 337 } 338 z.i = int64(f) 339 } else { 340 z.v = valueTypeUint 341 z.u = f 342 } 343 return 344 } 345 346 z.v = valueTypeFloat 347 z.f, err = parseFloat64_custom(b) 348 return 349 } 350 351 type readFloatResult struct { 352 mantissa uint64 353 exp int8 354 neg bool 355 trunc bool 356 bad bool // bad decimal string 357 hardexp bool // exponent is hard to handle (> 2 digits, etc) 358 ok bool 359 // sawdot bool 360 // sawexp bool 361 //_ [2]bool // padding 362 } 363 364 func readFloat(s []byte, y floatinfo) (r readFloatResult) { 365 var i uint // uint, so that we eliminate bounds checking 366 var slen = uint(len(s)) 367 if slen == 0 { 368 // read an empty string as the zero value 369 // r.bad = true 370 r.ok = true 371 return 372 } 373 374 if s[0] == '-' { 375 r.neg = true 376 i++ 377 } 378 379 // we considered punting early if string has length > maxMantDigits, but this doesn't account 380 // for trailing 0's e.g. 700000000000000000000 can be encoded exactly as it is 7e20 381 382 var nd, ndMant, dp int8 383 var sawdot, sawexp bool 384 var xu uint64 385 386 LOOP: 387 for ; i < slen; i++ { 388 switch s[i] { 389 case '.': 390 if sawdot { 391 r.bad = true 392 return 393 } 394 sawdot = true 395 dp = nd 396 case 'e', 'E': 397 sawexp = true 398 break LOOP 399 case '0': 400 if nd == 0 { 401 dp-- 402 continue LOOP 403 } 404 nd++ 405 if r.mantissa < y.mantCutoff { 406 r.mantissa *= fBase 407 ndMant++ 408 } 409 case '1', '2', '3', '4', '5', '6', '7', '8', '9': 410 nd++ 411 if y.mantCutoffIsUint64Cutoff && r.mantissa < fUint64Cutoff { 412 r.mantissa *= fBase 413 xu = r.mantissa + uint64(s[i]-'0') 414 if xu < r.mantissa { 415 r.trunc = true 416 return 417 } 418 r.mantissa = xu 419 } else if r.mantissa < y.mantCutoff { 420 // mantissa = (mantissa << 1) + (mantissa << 3) + uint64(c-'0') 421 r.mantissa = r.mantissa*fBase + uint64(s[i]-'0') 422 } else { 423 r.trunc = true 424 return 425 } 426 ndMant++ 427 default: 428 r.bad = true 429 return 430 } 431 } 432 433 if !sawdot { 434 dp = nd 435 } 436 437 if sawexp { 438 i++ 439 if i < slen { 440 var eneg bool 441 if s[i] == '+' { 442 i++ 443 } else if s[i] == '-' { 444 i++ 445 eneg = true 446 } 447 if i < slen { 448 // for exact match, exponent is 1 or 2 digits (float64: -22 to 37, float32: -1 to 17). 449 // exit quick if exponent is more than 2 digits. 450 if i+2 < slen { 451 r.hardexp = true 452 return 453 } 454 var e int8 455 if s[i] < '0' || s[i] > '9' { // !isDigitChar(s[i]) { // 456 r.bad = true 457 return 458 } 459 e = int8(s[i] - '0') 460 i++ 461 if i < slen { 462 if s[i] < '0' || s[i] > '9' { // !isDigitChar(s[i]) { // 463 r.bad = true 464 return 465 } 466 e = e*fBase + int8(s[i]-'0') // (e << 1) + (e << 3) + int8(s[i]-'0') 467 i++ 468 } 469 if eneg { 470 dp -= e 471 } else { 472 dp += e 473 } 474 } 475 } 476 } 477 478 if r.mantissa != 0 { 479 r.exp = dp - ndMant 480 // do not set ok=true for cases we cannot handle 481 if r.exp < -y.exactPow10 || 482 r.exp > y.exactInts+y.exactPow10 || 483 (y.mantbits != 0 && r.mantissa>>y.mantbits != 0) { 484 r.hardexp = true 485 return 486 } 487 } 488 489 r.ok = true 490 return 491 }