github.com/ugorji/go/codec@v1.2.13-0.20240307214044-07c54c229a5a/decimal.go (about) 1 // Copyright (c) 2012-2020 Ugorji Nwoke. All rights reserved. 2 // Use of this source code is governed by a MIT license found in the LICENSE file. 3 4 package codec 5 6 import ( 7 "math" 8 "strconv" 9 ) 10 11 // Per go spec, floats are represented in memory as 12 // IEEE single or double precision floating point values. 13 // 14 // We also looked at the source for stdlib math/modf.go, 15 // reviewed https://github.com/chewxy/math32 16 // and read wikipedia documents describing the formats. 17 // 18 // It became clear that we could easily look at the bits to determine 19 // whether any fraction exists. 20 21 func parseFloat32(b []byte) (f float32, err error) { 22 return parseFloat32_custom(b) 23 } 24 25 func parseFloat64(b []byte) (f float64, err error) { 26 return parseFloat64_custom(b) 27 } 28 29 func parseFloat32_strconv(b []byte) (f float32, err error) { 30 f64, err := strconv.ParseFloat(stringView(b), 32) 31 f = float32(f64) 32 return 33 } 34 35 func parseFloat64_strconv(b []byte) (f float64, err error) { 36 return strconv.ParseFloat(stringView(b), 64) 37 } 38 39 // ------ parseFloat custom below -------- 40 41 // JSON really supports decimal numbers in base 10 notation, with exponent support. 42 // 43 // We assume the following: 44 // - a lot of floating point numbers in json files will have defined precision 45 // (in terms of number of digits after decimal point), etc. 46 // - these (referenced above) can be written in exact format. 47 // 48 // strconv.ParseFloat has some unnecessary overhead which we can do without 49 // for the common case: 50 // 51 // - expensive char-by-char check to see if underscores are in right place 52 // - testing for and skipping underscores 53 // - check if the string matches ignorecase +/- inf, +/- infinity, nan 54 // - support for base 16 (0xFFFF...) 55 // 56 // The functions below will try a fast-path for floats which can be decoded 57 // without any loss of precision, meaning they: 58 // 59 // - fits within the significand bits of the 32-bits or 64-bits 60 // - exponent fits within the exponent value 61 // - there is no truncation (any extra numbers are all trailing zeros) 62 // 63 // To figure out what the values are for maxMantDigits, use this idea below: 64 // 65 // 2^23 = 838 8608 (between 10^ 6 and 10^ 7) (significand bits of uint32) 66 // 2^32 = 42 9496 7296 (between 10^ 9 and 10^10) (full uint32) 67 // 2^52 = 4503 5996 2737 0496 (between 10^15 and 10^16) (significand bits of uint64) 68 // 2^64 = 1844 6744 0737 0955 1616 (between 10^19 and 10^20) (full uint64) 69 // 70 // Note: we only allow for up to what can comfortably fit into the significand 71 // ignoring the exponent, and we only try to parse iff significand fits. 72 73 const ( 74 fMaxMultiplierForExactPow10_64 = 1e15 75 fMaxMultiplierForExactPow10_32 = 1e7 76 77 fUint64Cutoff = (1<<64-1)/10 + 1 78 // fUint32Cutoff = (1<<32-1)/10 + 1 79 80 fBase = 10 81 ) 82 83 const ( 84 thousand = 1000 85 million = thousand * thousand 86 billion = thousand * million 87 trillion = thousand * billion 88 quadrillion = thousand * trillion 89 quintillion = thousand * quadrillion 90 ) 91 92 // Exact powers of 10. 93 var uint64pow10 = [...]uint64{ 94 1, 10, 100, 95 1 * thousand, 10 * thousand, 100 * thousand, 96 1 * million, 10 * million, 100 * million, 97 1 * billion, 10 * billion, 100 * billion, 98 1 * trillion, 10 * trillion, 100 * trillion, 99 1 * quadrillion, 10 * quadrillion, 100 * quadrillion, 100 1 * quintillion, 10 * quintillion, 101 } 102 var float64pow10 = [...]float64{ 103 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 104 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, 105 1e20, 1e21, 1e22, 106 } 107 var float32pow10 = [...]float32{ 108 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 109 } 110 111 type floatinfo struct { 112 mantbits uint8 113 114 // expbits uint8 // (unused) 115 // bias int16 // (unused) 116 // is32bit bool // (unused) 117 118 exactPow10 int8 // Exact powers of ten are <= 10^N (32: 10, 64: 22) 119 120 exactInts int8 // Exact integers are <= 10^N (for non-float, set to 0) 121 122 // maxMantDigits int8 // 10^19 fits in uint64, while 10^9 fits in uint32 123 124 mantCutoffIsUint64Cutoff bool 125 126 mantCutoff uint64 127 } 128 129 var fi32 = floatinfo{23, 10, 7, false, 1<<23 - 1} 130 var fi64 = floatinfo{52, 22, 15, false, 1<<52 - 1} 131 132 var fi64u = floatinfo{0, 19, 0, true, fUint64Cutoff} 133 134 func noFrac64(fbits uint64) bool { 135 if fbits == 0 { 136 return true 137 } 138 139 exp := uint64(fbits>>52)&0x7FF - 1023 // uint(x>>shift)&mask - bias 140 // clear top 12+e bits, the integer part; if the rest is 0, then no fraction. 141 return exp < 52 && fbits<<(12+exp) == 0 // means there's no fractional part 142 } 143 144 func noFrac32(fbits uint32) bool { 145 if fbits == 0 { 146 return true 147 } 148 149 exp := uint32(fbits>>23)&0xFF - 127 // uint(x>>shift)&mask - bias 150 // clear top 9+e bits, the integer part; if the rest is 0, then no fraction. 151 return exp < 23 && fbits<<(9+exp) == 0 // means there's no fractional part 152 } 153 154 func strconvParseErr(b []byte, fn string) error { 155 return &strconv.NumError{ 156 Func: fn, 157 Err: strconv.ErrSyntax, 158 Num: string(b), 159 } 160 } 161 162 func parseFloat32_reader(r readFloatResult) (f float32, fail bool) { 163 f = float32(r.mantissa) 164 if r.exp == 0 { 165 } else if r.exp < 0 { // int / 10^k 166 f /= float32pow10[uint8(-r.exp)] 167 } else { // exp > 0 168 if r.exp > fi32.exactPow10 { 169 f *= float32pow10[r.exp-fi32.exactPow10] 170 if f > fMaxMultiplierForExactPow10_32 { // exponent too large - outside range 171 fail = true 172 return // ok = false 173 } 174 f *= float32pow10[fi32.exactPow10] 175 } else { 176 f *= float32pow10[uint8(r.exp)] 177 } 178 } 179 if r.neg { 180 f = -f 181 } 182 return 183 } 184 185 func parseFloat32_custom(b []byte) (f float32, err error) { 186 r := readFloat(b, fi32) 187 if r.bad { 188 return 0, strconvParseErr(b, "ParseFloat") 189 } 190 if r.ok { 191 f, r.bad = parseFloat32_reader(r) 192 if !r.bad { 193 return 194 } 195 } 196 return parseFloat32_strconv(b) 197 } 198 199 func parseFloat64_reader(r readFloatResult) (f float64, fail bool) { 200 f = float64(r.mantissa) 201 if r.exp == 0 { 202 } else if r.exp < 0 { // int / 10^k 203 f /= float64pow10[-uint8(r.exp)] 204 } else { // exp > 0 205 if r.exp > fi64.exactPow10 { 206 f *= float64pow10[r.exp-fi64.exactPow10] 207 if f > fMaxMultiplierForExactPow10_64 { // exponent too large - outside range 208 fail = true 209 return 210 } 211 f *= float64pow10[fi64.exactPow10] 212 } else { 213 f *= float64pow10[uint8(r.exp)] 214 } 215 } 216 if r.neg { 217 f = -f 218 } 219 return 220 } 221 222 func parseFloat64_custom(b []byte) (f float64, err error) { 223 r := readFloat(b, fi64) 224 if r.bad { 225 return 0, strconvParseErr(b, "ParseFloat") 226 } 227 if r.ok { 228 f, r.bad = parseFloat64_reader(r) 229 if !r.bad { 230 return 231 } 232 } 233 return parseFloat64_strconv(b) 234 } 235 236 func parseUint64_simple(b []byte) (n uint64, ok bool) { 237 if len(b) > 1 && b[0] == '0' { // punt on numbers with leading zeros 238 return 239 } 240 241 var i int 242 var n1 uint64 243 var c uint8 244 LOOP: 245 if i < len(b) { 246 c = b[i] 247 // unsigned integers don't overflow well on multiplication, so check cutoff here 248 // e.g. (maxUint64-5)*10 doesn't overflow well ... 249 // if n >= fUint64Cutoff || !isDigitChar(b[i]) { // if c < '0' || c > '9' { 250 if n >= fUint64Cutoff || c < '0' || c > '9' { 251 return 252 } else if c == '0' { 253 n *= fBase 254 } else { 255 n1 = n 256 n = n*fBase + uint64(c-'0') 257 if n < n1 { 258 return 259 } 260 } 261 i++ 262 goto LOOP 263 } 264 ok = true 265 return 266 } 267 268 func parseUint64_reader(r readFloatResult) (f uint64, fail bool) { 269 f = r.mantissa 270 if r.exp == 0 { 271 } else if r.exp < 0 { // int / 10^k 272 if f%uint64pow10[uint8(-r.exp)] != 0 { 273 fail = true 274 } else { 275 f /= uint64pow10[uint8(-r.exp)] 276 } 277 } else { // exp > 0 278 f *= uint64pow10[uint8(r.exp)] 279 } 280 return 281 } 282 283 func parseInteger_bytes(b []byte) (u uint64, neg, ok bool) { 284 if len(b) == 0 { 285 ok = true 286 return 287 } 288 if b[0] == '-' { 289 if len(b) == 1 { 290 return 291 } 292 neg = true 293 b = b[1:] 294 } 295 296 u, ok = parseUint64_simple(b) 297 if ok { 298 return 299 } 300 301 r := readFloat(b, fi64u) 302 if r.ok { 303 var fail bool 304 u, fail = parseUint64_reader(r) 305 if fail { 306 f, err := parseFloat64(b) 307 if err != nil { 308 return 309 } 310 if !noFrac64(math.Float64bits(f)) { 311 return 312 } 313 u = uint64(f) 314 } 315 ok = true 316 return 317 } 318 return 319 } 320 321 // parseNumber will return an integer if only composed of [-]?[0-9]+ 322 // Else it will return a float. 323 func parseNumber(b []byte, z *fauxUnion, preferSignedInt bool) (err error) { 324 var ok, neg bool 325 var f uint64 326 327 if len(b) == 0 { 328 return 329 } 330 331 if b[0] == '-' { 332 neg = true 333 f, ok = parseUint64_simple(b[1:]) 334 } else { 335 f, ok = parseUint64_simple(b) 336 } 337 338 if ok { 339 if neg { 340 z.v = valueTypeInt 341 if chkOvf.Uint2Int(f, neg) { 342 return strconvParseErr(b, "ParseInt") 343 } 344 z.i = -int64(f) 345 } else if preferSignedInt { 346 z.v = valueTypeInt 347 if chkOvf.Uint2Int(f, neg) { 348 return strconvParseErr(b, "ParseInt") 349 } 350 z.i = int64(f) 351 } else { 352 z.v = valueTypeUint 353 z.u = f 354 } 355 return 356 } 357 358 z.v = valueTypeFloat 359 z.f, err = parseFloat64_custom(b) 360 return 361 } 362 363 type readFloatResult struct { 364 mantissa uint64 365 exp int8 366 neg bool 367 trunc bool 368 bad bool // bad decimal string 369 hardexp bool // exponent is hard to handle (> 2 digits, etc) 370 ok bool 371 // sawdot bool 372 // sawexp bool 373 //_ [2]bool // padding 374 } 375 376 func readFloat(s []byte, y floatinfo) (r readFloatResult) { 377 var i uint // uint, so that we eliminate bounds checking 378 var slen = uint(len(s)) 379 if slen == 0 { 380 // read an empty string as the zero value 381 // r.bad = true 382 r.ok = true 383 return 384 } 385 386 if s[0] == '-' { 387 r.neg = true 388 i++ 389 } 390 391 // considered punting early if string has length > maxMantDigits, but doesn't account 392 // for trailing 0's e.g. 700000000000000000000 can be encoded exactly as it is 7e20 393 394 var nd, ndMant, dp int8 395 var sawdot, sawexp bool 396 var xu uint64 397 398 if i+1 < slen && s[i] == '0' { 399 switch s[i+1] { 400 case '.', 'e', 'E': 401 // ok 402 default: 403 r.bad = true 404 return 405 } 406 } 407 408 LOOP: 409 for ; i < slen; i++ { 410 switch s[i] { 411 case '.': 412 if sawdot { 413 r.bad = true 414 return 415 } 416 sawdot = true 417 dp = nd 418 case 'e', 'E': 419 sawexp = true 420 break LOOP 421 case '0': 422 if nd == 0 { 423 dp-- 424 continue LOOP 425 } 426 nd++ 427 if r.mantissa < y.mantCutoff { 428 r.mantissa *= fBase 429 ndMant++ 430 } 431 case '1', '2', '3', '4', '5', '6', '7', '8', '9': 432 nd++ 433 if y.mantCutoffIsUint64Cutoff && r.mantissa < fUint64Cutoff { 434 r.mantissa *= fBase 435 xu = r.mantissa + uint64(s[i]-'0') 436 if xu < r.mantissa { 437 r.trunc = true 438 return 439 } 440 r.mantissa = xu 441 } else if r.mantissa < y.mantCutoff { 442 // mantissa = (mantissa << 1) + (mantissa << 3) + uint64(c-'0') 443 r.mantissa = r.mantissa*fBase + uint64(s[i]-'0') 444 } else { 445 r.trunc = true 446 return 447 } 448 ndMant++ 449 default: 450 r.bad = true 451 return 452 } 453 } 454 455 if !sawdot { 456 dp = nd 457 } 458 459 if sawexp { 460 i++ 461 if i < slen { 462 var eneg bool 463 if s[i] == '+' { 464 i++ 465 } else if s[i] == '-' { 466 i++ 467 eneg = true 468 } 469 if i < slen { 470 // for exact match, exponent is 1 or 2 digits (float64: -22 to 37, float32: -1 to 17). 471 // exit quick if exponent is more than 2 digits. 472 if i+2 < slen { 473 r.hardexp = true 474 return 475 } 476 var e int8 477 if s[i] < '0' || s[i] > '9' { // !isDigitChar(s[i]) { // 478 r.bad = true 479 return 480 } 481 e = int8(s[i] - '0') 482 i++ 483 if i < slen { 484 if s[i] < '0' || s[i] > '9' { // !isDigitChar(s[i]) { // 485 r.bad = true 486 return 487 } 488 e = e*fBase + int8(s[i]-'0') // (e << 1) + (e << 3) + int8(s[i]-'0') 489 i++ 490 } 491 if eneg { 492 dp -= e 493 } else { 494 dp += e 495 } 496 } 497 } 498 } 499 500 if r.mantissa != 0 { 501 r.exp = dp - ndMant 502 // do not set ok=true for cases we cannot handle 503 if r.exp < -y.exactPow10 || 504 r.exp > y.exactInts+y.exactPow10 || 505 (y.mantbits != 0 && r.mantissa>>y.mantbits != 0) { 506 r.hardexp = true 507 return 508 } 509 } 510 511 r.ok = true 512 return 513 }