github.com/neilotoole/jsoncolor@v0.6.0/parse.go (about) 1 package jsoncolor 2 3 import ( 4 "bytes" 5 "math" 6 "reflect" 7 "unicode" 8 "unicode/utf16" 9 "unicode/utf8" 10 11 "github.com/segmentio/encoding/ascii" 12 ) 13 14 // All spaces characters defined in the json specification. 15 const ( 16 sp = ' ' 17 ht = '\t' 18 nl = '\n' 19 cr = '\r' 20 ) 21 22 const ( 23 escape = '\\' 24 quote = '"' //nolint:varcheck // from original code 25 ) 26 27 func skipSpaces(b []byte) []byte { 28 b, _ = skipSpacesN(b) 29 return b 30 } 31 32 func skipSpacesN(b []byte) ([]byte, int) { 33 for i := range b { 34 switch b[i] { 35 case sp, ht, nl, cr: 36 default: 37 return b[i:], i 38 } 39 } 40 return nil, 0 41 } 42 43 // parseInt parses a decimanl representation of an int64 from b. 44 // 45 // The function is equivalent to calling strconv.ParseInt(string(b), 10, 64) but 46 // it prevents Go from making a memory allocation for converting a byte slice to 47 // a string (escape analysis fails due to the error returned by strconv.ParseInt). 48 // 49 // Because it only works with base 10 the function is also significantly faster 50 // than strconv.ParseInt. 51 func parseInt(b []byte, t reflect.Type) (int64, []byte, error) { 52 var value int64 53 var count int 54 55 if len(b) == 0 { 56 return 0, b, syntaxError(b, "cannot decode integer from an empty input") 57 } 58 59 if b[0] == '-' { 60 const max = math.MinInt64 61 const lim = max / 10 62 63 if len(b) == 1 { 64 return 0, b, syntaxError(b, "cannot decode integer from '-'") 65 } 66 67 if len(b) > 2 && b[1] == '0' && '0' <= b[2] && b[2] <= '9' { 68 return 0, b, syntaxError(b, "invalid leading character '0' in integer") 69 } 70 71 for _, d := range b[1:] { 72 if !(d >= '0' && d <= '9') { 73 if count == 0 { 74 b, err := inputError(b, t) 75 return 0, b, err 76 } 77 break 78 } 79 80 if value < lim { 81 return 0, b, unmarshalOverflow(b, t) 82 } 83 84 value *= 10 85 x := int64(d - '0') 86 87 if value < (max + x) { 88 return 0, b, unmarshalOverflow(b, t) 89 } 90 91 value -= x 92 count++ 93 } 94 95 count++ 96 } else { 97 const max = math.MaxInt64 98 const lim = max / 10 99 100 if len(b) > 1 && b[0] == '0' && '0' <= b[1] && b[1] <= '9' { 101 return 0, b, syntaxError(b, "invalid leading character '0' in integer") 102 } 103 104 for _, d := range b { 105 if !(d >= '0' && d <= '9') { 106 if count == 0 { 107 b, err := inputError(b, t) 108 return 0, b, err 109 } 110 break 111 } 112 x := int64(d - '0') 113 114 if value > lim { 115 return 0, b, unmarshalOverflow(b, t) 116 } 117 118 if value *= 10; value > (max - x) { 119 return 0, b, unmarshalOverflow(b, t) 120 } 121 122 value += x 123 count++ 124 } 125 } 126 127 if count < len(b) { 128 switch b[count] { 129 case '.', 'e', 'E': // was this actually a float? 130 v, r, err := parseNumber(b) 131 if err != nil { 132 v, r = b[:count+1], b[count+1:] 133 } 134 return 0, r, unmarshalTypeError(v, t) 135 } 136 } 137 138 return value, b[count:], nil 139 } 140 141 // parseUint is like parseInt but for unsigned integers. 142 func parseUint(b []byte, t reflect.Type) (uint64, []byte, error) { 143 const max = math.MaxUint64 144 const lim = max / 10 145 146 var value uint64 147 var count int 148 149 if len(b) == 0 { 150 return 0, b, syntaxError(b, "cannot decode integer value from an empty input") 151 } 152 153 if len(b) > 1 && b[0] == '0' && '0' <= b[1] && b[1] <= '9' { 154 return 0, b, syntaxError(b, "invalid leading character '0' in integer") 155 } 156 157 for _, d := range b { 158 if !(d >= '0' && d <= '9') { 159 if count == 0 { 160 b, err := inputError(b, t) 161 return 0, b, err 162 } 163 break 164 } 165 x := uint64(d - '0') 166 167 if value > lim { 168 return 0, b, unmarshalOverflow(b, t) 169 } 170 171 if value *= 10; value > (max - x) { 172 return 0, b, unmarshalOverflow(b, t) 173 } 174 175 value += x 176 count++ 177 } 178 179 if count < len(b) { 180 switch b[count] { 181 case '.', 'e', 'E': // was this actually a float? 182 v, r, err := parseNumber(b) 183 if err != nil { 184 v, r = b[:count+1], b[count+1:] 185 } 186 return 0, r, unmarshalTypeError(v, t) 187 } 188 } 189 190 return value, b[count:], nil 191 } 192 193 // parseUintHex parses a hexadecimanl representation of a uint64 from b. 194 // 195 // The function is equivalent to calling strconv.ParseUint(string(b), 16, 64) but 196 // it prevents Go from making a memory allocation for converting a byte slice to 197 // a string (escape analysis fails due to the error returned by strconv.ParseUint). 198 // 199 // Because it only works with base 16 the function is also significantly faster 200 // than strconv.ParseUint. 201 func parseUintHex(b []byte) (uint64, []byte, error) { 202 const max = math.MaxUint64 203 const lim = max / 0x10 204 205 var value uint64 206 var count int 207 208 if len(b) == 0 { 209 return 0, b, syntaxError(b, "cannot decode hexadecimal value from an empty input") 210 } 211 212 parseLoop: 213 for i, d := range b { 214 var x uint64 215 216 switch { 217 case d >= '0' && d <= '9': 218 x = uint64(d - '0') 219 220 case d >= 'A' && d <= 'F': 221 x = uint64(d-'A') + 0xA 222 223 case d >= 'a' && d <= 'f': 224 x = uint64(d-'a') + 0xA 225 226 default: 227 if i == 0 { 228 return 0, b, syntaxError(b, "expected hexadecimal digit but found '%c'", d) 229 } 230 break parseLoop 231 } 232 233 if value > lim { 234 return 0, b, syntaxError(b, "hexadecimal value out of range") 235 } 236 237 if value *= 0x10; value > (max - x) { 238 return 0, b, syntaxError(b, "hexadecimal value out of range") 239 } 240 241 value += x 242 count++ 243 } 244 245 return value, b[count:], nil 246 } 247 248 func parseNull(b []byte) ([]byte, []byte, error) { 249 if hasNullPrefix(b) { 250 return b[:4], b[4:], nil 251 } 252 if len(b) < 4 { 253 return nil, b[len(b):], unexpectedEOF(b) 254 } 255 return nil, b, syntaxError(b, "expected 'null' but found invalid token") 256 } 257 258 func parseTrue(b []byte) ([]byte, []byte, error) { 259 if hasTruePrefix(b) { 260 return b[:4], b[4:], nil 261 } 262 if len(b) < 4 { 263 return nil, b[len(b):], unexpectedEOF(b) 264 } 265 return nil, b, syntaxError(b, "expected 'true' but found invalid token") 266 } 267 268 func parseFalse(b []byte) ([]byte, []byte, error) { 269 if hasFalsePrefix(b) { 270 return b[:5], b[5:], nil 271 } 272 if len(b) < 5 { 273 return nil, b[len(b):], unexpectedEOF(b) 274 } 275 return nil, b, syntaxError(b, "expected 'false' but found invalid token") 276 } 277 278 func parseNumber(b []byte) (v, r []byte, err error) { 279 if len(b) == 0 { 280 r, err = b, unexpectedEOF(b) 281 return 282 } 283 284 i := 0 285 // sign 286 if b[i] == '-' { 287 i++ 288 } 289 290 if i == len(b) { 291 r, err = b[i:], syntaxError(b, "missing number value after sign") 292 return 293 } 294 295 if b[i] < '0' || b[i] > '9' { 296 r, err = b[i:], syntaxError(b, "expected digit but got '%c'", b[i]) 297 return 298 } 299 300 // integer part 301 if b[i] == '0' { 302 i++ 303 if i == len(b) || (b[i] != '.' && b[i] != 'e' && b[i] != 'E') { 304 v, r = b[:i], b[i:] 305 return 306 } 307 if '0' <= b[i] && b[i] <= '9' { 308 r, err = b[i:], syntaxError(b, "cannot decode number with leading '0' character") 309 return 310 } 311 } 312 313 for i < len(b) && '0' <= b[i] && b[i] <= '9' { 314 i++ 315 } 316 317 // decimal part 318 if i < len(b) && b[i] == '.' { 319 i++ 320 decimalStart := i 321 322 for i < len(b) { 323 if c := b[i]; !('0' <= c && c <= '9') { 324 if i == decimalStart { 325 r, err = b[i:], syntaxError(b, "expected digit but found '%c'", c) 326 return 327 } 328 break 329 } 330 i++ 331 } 332 333 if i == decimalStart { 334 r, err = b[i:], syntaxError(b, "expected decimal part after '.'") 335 return 336 } 337 } 338 339 // exponent part 340 if i < len(b) && (b[i] == 'e' || b[i] == 'E') { 341 i++ 342 343 if i < len(b) { 344 if c := b[i]; c == '+' || c == '-' { 345 i++ 346 } 347 } 348 349 if i == len(b) { 350 r, err = b[i:], syntaxError(b, "missing exponent in number") 351 return 352 } 353 354 exponentStart := i 355 356 for i < len(b) { 357 if c := b[i]; !('0' <= c && c <= '9') { 358 if i == exponentStart { 359 err = syntaxError(b, "expected digit but found '%c'", c) 360 return 361 } 362 break 363 } 364 i++ 365 } 366 } 367 368 v, r = b[:i], b[i:] 369 return 370 } 371 372 func parseUnicode(b []byte) (rune, int, error) { 373 if len(b) < 4 { 374 return 0, 0, syntaxError(b, "unicode code point must have at least 4 characters") 375 } 376 377 u, r, err := parseUintHex(b[:4]) 378 if err != nil { 379 return 0, 0, syntaxError(b, "parsing unicode code point: %s", err) 380 } 381 382 if len(r) != 0 { 383 return 0, 0, syntaxError(b, "invalid unicode code point") 384 } 385 386 return rune(u), 4, nil 387 } 388 389 func parseStringFast(b []byte) ([]byte, []byte, bool, error) { 390 if len(b) < 2 { 391 return nil, b[len(b):], false, unexpectedEOF(b) 392 } 393 if b[0] != '"' { 394 return nil, b, false, syntaxError(b, "expected '\"' at the beginning of a string value") 395 } 396 397 n := bytes.IndexByte(b[1:], '"') + 2 398 if n <= 1 { 399 return nil, b[len(b):], false, syntaxError(b, "missing '\"' at the end of a string value") 400 } 401 if bytes.IndexByte(b[1:n], '\\') < 0 && ascii.ValidPrint(b[1:n]) { 402 return b[:n], b[n:], false, nil 403 } 404 405 for i := 1; i < len(b); i++ { 406 switch b[i] { 407 case '\\': 408 if i++; i < len(b) { 409 switch b[i] { 410 case '"', '\\', '/', 'n', 'r', 't', 'f', 'b': 411 case 'u': 412 _, n, err := parseUnicode(b[i+1:]) 413 if err != nil { 414 return nil, b, false, err 415 } 416 i += n 417 default: 418 return nil, b, false, syntaxError(b, "invalid character '%c' in string escape code", b[i]) 419 } 420 } 421 422 case '"': 423 return b[:i+1], b[i+1:], true, nil 424 425 default: 426 if b[i] < 0x20 { 427 return nil, b, false, syntaxError(b, "invalid character '%c' in string escape code", b[i]) 428 } 429 } 430 } 431 432 return nil, b[len(b):], false, syntaxError(b, "missing '\"' at the end of a string value") 433 } 434 435 func parseString(b []byte) ([]byte, []byte, error) { 436 s, b, _, err := parseStringFast(b) 437 return s, b, err 438 } 439 440 func parseStringUnquote(b []byte, r []byte) ([]byte, []byte, bool, error) { 441 s, b, escaped, err := parseStringFast(b) 442 if err != nil { 443 return s, b, false, err 444 } 445 446 s = s[1 : len(s)-1] // trim the quotes 447 448 if !escaped { 449 return s, b, false, nil 450 } 451 452 if r == nil { 453 r = make([]byte, 0, len(s)) 454 } 455 456 for len(s) != 0 { 457 i := bytes.IndexByte(s, '\\') 458 459 if i < 0 { 460 r = appendCoerceInvalidUTF8(r, s) 461 break 462 } 463 464 r = appendCoerceInvalidUTF8(r, s[:i]) 465 s = s[i+1:] 466 467 c := s[0] 468 switch c { 469 case '"', '\\', '/': 470 // simple escaped character 471 case 'n': 472 c = '\n' 473 474 case 'r': 475 c = '\r' 476 477 case 't': 478 c = '\t' 479 480 case 'b': 481 c = '\b' 482 483 case 'f': 484 c = '\f' 485 486 case 'u': 487 s = s[1:] 488 489 r1, n1, err := parseUnicode(s) 490 if err != nil { 491 return r, b, true, err 492 } 493 s = s[n1:] 494 495 if utf16.IsSurrogate(r1) { 496 if !hasPrefix(s, `\u`) { 497 r1 = unicode.ReplacementChar 498 } else { 499 r2, n2, err := parseUnicode(s[2:]) 500 if err != nil { 501 return r, b, true, err 502 } 503 if r1 = utf16.DecodeRune(r1, r2); r1 != unicode.ReplacementChar { 504 s = s[2+n2:] 505 } 506 } 507 } 508 509 r = appendRune(r, r1) 510 continue 511 512 default: // not sure what this escape sequence is 513 return r, b, false, syntaxError(s, "invalid character '%c' in string escape code", c) 514 } 515 516 r = append(r, c) 517 s = s[1:] 518 } 519 520 return r, b, true, nil 521 } 522 523 func appendRune(b []byte, r rune) []byte { 524 n := len(b) 525 b = append(b, 0, 0, 0, 0) 526 return b[:n+utf8.EncodeRune(b[n:], r)] 527 } 528 529 func appendCoerceInvalidUTF8(b []byte, s []byte) []byte { 530 c := [4]byte{} 531 532 for _, r := range string(s) { 533 b = append(b, c[:utf8.EncodeRune(c[:], r)]...) 534 } 535 536 return b 537 } 538 539 func parseObject(b []byte) ([]byte, []byte, error) { 540 if len(b) < 2 { 541 return nil, b[len(b):], unexpectedEOF(b) 542 } 543 544 if b[0] != '{' { 545 return nil, b, syntaxError(b, "expected '{' at the beginning of an object value") 546 } 547 548 var err error 549 var a = b 550 var n = len(b) 551 var i = 0 552 553 b = b[1:] 554 for { 555 b = skipSpaces(b) 556 557 if len(b) == 0 { 558 return nil, b, syntaxError(b, "cannot decode object from empty input") 559 } 560 561 if b[0] == '}' { 562 j := (n - len(b)) + 1 563 return a[:j], a[j:], nil 564 } 565 566 if i != 0 { 567 if len(b) == 0 { 568 return nil, b, syntaxError(b, "unexpected EOF after object field value") 569 } 570 if b[0] != ',' { 571 return nil, b, syntaxError(b, "expected ',' after object field value but found '%c'", b[0]) 572 } 573 b = skipSpaces(b[1:]) 574 if len(b) == 0 { 575 return nil, b, unexpectedEOF(b) 576 } 577 if b[0] == '}' { 578 return nil, b, syntaxError(b, "unexpected trailing comma after object field") 579 } 580 } 581 582 _, b, err = parseString(b) 583 if err != nil { 584 return nil, b, err 585 } 586 b = skipSpaces(b) 587 588 if len(b) == 0 { 589 return nil, b, syntaxError(b, "unexpected EOF after object field key") 590 } 591 if b[0] != ':' { 592 return nil, b, syntaxError(b, "expected ':' after object field key but found '%c'", b[0]) 593 } 594 b = skipSpaces(b[1:]) 595 596 _, b, err = parseValue(b) 597 if err != nil { 598 return nil, b, err 599 } 600 601 i++ 602 } 603 } 604 605 func parseArray(b []byte) ([]byte, []byte, error) { 606 if len(b) < 2 { 607 return nil, b[len(b):], unexpectedEOF(b) 608 } 609 610 if b[0] != '[' { 611 return nil, b, syntaxError(b, "expected '[' at the beginning of array value") 612 } 613 614 var err error 615 var a = b 616 var n = len(b) 617 var i = 0 618 619 b = b[1:] 620 for { 621 b = skipSpaces(b) 622 623 if len(b) == 0 { 624 return nil, b, syntaxError(b, "missing closing ']' after array value") 625 } 626 627 if b[0] == ']' { 628 j := (n - len(b)) + 1 629 return a[:j], a[j:], nil 630 } 631 632 if i != 0 { 633 if len(b) == 0 { 634 return nil, b, syntaxError(b, "unexpected EOF after array element") 635 } 636 if b[0] != ',' { 637 return nil, b, syntaxError(b, "expected ',' after array element but found '%c'", b[0]) 638 } 639 b = skipSpaces(b[1:]) 640 if len(b) == 0 { 641 return nil, b, unexpectedEOF(b) 642 } 643 if b[0] == ']' { 644 return nil, b, syntaxError(b, "unexpected trailing comma after object field") 645 } 646 } 647 648 _, b, err = parseValue(b) 649 if err != nil { 650 return nil, b, err 651 } 652 653 i++ 654 } 655 } 656 657 func parseValue(b []byte) ([]byte, []byte, error) { 658 if len(b) != 0 { 659 switch b[0] { 660 case '{': 661 return parseObject(b) 662 case '[': 663 return parseArray(b) 664 case '"': 665 return parseString(b) 666 case 'n': 667 return parseNull(b) 668 case 't': 669 return parseTrue(b) 670 case 'f': 671 return parseFalse(b) 672 case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': 673 return parseNumber(b) 674 default: 675 return nil, b, syntaxError(b, "invalid character '%c' looking for beginning of value", b[0]) 676 } 677 } 678 return nil, b, syntaxError(b, "unexpected end of JSON input") 679 } 680 681 func hasNullPrefix(b []byte) bool { 682 return len(b) >= 4 && string(b[:4]) == "null" 683 } 684 685 func hasTruePrefix(b []byte) bool { 686 return len(b) >= 4 && string(b[:4]) == "true" 687 } 688 689 func hasFalsePrefix(b []byte) bool { 690 return len(b) >= 5 && string(b[:5]) == "false" 691 } 692 693 func hasPrefix(b []byte, s string) bool { 694 return len(b) >= len(s) && s == string(b[:len(s)]) 695 } 696 697 func hasLeadingSign(b []byte) bool { 698 return len(b) > 0 && (b[0] == '+' || b[0] == '-') 699 } 700 701 func hasLeadingZeroes(b []byte) bool { 702 if hasLeadingSign(b) { 703 b = b[1:] 704 } 705 return len(b) > 1 && b[0] == '0' && '0' <= b[1] && b[1] <= '9' 706 } 707 708 func appendToLower(b, s []byte) []byte { 709 if ascii.Valid(s) { // fast path for ascii strings 710 i := 0 711 712 for j := range s { 713 c := s[j] 714 715 if 'A' <= c && c <= 'Z' { 716 b = append(b, s[i:j]...) 717 b = append(b, c+('a'-'A')) 718 i = j + 1 719 } 720 } 721 722 return append(b, s[i:]...) 723 } 724 725 for _, r := range string(s) { 726 b = appendRune(b, foldRune(r)) 727 } 728 729 return b 730 } 731 732 func foldRune(r rune) rune { 733 if r = unicode.SimpleFold(r); 'A' <= r && r <= 'Z' { 734 r = r + ('a' - 'A') 735 } 736 return r 737 }