github.com/neilotoole/jsoncolor@v0.7.2-0.20231115150201-1637fae69be1/parse.go (about) 1 package jsoncolor 2 3 import ( 4 "bytes" 5 "math" 6 "reflect" 7 "unicode" 8 "unicode/utf16" 9 "unicode/utf8" 10 ) 11 12 // All spaces characters defined in the json specification. 13 const ( 14 sp = ' ' 15 ht = '\t' 16 nl = '\n' 17 cr = '\r' 18 ) 19 20 func skipSpaces(b []byte) []byte { 21 b, _ = skipSpacesN(b) 22 return b 23 } 24 25 func skipSpacesN(b []byte) ([]byte, int) { 26 for i := range b { 27 switch b[i] { 28 case sp, ht, nl, cr: 29 default: 30 return b[i:], i 31 } 32 } 33 return nil, 0 34 } 35 36 // parseInt parses a decimanl representation of an int64 from b. 37 // 38 // The function is equivalent to calling strconv.ParseInt(string(b), 10, 64) but 39 // it prevents Go from making a memory allocation for converting a byte slice to 40 // a string (escape analysis fails due to the error returned by strconv.ParseInt). 41 // 42 // Because it only works with base 10 the function is also significantly faster 43 // than strconv.ParseInt. 44 func parseInt(b []byte, t reflect.Type) (int64, []byte, error) { 45 var value int64 46 var count int 47 48 if len(b) == 0 { 49 return 0, b, syntaxError(b, "cannot decode integer from an empty input") 50 } 51 52 if b[0] == '-' { 53 const max = math.MinInt64 54 const lim = max / 10 55 56 if len(b) == 1 { 57 return 0, b, syntaxError(b, "cannot decode integer from '-'") 58 } 59 60 if len(b) > 2 && b[1] == '0' && '0' <= b[2] && b[2] <= '9' { 61 return 0, b, syntaxError(b, "invalid leading character '0' in integer") 62 } 63 64 for _, d := range b[1:] { 65 if !(d >= '0' && d <= '9') { 66 if count == 0 { 67 bs, err := inputError(b, t) 68 return 0, bs, err 69 } 70 break 71 } 72 73 if value < lim { 74 return 0, b, unmarshalOverflow(b, t) 75 } 76 77 value *= 10 78 x := int64(d - '0') 79 80 if value < (max + x) { 81 return 0, b, unmarshalOverflow(b, t) 82 } 83 84 value -= x 85 count++ 86 } 87 88 count++ 89 } else { 90 const max = math.MaxInt64 91 const lim = max / 10 92 93 if len(b) > 1 && b[0] == '0' && '0' <= b[1] && b[1] <= '9' { 94 return 0, b, syntaxError(b, "invalid leading character '0' in integer") 95 } 96 97 for _, d := range b { 98 if !(d >= '0' && d <= '9') { 99 if count == 0 { 100 bs, err := inputError(b, t) 101 return 0, bs, err 102 } 103 break 104 } 105 x := int64(d - '0') 106 107 if value > lim { 108 return 0, b, unmarshalOverflow(b, t) 109 } 110 111 if value *= 10; value > (max - x) { 112 return 0, b, unmarshalOverflow(b, t) 113 } 114 115 value += x 116 count++ 117 } 118 } 119 120 if count < len(b) { 121 switch b[count] { 122 case '.', 'e', 'E': // was this actually a float? 123 v, r, err := parseNumber(b) 124 if err != nil { 125 v, r = b[:count+1], b[count+1:] 126 } 127 return 0, r, unmarshalTypeError(v, t) 128 } 129 } 130 131 return value, b[count:], nil 132 } 133 134 // parseUint is like parseInt but for unsigned integers. 135 func parseUint(b []byte, t reflect.Type) (uint64, []byte, error) { 136 const max = math.MaxUint64 137 const lim = max / 10 138 139 var value uint64 140 var count int 141 142 if len(b) == 0 { 143 return 0, b, syntaxError(b, "cannot decode integer value from an empty input") 144 } 145 146 if len(b) > 1 && b[0] == '0' && '0' <= b[1] && b[1] <= '9' { 147 return 0, b, syntaxError(b, "invalid leading character '0' in integer") 148 } 149 150 for _, d := range b { 151 if !(d >= '0' && d <= '9') { 152 if count == 0 { 153 bs, err := inputError(b, t) 154 return 0, bs, err 155 } 156 break 157 } 158 x := uint64(d - '0') 159 160 if value > lim { 161 return 0, b, unmarshalOverflow(b, t) 162 } 163 164 if value *= 10; value > (max - x) { 165 return 0, b, unmarshalOverflow(b, t) 166 } 167 168 value += x 169 count++ 170 } 171 172 if count < len(b) { 173 switch b[count] { 174 case '.', 'e', 'E': // was this actually a float? 175 v, r, err := parseNumber(b) 176 if err != nil { 177 v, r = b[:count+1], b[count+1:] 178 } 179 return 0, r, unmarshalTypeError(v, t) 180 } 181 } 182 183 return value, b[count:], nil 184 } 185 186 // parseUintHex parses a hexadecimanl representation of a uint64 from b. 187 // 188 // The function is equivalent to calling strconv.ParseUint(string(b), 16, 64) but 189 // it prevents Go from making a memory allocation for converting a byte slice to 190 // a string (escape analysis fails due to the error returned by strconv.ParseUint). 191 // 192 // Because it only works with base 16 the function is also significantly faster 193 // than strconv.ParseUint. 194 func parseUintHex(b []byte) (uint64, []byte, error) { 195 const max = math.MaxUint64 196 const lim = max / 0x10 197 198 var value uint64 199 var count int 200 201 if len(b) == 0 { 202 return 0, b, syntaxError(b, "cannot decode hexadecimal value from an empty input") 203 } 204 205 parseLoop: 206 for i, d := range b { 207 var x uint64 208 209 switch { 210 case d >= '0' && d <= '9': 211 x = uint64(d - '0') 212 213 case d >= 'A' && d <= 'F': 214 x = uint64(d-'A') + 0xA 215 216 case d >= 'a' && d <= 'f': 217 x = uint64(d-'a') + 0xA 218 219 default: 220 if i == 0 { 221 return 0, b, syntaxError(b, "expected hexadecimal digit but found '%c'", d) 222 } 223 break parseLoop 224 } 225 226 if value > lim { 227 return 0, b, syntaxError(b, "hexadecimal value out of range") 228 } 229 230 if value *= 0x10; value > (max - x) { 231 return 0, b, syntaxError(b, "hexadecimal value out of range") 232 } 233 234 value += x 235 count++ 236 } 237 238 return value, b[count:], nil 239 } 240 241 func parseNull(b []byte) ([]byte, []byte, error) { 242 if hasNullPrefix(b) { 243 return b[:4], b[4:], nil 244 } 245 if len(b) < 4 { 246 return nil, b[len(b):], unexpectedEOF(b) 247 } 248 return nil, b, syntaxError(b, "expected 'null' but found invalid token") 249 } 250 251 func parseTrue(b []byte) ([]byte, []byte, error) { 252 if hasTruePrefix(b) { 253 return b[:4], b[4:], nil 254 } 255 if len(b) < 4 { 256 return nil, b[len(b):], unexpectedEOF(b) 257 } 258 return nil, b, syntaxError(b, "expected 'true' but found invalid token") 259 } 260 261 func parseFalse(b []byte) ([]byte, []byte, error) { 262 if hasFalsePrefix(b) { 263 return b[:5], b[5:], nil 264 } 265 if len(b) < 5 { 266 return nil, b[len(b):], unexpectedEOF(b) 267 } 268 return nil, b, syntaxError(b, "expected 'false' but found invalid token") 269 } 270 271 func parseNumber(b []byte) (v, r []byte, err error) { 272 if len(b) == 0 { 273 r, err = b, unexpectedEOF(b) 274 return v, r, err 275 } 276 277 i := 0 278 // sign 279 if b[i] == '-' { 280 i++ 281 } 282 283 if i == len(b) { 284 r, err = b[i:], syntaxError(b, "missing number value after sign") 285 return v, r, err 286 } 287 288 if b[i] < '0' || b[i] > '9' { 289 r, err = b[i:], syntaxError(b, "expected digit but got '%c'", b[i]) 290 return v, r, err 291 } 292 293 // integer part 294 if b[i] == '0' { 295 i++ 296 if i == len(b) || (b[i] != '.' && b[i] != 'e' && b[i] != 'E') { 297 v, r = b[:i], b[i:] 298 return v, r, err 299 } 300 if '0' <= b[i] && b[i] <= '9' { 301 r, err = b[i:], syntaxError(b, "cannot decode number with leading '0' character") 302 return v, r, err 303 } 304 } 305 306 for i < len(b) && '0' <= b[i] && b[i] <= '9' { 307 i++ 308 } 309 310 // decimal part 311 if i < len(b) && b[i] == '.' { 312 i++ 313 decimalStart := i 314 315 for i < len(b) { 316 if c := b[i]; !('0' <= c && c <= '9') { 317 if i == decimalStart { 318 r, err = b[i:], syntaxError(b, "expected digit but found '%c'", c) 319 return v, r, err 320 } 321 break 322 } 323 i++ 324 } 325 326 if i == decimalStart { 327 r, err = b[i:], syntaxError(b, "expected decimal part after '.'") 328 return v, r, err 329 } 330 } 331 332 // exponent part 333 if i < len(b) && (b[i] == 'e' || b[i] == 'E') { 334 i++ 335 336 if i < len(b) { 337 if c := b[i]; c == '+' || c == '-' { 338 i++ 339 } 340 } 341 342 if i == len(b) { 343 r, err = b[i:], syntaxError(b, "missing exponent in number") 344 return v, r, err 345 } 346 347 exponentStart := i 348 349 for i < len(b) { 350 if c := b[i]; !('0' <= c && c <= '9') { 351 if i == exponentStart { 352 err = syntaxError(b, "expected digit but found '%c'", c) 353 return v, r, err 354 } 355 break 356 } 357 i++ 358 } 359 } 360 361 v, r = b[:i], b[i:] 362 return v, r, err 363 } 364 365 func parseUnicode(b []byte) (rune, int, error) { 366 if len(b) < 4 { 367 return 0, 0, syntaxError(b, "unicode code point must have at least 4 characters") 368 } 369 370 u, r, err := parseUintHex(b[:4]) 371 if err != nil { 372 return 0, 0, syntaxError(b, "parsing unicode code point: %s", err) 373 } 374 375 if len(r) != 0 { 376 return 0, 0, syntaxError(b, "invalid unicode code point") 377 } 378 379 return rune(u), 4, nil 380 } 381 382 func parseStringFast(b []byte) ([]byte, []byte, bool, error) { 383 if len(b) < 2 { 384 return nil, b[len(b):], false, unexpectedEOF(b) 385 } 386 if b[0] != '"' { 387 return nil, b, false, syntaxError(b, "expected '\"' at the beginning of a string value") 388 } 389 390 n := bytes.IndexByte(b[1:], '"') + 2 391 if n <= 1 { 392 return nil, b[len(b):], false, syntaxError(b, "missing '\"' at the end of a string value") 393 } 394 if bytes.IndexByte(b[1:n], '\\') < 0 && asciiValidPrint(b[1:n]) { 395 return b[:n], b[n:], false, nil 396 } 397 398 for i := 1; i < len(b); i++ { 399 switch b[i] { 400 case '\\': 401 if i++; i < len(b) { 402 switch b[i] { 403 case '"', '\\', '/', 'n', 'r', 't', 'f', 'b': 404 case 'u': 405 _, n, err := parseUnicode(b[i+1:]) 406 if err != nil { 407 return nil, b, false, err 408 } 409 i += n 410 default: 411 return nil, b, false, syntaxError(b, "invalid character '%c' in string escape code", b[i]) 412 } 413 } 414 415 case '"': 416 return b[:i+1], b[i+1:], true, nil 417 418 default: 419 if b[i] < 0x20 { 420 return nil, b, false, syntaxError(b, "invalid character '%c' in string escape code", b[i]) 421 } 422 } 423 } 424 425 return nil, b[len(b):], false, syntaxError(b, "missing '\"' at the end of a string value") 426 } 427 428 func parseString(b []byte) ([]byte, []byte, error) { 429 s, b, _, err := parseStringFast(b) 430 return s, b, err 431 } 432 433 func parseStringUnquote(b, r []byte) ([]byte, []byte, bool, error) { 434 s, b, escaped, err := parseStringFast(b) 435 if err != nil { 436 return s, b, false, err 437 } 438 439 s = s[1 : len(s)-1] // trim the quotes 440 441 if !escaped { 442 return s, b, false, nil 443 } 444 445 if r == nil { 446 r = make([]byte, 0, len(s)) 447 } 448 449 for len(s) != 0 { 450 i := bytes.IndexByte(s, '\\') 451 452 if i < 0 { 453 r = appendCoerceInvalidUTF8(r, s) 454 break 455 } 456 457 r = appendCoerceInvalidUTF8(r, s[:i]) 458 s = s[i+1:] 459 460 c := s[0] 461 switch c { 462 case '"', '\\', '/': 463 // simple escaped character 464 case 'n': 465 c = '\n' 466 467 case 'r': 468 c = '\r' 469 470 case 't': 471 c = '\t' 472 473 case 'b': 474 c = '\b' 475 476 case 'f': 477 c = '\f' 478 479 case 'u': 480 s = s[1:] 481 482 r1, n1, err := parseUnicode(s) 483 if err != nil { 484 return r, b, true, err 485 } 486 s = s[n1:] 487 488 if utf16.IsSurrogate(r1) { 489 if !hasPrefix(s, `\u`) { 490 r1 = unicode.ReplacementChar 491 } else { 492 r2, n2, err := parseUnicode(s[2:]) 493 if err != nil { 494 return r, b, true, err 495 } 496 if r1 = utf16.DecodeRune(r1, r2); r1 != unicode.ReplacementChar { 497 s = s[2+n2:] 498 } 499 } 500 } 501 502 r = appendRune(r, r1) 503 continue 504 505 default: // not sure what this escape sequence is 506 return r, b, false, syntaxError(s, "invalid character '%c' in string escape code", c) 507 } 508 509 r = append(r, c) 510 s = s[1:] 511 } 512 513 return r, b, true, nil 514 } 515 516 func appendRune(b []byte, r rune) []byte { 517 n := len(b) 518 b = append(b, 0, 0, 0, 0) 519 return b[:n+utf8.EncodeRune(b[n:], r)] 520 } 521 522 func appendCoerceInvalidUTF8(b, s []byte) []byte { 523 c := [4]byte{} 524 525 for _, r := range string(s) { 526 b = append(b, c[:utf8.EncodeRune(c[:], r)]...) 527 } 528 529 return b 530 } 531 532 func parseObject(b []byte) ([]byte, []byte, error) { 533 if len(b) < 2 { 534 return nil, b[len(b):], unexpectedEOF(b) 535 } 536 537 if b[0] != '{' { 538 return nil, b, syntaxError(b, "expected '{' at the beginning of an object value") 539 } 540 541 var err error 542 a := b 543 n := len(b) 544 i := 0 545 546 b = b[1:] 547 for { 548 b = skipSpaces(b) 549 550 if len(b) == 0 { 551 return nil, b, syntaxError(b, "cannot decode object from empty input") 552 } 553 554 if b[0] == '}' { 555 j := (n - len(b)) + 1 556 return a[:j], a[j:], nil 557 } 558 559 if i != 0 { 560 if len(b) == 0 { 561 return nil, b, syntaxError(b, "unexpected EOF after object field value") 562 } 563 if b[0] != ',' { 564 return nil, b, syntaxError(b, "expected ',' after object field value but found '%c'", b[0]) 565 } 566 b = skipSpaces(b[1:]) 567 if len(b) == 0 { 568 return nil, b, unexpectedEOF(b) 569 } 570 if b[0] == '}' { 571 return nil, b, syntaxError(b, "unexpected trailing comma after object field") 572 } 573 } 574 575 _, b, err = parseString(b) 576 if err != nil { 577 return nil, b, err 578 } 579 b = skipSpaces(b) 580 581 if len(b) == 0 { 582 return nil, b, syntaxError(b, "unexpected EOF after object field key") 583 } 584 if b[0] != ':' { 585 return nil, b, syntaxError(b, "expected ':' after object field key but found '%c'", b[0]) 586 } 587 b = skipSpaces(b[1:]) 588 589 _, b, err = parseValue(b) 590 if err != nil { 591 return nil, b, err 592 } 593 594 i++ 595 } 596 } 597 598 func parseArray(b []byte) ([]byte, []byte, error) { 599 if len(b) < 2 { 600 return nil, b[len(b):], unexpectedEOF(b) 601 } 602 603 if b[0] != '[' { 604 return nil, b, syntaxError(b, "expected '[' at the beginning of array value") 605 } 606 607 var err error 608 a := b 609 n := len(b) 610 i := 0 611 612 b = b[1:] 613 for { 614 b = skipSpaces(b) 615 616 if len(b) == 0 { 617 return nil, b, syntaxError(b, "missing closing ']' after array value") 618 } 619 620 if b[0] == ']' { 621 j := (n - len(b)) + 1 622 return a[:j], a[j:], nil 623 } 624 625 if i != 0 { 626 if len(b) == 0 { 627 return nil, b, syntaxError(b, "unexpected EOF after array element") 628 } 629 if b[0] != ',' { 630 return nil, b, syntaxError(b, "expected ',' after array element but found '%c'", b[0]) 631 } 632 b = skipSpaces(b[1:]) 633 if len(b) == 0 { 634 return nil, b, unexpectedEOF(b) 635 } 636 if b[0] == ']' { 637 return nil, b, syntaxError(b, "unexpected trailing comma after object field") 638 } 639 } 640 641 _, b, err = parseValue(b) 642 if err != nil { 643 return nil, b, err 644 } 645 646 i++ 647 } 648 } 649 650 func parseValue(b []byte) ([]byte, []byte, error) { 651 if len(b) != 0 { 652 switch b[0] { 653 case '{': 654 return parseObject(b) 655 case '[': 656 return parseArray(b) 657 case '"': 658 return parseString(b) 659 case 'n': 660 return parseNull(b) 661 case 't': 662 return parseTrue(b) 663 case 'f': 664 return parseFalse(b) 665 case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': 666 return parseNumber(b) 667 default: 668 return nil, b, syntaxError(b, "invalid character '%c' looking for beginning of value", b[0]) 669 } 670 } 671 return nil, b, syntaxError(b, "unexpected end of JSON input") 672 } 673 674 func hasNullPrefix(b []byte) bool { 675 return len(b) >= 4 && string(b[:4]) == "null" 676 } 677 678 func hasTruePrefix(b []byte) bool { 679 return len(b) >= 4 && string(b[:4]) == "true" 680 } 681 682 func hasFalsePrefix(b []byte) bool { 683 return len(b) >= 5 && string(b[:5]) == "false" 684 } 685 686 func hasPrefix(b []byte, s string) bool { 687 return len(b) >= len(s) && s == string(b[:len(s)]) 688 } 689 690 func hasLeadingSign(b []byte) bool { 691 return len(b) > 0 && (b[0] == '+' || b[0] == '-') 692 } 693 694 func hasLeadingZeroes(b []byte) bool { 695 if hasLeadingSign(b) { 696 b = b[1:] 697 } 698 return len(b) > 1 && b[0] == '0' && '0' <= b[1] && b[1] <= '9' 699 } 700 701 func appendToLower(b, s []byte) []byte { 702 if asciiValid(s) { // fast path for ascii strings 703 i := 0 704 705 for j := range s { 706 c := s[j] 707 708 if 'A' <= c && c <= 'Z' { 709 b = append(b, s[i:j]...) 710 b = append(b, c+('a'-'A')) 711 i = j + 1 712 } 713 } 714 715 return append(b, s[i:]...) 716 } 717 718 for _, r := range string(s) { 719 b = appendRune(b, foldRune(r)) 720 } 721 722 return b 723 } 724 725 func foldRune(r rune) rune { 726 if r = unicode.SimpleFold(r); 'A' <= r && r <= 'Z' { 727 r += 'a' - 'A' 728 } 729 return r 730 }