git.lukeshu.com/go/lowmemjson@v0.3.9-0.20230723050957-72f6d13f6fb2/internal/jsonparse/parse.go (about) 1 // Copyright (C) 2022-2023 Luke Shumaker <lukeshu@lukeshu.com> 2 // 3 // SPDX-License-Identifier: GPL-2.0-or-later 4 5 package jsonparse 6 7 import ( 8 "errors" 9 "fmt" 10 "io" 11 iofs "io/fs" 12 "strings" 13 ) 14 15 var ErrParserExceededMaxDepth = errors.New("exceeded max depth") 16 17 type InvalidCharacterError struct { 18 Char rune 19 IsRune bool 20 Where string 21 } 22 23 func (e *InvalidCharacterError) Error() string { 24 if e.IsRune { 25 return fmt.Sprintf("invalid character %q %s", e.Char, e.Where) 26 } else { 27 return fmt.Sprintf("invalid character '\\x%02x' %s", e.Char, e.Where) 28 } 29 } 30 31 func isHex(c rune) bool { 32 return ('0' <= c && c <= '9') || 33 ('a' <= c && c <= 'f') || 34 ('A' <= c && c <= 'F') 35 } 36 37 // RuneType is the classification of a rune when parsing JSON input. 38 // A Parser, rather than grouping runes into tokens and classifying 39 // tokens, classifies runes directly. 40 type RuneType uint8 41 42 const ( 43 RuneTypeError RuneType = iota 44 45 RuneTypeSpace // whitespace 46 47 RuneTypeObjectBeg // '{' 48 RuneTypeObjectColon // ':' 49 RuneTypeObjectComma // ',' 50 RuneTypeObjectEnd // '}' 51 52 RuneTypeArrayBeg // '[' 53 RuneTypeArrayComma // ',' 54 RuneTypeArrayEnd // ']' 55 56 RuneTypeStringBeg // opening '"' 57 RuneTypeStringChar // normal character 58 RuneTypeStringEsc // backslash 59 RuneTypeStringEsc1 // single-char after a backslash 60 RuneTypeStringEscU // \uABCD : u 61 RuneTypeStringEscUA // \uABCD : A 62 RuneTypeStringEscUB // \uABCD : B 63 RuneTypeStringEscUC // \uABCD : C 64 RuneTypeStringEscUD // \uABCD : D 65 RuneTypeStringEnd // closing '"' 66 67 RuneTypeNumberIntNeg 68 RuneTypeNumberIntZero // leading zero only; non-leading zeros are IntDig, not IntZero 69 RuneTypeNumberIntDig 70 RuneTypeNumberFracDot 71 RuneTypeNumberFracDig 72 RuneTypeNumberExpE 73 RuneTypeNumberExpSign 74 RuneTypeNumberExpDig 75 76 RuneTypeTrueT 77 RuneTypeTrueR 78 RuneTypeTrueU 79 RuneTypeTrueE 80 81 RuneTypeFalseF 82 RuneTypeFalseA 83 RuneTypeFalseL 84 RuneTypeFalseS 85 RuneTypeFalseE 86 87 RuneTypeNullN 88 RuneTypeNullU 89 RuneTypeNullL1 90 RuneTypeNullL2 91 92 RuneTypeEOF 93 94 // Not a real rune type, but used as a stack state. 95 runeTypeAny 96 ) 97 98 // GoString implements fmt.GoStringer. 99 // 100 //nolint:dupl // False positive due to similarly shaped AST. 101 func (t RuneType) GoString() string { 102 str, ok := map[RuneType]string{ 103 RuneTypeError: "RuneTypeError", 104 105 RuneTypeSpace: "RuneTypeSpace", 106 107 RuneTypeObjectBeg: "RuneTypeObjectBeg", 108 RuneTypeObjectColon: "RuneTypeObjectColon", 109 RuneTypeObjectComma: "RuneTypeObjectComma", 110 RuneTypeObjectEnd: "RuneTypeObjectEnd", 111 112 RuneTypeArrayBeg: "RuneTypeArrayBeg", 113 RuneTypeArrayComma: "RuneTypeArrayComma", 114 RuneTypeArrayEnd: "RuneTypeArrayEnd", 115 116 RuneTypeStringBeg: "RuneTypeStringBeg", 117 RuneTypeStringChar: "RuneTypeStringChar", 118 RuneTypeStringEsc: "RuneTypeStringEsc", 119 RuneTypeStringEsc1: "RuneTypeStringEsc1", 120 RuneTypeStringEscU: "RuneTypeStringEscU", 121 RuneTypeStringEscUA: "RuneTypeStringEscUA", 122 RuneTypeStringEscUB: "RuneTypeStringEscUB", 123 RuneTypeStringEscUC: "RuneTypeStringEscUC", 124 RuneTypeStringEscUD: "RuneTypeStringEscUD", 125 RuneTypeStringEnd: "RuneTypeStringEnd", 126 127 RuneTypeNumberIntNeg: "RuneTypeNumberIntNeg", 128 RuneTypeNumberIntZero: "RuneTypeNumberIntZero", 129 RuneTypeNumberIntDig: "RuneTypeNumberIntDig", 130 RuneTypeNumberFracDot: "RuneTypeNumberFracDot", 131 RuneTypeNumberFracDig: "RuneTypeNumberFracDig", 132 RuneTypeNumberExpE: "RuneTypeNumberExpE", 133 RuneTypeNumberExpSign: "RuneTypeNumberExpSign", 134 RuneTypeNumberExpDig: "RuneTypeNumberExpDig", 135 136 RuneTypeTrueT: "RuneTypeTrueT", 137 RuneTypeTrueR: "RuneTypeTrueR", 138 RuneTypeTrueU: "RuneTypeTrueU", 139 RuneTypeTrueE: "RuneTypeTrueE", 140 141 RuneTypeFalseF: "RuneTypeFalseF", 142 RuneTypeFalseA: "RuneTypeFalseA", 143 RuneTypeFalseL: "RuneTypeFalseL", 144 RuneTypeFalseS: "RuneTypeFalseS", 145 RuneTypeFalseE: "RuneTypeFalseE", 146 147 RuneTypeNullN: "RuneTypeNullN", 148 RuneTypeNullU: "RuneTypeNullU", 149 RuneTypeNullL1: "RuneTypeNullL1", 150 RuneTypeNullL2: "RuneTypeNullL2", 151 152 RuneTypeEOF: "RuneTypeEOF", 153 154 runeTypeAny: "runeTypeAny", 155 }[t] 156 if ok { 157 return str 158 } 159 return fmt.Sprintf("RuneType(%d)", t) 160 } 161 162 // String implements fmt.Stringer. 163 // 164 //nolint:dupl // False positive due to similarly shaped AST. 165 func (t RuneType) String() string { 166 str, ok := map[RuneType]string{ 167 RuneTypeError: "x", 168 169 RuneTypeSpace: " ", 170 171 RuneTypeObjectBeg: "{", 172 RuneTypeObjectColon: ":", 173 RuneTypeObjectComma: "o", 174 RuneTypeObjectEnd: "}", 175 176 RuneTypeArrayBeg: "[", 177 RuneTypeArrayComma: "a", 178 RuneTypeArrayEnd: "]", 179 180 RuneTypeStringBeg: "\"", 181 RuneTypeStringChar: "c", 182 RuneTypeStringEsc: "\\", 183 RuneTypeStringEsc1: "b", 184 RuneTypeStringEscU: "u", 185 RuneTypeStringEscUA: "A", 186 RuneTypeStringEscUB: "B", 187 RuneTypeStringEscUC: "C", 188 RuneTypeStringEscUD: "D", 189 RuneTypeStringEnd: "ยป", 190 191 RuneTypeNumberIntNeg: "-", 192 RuneTypeNumberIntZero: "0", 193 RuneTypeNumberIntDig: "1", 194 RuneTypeNumberFracDot: ".", 195 RuneTypeNumberFracDig: "2", 196 RuneTypeNumberExpE: "e", 197 RuneTypeNumberExpSign: "+", 198 RuneTypeNumberExpDig: "3", 199 200 RuneTypeTrueT: "๐ฅ", // double-struck 201 RuneTypeTrueR: "๐ฃ", 202 RuneTypeTrueU: "๐ฆ", 203 RuneTypeTrueE: "๐", 204 205 RuneTypeFalseF: "๐ฃ", // fraktur 206 RuneTypeFalseA: "๐", 207 RuneTypeFalseL: "๐ฉ", 208 RuneTypeFalseS: "๐ฐ", 209 RuneTypeFalseE: "๐ข", 210 211 RuneTypeNullN: "โ", // circled 212 RuneTypeNullU: "โค", 213 RuneTypeNullL1: "โ", 214 RuneTypeNullL2: "โ", // +uppercase 215 216 RuneTypeEOF: "$", 217 218 runeTypeAny: "?", 219 }[t] 220 if ok { 221 return str 222 } 223 return fmt.Sprintf("<%d>", t) 224 } 225 226 func (t RuneType) JSONType() string { 227 return map[RuneType]string{ 228 RuneTypeObjectBeg: "object", 229 RuneTypeArrayBeg: "array", 230 RuneTypeStringBeg: "string", 231 RuneTypeNumberIntNeg: "number", 232 RuneTypeNumberIntZero: "number", 233 RuneTypeNumberIntDig: "number", 234 RuneTypeTrueT: "true", 235 RuneTypeFalseF: "false", 236 RuneTypeNullN: "null", 237 RuneTypeEOF: "eof", 238 }[t] 239 } 240 241 // IsNumber returns whether the RuneType is one of the 242 // RuneTypeNumberXXX values. 243 func (t RuneType) IsNumber() bool { 244 return RuneTypeNumberIntNeg <= t && t <= RuneTypeNumberExpDig 245 } 246 247 // Parser is the low-level JSON parser that powers both *Decoder and 248 // *ReEncoder. 249 type Parser struct { 250 // Setting MaxError to a value greater than 0 causes 251 // HandleRune to return ErrParserExceededMaxDepth if 252 // objects/arrays become nested more deeply than this. 253 MaxDepth int 254 255 initialized bool 256 257 err error 258 closed bool 259 260 // We reuse RuneTypes to store the stack. The base idea is: 261 // stack items are "the most recently read stack-relevant 262 // RuneType". 263 // 264 // The stack starts out with the special pseudo-RuneType 265 // `runeTypeAny` that means we're willing to accept any 266 // element type; an empty stack means that we have reached the 267 // end of the top-level element and should accept no more 268 // input except for whitespace. 269 // 270 // The "normal" stack-relevant RuneTypes are: 271 // 272 // "\uABC for strings 273 // -01.2e+3 for numbers 274 // ๐ฅ๐ฃ๐ฆ for "true" 275 // ๐ฃ๐๐ฉ๐ฐ for "false" 276 // โโคโ for "null" 277 // 278 // Objects and arrays break the "most recently read RuneType" 279 // rule; they need some special assignments: 280 // 281 // { object: waiting for key to start or '}' 282 // } object: waiting for key to start 283 // : object: reading key / waiting for colon 284 // o object: reading value / waiting for ',' or '}' 285 // 286 // [ array: waiting for item to start or ']' 287 // a array: reading item / waiting for ',' or ']' 288 // 289 // Within each element type, the stack item is replaced, not pushed. 290 // 291 // (Keep each of these examples in-sync with parse_test.go.) 292 // 293 // For example, given the input string 294 // 295 // {"x":"y","a":"b"} 296 // 297 // The stack would be 298 // 299 // stack processed 300 // ? 301 // { { 302 // :" {" 303 // :" {"x 304 // : {"x" 305 // o? {"x": 306 // o" {"x":" 307 // o" {"x":"y 308 // o {"x":"y" 309 // } {"x":"y", 310 // :" {"x":"y"," 311 // :" {"x":"y","a 312 // : {"x":"y","a" 313 // o? {"x":"y","a": 314 // o" {"x":"y","a":" 315 // o" {"x":"y","a":"b 316 // o {"x":"y","a":"b" 317 // {"x":"y","a":"b"} 318 // 319 // Or, given the input string 320 // 321 // ["x","y"] 322 // 323 // The stack would be 324 // 325 // stack processed 326 // ? 327 // [ [ 328 // a" [" 329 // a" ["x 330 // a ["x" 331 // a? ["x", 332 // a" ["x"," 333 // a" ["x","y 334 // a ["x","y" 335 // ["x","y"] 336 stack []RuneType 337 338 barriers []barrier 339 } 340 341 type barrier struct { 342 allowWS bool 343 stack []RuneType 344 } 345 346 func (par *Parser) init() { 347 if !par.initialized { 348 par.initialized = true 349 par.pushState(runeTypeAny) 350 } 351 } 352 353 func (par *Parser) pushState(state RuneType) RuneType { 354 par.stack = append(par.stack, state) 355 return state 356 } 357 358 func (par *Parser) replaceState(state RuneType) RuneType { 359 par.stack[len(par.stack)-1] = state 360 return state 361 } 362 363 func (par *Parser) popState() { 364 par.stack = par.stack[:len(par.stack)-1] 365 } 366 367 func (par *Parser) stackString() string { 368 par.init() 369 var buf strings.Builder 370 for _, s := range par.stack { 371 buf.WriteString(s.String()) 372 } 373 return buf.String() 374 } 375 376 func (par *Parser) depth() int { 377 n := len(par.stack) 378 for _, barrier := range par.barriers { 379 n += len(barrier.stack) 380 } 381 return n 382 } 383 384 func (par *Parser) StackIsEmpty() bool { 385 if len(par.barriers) > 0 { 386 return false 387 } 388 if len(par.stack) == 0 { 389 return true 390 } 391 return len(par.stack) == 1 && par.stack[0] == runeTypeAny 392 } 393 394 func (par *Parser) StackSize() int { 395 return len(par.stack) 396 } 397 398 // Reset all Parser state. 399 func (par *Parser) Reset() { 400 *par = Parser{ 401 MaxDepth: par.MaxDepth, 402 } 403 } 404 405 // PushReadBarrier causes the parser to emit EOF once the end of the 406 // element that is started by the current top-of-stack is reached 407 // (which means that it will reject whitespace between the end of the 408 // element and EOF), until this is un-done with PopBarrier. It 409 // essentially turns the parser in to a sub-parser. 410 // 411 // PushReadBarrier may only be called at the beginning of an element, 412 // whether that be 413 // 414 // - runeTypeAny 415 // - RuneTypeObjectBeg 416 // - RuneTypeArrayBeg 417 // - RuneTypeStringBeg 418 // - RuneTypeNumberIntNeg, RuneTypeNumberIntZero, RuneTypeNumberIntDig 419 // - RuneTypeTrueT 420 // - RuneTypeFalseF 421 // - RuneTypeNullN 422 func (par *Parser) PushReadBarrier() { 423 // Sanity checking. 424 par.init() 425 if len(par.stack) == 0 { 426 panic(errors.New("should not happen: illegal PushReadBarrier call: empty stack")) 427 } 428 curState := par.stack[len(par.stack)-1] 429 switch curState { 430 case runeTypeAny, 431 RuneTypeObjectBeg, 432 RuneTypeArrayBeg, 433 RuneTypeStringBeg, 434 RuneTypeNumberIntNeg, RuneTypeNumberIntZero, RuneTypeNumberIntDig, 435 RuneTypeTrueT, 436 RuneTypeFalseF, 437 RuneTypeNullN: 438 // OK 439 default: 440 panic(fmt.Errorf("should not happen: illegal PushReadBarrier call: %q", curState)) 441 } 442 // Actually push. 443 par.barriers = append(par.barriers, barrier{ 444 allowWS: false, 445 stack: par.stack[:len(par.stack)-1], 446 }) 447 par.stack = []RuneType{curState} 448 } 449 450 // PushWriteBarrier causes the parser to emit EOF once the end of the 451 // about-to-start element is reached and any trailing whitespace has 452 // been exhausted, until this is un-done with PopBarrier. It 453 // essentially turns the parser in to a sub-parser. 454 // 455 // PushWriteBarrier may only be called at the places where an element 456 // of any type may start: 457 // 458 // - runeTypeAny for top-level and object-value elements 459 // - RuneTypeArrayBeg for array-item elements 460 // 461 // PushWriteBarrier signals intent to write an element; if it is 462 // called in a place where an element is optional (at the beginning of 463 // an array), it becomes a syntax error to not write the element. 464 func (par *Parser) PushWriteBarrier() { 465 par.init() 466 if len(par.stack) == 0 { 467 panic(errors.New("should not happen: illegal PushWriteBarrier call: empty stack")) 468 } 469 switch par.stack[len(par.stack)-1] { 470 case runeTypeAny: 471 par.popState() 472 par.barriers = append(par.barriers, barrier{ 473 allowWS: true, 474 stack: par.stack, 475 }) 476 par.stack = []RuneType{runeTypeAny} 477 case RuneTypeArrayBeg: 478 par.replaceState(RuneTypeArrayComma) 479 par.barriers = append(par.barriers, barrier{ 480 allowWS: true, 481 stack: par.stack, 482 }) 483 par.stack = []RuneType{runeTypeAny} 484 default: 485 panic(fmt.Errorf("should not happen: illegal PushWriteBarrier call: %q", par.stack[len(par.stack)-1])) 486 } 487 } 488 489 // PopBarrier reverses a call to PushReadBarrier or PushWriteBarrier. 490 func (par *Parser) PopBarrier() { 491 if len(par.barriers) == 0 { 492 panic(errors.New("should not happen: illegal PopBarrier call: empty barrier stack")) 493 } 494 barrier := par.barriers[len(par.barriers)-1] 495 par.barriers = par.barriers[:len(par.barriers)-1] 496 par.closed = false 497 par.stack = append(barrier.stack, par.stack...) 498 } 499 500 // HandleEOF feeds EOF to the Parser. The returned RuneType is either 501 // RuneTypeEOF or RuneTypeError. 502 // 503 // An error is returned if and only if the RuneType is RuneTypeError. 504 // Returns io/fs.ErrClosed if .HandleEOF() has previously been called 505 // (and .Reset() has not been called since). 506 // 507 // Once RuneTypeError or RuneTypeEOF has been returned, it will keep 508 // being returned from both .HandleRune(c) and .HandleEOF() until 509 // .Reset() is called. 510 // 511 // RuneTypeEOF indicates that a complete JSON document has been read. 512 func (par *Parser) HandleEOF() (RuneType, error) { 513 if par.closed { 514 return RuneTypeError, iofs.ErrClosed 515 } 516 defer func() { 517 par.closed = true 518 }() 519 if par.err != nil { 520 return RuneTypeError, par.err 521 } 522 par.init() 523 switch len(par.stack) { 524 case 0: 525 return RuneTypeEOF, nil 526 case 1: 527 switch { 528 case par.stack[0].IsNumber(): 529 if _, err := par.HandleRune('\n', true); err == nil { 530 return RuneTypeEOF, nil 531 } 532 case par.stack[0] == runeTypeAny: 533 par.err = io.EOF 534 return RuneTypeError, par.err 535 } 536 fallthrough 537 default: 538 par.err = io.ErrUnexpectedEOF 539 return RuneTypeError, par.err 540 } 541 } 542 543 // IsAtBarrier returns whether a read-barrier has been reached and the 544 // next HandleRune call would definitely return RuneTypeEOF. 545 func (par *Parser) IsAtBarrier() bool { 546 return par.initialized && 547 // HandleRune wouldn't return early with an error. 548 !par.closed && 549 par.err == nil && 550 // The current (sub-)parser has reached its end, and 551 len(par.stack) == 0 && 552 // there is a barrier, and 553 len(par.barriers) > 0 && 554 // that barrier would definitely return RuneTypeEOF. 555 !par.barriers[len(par.barriers)-1].allowWS 556 } 557 558 // HandleRune feeds a Unicode rune to the Parser. 559 // 560 // An error is returned if and only if the RuneType is RuneTypeError. 561 // Returns io/fs.ErrClosed if .HandleEOF() has previously been called 562 // (and .Reset() has not been called since). 563 // 564 // Once RuneTypeError or RuneTypeEOF has been returned, it will keep 565 // being returned from both .HandleRune(c) and .HandleEOF() until 566 // .Reset() is called. 567 // 568 // RuneTypeEOF indicates that the rune cannot be appended to the JSON 569 // document; a new JSON document must be started in order to process 570 // that rune. 571 func (par *Parser) HandleRune(c rune, isRune bool) (RuneType, error) { 572 if par.closed { 573 return RuneTypeError, iofs.ErrClosed 574 } 575 if par.err != nil { 576 return RuneTypeError, par.err 577 } 578 par.init() 579 if len(par.stack) == 0 { 580 if len(par.barriers) == 0 || par.barriers[len(par.barriers)-1].allowWS { 581 switch c { 582 case 0x0020, 0x000A, 0x000D, 0x0009: 583 return RuneTypeSpace, nil 584 } 585 } 586 if len(par.barriers) > 0 { 587 return RuneTypeEOF, nil 588 } else { 589 return RuneTypeError, &InvalidCharacterError{c, isRune, "after top-level value"} 590 } 591 } 592 switch par.stack[len(par.stack)-1] { 593 // any ///////////////////////////////////////////////////////////////////////////////////// 594 case runeTypeAny: 595 switch c { 596 case 0x0020, 0x000A, 0x000D, 0x0009: 597 return RuneTypeSpace, nil 598 case '{': 599 if par.MaxDepth > 0 && par.depth() > par.MaxDepth { 600 return RuneTypeError, ErrParserExceededMaxDepth 601 } 602 return par.replaceState(RuneTypeObjectBeg), nil 603 case '[': 604 if par.MaxDepth > 0 && par.depth() > par.MaxDepth { 605 return RuneTypeError, ErrParserExceededMaxDepth 606 } 607 return par.replaceState(RuneTypeArrayBeg), nil 608 case '"': 609 return par.replaceState(RuneTypeStringBeg), nil 610 case '-': 611 return par.replaceState(RuneTypeNumberIntNeg), nil 612 case '0': 613 return par.replaceState(RuneTypeNumberIntZero), nil 614 case '1', '2', '3', '4', '5', '6', '7', '8', '9': 615 return par.replaceState(RuneTypeNumberIntDig), nil 616 case 't': 617 return par.replaceState(RuneTypeTrueT), nil 618 case 'f': 619 return par.replaceState(RuneTypeFalseF), nil 620 case 'n': 621 return par.replaceState(RuneTypeNullN), nil 622 default: 623 return RuneTypeError, &InvalidCharacterError{c, isRune, "looking for beginning of value"} 624 } 625 // object ////////////////////////////////////////////////////////////////////////////////// 626 case RuneTypeObjectBeg: // waiting for key to start or '}' 627 switch c { 628 case 0x0020, 0x000A, 0x000D, 0x0009: 629 return RuneTypeSpace, nil 630 case '"': 631 par.replaceState(RuneTypeObjectColon) 632 return par.pushState(RuneTypeStringBeg), nil 633 case '}': 634 par.popState() 635 return RuneTypeObjectEnd, nil 636 default: 637 return RuneTypeError, &InvalidCharacterError{c, isRune, "looking for beginning of object key string"} 638 } 639 case RuneTypeObjectEnd: // waiting for key to start 640 switch c { 641 case 0x0020, 0x000A, 0x000D, 0x0009: 642 return RuneTypeSpace, nil 643 case '"': 644 par.replaceState(RuneTypeObjectColon) 645 return par.pushState(RuneTypeStringBeg), nil 646 default: 647 return RuneTypeError, &InvalidCharacterError{c, isRune, "looking for beginning of object key string"} 648 } 649 case RuneTypeObjectColon: // waiting for ':' 650 switch c { 651 case 0x0020, 0x000A, 0x000D, 0x0009: 652 return RuneTypeSpace, nil 653 case ':': 654 par.replaceState(RuneTypeObjectComma) 655 par.pushState(runeTypeAny) 656 return RuneTypeObjectColon, nil 657 default: 658 return RuneTypeError, &InvalidCharacterError{c, isRune, "after object key"} 659 } 660 case RuneTypeObjectComma: // waiting for ',' or '}' 661 switch c { 662 case 0x0020, 0x000A, 0x000D, 0x0009: 663 return RuneTypeSpace, nil 664 case ',': 665 par.replaceState(RuneTypeObjectEnd) 666 return RuneTypeObjectComma, nil 667 case '}': 668 par.popState() 669 return RuneTypeObjectEnd, nil 670 default: 671 return RuneTypeError, &InvalidCharacterError{c, isRune, "after object key:value pair"} 672 } 673 // array /////////////////////////////////////////////////////////////////////////////////// 674 case RuneTypeArrayBeg: // waiting for item to start or ']' 675 switch c { 676 case 0x0020, 0x000A, 0x000D, 0x0009: 677 return RuneTypeSpace, nil 678 case ']': 679 par.popState() 680 return RuneTypeArrayEnd, nil 681 default: 682 par.replaceState(RuneTypeArrayComma) 683 par.pushState(runeTypeAny) 684 return par.HandleRune(c, isRune) 685 } 686 case RuneTypeArrayComma: // waiting for ',' or ']' 687 switch c { 688 case 0x0020, 0x000A, 0x000D, 0x0009: 689 return RuneTypeSpace, nil 690 case ',': 691 par.pushState(runeTypeAny) 692 return RuneTypeArrayComma, nil 693 case ']': 694 par.popState() 695 return RuneTypeArrayEnd, nil 696 default: 697 return RuneTypeError, &InvalidCharacterError{c, isRune, "after array element"} 698 } 699 // string ////////////////////////////////////////////////////////////////////////////////// 700 case RuneTypeStringBeg: // waiting for char or '"' 701 switch { 702 case c == '\\': 703 return par.replaceState(RuneTypeStringEsc), nil 704 case c == '"': 705 par.popState() 706 return RuneTypeStringEnd, nil 707 case 0x0020 <= c && c <= 0x10FFFF: 708 return RuneTypeStringChar, nil 709 default: 710 return RuneTypeError, &InvalidCharacterError{c, isRune, "in string literal"} 711 } 712 case RuneTypeStringEsc: // waiting for escape char 713 switch c { 714 case '"', '\\', '/', 'b', 'f', 'n', 'r', 't': 715 par.replaceState(RuneTypeStringBeg) 716 return RuneTypeStringEsc1, nil 717 case 'u': 718 return par.replaceState(RuneTypeStringEscU), nil 719 default: 720 return RuneTypeError, &InvalidCharacterError{c, isRune, "in string escape code"} 721 } 722 case RuneTypeStringEscU: 723 if !isHex(c) { 724 return RuneTypeError, fmt.Errorf(`invalid character %q in \u hexadecimal character escape`, c) 725 } 726 return par.replaceState(RuneTypeStringEscUA), nil 727 case RuneTypeStringEscUA: 728 if !isHex(c) { 729 return RuneTypeError, fmt.Errorf(`invalid character %q in \u hexadecimal character escape`, c) 730 } 731 return par.replaceState(RuneTypeStringEscUB), nil 732 case RuneTypeStringEscUB: 733 if !isHex(c) { 734 return RuneTypeError, fmt.Errorf(`invalid character %q in \u hexadecimal character escape`, c) 735 } 736 return par.replaceState(RuneTypeStringEscUC), nil 737 case RuneTypeStringEscUC: 738 if !isHex(c) { 739 return RuneTypeError, fmt.Errorf(`invalid character %q in \u hexadecimal character escape`, c) 740 } 741 par.replaceState(RuneTypeStringBeg) 742 return RuneTypeStringEscUD, nil 743 // number ////////////////////////////////////////////////////////////////////////////////// 744 // 745 // Here's a flattened drawing of the syntax diagram from www.json.org : 746 // 747 // [------------ integer ----------][-- fraction ---][-------- exponent -------] 748 // >โโฎโโโโโโญโโฎโ"0"โโโโโโโโญโโโโโโโโโโญโโโฎโโโโโโโโโโโโโโญโโโฎโโโโโโโโโโโโโโโโโโโโโโโโญโ> 749 // โ โ โ โ โ โ โ โ โ 750 // โฐโ"-"โโฏ โฐโdigit 1-9โโฏโโญdigitโฎโโฏ โฐโ"."โโญdigitโฎโโฏ โฐโ"e"โโญโโฎโโโโโโญโโญdigitโฎโโฏ 751 // โฐโโ<โโโฏ โฐโโ<โโโฏ โ โ โ โ โฐโโ<โโโฏ 752 // โฐโ"E"โโฏ โฐโ"-"โโฏ 753 // โ โ 754 // โฐโ"+"โโฏ 755 // 756 // Now here it is slightly redrawn, and with each distinct state our 757 // parser can be in marked with a single-capital-letter: 758 // 759 // [-------------- integer ------------][--------- fraction --------][--------- exponent ---------] 760 // >โAโโฎโโโโโโโโญโโโฎโ"0"โโโโโโโโโCโโญโโโโโโโโโโฎโโโโโโโโโโโโโโโโโโโญโโโโโโโโโโฎโโโโโโโโโโโโโโโโโโโโโโโโโโโญโ> 761 // โ โ โ โ โ โ โ โ 762 // โฐโ"-"โBโโฏ โฐโdigit 1-9โโญโDโโฏโdigitโฎ โฐโ"."โEโdigitโโโญโFโโฏโdigitโฎ โฐโ"e"โโญโGโโฎโโโโโโญโโญdigitโIโโฏ 763 // โฐโโโโ<โโโโโโฏ โฐโโโโ<โโโโโโฏ โ โ โ H โฐโโโโ<โโโโฏ 764 // โฐโ"E"โโฏ โฐโ"-"โโฏ 765 // โ โ 766 // โฐโ"+"โโฏ 767 // 768 // You may notice that each of these states may be uniquely identified 769 // by the last-read RuneType: 770 // 771 // A = (nothing yet) 772 // B = IntNeg 773 // C = IntZero 774 // D = IntDig 775 // E = FracDot 776 // F = FracDig 777 // G = ExpE 778 // H = ExpSign 779 // I = ExpDig 780 // 781 // The 'A' state is part of the runeTypeAny case above, and 782 // the remainder follow: 783 case RuneTypeNumberIntNeg: // B 784 switch c { 785 case '0': 786 return par.replaceState(RuneTypeNumberIntZero), nil 787 case '1', '2', '3', '4', '5', '6', '7', '8', '9': 788 return par.replaceState(RuneTypeNumberIntDig), nil 789 default: 790 return RuneTypeError, &InvalidCharacterError{c, isRune, "in numeric literal"} 791 } 792 case RuneTypeNumberIntZero: // C 793 switch c { 794 case '.': 795 return par.replaceState(RuneTypeNumberFracDot), nil 796 case 'e', 'E': 797 return par.replaceState(RuneTypeNumberExpE), nil 798 default: 799 par.popState() 800 return par.HandleRune(c, isRune) 801 } 802 case RuneTypeNumberIntDig: // D 803 switch c { 804 case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': 805 return par.replaceState(RuneTypeNumberIntDig), nil 806 case '.': 807 return par.replaceState(RuneTypeNumberFracDot), nil 808 case 'e', 'E': 809 return par.replaceState(RuneTypeNumberExpE), nil 810 default: 811 par.popState() 812 return par.HandleRune(c, isRune) 813 } 814 case RuneTypeNumberFracDot: // E 815 switch c { 816 case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': 817 return par.replaceState(RuneTypeNumberFracDig), nil 818 default: 819 return RuneTypeError, &InvalidCharacterError{c, isRune, "after decimal point in numeric literal"} 820 } 821 case RuneTypeNumberFracDig: // F 822 switch c { 823 case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': 824 return par.replaceState(RuneTypeNumberFracDig), nil 825 case 'e', 'E': 826 return par.replaceState(RuneTypeNumberExpE), nil 827 default: 828 par.popState() 829 return par.HandleRune(c, isRune) 830 } 831 case RuneTypeNumberExpE: // G 832 switch c { 833 case '-', '+': 834 return par.replaceState(RuneTypeNumberExpSign), nil 835 case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': 836 return par.replaceState(RuneTypeNumberExpDig), nil 837 default: 838 return RuneTypeError, &InvalidCharacterError{c, isRune, "in exponent of numeric literal"} 839 } 840 case RuneTypeNumberExpSign: // H 841 switch c { 842 case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': 843 return par.replaceState(RuneTypeNumberExpDig), nil 844 default: 845 return RuneTypeError, &InvalidCharacterError{c, isRune, "in exponent of numeric literal"} 846 } 847 case RuneTypeNumberExpDig: // I 848 switch c { 849 case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': 850 return par.replaceState(RuneTypeNumberExpDig), nil 851 default: 852 par.popState() 853 return par.HandleRune(c, isRune) 854 } 855 // literals //////////////////////////////////////////////////////////////////////////////// 856 // true 857 case RuneTypeTrueT: 858 return par.expectRune(c, isRune, 'r', RuneTypeTrueR, "true", false) 859 case RuneTypeTrueR: 860 return par.expectRune(c, isRune, 'u', RuneTypeTrueU, "true", false) 861 case RuneTypeTrueU: 862 return par.expectRune(c, isRune, 'e', RuneTypeTrueE, "true", true) 863 // false 864 case RuneTypeFalseF: 865 return par.expectRune(c, isRune, 'a', RuneTypeFalseA, "false", false) 866 case RuneTypeFalseA: 867 return par.expectRune(c, isRune, 'l', RuneTypeFalseL, "false", false) 868 case RuneTypeFalseL: 869 return par.expectRune(c, isRune, 's', RuneTypeFalseS, "false", false) 870 case RuneTypeFalseS: 871 return par.expectRune(c, isRune, 'e', RuneTypeFalseE, "false", true) 872 // null 873 case RuneTypeNullN: 874 return par.expectRune(c, isRune, 'u', RuneTypeNullU, "null", false) 875 case RuneTypeNullU: 876 return par.expectRune(c, isRune, 'l', RuneTypeNullL1, "null", false) 877 case RuneTypeNullL1: 878 return par.expectRune(c, isRune, 'l', RuneTypeNullL2, "null", true) 879 default: 880 panic(fmt.Errorf(`should not happen: invalid stack: "%s"`, par.stackString())) 881 } 882 } 883 884 func (par *Parser) expectRune(c rune, isRune bool, exp rune, typ RuneType, context string, pop bool) (RuneType, error) { 885 if c != exp { 886 return RuneTypeError, &InvalidCharacterError{c, isRune, fmt.Sprintf("in literal %s (expecting %q)", context, exp)} 887 } 888 if pop { 889 par.popState() 890 return typ, nil 891 } else { 892 return par.replaceState(typ), nil 893 } 894 }