git.lukeshu.com/go/lowmemjson@v0.3.9-0.20230723050957-72f6d13f6fb2/internal/jsonparse/parse.go (about)

     1  // Copyright (C) 2022-2023  Luke Shumaker <lukeshu@lukeshu.com>
     2  //
     3  // SPDX-License-Identifier: GPL-2.0-or-later
     4  
     5  package jsonparse
     6  
     7  import (
     8  	"errors"
     9  	"fmt"
    10  	"io"
    11  	iofs "io/fs"
    12  	"strings"
    13  )
    14  
    15  var ErrParserExceededMaxDepth = errors.New("exceeded max depth")
    16  
    17  type InvalidCharacterError struct {
    18  	Char   rune
    19  	IsRune bool
    20  	Where  string
    21  }
    22  
    23  func (e *InvalidCharacterError) Error() string {
    24  	if e.IsRune {
    25  		return fmt.Sprintf("invalid character %q %s", e.Char, e.Where)
    26  	} else {
    27  		return fmt.Sprintf("invalid character '\\x%02x' %s", e.Char, e.Where)
    28  	}
    29  }
    30  
    31  func isHex(c rune) bool {
    32  	return ('0' <= c && c <= '9') ||
    33  		('a' <= c && c <= 'f') ||
    34  		('A' <= c && c <= 'F')
    35  }
    36  
    37  // RuneType is the classification of a rune when parsing JSON input.
    38  // A Parser, rather than grouping runes into tokens and classifying
    39  // tokens, classifies runes directly.
    40  type RuneType uint8
    41  
    42  const (
    43  	RuneTypeError RuneType = iota
    44  
    45  	RuneTypeSpace // whitespace
    46  
    47  	RuneTypeObjectBeg   // '{'
    48  	RuneTypeObjectColon // ':'
    49  	RuneTypeObjectComma // ','
    50  	RuneTypeObjectEnd   // '}'
    51  
    52  	RuneTypeArrayBeg   // '['
    53  	RuneTypeArrayComma // ','
    54  	RuneTypeArrayEnd   // ']'
    55  
    56  	RuneTypeStringBeg   // opening '"'
    57  	RuneTypeStringChar  // normal character
    58  	RuneTypeStringEsc   // backslash
    59  	RuneTypeStringEsc1  // single-char after a backslash
    60  	RuneTypeStringEscU  // \uABCD : u
    61  	RuneTypeStringEscUA // \uABCD : A
    62  	RuneTypeStringEscUB // \uABCD : B
    63  	RuneTypeStringEscUC // \uABCD : C
    64  	RuneTypeStringEscUD // \uABCD : D
    65  	RuneTypeStringEnd   // closing '"'
    66  
    67  	RuneTypeNumberIntNeg
    68  	RuneTypeNumberIntZero // leading zero only; non-leading zeros are IntDig, not IntZero
    69  	RuneTypeNumberIntDig
    70  	RuneTypeNumberFracDot
    71  	RuneTypeNumberFracDig
    72  	RuneTypeNumberExpE
    73  	RuneTypeNumberExpSign
    74  	RuneTypeNumberExpDig
    75  
    76  	RuneTypeTrueT
    77  	RuneTypeTrueR
    78  	RuneTypeTrueU
    79  	RuneTypeTrueE
    80  
    81  	RuneTypeFalseF
    82  	RuneTypeFalseA
    83  	RuneTypeFalseL
    84  	RuneTypeFalseS
    85  	RuneTypeFalseE
    86  
    87  	RuneTypeNullN
    88  	RuneTypeNullU
    89  	RuneTypeNullL1
    90  	RuneTypeNullL2
    91  
    92  	RuneTypeEOF
    93  
    94  	// Not a real rune type, but used as a stack state.
    95  	runeTypeAny
    96  )
    97  
    98  // GoString implements fmt.GoStringer.
    99  //
   100  //nolint:dupl // False positive due to similarly shaped AST.
   101  func (t RuneType) GoString() string {
   102  	str, ok := map[RuneType]string{
   103  		RuneTypeError: "RuneTypeError",
   104  
   105  		RuneTypeSpace: "RuneTypeSpace",
   106  
   107  		RuneTypeObjectBeg:   "RuneTypeObjectBeg",
   108  		RuneTypeObjectColon: "RuneTypeObjectColon",
   109  		RuneTypeObjectComma: "RuneTypeObjectComma",
   110  		RuneTypeObjectEnd:   "RuneTypeObjectEnd",
   111  
   112  		RuneTypeArrayBeg:   "RuneTypeArrayBeg",
   113  		RuneTypeArrayComma: "RuneTypeArrayComma",
   114  		RuneTypeArrayEnd:   "RuneTypeArrayEnd",
   115  
   116  		RuneTypeStringBeg:   "RuneTypeStringBeg",
   117  		RuneTypeStringChar:  "RuneTypeStringChar",
   118  		RuneTypeStringEsc:   "RuneTypeStringEsc",
   119  		RuneTypeStringEsc1:  "RuneTypeStringEsc1",
   120  		RuneTypeStringEscU:  "RuneTypeStringEscU",
   121  		RuneTypeStringEscUA: "RuneTypeStringEscUA",
   122  		RuneTypeStringEscUB: "RuneTypeStringEscUB",
   123  		RuneTypeStringEscUC: "RuneTypeStringEscUC",
   124  		RuneTypeStringEscUD: "RuneTypeStringEscUD",
   125  		RuneTypeStringEnd:   "RuneTypeStringEnd",
   126  
   127  		RuneTypeNumberIntNeg:  "RuneTypeNumberIntNeg",
   128  		RuneTypeNumberIntZero: "RuneTypeNumberIntZero",
   129  		RuneTypeNumberIntDig:  "RuneTypeNumberIntDig",
   130  		RuneTypeNumberFracDot: "RuneTypeNumberFracDot",
   131  		RuneTypeNumberFracDig: "RuneTypeNumberFracDig",
   132  		RuneTypeNumberExpE:    "RuneTypeNumberExpE",
   133  		RuneTypeNumberExpSign: "RuneTypeNumberExpSign",
   134  		RuneTypeNumberExpDig:  "RuneTypeNumberExpDig",
   135  
   136  		RuneTypeTrueT: "RuneTypeTrueT",
   137  		RuneTypeTrueR: "RuneTypeTrueR",
   138  		RuneTypeTrueU: "RuneTypeTrueU",
   139  		RuneTypeTrueE: "RuneTypeTrueE",
   140  
   141  		RuneTypeFalseF: "RuneTypeFalseF",
   142  		RuneTypeFalseA: "RuneTypeFalseA",
   143  		RuneTypeFalseL: "RuneTypeFalseL",
   144  		RuneTypeFalseS: "RuneTypeFalseS",
   145  		RuneTypeFalseE: "RuneTypeFalseE",
   146  
   147  		RuneTypeNullN:  "RuneTypeNullN",
   148  		RuneTypeNullU:  "RuneTypeNullU",
   149  		RuneTypeNullL1: "RuneTypeNullL1",
   150  		RuneTypeNullL2: "RuneTypeNullL2",
   151  
   152  		RuneTypeEOF: "RuneTypeEOF",
   153  
   154  		runeTypeAny: "runeTypeAny",
   155  	}[t]
   156  	if ok {
   157  		return str
   158  	}
   159  	return fmt.Sprintf("RuneType(%d)", t)
   160  }
   161  
   162  // String implements fmt.Stringer.
   163  //
   164  //nolint:dupl // False positive due to similarly shaped AST.
   165  func (t RuneType) String() string {
   166  	str, ok := map[RuneType]string{
   167  		RuneTypeError: "x",
   168  
   169  		RuneTypeSpace: " ",
   170  
   171  		RuneTypeObjectBeg:   "{",
   172  		RuneTypeObjectColon: ":",
   173  		RuneTypeObjectComma: "o",
   174  		RuneTypeObjectEnd:   "}",
   175  
   176  		RuneTypeArrayBeg:   "[",
   177  		RuneTypeArrayComma: "a",
   178  		RuneTypeArrayEnd:   "]",
   179  
   180  		RuneTypeStringBeg:   "\"",
   181  		RuneTypeStringChar:  "c",
   182  		RuneTypeStringEsc:   "\\",
   183  		RuneTypeStringEsc1:  "b",
   184  		RuneTypeStringEscU:  "u",
   185  		RuneTypeStringEscUA: "A",
   186  		RuneTypeStringEscUB: "B",
   187  		RuneTypeStringEscUC: "C",
   188  		RuneTypeStringEscUD: "D",
   189  		RuneTypeStringEnd:   "ยป",
   190  
   191  		RuneTypeNumberIntNeg:  "-",
   192  		RuneTypeNumberIntZero: "0",
   193  		RuneTypeNumberIntDig:  "1",
   194  		RuneTypeNumberFracDot: ".",
   195  		RuneTypeNumberFracDig: "2",
   196  		RuneTypeNumberExpE:    "e",
   197  		RuneTypeNumberExpSign: "+",
   198  		RuneTypeNumberExpDig:  "3",
   199  
   200  		RuneTypeTrueT: "๐•ฅ", // double-struck
   201  		RuneTypeTrueR: "๐•ฃ",
   202  		RuneTypeTrueU: "๐•ฆ",
   203  		RuneTypeTrueE: "๐•–",
   204  
   205  		RuneTypeFalseF: "๐”ฃ", // fraktur
   206  		RuneTypeFalseA: "๐”ž",
   207  		RuneTypeFalseL: "๐”ฉ",
   208  		RuneTypeFalseS: "๐”ฐ",
   209  		RuneTypeFalseE: "๐”ข",
   210  
   211  		RuneTypeNullN:  "โ“", // circled
   212  		RuneTypeNullU:  "โ“ค",
   213  		RuneTypeNullL1: "โ“›",
   214  		RuneTypeNullL2: "โ“", // +uppercase
   215  
   216  		RuneTypeEOF: "$",
   217  
   218  		runeTypeAny: "?",
   219  	}[t]
   220  	if ok {
   221  		return str
   222  	}
   223  	return fmt.Sprintf("<%d>", t)
   224  }
   225  
   226  func (t RuneType) JSONType() string {
   227  	return map[RuneType]string{
   228  		RuneTypeObjectBeg:     "object",
   229  		RuneTypeArrayBeg:      "array",
   230  		RuneTypeStringBeg:     "string",
   231  		RuneTypeNumberIntNeg:  "number",
   232  		RuneTypeNumberIntZero: "number",
   233  		RuneTypeNumberIntDig:  "number",
   234  		RuneTypeTrueT:         "true",
   235  		RuneTypeFalseF:        "false",
   236  		RuneTypeNullN:         "null",
   237  		RuneTypeEOF:           "eof",
   238  	}[t]
   239  }
   240  
   241  // IsNumber returns whether the RuneType is one of the
   242  // RuneTypeNumberXXX values.
   243  func (t RuneType) IsNumber() bool {
   244  	return RuneTypeNumberIntNeg <= t && t <= RuneTypeNumberExpDig
   245  }
   246  
   247  // Parser is the low-level JSON parser that powers both *Decoder and
   248  // *ReEncoder.
   249  type Parser struct {
   250  	// Setting MaxError to a value greater than 0 causes
   251  	// HandleRune to return ErrParserExceededMaxDepth if
   252  	// objects/arrays become nested more deeply than this.
   253  	MaxDepth int
   254  
   255  	initialized bool
   256  
   257  	err    error
   258  	closed bool
   259  
   260  	// We reuse RuneTypes to store the stack.  The base idea is:
   261  	// stack items are "the most recently read stack-relevant
   262  	// RuneType".
   263  	//
   264  	// The stack starts out with the special pseudo-RuneType
   265  	// `runeTypeAny` that means we're willing to accept any
   266  	// element type; an empty stack means that we have reached the
   267  	// end of the top-level element and should accept no more
   268  	// input except for whitespace.
   269  	//
   270  	// The "normal" stack-relevant RuneTypes are:
   271  	//
   272  	//   "\uABC    for strings
   273  	//   -01.2e+3  for numbers
   274  	//   ๐•ฅ๐•ฃ๐•ฆ       for "true"
   275  	//   ๐”ฃ๐”ž๐”ฉ๐”ฐ      for "false"
   276  	//   โ“โ“คโ“›       for "null"
   277  	//
   278  	// Objects and arrays break the "most recently read RuneType"
   279  	// rule; they need some special assignments:
   280  	//
   281  	//   {   object: waiting for key to start or '}'
   282  	//   }   object: waiting for key to start
   283  	//   :   object: reading key / waiting for colon
   284  	//   o   object: reading value / waiting for ',' or '}'
   285  	//
   286  	//   [   array: waiting for item to start or ']'
   287  	//   a   array: reading item / waiting for ',' or ']'
   288  	//
   289  	// Within each element type, the stack item is replaced, not pushed.
   290  	//
   291  	// (Keep each of these examples in-sync with parse_test.go.)
   292  	//
   293  	// For example, given the input string
   294  	//
   295  	//   {"x":"y","a":"b"}
   296  	//
   297  	// The stack would be
   298  	//
   299  	//   stack   processed
   300  	//   ?
   301  	//   {       {
   302  	//   :"      {"
   303  	//   :"      {"x
   304  	//   :       {"x"
   305  	//   o?      {"x":
   306  	//   o"      {"x":"
   307  	//   o"      {"x":"y
   308  	//   o       {"x":"y"
   309  	//   }       {"x":"y",
   310  	//   :"      {"x":"y","
   311  	//   :"      {"x":"y","a
   312  	//   :       {"x":"y","a"
   313  	//   o?      {"x":"y","a":
   314  	//   o"      {"x":"y","a":"
   315  	//   o"      {"x":"y","a":"b
   316  	//   o       {"x":"y","a":"b"
   317  	//           {"x":"y","a":"b"}
   318  	//
   319  	// Or, given the input string
   320  	//
   321  	//   ["x","y"]
   322  	//
   323  	// The stack would be
   324  	//
   325  	//   stack   processed
   326  	//   ?
   327  	//   [       [
   328  	//   a"      ["
   329  	//   a"      ["x
   330  	//   a       ["x"
   331  	//   a?      ["x",
   332  	//   a"      ["x","
   333  	//   a"      ["x","y
   334  	//   a       ["x","y"
   335  	//           ["x","y"]
   336  	stack []RuneType
   337  
   338  	barriers []barrier
   339  }
   340  
   341  type barrier struct {
   342  	allowWS bool
   343  	stack   []RuneType
   344  }
   345  
   346  func (par *Parser) init() {
   347  	if !par.initialized {
   348  		par.initialized = true
   349  		par.pushState(runeTypeAny)
   350  	}
   351  }
   352  
   353  func (par *Parser) pushState(state RuneType) RuneType {
   354  	par.stack = append(par.stack, state)
   355  	return state
   356  }
   357  
   358  func (par *Parser) replaceState(state RuneType) RuneType {
   359  	par.stack[len(par.stack)-1] = state
   360  	return state
   361  }
   362  
   363  func (par *Parser) popState() {
   364  	par.stack = par.stack[:len(par.stack)-1]
   365  }
   366  
   367  func (par *Parser) stackString() string {
   368  	par.init()
   369  	var buf strings.Builder
   370  	for _, s := range par.stack {
   371  		buf.WriteString(s.String())
   372  	}
   373  	return buf.String()
   374  }
   375  
   376  func (par *Parser) depth() int {
   377  	n := len(par.stack)
   378  	for _, barrier := range par.barriers {
   379  		n += len(barrier.stack)
   380  	}
   381  	return n
   382  }
   383  
   384  func (par *Parser) StackIsEmpty() bool {
   385  	if len(par.barriers) > 0 {
   386  		return false
   387  	}
   388  	if len(par.stack) == 0 {
   389  		return true
   390  	}
   391  	return len(par.stack) == 1 && par.stack[0] == runeTypeAny
   392  }
   393  
   394  func (par *Parser) StackSize() int {
   395  	return len(par.stack)
   396  }
   397  
   398  // Reset all Parser state.
   399  func (par *Parser) Reset() {
   400  	*par = Parser{
   401  		MaxDepth: par.MaxDepth,
   402  	}
   403  }
   404  
   405  // PushReadBarrier causes the parser to emit EOF once the end of the
   406  // element that is started by the current top-of-stack is reached
   407  // (which means that it will reject whitespace between the end of the
   408  // element and EOF), until this is un-done with PopBarrier.  It
   409  // essentially turns the parser in to a sub-parser.
   410  //
   411  // PushReadBarrier may only be called at the beginning of an element,
   412  // whether that be
   413  //
   414  //   - runeTypeAny
   415  //   - RuneTypeObjectBeg
   416  //   - RuneTypeArrayBeg
   417  //   - RuneTypeStringBeg
   418  //   - RuneTypeNumberIntNeg, RuneTypeNumberIntZero, RuneTypeNumberIntDig
   419  //   - RuneTypeTrueT
   420  //   - RuneTypeFalseF
   421  //   - RuneTypeNullN
   422  func (par *Parser) PushReadBarrier() {
   423  	// Sanity checking.
   424  	par.init()
   425  	if len(par.stack) == 0 {
   426  		panic(errors.New("should not happen: illegal PushReadBarrier call: empty stack"))
   427  	}
   428  	curState := par.stack[len(par.stack)-1]
   429  	switch curState {
   430  	case runeTypeAny,
   431  		RuneTypeObjectBeg,
   432  		RuneTypeArrayBeg,
   433  		RuneTypeStringBeg,
   434  		RuneTypeNumberIntNeg, RuneTypeNumberIntZero, RuneTypeNumberIntDig,
   435  		RuneTypeTrueT,
   436  		RuneTypeFalseF,
   437  		RuneTypeNullN:
   438  		// OK
   439  	default:
   440  		panic(fmt.Errorf("should not happen: illegal PushReadBarrier call: %q", curState))
   441  	}
   442  	// Actually push.
   443  	par.barriers = append(par.barriers, barrier{
   444  		allowWS: false,
   445  		stack:   par.stack[:len(par.stack)-1],
   446  	})
   447  	par.stack = []RuneType{curState}
   448  }
   449  
   450  // PushWriteBarrier causes the parser to emit EOF once the end of the
   451  // about-to-start element is reached and any trailing whitespace has
   452  // been exhausted, until this is un-done with PopBarrier.  It
   453  // essentially turns the parser in to a sub-parser.
   454  //
   455  // PushWriteBarrier may only be called at the places where an element
   456  // of any type may start:
   457  //
   458  //   - runeTypeAny for top-level and object-value elements
   459  //   - RuneTypeArrayBeg for array-item elements
   460  //
   461  // PushWriteBarrier signals intent to write an element; if it is
   462  // called in a place where an element is optional (at the beginning of
   463  // an array), it becomes a syntax error to not write the element.
   464  func (par *Parser) PushWriteBarrier() {
   465  	par.init()
   466  	if len(par.stack) == 0 {
   467  		panic(errors.New("should not happen: illegal PushWriteBarrier call: empty stack"))
   468  	}
   469  	switch par.stack[len(par.stack)-1] {
   470  	case runeTypeAny:
   471  		par.popState()
   472  		par.barriers = append(par.barriers, barrier{
   473  			allowWS: true,
   474  			stack:   par.stack,
   475  		})
   476  		par.stack = []RuneType{runeTypeAny}
   477  	case RuneTypeArrayBeg:
   478  		par.replaceState(RuneTypeArrayComma)
   479  		par.barriers = append(par.barriers, barrier{
   480  			allowWS: true,
   481  			stack:   par.stack,
   482  		})
   483  		par.stack = []RuneType{runeTypeAny}
   484  	default:
   485  		panic(fmt.Errorf("should not happen: illegal PushWriteBarrier call: %q", par.stack[len(par.stack)-1]))
   486  	}
   487  }
   488  
   489  // PopBarrier reverses a call to PushReadBarrier or PushWriteBarrier.
   490  func (par *Parser) PopBarrier() {
   491  	if len(par.barriers) == 0 {
   492  		panic(errors.New("should not happen: illegal PopBarrier call: empty barrier stack"))
   493  	}
   494  	barrier := par.barriers[len(par.barriers)-1]
   495  	par.barriers = par.barriers[:len(par.barriers)-1]
   496  	par.closed = false
   497  	par.stack = append(barrier.stack, par.stack...)
   498  }
   499  
   500  // HandleEOF feeds EOF to the Parser.  The returned RuneType is either
   501  // RuneTypeEOF or RuneTypeError.
   502  //
   503  // An error is returned if and only if the RuneType is RuneTypeError.
   504  // Returns io/fs.ErrClosed if .HandleEOF() has previously been called
   505  // (and .Reset() has not been called since).
   506  //
   507  // Once RuneTypeError or RuneTypeEOF has been returned, it will keep
   508  // being returned from both .HandleRune(c) and .HandleEOF() until
   509  // .Reset() is called.
   510  //
   511  // RuneTypeEOF indicates that a complete JSON document has been read.
   512  func (par *Parser) HandleEOF() (RuneType, error) {
   513  	if par.closed {
   514  		return RuneTypeError, iofs.ErrClosed
   515  	}
   516  	defer func() {
   517  		par.closed = true
   518  	}()
   519  	if par.err != nil {
   520  		return RuneTypeError, par.err
   521  	}
   522  	par.init()
   523  	switch len(par.stack) {
   524  	case 0:
   525  		return RuneTypeEOF, nil
   526  	case 1:
   527  		switch {
   528  		case par.stack[0].IsNumber():
   529  			if _, err := par.HandleRune('\n', true); err == nil {
   530  				return RuneTypeEOF, nil
   531  			}
   532  		case par.stack[0] == runeTypeAny:
   533  			par.err = io.EOF
   534  			return RuneTypeError, par.err
   535  		}
   536  		fallthrough
   537  	default:
   538  		par.err = io.ErrUnexpectedEOF
   539  		return RuneTypeError, par.err
   540  	}
   541  }
   542  
   543  // IsAtBarrier returns whether a read-barrier has been reached and the
   544  // next HandleRune call would definitely return RuneTypeEOF.
   545  func (par *Parser) IsAtBarrier() bool {
   546  	return par.initialized &&
   547  		// HandleRune wouldn't return early with an error.
   548  		!par.closed &&
   549  		par.err == nil &&
   550  		// The current (sub-)parser has reached its end, and
   551  		len(par.stack) == 0 &&
   552  		// there is a barrier, and
   553  		len(par.barriers) > 0 &&
   554  		// that barrier would definitely return RuneTypeEOF.
   555  		!par.barriers[len(par.barriers)-1].allowWS
   556  }
   557  
   558  // HandleRune feeds a Unicode rune to the Parser.
   559  //
   560  // An error is returned if and only if the RuneType is RuneTypeError.
   561  // Returns io/fs.ErrClosed if .HandleEOF() has previously been called
   562  // (and .Reset() has not been called since).
   563  //
   564  // Once RuneTypeError or RuneTypeEOF has been returned, it will keep
   565  // being returned from both .HandleRune(c) and .HandleEOF() until
   566  // .Reset() is called.
   567  //
   568  // RuneTypeEOF indicates that the rune cannot be appended to the JSON
   569  // document; a new JSON document must be started in order to process
   570  // that rune.
   571  func (par *Parser) HandleRune(c rune, isRune bool) (RuneType, error) {
   572  	if par.closed {
   573  		return RuneTypeError, iofs.ErrClosed
   574  	}
   575  	if par.err != nil {
   576  		return RuneTypeError, par.err
   577  	}
   578  	par.init()
   579  	if len(par.stack) == 0 {
   580  		if len(par.barriers) == 0 || par.barriers[len(par.barriers)-1].allowWS {
   581  			switch c {
   582  			case 0x0020, 0x000A, 0x000D, 0x0009:
   583  				return RuneTypeSpace, nil
   584  			}
   585  		}
   586  		if len(par.barriers) > 0 {
   587  			return RuneTypeEOF, nil
   588  		} else {
   589  			return RuneTypeError, &InvalidCharacterError{c, isRune, "after top-level value"}
   590  		}
   591  	}
   592  	switch par.stack[len(par.stack)-1] {
   593  	// any /////////////////////////////////////////////////////////////////////////////////////
   594  	case runeTypeAny:
   595  		switch c {
   596  		case 0x0020, 0x000A, 0x000D, 0x0009:
   597  			return RuneTypeSpace, nil
   598  		case '{':
   599  			if par.MaxDepth > 0 && par.depth() > par.MaxDepth {
   600  				return RuneTypeError, ErrParserExceededMaxDepth
   601  			}
   602  			return par.replaceState(RuneTypeObjectBeg), nil
   603  		case '[':
   604  			if par.MaxDepth > 0 && par.depth() > par.MaxDepth {
   605  				return RuneTypeError, ErrParserExceededMaxDepth
   606  			}
   607  			return par.replaceState(RuneTypeArrayBeg), nil
   608  		case '"':
   609  			return par.replaceState(RuneTypeStringBeg), nil
   610  		case '-':
   611  			return par.replaceState(RuneTypeNumberIntNeg), nil
   612  		case '0':
   613  			return par.replaceState(RuneTypeNumberIntZero), nil
   614  		case '1', '2', '3', '4', '5', '6', '7', '8', '9':
   615  			return par.replaceState(RuneTypeNumberIntDig), nil
   616  		case 't':
   617  			return par.replaceState(RuneTypeTrueT), nil
   618  		case 'f':
   619  			return par.replaceState(RuneTypeFalseF), nil
   620  		case 'n':
   621  			return par.replaceState(RuneTypeNullN), nil
   622  		default:
   623  			return RuneTypeError, &InvalidCharacterError{c, isRune, "looking for beginning of value"}
   624  		}
   625  	// object //////////////////////////////////////////////////////////////////////////////////
   626  	case RuneTypeObjectBeg: // waiting for key to start or '}'
   627  		switch c {
   628  		case 0x0020, 0x000A, 0x000D, 0x0009:
   629  			return RuneTypeSpace, nil
   630  		case '"':
   631  			par.replaceState(RuneTypeObjectColon)
   632  			return par.pushState(RuneTypeStringBeg), nil
   633  		case '}':
   634  			par.popState()
   635  			return RuneTypeObjectEnd, nil
   636  		default:
   637  			return RuneTypeError, &InvalidCharacterError{c, isRune, "looking for beginning of object key string"}
   638  		}
   639  	case RuneTypeObjectEnd: // waiting for key to start
   640  		switch c {
   641  		case 0x0020, 0x000A, 0x000D, 0x0009:
   642  			return RuneTypeSpace, nil
   643  		case '"':
   644  			par.replaceState(RuneTypeObjectColon)
   645  			return par.pushState(RuneTypeStringBeg), nil
   646  		default:
   647  			return RuneTypeError, &InvalidCharacterError{c, isRune, "looking for beginning of object key string"}
   648  		}
   649  	case RuneTypeObjectColon: // waiting for ':'
   650  		switch c {
   651  		case 0x0020, 0x000A, 0x000D, 0x0009:
   652  			return RuneTypeSpace, nil
   653  		case ':':
   654  			par.replaceState(RuneTypeObjectComma)
   655  			par.pushState(runeTypeAny)
   656  			return RuneTypeObjectColon, nil
   657  		default:
   658  			return RuneTypeError, &InvalidCharacterError{c, isRune, "after object key"}
   659  		}
   660  	case RuneTypeObjectComma: // waiting for ',' or '}'
   661  		switch c {
   662  		case 0x0020, 0x000A, 0x000D, 0x0009:
   663  			return RuneTypeSpace, nil
   664  		case ',':
   665  			par.replaceState(RuneTypeObjectEnd)
   666  			return RuneTypeObjectComma, nil
   667  		case '}':
   668  			par.popState()
   669  			return RuneTypeObjectEnd, nil
   670  		default:
   671  			return RuneTypeError, &InvalidCharacterError{c, isRune, "after object key:value pair"}
   672  		}
   673  	// array ///////////////////////////////////////////////////////////////////////////////////
   674  	case RuneTypeArrayBeg: // waiting for item to start or ']'
   675  		switch c {
   676  		case 0x0020, 0x000A, 0x000D, 0x0009:
   677  			return RuneTypeSpace, nil
   678  		case ']':
   679  			par.popState()
   680  			return RuneTypeArrayEnd, nil
   681  		default:
   682  			par.replaceState(RuneTypeArrayComma)
   683  			par.pushState(runeTypeAny)
   684  			return par.HandleRune(c, isRune)
   685  		}
   686  	case RuneTypeArrayComma: // waiting for ',' or ']'
   687  		switch c {
   688  		case 0x0020, 0x000A, 0x000D, 0x0009:
   689  			return RuneTypeSpace, nil
   690  		case ',':
   691  			par.pushState(runeTypeAny)
   692  			return RuneTypeArrayComma, nil
   693  		case ']':
   694  			par.popState()
   695  			return RuneTypeArrayEnd, nil
   696  		default:
   697  			return RuneTypeError, &InvalidCharacterError{c, isRune, "after array element"}
   698  		}
   699  	// string //////////////////////////////////////////////////////////////////////////////////
   700  	case RuneTypeStringBeg: // waiting for char or '"'
   701  		switch {
   702  		case c == '\\':
   703  			return par.replaceState(RuneTypeStringEsc), nil
   704  		case c == '"':
   705  			par.popState()
   706  			return RuneTypeStringEnd, nil
   707  		case 0x0020 <= c && c <= 0x10FFFF:
   708  			return RuneTypeStringChar, nil
   709  		default:
   710  			return RuneTypeError, &InvalidCharacterError{c, isRune, "in string literal"}
   711  		}
   712  	case RuneTypeStringEsc: // waiting for escape char
   713  		switch c {
   714  		case '"', '\\', '/', 'b', 'f', 'n', 'r', 't':
   715  			par.replaceState(RuneTypeStringBeg)
   716  			return RuneTypeStringEsc1, nil
   717  		case 'u':
   718  			return par.replaceState(RuneTypeStringEscU), nil
   719  		default:
   720  			return RuneTypeError, &InvalidCharacterError{c, isRune, "in string escape code"}
   721  		}
   722  	case RuneTypeStringEscU:
   723  		if !isHex(c) {
   724  			return RuneTypeError, fmt.Errorf(`invalid character %q in \u hexadecimal character escape`, c)
   725  		}
   726  		return par.replaceState(RuneTypeStringEscUA), nil
   727  	case RuneTypeStringEscUA:
   728  		if !isHex(c) {
   729  			return RuneTypeError, fmt.Errorf(`invalid character %q in \u hexadecimal character escape`, c)
   730  		}
   731  		return par.replaceState(RuneTypeStringEscUB), nil
   732  	case RuneTypeStringEscUB:
   733  		if !isHex(c) {
   734  			return RuneTypeError, fmt.Errorf(`invalid character %q in \u hexadecimal character escape`, c)
   735  		}
   736  		return par.replaceState(RuneTypeStringEscUC), nil
   737  	case RuneTypeStringEscUC:
   738  		if !isHex(c) {
   739  			return RuneTypeError, fmt.Errorf(`invalid character %q in \u hexadecimal character escape`, c)
   740  		}
   741  		par.replaceState(RuneTypeStringBeg)
   742  		return RuneTypeStringEscUD, nil
   743  	// number //////////////////////////////////////////////////////////////////////////////////
   744  	//
   745  	// Here's a flattened drawing of the syntax diagram from www.json.org :
   746  	//
   747  	//      [------------ integer ----------][-- fraction ---][-------- exponent -------]
   748  	//     >โ”€โ•ฎโ”€โ”€โ”€โ”€โ”€โ•ญโ”€โ•ฎโ”€"0"โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ญโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ญโ”€โ”€โ•ฎโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ญโ”€โ”€โ•ฎโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ญโ”€>
   749  	//       โ”‚     โ”‚ โ”‚           โ”‚         โ”‚  โ”‚             โ”‚  โ”‚                       โ”‚
   750  	//       โ•ฐโ”€"-"โ”€โ•ฏ โ•ฐโ”€digit 1-9โ”€โ•ฏโ”€โ•ญdigitโ•ฎโ”€โ•ฏ  โ•ฐโ”€"."โ”€โ•ญdigitโ•ฎโ”€โ•ฏ  โ•ฐโ”€"e"โ”€โ•ญโ”€โ•ฎโ”€โ”€โ”€โ”€โ”€โ•ญโ”€โ•ญdigitโ•ฎโ”€โ•ฏ
   751  	//                             โ•ฐโ”€โ”€<โ”€โ”€โ•ฏ          โ•ฐโ”€โ”€<โ”€โ”€โ•ฏ    โ”‚     โ”‚ โ”‚     โ”‚ โ•ฐโ”€โ”€<โ”€โ”€โ•ฏ
   752  	//                                                         โ•ฐโ”€"E"โ”€โ•ฏ โ•ฐโ”€"-"โ”€โ•ฏ
   753  	//                                                                 โ”‚     โ”‚
   754  	//                                                                 โ•ฐโ”€"+"โ”€โ•ฏ
   755  	//
   756  	// Now here it is slightly redrawn, and with each distinct state our
   757  	// parser can be in marked with a single-capital-letter:
   758  	//
   759  	//        [-------------- integer ------------][--------- fraction --------][--------- exponent ---------]
   760  	//     >โ”€Aโ”€โ•ฎโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ญโ”€โ”€โ•ฎโ”€"0"โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€Cโ”€โ•ญโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฎโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ญโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฎโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ญโ”€>
   761  	//         โ”‚       โ”‚  โ”‚               โ”‚         โ”‚                  โ”‚         โ”‚                          โ”‚
   762  	//         โ•ฐโ”€"-"โ”€Bโ”€โ•ฏ  โ•ฐโ”€digit 1-9โ”€โ•ญโ”€Dโ”€โ•ฏโ”€digitโ•ฎ  โ•ฐโ”€"."โ”€Eโ”€digitโ”€โ”€โ•ญโ”€Fโ”€โ•ฏโ”€digitโ•ฎ  โ•ฐโ”€"e"โ”€โ•ญโ”€Gโ”€โ•ฎโ”€โ”€โ”€โ”€โ”€โ•ญโ”€โ•ญdigitโ”€Iโ”€โ•ฏ
   763  	//                                โ•ฐโ”€โ”€โ”€โ”€<โ”€โ”€โ”€โ”€โ”€โ•ฏ                 โ•ฐโ”€โ”€โ”€โ”€<โ”€โ”€โ”€โ”€โ”€โ•ฏ  โ”‚     โ”‚   โ”‚     H โ•ฐโ”€โ”€โ”€โ”€<โ”€โ”€โ”€โ•ฏ
   764  	//                                                                           โ•ฐโ”€"E"โ”€โ•ฏ   โ•ฐโ”€"-"โ”€โ•ฏ
   765  	//                                                                                     โ”‚     โ”‚
   766  	//                                                                                     โ•ฐโ”€"+"โ”€โ•ฏ
   767  	//
   768  	// You may notice that each of these states may be uniquely identified
   769  	// by the last-read RuneType:
   770  	//
   771  	//     A = (nothing yet)
   772  	//     B = IntNeg
   773  	//     C = IntZero
   774  	//     D = IntDig
   775  	//     E = FracDot
   776  	//     F = FracDig
   777  	//     G = ExpE
   778  	//     H = ExpSign
   779  	//     I = ExpDig
   780  	//
   781  	// The 'A' state is part of the runeTypeAny case above, and
   782  	// the remainder follow:
   783  	case RuneTypeNumberIntNeg: // B
   784  		switch c {
   785  		case '0':
   786  			return par.replaceState(RuneTypeNumberIntZero), nil
   787  		case '1', '2', '3', '4', '5', '6', '7', '8', '9':
   788  			return par.replaceState(RuneTypeNumberIntDig), nil
   789  		default:
   790  			return RuneTypeError, &InvalidCharacterError{c, isRune, "in numeric literal"}
   791  		}
   792  	case RuneTypeNumberIntZero: // C
   793  		switch c {
   794  		case '.':
   795  			return par.replaceState(RuneTypeNumberFracDot), nil
   796  		case 'e', 'E':
   797  			return par.replaceState(RuneTypeNumberExpE), nil
   798  		default:
   799  			par.popState()
   800  			return par.HandleRune(c, isRune)
   801  		}
   802  	case RuneTypeNumberIntDig: // D
   803  		switch c {
   804  		case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
   805  			return par.replaceState(RuneTypeNumberIntDig), nil
   806  		case '.':
   807  			return par.replaceState(RuneTypeNumberFracDot), nil
   808  		case 'e', 'E':
   809  			return par.replaceState(RuneTypeNumberExpE), nil
   810  		default:
   811  			par.popState()
   812  			return par.HandleRune(c, isRune)
   813  		}
   814  	case RuneTypeNumberFracDot: // E
   815  		switch c {
   816  		case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
   817  			return par.replaceState(RuneTypeNumberFracDig), nil
   818  		default:
   819  			return RuneTypeError, &InvalidCharacterError{c, isRune, "after decimal point in numeric literal"}
   820  		}
   821  	case RuneTypeNumberFracDig: // F
   822  		switch c {
   823  		case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
   824  			return par.replaceState(RuneTypeNumberFracDig), nil
   825  		case 'e', 'E':
   826  			return par.replaceState(RuneTypeNumberExpE), nil
   827  		default:
   828  			par.popState()
   829  			return par.HandleRune(c, isRune)
   830  		}
   831  	case RuneTypeNumberExpE: // G
   832  		switch c {
   833  		case '-', '+':
   834  			return par.replaceState(RuneTypeNumberExpSign), nil
   835  		case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
   836  			return par.replaceState(RuneTypeNumberExpDig), nil
   837  		default:
   838  			return RuneTypeError, &InvalidCharacterError{c, isRune, "in exponent of numeric literal"}
   839  		}
   840  	case RuneTypeNumberExpSign: // H
   841  		switch c {
   842  		case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
   843  			return par.replaceState(RuneTypeNumberExpDig), nil
   844  		default:
   845  			return RuneTypeError, &InvalidCharacterError{c, isRune, "in exponent of numeric literal"}
   846  		}
   847  	case RuneTypeNumberExpDig: // I
   848  		switch c {
   849  		case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
   850  			return par.replaceState(RuneTypeNumberExpDig), nil
   851  		default:
   852  			par.popState()
   853  			return par.HandleRune(c, isRune)
   854  		}
   855  	// literals ////////////////////////////////////////////////////////////////////////////////
   856  	// true
   857  	case RuneTypeTrueT:
   858  		return par.expectRune(c, isRune, 'r', RuneTypeTrueR, "true", false)
   859  	case RuneTypeTrueR:
   860  		return par.expectRune(c, isRune, 'u', RuneTypeTrueU, "true", false)
   861  	case RuneTypeTrueU:
   862  		return par.expectRune(c, isRune, 'e', RuneTypeTrueE, "true", true)
   863  	// false
   864  	case RuneTypeFalseF:
   865  		return par.expectRune(c, isRune, 'a', RuneTypeFalseA, "false", false)
   866  	case RuneTypeFalseA:
   867  		return par.expectRune(c, isRune, 'l', RuneTypeFalseL, "false", false)
   868  	case RuneTypeFalseL:
   869  		return par.expectRune(c, isRune, 's', RuneTypeFalseS, "false", false)
   870  	case RuneTypeFalseS:
   871  		return par.expectRune(c, isRune, 'e', RuneTypeFalseE, "false", true)
   872  	// null
   873  	case RuneTypeNullN:
   874  		return par.expectRune(c, isRune, 'u', RuneTypeNullU, "null", false)
   875  	case RuneTypeNullU:
   876  		return par.expectRune(c, isRune, 'l', RuneTypeNullL1, "null", false)
   877  	case RuneTypeNullL1:
   878  		return par.expectRune(c, isRune, 'l', RuneTypeNullL2, "null", true)
   879  	default:
   880  		panic(fmt.Errorf(`should not happen: invalid stack: "%s"`, par.stackString()))
   881  	}
   882  }
   883  
   884  func (par *Parser) expectRune(c rune, isRune bool, exp rune, typ RuneType, context string, pop bool) (RuneType, error) {
   885  	if c != exp {
   886  		return RuneTypeError, &InvalidCharacterError{c, isRune, fmt.Sprintf("in literal %s (expecting %q)", context, exp)}
   887  	}
   888  	if pop {
   889  		par.popState()
   890  		return typ, nil
   891  	} else {
   892  		return par.replaceState(typ), nil
   893  	}
   894  }