github.com/intel-go/fastjson@v0.0.0-20170329170629-f846ae58a1ab/scanner.go

github.com/intel-go/fastjson@v0.0.0-20170329170629-f846ae58a1ab/scanner.go (about)

     1  // Copyright 2010 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package fastjson
     6  
     7  // JSON value parser state machine.
     8  // Just about at the limit of what is reasonable to write by hand.
     9  // Some parts are a bit tedious, but overall it nicely factors out the
    10  // otherwise common code from the multiple scanning functions
    11  // in this package (Compact, Indent, checkValid, nextValue, etc).
    12  //
    13  // This file starts with two simple examples using the scanner
    14  // before diving into the scanner itself.
    15  
    16  import (
    17  	"bytes"
    18  	"strconv"
    19  )
    20  
    21  // checkValid verifies that data is valid JSON-encoded data.
    22  // scan is passed in for use by checkValid to avoid an allocation.
    23  func checkValid(data []byte, scan *scanner) error {
    24  	scan.length_data = len(data)
    25  	scan.reset()
    26  	scan.endTop = true
    27  	stream := streamByte{data: &data, pos: 0}
    28  	op := scan.parseValue(&stream)
    29  
    30  	if op == scanError {
    31  		return scan.err
    32  	}
    33  
    34  	return nil
    35  }
    36  
    37  // A SyntaxError is a description of a JSON syntax error.
    38  type SyntaxError struct {
    39  	msg    string // description of error
    40  	Offset int64  // error occurred after reading Offset bytes
    41  }
    42  
    43  type Record struct {
    44  	state int
    45  	pos   int
    46  }
    47  
    48  func (e *SyntaxError) Error() string { return e.msg }
    49  
    50  // A scanner is a JSON scanning state machine.
    51  // Callers call scan.reset() and then pass bytes in one at a time
    52  // by calling scan.step(&scan, c) for each byte.
    53  // The return value, referred to as an opcode, tells the
    54  // caller about significant parsing events like beginning
    55  // and ending literals, objects, and arrays, so that the
    56  // caller can follow along if it wishes.
    57  // The return value scanEnd indicates that a single top-level
    58  // JSON value has been completed, *before* the byte that
    59  // just got passed in.  (The indication must be delayed in order
    60  // to recognize the end of numbers: is 123 a whole value or
    61  // the beginning of 12345e+6?).
    62  type scanner struct {
    63  	// The step is a func to be called to execute the next transition.
    64  	// Also tried using an integer constant and a single func
    65  	// with a switch, but using the func directly was 10% faster
    66  	// on a 64-bit Mac Mini, and it's nicer to read.
    67  	step func(*scanner, byte) int
    68  
    69  	// Reached end of top-level value.
    70  	endTop bool
    71  
    72  	// Stack of what we're in the middle of - array values, object keys, object values.
    73  	parseState []int
    74  
    75  	// Error that happened, if any.
    76  	err error
    77  
    78  	stateRecord          []Record//array of records of labels(position in array and state on this position)
    79  	cacheRecord	     Record
    80  	cached               bool
    81  	readPos              int  //position in array stateRecord during filling
    82  	length_data          int  //length of data to read, initialized in unmarshal. Helps to set correct capacity of stateRecord
    83  	inNumber             bool // flag of parsing figure
    84  	endLiteral           bool //flag of finishing literal
    85  
    86  	bytes int64 // total bytes consumed, updated by decoder.Decode
    87  }
    88  
    89  // These values are returned by the state transition functions
    90  // assigned to scanner.state and the method scanner.eof.
    91  // They give details about the current state of the scan that
    92  // callers might be interested to know about.
    93  // It is okay to ignore the return value of any particular
    94  // call to scanner.state: if one call returns scanError,
    95  // every subsequent call will return scanError too.
    96  const (
    97  	scanBeginLiteral = iota // end implied by next result != scanContinue
    98  	scanEndLiteral          // not returned by scanner, but clearer for state recording
    99  	scanBeginObject         // begin object
   100  	scanEndObject           // end object (implies scanObjectValue if possible)
   101  	scanBeginArray          // begin array
   102  	scanEndArray            // end array (implies scanArrayValue if possible)
   103  	scanObjectKey           // just finished object key (string)
   104  	scanObjectValue         // just finished non-last object value
   105  	scanContinue            // uninteresting byte
   106  	scanArrayValue          // just finished array value
   107  
   108  	scanSkipSpace // space byte; can skip; known to be last "continue" result
   109  
   110  	// Stop.
   111  	scanEnd   // top-level value ended *before* this byte; known to be first "stop" result
   112  	scanError // hit an error, scanner.err.
   113  )
   114  
   115  // These values are stored in the parseState stack.
   116  // They give the current state of a composite value
   117  // being scanned. If the parser is inside a nested value
   118  // the parseState describes the nested state, outermost at entry 0.
   119  const (
   120  	parseObjectKey   = iota // parsing object key (before colon)
   121  	parseObjectValue        // parsing object value (after colon)
   122  	parseArrayValue         // parsing array value
   123  )
   124  
   125  type streamByte struct {
   126  	data *[]byte
   127  	pos  int
   128  }
   129  
   130  func (s *streamByte) isEnd() bool {
   131  	return s.pos >= len(*s.data)
   132  }
   133  
   134  func (s *streamByte) Take() byte {
   135  	result := s.Peek()
   136  	s.pos++
   137  	return result
   138  }
   139  
   140  func (s *streamByte) Peek() byte {
   141  	if !s.isEnd() {
   142  		return (*s.data)[s.pos]
   143  	} else {
   144  		return 0//I have to leave this case, because I can call Peek, when the stream is over. I won't use value it returns, but it should be protected 
   145  	}
   146  }
   147  
   148  func (sb *streamByte) skipSpaces() {
   149  	for c := sb.Peek(); c <= ' ' && isSpace(c); {
   150  		sb.pos++
   151  		c = sb.Peek()
   152  	}
   153  }
   154  
   155  const AVERAGE_LENGTH = 10000
   156  
   157  // reset prepares the scanner for use.
   158  // It must be called before calling s.step.
   159  func (s *scanner) reset() {
   160  	s.step = stateBeginValue
   161  	s.parseState = s.parseState[0:0]
   162  	s.err = nil
   163  	if s.isRecordEmpty() {
   164  		if s.length_data >= AVERAGE_LENGTH {
   165  			s.stateRecord = make([]Record, 0, s.length_data/4) //capacity doesn't depends on the length whole value, but on the length of nested values. But predictively the large values have large nested values.
   166  		} else {
   167  			s.stateRecord = make([]Record, 0, s.length_data/2)
   168  		}
   169  	}
   170  	s.inNumber = false
   171  	s.endLiteral = false
   172  	s.cached = false
   173  	s.readPos = 0
   174  	s.endTop = false
   175  }
   176  
   177  // eof tells the scanner that the end of input has been reached.
   178  // It returns a scan status just as s.step does.
   179  func (s *scanner) eof() int {
   180  	if s.err != nil {
   181  		return scanError
   182  	}
   183  	if s.endTop {
   184  		return scanEnd
   185  	}
   186  	s.step(s, ' ')
   187  	if s.endTop {
   188  		return scanEnd
   189  	}
   190  	if s.err == nil {
   191  		s.err = &SyntaxError{"unexpected end of JSON input", s.bytes}
   192  	}
   193  	return scanError
   194  }
   195  
   196  // pushParseState pushes a new parse state p onto the parse stack.
   197  func (s *scanner) pushParseState(p int) {
   198  	s.parseState = append(s.parseState, p)
   199  }
   200  
   201  // popParseState pops a parse state (already obtained) off the stack
   202  // and updates s.step accordingly.
   203  func (s *scanner) popParseState() {
   204  	n := len(s.parseState) - 1
   205  	s.parseState = s.parseState[0:n]
   206  	if n == 0 {
   207  		s.step = stateEndTop
   208  		s.endTop = true
   209  	} else {
   210  		s.step = stateEndValue
   211  	}
   212  }
   213  
   214  //checks if array of records is empty
   215  func (s *scanner) isRecordEmpty() bool {
   216  	return len(s.stateRecord) == 0
   217  }
   218  
   219  //pushes Record into array
   220  func (s *scanner) pushRecord(state, pos int) {
   221  	s.stateRecord = append(s.stateRecord, Record{state:state, pos:pos}) //state are at even positions, pos are at odd positions in stateRecord array
   222  }
   223  
   224  //peeks current state for filling object. Doesn't change position. Returns state, pos
   225  func (s *scanner) peekPos() int {
   226  	if s.readPos >= len(s.stateRecord){
   227  		return  s.cacheRecord.pos// peek can be called when the array is over , only if unmarshal error occured, so return last read position 
   228  	}
   229  	if !s.cached {
   230  		s.cached = true
   231  		s.cacheRecord = s.stateRecord[s.readPos]
   232  	}
   233  	return s.cacheRecord.pos
   234  }
   235  
   236  func (s *scanner) peekState() int {
   237  	if s.readPos >= len(s.stateRecord) {
   238  	    return s.cacheRecord.state  // the same as Peek 
   239  	}
   240  	if !s.cached {
   241  		s.cached = true
   242  		s.cacheRecord = s.stateRecord[s.readPos]
   243  	}
   244  	return s.cacheRecord.state
   245  }
   246  
   247  //takes current state and increments reading position.
   248  func (s *scanner) takeState() int {
   249  	if s.cached {
   250  		s.cached = false
   251  	}else{
   252  	    s.peekState()
   253  	}
   254  	s.readPos += 1
   255  	return s.cacheRecord.state
   256  }
   257  
   258  func (s *scanner) takePos() int {
   259  	if s.cached {
   260  		s.cached = false
   261  	}else{
   262  	    s.peekState()
   263  	}
   264  	s.readPos += 1
   265  	return s.cacheRecord.pos
   266  }
   267  
   268  func (s *scanner) skipRecord() {
   269  	s.readPos += 1
   270  	s.cached = false
   271  }
   272  
   273  //checks if we need this state to be recorded
   274  func (s *scanner) isNeededState(state int) bool {
   275  	if s.endLiteral {
   276  		return true
   277  	}
   278  	if state > scanEndArray || state < scanBeginLiteral {
   279  		return false
   280  	}
   281  	return true
   282  }
   283  
   284  func (s *scanner) fillRecord(pos, state int) {
   285  
   286  	if s.isNeededState(state) {
   287  		if s.inNumber && s.endLiteral { // in case 2] , 2} or 2,
   288  			s.inNumber = false
   289  			s.endLiteral = false
   290  			s.pushRecord(scanEndLiteral, pos-1)
   291  			if s.isNeededState(state) { // in case 2] or 2}
   292  				s.pushRecord(state, pos)
   293  			}
   294  			return
   295  		}
   296  
   297  		if s.endLiteral {
   298  			s.endLiteral = false
   299  			state = scanEndLiteral
   300  		}
   301  		s.pushRecord(state, pos)
   302  	}
   303  
   304  }
   305  
   306  func isSpace(c byte) bool {
   307  	return c == ' ' || c == '\t' || c == '\r' || c == '\n'
   308  }
   309  
   310  // stateBeginValueOrEmpty is the state after reading `[`.
   311  func stateBeginValueOrEmpty(s *scanner, c byte) int {
   312  	if c <= ' ' && isSpace(c) {
   313  		return scanSkipSpace
   314  	}
   315  	if c == ']' {
   316  		return stateEndValue(s, c)
   317  	}
   318  	return stateBeginValue(s, c)
   319  }
   320  
   321  // stateBeginValue is the state at the beginning of the input.
   322  func stateBeginValue(s *scanner, c byte) int {
   323  	if c <= ' ' && isSpace(c) {
   324  		return scanSkipSpace
   325  	}
   326  	switch c {
   327  	case '{':
   328  		s.step = stateBeginStringOrEmpty
   329  		s.pushParseState(parseObjectKey)
   330  		return scanBeginObject
   331  	case '[':
   332  		s.step = stateBeginValueOrEmpty
   333  		s.pushParseState(parseArrayValue)
   334  		return scanBeginArray
   335  	case '"':
   336  		s.step = stateInString
   337  		return scanBeginLiteral
   338  	case '-':
   339  		s.step = stateNeg
   340  		s.inNumber = true
   341  		return scanBeginLiteral
   342  	case '0': // beginning of 0.123
   343  		s.step = state0
   344  		s.inNumber = true
   345  		return scanBeginLiteral
   346  	case 't': // beginning of true
   347  		s.step = stateT
   348  		return scanBeginLiteral
   349  	case 'f': // beginning of false
   350  		s.step = stateF
   351  		return scanBeginLiteral
   352  	case 'n': // beginning of null
   353  		s.step = stateN
   354  		return scanBeginLiteral
   355  	}
   356  	if '1' <= c && c <= '9' { // beginning of 1234.5
   357  		s.step = state1
   358  		s.inNumber = true
   359  		return scanBeginLiteral
   360  	}
   361  	return s.error(c, "looking for beginning of value")
   362  }
   363  
   364  // stateBeginStringOrEmpty is the state after reading `{`.
   365  func stateBeginStringOrEmpty(s *scanner, c byte) int {
   366  	if c <= ' ' && isSpace(c) {
   367  		return scanSkipSpace
   368  	}
   369  	if c == '}' {
   370  		n := len(s.parseState)
   371  		s.parseState[n-1] = parseObjectValue
   372  		return stateEndValue(s, c)
   373  	}
   374  	return stateBeginString(s, c)
   375  }
   376  
   377  // stateBeginString is the state after reading `{"key": value,`.
   378  func stateBeginString(s *scanner, c byte) int {
   379  	if c <= ' ' && isSpace(c) {
   380  		return scanSkipSpace
   381  	}
   382  	if c == '"' {
   383  		s.step = stateInString
   384  		return scanBeginLiteral
   385  	}
   386  	return s.error(c, "looking for beginning of object key string")
   387  }
   388  
   389  // stateEndValue is the state after completing a value,
   390  // such as after reading `{}` or `true` or `["x"`.
   391  func stateEndValue(s *scanner, c byte) int {
   392  	n := len(s.parseState)
   393  	if n == 0 {
   394  		// Completed top-level before the current byte.
   395  		s.step = stateEndTop
   396  		s.endTop = true
   397  		return stateEndTop(s, c)
   398  	}
   399  	if c <= ' ' && isSpace(c) {
   400  		s.step = stateEndValue
   401  		return scanSkipSpace
   402  	}
   403  	ps := s.parseState[n-1]
   404  	switch ps {
   405  	case parseObjectKey:
   406  		if c == ':' {
   407  			s.parseState[n-1] = parseObjectValue
   408  			s.step = stateBeginValue
   409  			return scanObjectKey
   410  		}
   411  		return s.error(c, "after object key")
   412  	case parseObjectValue:
   413  		if c == ',' {
   414  			s.parseState[n-1] = parseObjectKey
   415  			s.step = stateBeginString
   416  			return scanObjectValue
   417  		}
   418  		if c == '}' {
   419  			s.popParseState()
   420  			return scanEndObject
   421  		}
   422  		return s.error(c, "after object key:value pair")
   423  	case parseArrayValue:
   424  		if c == ',' {
   425  			s.step = stateBeginValue
   426  			return scanArrayValue
   427  		}
   428  		if c == ']' {
   429  			s.popParseState()
   430  			return scanEndArray
   431  		}
   432  		return s.error(c, "after array element")
   433  	}
   434  	return s.error(c, "")
   435  }
   436  
   437  // stateEndTop is the state after finishing the top-level value,
   438  // such as after reading `{}` or `[1,2,3]`.
   439  // Only space characters should be seen now.
   440  func stateEndTop(s *scanner, c byte) int {
   441  	if c != ' ' && c != '\t' && c != '\r' && c != '\n' {
   442  		// Complain about non-space byte on next call.
   443  		s.error(c, "after top-level value")
   444  	}
   445  	return scanEnd
   446  }
   447  
   448  // stateInString is the state after reading `"`.
   449  func stateInString(s *scanner, c byte) int {
   450  	if c == '"' {
   451  		s.step = stateEndValue
   452  		s.endLiteral = true
   453  		return scanContinue
   454  	}
   455  	if c == '\\' {
   456  		s.step = stateInStringEsc
   457  		return scanContinue
   458  	}
   459  	if c < 0x20 {
   460  		return s.error(c, "in string literal")
   461  	}
   462  	return scanContinue
   463  }
   464  
   465  // stateInStringEsc is the state after reading `"\` during a quoted string.
   466  func stateInStringEsc(s *scanner, c byte) int {
   467  	switch c {
   468  	case 'b', 'f', 'n', 'r', 't', '\\', '/', '"':
   469  		s.step = stateInString
   470  		return scanContinue
   471  	case 'u':
   472  		s.step = stateInStringEscU
   473  		return scanContinue
   474  	}
   475  	return s.error(c, "in string escape code")
   476  }
   477  
   478  // stateInStringEscU is the state after reading `"\u` during a quoted string.
   479  func stateInStringEscU(s *scanner, c byte) int {
   480  	if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
   481  		s.step = stateInStringEscU1
   482  		return scanContinue
   483  	}
   484  	// numbers
   485  	return s.error(c, "in \\u hexadecimal character escape")
   486  }
   487  
   488  // stateInStringEscU1 is the state after reading `"\u1` during a quoted string.
   489  func stateInStringEscU1(s *scanner, c byte) int {
   490  	if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
   491  		s.step = stateInStringEscU12
   492  		return scanContinue
   493  	}
   494  	// numbers
   495  	return s.error(c, "in \\u hexadecimal character escape")
   496  }
   497  
   498  // stateInStringEscU12 is the state after reading `"\u12` during a quoted string.
   499  func stateInStringEscU12(s *scanner, c byte) int {
   500  	if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
   501  		s.step = stateInStringEscU123
   502  		return scanContinue
   503  	}
   504  	// numbers
   505  	return s.error(c, "in \\u hexadecimal character escape")
   506  }
   507  
   508  // stateInStringEscU123 is the state after reading `"\u123` during a quoted string.
   509  func stateInStringEscU123(s *scanner, c byte) int {
   510  	if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
   511  		s.step = stateInString
   512  		return scanContinue
   513  	}
   514  	// numbers
   515  	return s.error(c, "in \\u hexadecimal character escape")
   516  }
   517  
   518  // stateNeg is the state after reading `-` during a number.
   519  func stateNeg(s *scanner, c byte) int {
   520  	if c == '0' {
   521  		s.step = state0
   522  		return scanContinue
   523  	}
   524  	if '1' <= c && c <= '9' {
   525  		s.step = state1
   526  		return scanContinue
   527  	}
   528  	return s.error(c, "in numeric literal")
   529  }
   530  
   531  // state1 is the state after reading a non-zero integer during a number,
   532  // such as after reading `1` or `100` but not `0`.
   533  func state1(s *scanner, c byte) int {
   534  	if '0' <= c && c <= '9' {
   535  		s.step = state1
   536  		return scanContinue
   537  	}
   538  	return state0(s, c)
   539  }
   540  
   541  // state0 is the state after reading `0` during a number.
   542  func state0(s *scanner, c byte) int {
   543  	if c == '.' {
   544  		s.step = stateDot
   545  		return scanContinue
   546  	}
   547  	if c == 'e' || c == 'E' {
   548  		s.step = stateE
   549  		return scanContinue
   550  	}
   551  	s.endLiteral = true
   552  	return stateEndValue(s, c)
   553  }
   554  
   555  // stateDot is the state after reading the integer and decimal point in a number,
   556  // such as after reading `1.`.
   557  func stateDot(s *scanner, c byte) int {
   558  	if '0' <= c && c <= '9' {
   559  		s.step = stateDot0
   560  		return scanContinue
   561  	}
   562  	return s.error(c, "after decimal point in numeric literal")
   563  }
   564  
   565  // stateDot0 is the state after reading the integer, decimal point, and subsequent
   566  // digits of a number, such as after reading `3.14`.
   567  func stateDot0(s *scanner, c byte) int {
   568  	if '0' <= c && c <= '9' {
   569  		return scanContinue
   570  	}
   571  	if c == 'e' || c == 'E' {
   572  		s.step = stateE
   573  		return scanContinue
   574  	}
   575  	s.endLiteral = true
   576  	return stateEndValue(s, c)
   577  }
   578  
   579  // stateE is the state after reading the mantissa and e in a number,
   580  // such as after reading `314e` or `0.314e`.
   581  func stateE(s *scanner, c byte) int {
   582  	if c == '+' || c == '-' {
   583  		s.step = stateESign
   584  		return scanContinue
   585  	}
   586  	return stateESign(s, c)
   587  }
   588  
   589  // stateESign is the state after reading the mantissa, e, and sign in a number,
   590  // such as after reading `314e-` or `0.314e+`.
   591  func stateESign(s *scanner, c byte) int {
   592  	if '0' <= c && c <= '9' {
   593  		s.step = stateE0
   594  		return scanContinue
   595  	}
   596  	return s.error(c, "in exponent of numeric literal")
   597  }
   598  
   599  // stateE0 is the state after reading the mantissa, e, optional sign,
   600  // and at least one digit of the exponent in a number,
   601  // such as after reading `314e-2` or `0.314e+1` or `3.14e0`.
   602  func stateE0(s *scanner, c byte) int {
   603  	if '0' <= c && c <= '9' {
   604  		return scanContinue
   605  	}
   606  	s.endLiteral = true
   607  	return stateEndValue(s, c)
   608  }
   609  
   610  // stateT is the state after reading `t`.
   611  func stateT(s *scanner, c byte) int {
   612  	if c == 'r' {
   613  		s.step = stateTr
   614  		return scanContinue
   615  	}
   616  	return s.error(c, "in literal true (expecting 'r')")
   617  }
   618  
   619  // stateTr is the state after reading `tr`.
   620  func stateTr(s *scanner, c byte) int {
   621  	if c == 'u' {
   622  		s.step = stateTru
   623  		return scanContinue
   624  	}
   625  	return s.error(c, "in literal true (expecting 'u')")
   626  }
   627  
   628  // stateTru is the state after reading `tru`.
   629  func stateTru(s *scanner, c byte) int {
   630  	if c == 'e' {
   631  		s.step = stateEndValue
   632  		s.endLiteral = true
   633  		return scanContinue
   634  	}
   635  	return s.error(c, "in literal true (expecting 'e')")
   636  }
   637  
   638  // stateF is the state after reading `f`.
   639  func stateF(s *scanner, c byte) int {
   640  	if c == 'a' {
   641  		s.step = stateFa
   642  		return scanContinue
   643  	}
   644  	return s.error(c, "in literal false (expecting 'a')")
   645  }
   646  
   647  // stateFa is the state after reading `fa`.
   648  func stateFa(s *scanner, c byte) int {
   649  	if c == 'l' {
   650  		s.step = stateFal
   651  		return scanContinue
   652  	}
   653  	return s.error(c, "in literal false (expecting 'l')")
   654  }
   655  
   656  // stateFal is the state after reading `fal`.
   657  func stateFal(s *scanner, c byte) int {
   658  	if c == 's' {
   659  		s.step = stateFals
   660  		return scanContinue
   661  	}
   662  	return s.error(c, "in literal false (expecting 's')")
   663  }
   664  
   665  // stateFals is the state after reading `fals`.
   666  func stateFals(s *scanner, c byte) int {
   667  	if c == 'e' {
   668  		s.step = stateEndValue
   669  		s.endLiteral = true
   670  		return scanContinue
   671  	}
   672  	return s.error(c, "in literal false (expecting 'e')")
   673  }
   674  
   675  // stateN is the state after reading `n`.
   676  func stateN(s *scanner, c byte) int {
   677  	if c == 'u' {
   678  		s.step = stateNu
   679  		return scanContinue
   680  	}
   681  	return s.error(c, "in literal null (expecting 'u')")
   682  }
   683  
   684  // stateNu is the state after reading `nu`.
   685  func stateNu(s *scanner, c byte) int {
   686  	if c == 'l' {
   687  		s.step = stateNul
   688  		return scanContinue
   689  	}
   690  	return s.error(c, "in literal null (expecting 'l')")
   691  }
   692  
   693  // stateNul is the state after reading `nul`.
   694  func stateNul(s *scanner, c byte) int {
   695  	if c == 'l' {
   696  		s.step = stateEndValue
   697  		s.endLiteral = true
   698  		return scanContinue
   699  	}
   700  	return s.error(c, "in literal null (expecting 'l')")
   701  }
   702  
   703  // stateError is the state after reaching a syntax error,
   704  // such as after reading `[1}` or `5.1.2`.
   705  func stateError(s *scanner, c byte) int {
   706  	return scanError
   707  }
   708  
   709  // error records an error and switches to the error state.
   710  func (s *scanner) error(c byte, context string) int {
   711  	s.step = stateError
   712  	s.err = &SyntaxError{"invalid character " + quoteChar(c) + " " + context, s.bytes}
   713  	return scanError
   714  }
   715  
   716  // quoteChar formats c as a quoted character literal
   717  func quoteChar(c byte) string {
   718  	// special cases - different from quoted strings
   719  	if c == '\'' {
   720  		return `'\''`
   721  	}
   722  	if c == '"' {
   723  		return `'"'`
   724  	}
   725  
   726  	// use quoted string with different quotation marks
   727  	s := strconv.Quote(string(c))
   728  	return "'" + s[1:len(s)-1] + "'"
   729  }
   730  
   731  func (sb *streamByte) error(s *scanner, context string) int {
   732  	s.err = &SyntaxError{"invalid character " + quoteChar(sb.Peek()) + " " + context, int64(sb.pos + 1)}
   733  	return scanError
   734  }
   735  
   736  func (s *scanner) parseSimpleLiteral(sb *streamByte, length int) int {
   737  	if len(*sb.data) < sb.pos+length {
   738  		s.err = &SyntaxError{"unexpected end of JSON input", int64(len(*sb.data))}
   739  		return scanError
   740  	}
   741  	s.pushRecord(scanBeginLiteral, sb.pos)
   742  	sb.Take()
   743  	s.bytes = int64(sb.pos)
   744  	for i := 0; i < length-1; i++ {
   745  		s.bytes++
   746  		op := s.step(s, sb.Take())
   747  		if op == scanError {
   748  			return op
   749  		}
   750  	}
   751  	s.pushRecord(scanEndLiteral, sb.pos-1)
   752  	return scanContinue
   753  }
   754  
   755  func (s *scanner) parseValue(sb *streamByte) int {
   756  	sb.skipSpaces()
   757  	topValue := s.endTop
   758  	if len(*sb.data) <= sb.pos {
   759  		s.err = &SyntaxError{"unexpected end of JSON input", int64(sb.pos)}
   760  		return scanError
   761  	}
   762  	cur := sb.Peek()
   763  	s.endTop = false
   764  	op := scanContinue
   765  	switch cur {
   766  	case '"':
   767  		op = s.parseString(sb)
   768  	case '{':
   769  		op = s.parseObject(sb)
   770  	case '[':
   771  		op = s.parseArray(sb)
   772  	case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
   773  		op = s.parseNumber(sb)
   774  	case 't':
   775  		s.step = stateT
   776  		op = s.parseSimpleLiteral(sb, 4)
   777  
   778  	case 'f':
   779  		s.step = stateF
   780  		op = s.parseSimpleLiteral(sb, 5)
   781  	case 'n':
   782  		s.step = stateN
   783  		op = s.parseSimpleLiteral(sb, 4)
   784  	default:
   785  		return sb.error(s, "looking for beginning of value")
   786  	}
   787  
   788  	if topValue && op != scanError {
   789  		sb.skipSpaces()
   790  		if !sb.isEnd() {
   791  			return sb.error(s, "after top-level value")
   792  		}
   793  	}
   794  
   795  	return op
   796  }
   797  
   798  func (s *scanner) parseString(sb *streamByte) int {
   799  	s.pushRecord(scanBeginLiteral, sb.pos)
   800  	sb.pos++ //skip "
   801  	quotePos := bytes.IndexByte((*sb.data)[sb.pos:], '"')
   802  	if quotePos < 0 {
   803  		s.err = &SyntaxError{"unexpected end of JSON input", int64(len(*sb.data))}
   804  		return scanError
   805  
   806  	}
   807  
   808  	// in case without escape symbol \". Errors inside string will be handled during object filling, with function unquote
   809  	// it's done in sake of speed
   810  	sb.pos += quotePos - 1
   811  	for sb.Peek() == '\\' { //pos on the symbol before "
   812  		//it may escape symbol "
   813  		sb.pos--
   814  		sum := 1
   815  
   816  		//checking multiple symbols \, kind of "...\\"..."
   817  		for sb.Peek() == '\\' {
   818  			sum++
   819  			sb.pos--
   820  		}
   821  		if sum%2 == 0 { //even number of \, last of them doesn't escape "; it means that current qoute pos is end of string
   822  			sb.pos += sum
   823  			break
   824  		}
   825  		//otherwise odd number of \ escapes ". Looking for the next "
   826  		sb.pos += sum + 1 // pos on "
   827  		n := bytes.IndexByte((*sb.data)[sb.pos+1:], '"')
   828  		if n < 0 {
   829  			s.err = &SyntaxError{"unexpected end of JSON input", int64(len(*sb.data))}
   830  			return scanError
   831  		}
   832  		sb.pos += n
   833  	}
   834  	//here pos is on the symbol before "
   835  	sb.pos += 2
   836  	s.pushRecord(scanEndLiteral, sb.pos-1)
   837  	return scanEndLiteral
   838  }
   839  
   840  func (s *scanner) parseNumber(sb *streamByte) int {
   841  	s.pushRecord(scanBeginLiteral, sb.pos)
   842  	cur := sb.Take()
   843  	if cur == '-' {
   844  		if sb.isEnd() {
   845  			s.err = &SyntaxError{"unexpected end of JSON input", int64(sb.pos)}
   846  			return scanError
   847  		}
   848  		cur = sb.Take()
   849  	}
   850  	if sb.isEnd() {
   851  		if '0' <= cur && cur <= '9' {
   852  			s.pushRecord(scanEndLiteral, sb.pos-1)
   853  			return scanEndLiteral
   854  		} else {
   855  			sb.pos--
   856  			return sb.error(s, "in numeric literal")
   857  		}
   858  	}
   859  	if !sb.isEnd() && '1' <= cur && cur <= '9' {
   860  		sb.parseFigures()
   861  	} else {
   862  		if cur != '0' {
   863  			sb.pos--
   864  			return sb.error(s, "in numeric literal")
   865  		}
   866  	}
   867  	cur = sb.Take() //pos on the next after cur
   868  	if cur == '.' {
   869  		if op := sb.Peek(); op > '9' || op < '0' {
   870  			if sb.isEnd() {
   871  				s.err = &SyntaxError{"unexpected end of JSON input", int64(sb.pos)}
   872  				return scanError
   873  			}
   874  			return sb.error(s, "after decimal point in numeric literal")
   875  		}
   876  		sb.parseFigures()
   877  		cur = sb.Take()
   878  	}
   879  	if cur == 'e' || cur == 'E' {
   880  		op := sb.Peek()
   881  		if op != '+' && op != '-' && (op < '0' || op > '9') {
   882  			if sb.isEnd() {
   883  				s.err = &SyntaxError{"unexpected end of JSON input", int64(sb.pos)}
   884  				return scanError
   885  			}
   886  			return sb.error(s, "in exponent of numeric literal")
   887  		}
   888  		op = sb.Take()
   889  		if op == '-' || op == '+' {
   890  			op = sb.Peek()
   891  			if op < '0' || op > '9' {
   892  				if sb.isEnd() {
   893  					s.err = &SyntaxError{"unexpected end of JSON input", int64(sb.pos)}
   894  					return scanError
   895  				}
   896  				return sb.error(s, "in exponent of numeric literal")
   897  			}
   898  		}
   899  
   900  		sb.parseFigures()
   901  
   902  	} else { //pos on the second after unknown symbol. like 123ua. pos now at a
   903  		sb.pos--
   904  	}
   905  	s.pushRecord(scanEndLiteral, sb.pos-1)
   906  	return scanEndLiteral
   907  }
   908  
   909  func (sb *streamByte) parseFigures() {
   910  	c := sb.Take()
   911  
   912  	for '0' <= c && c <= '9' {
   913  		c = sb.Take()
   914  	}
   915  	sb.pos--
   916  }
   917  
   918  func (s *scanner) parseObject(sb *streamByte) int {
   919  	s.pushRecord(scanBeginObject, sb.pos)
   920  	sb.pos++ // skip {
   921  	sb.skipSpaces()
   922  	cur := sb.Peek()
   923  	if sb.isEnd() {
   924  		s.err = &SyntaxError{"unexpected end of JSON input", int64(sb.pos)}
   925  		return scanError
   926  	}
   927  
   928  	if cur != '"' && cur != '}' {
   929  		return sb.error(s, "looking for beginning of object key string")
   930  	}
   931  	for !sb.isEnd() {
   932  		sb.skipSpaces()
   933  
   934  		switch cur {
   935  		case '}':
   936  			s.pushRecord(scanEndObject, sb.pos)
   937  			sb.pos++
   938  			return scanEndObject
   939  		case '"':
   940  			op := s.parseString(sb)
   941  			if op == scanError {
   942  				return op
   943  			}
   944  			sb.skipSpaces()
   945  			if sb.isEnd() {
   946  				s.err = &SyntaxError{"unexpected end of JSON input", int64(sb.pos)}
   947  				return scanError
   948  			}
   949  			cur = sb.Peek()
   950  
   951  			if cur == ':' {
   952  				sb.pos++
   953  			} else {
   954  				return sb.error(s, "after object key")
   955  			}
   956  			op = s.parseValue(sb)
   957  			if op == scanError {
   958  				return op
   959  			}
   960  			sb.skipSpaces()
   961  			if sb.isEnd() {
   962  				s.err = &SyntaxError{"unexpected end of JSON input", int64(sb.pos)}
   963  				return scanError
   964  			}
   965  			cur = sb.Peek()
   966  			if cur == ',' {
   967  				sb.pos++
   968  				if sb.isEnd() {
   969  					s.err = &SyntaxError{"unexpected end of JSON input", int64(sb.pos)}
   970  					return scanError
   971  				}
   972  				sb.skipSpaces()
   973  				cur = sb.Peek()
   974  			}
   975  		default:
   976  			return sb.error(s, "after object key:value pair")
   977  		}
   978  	}
   979  
   980  	s.err = &SyntaxError{"unexpected end of JSON input", int64(sb.pos)}
   981  	return scanError
   982  }
   983  
   984  func (s *scanner) parseArray(sb *streamByte) int {
   985  	s.pushRecord(scanBeginArray, sb.pos)
   986  	sb.pos++
   987  	sb.skipSpaces()
   988  	if sb.isEnd() {
   989  		s.err = &SyntaxError{"unexpected end of JSON input", int64(sb.pos)}
   990  		return scanError
   991  	}
   992  	cur := sb.Peek()
   993  	if cur == ']' {
   994  		s.pushRecord(scanEndArray, sb.pos)
   995  		sb.pos++
   996  		return scanEndArray
   997  	}
   998  	op := s.parseValue(sb)
   999  	if op == scanError {
  1000  		return op
  1001  	}
  1002  	sb.skipSpaces()
  1003  
  1004  	cur = sb.Peek()
  1005  	for !sb.isEnd() {
  1006  		switch cur {
  1007  		case ']':
  1008  			s.pushRecord(scanEndArray, sb.pos)
  1009  			sb.pos++
  1010  			return scanEndArray
  1011  		case ',':
  1012  			sb.pos++
  1013  			sb.skipSpaces()
  1014  		default:
  1015  			return sb.error(s, "after array element")
  1016  		}
  1017  
  1018  		op = s.parseValue(sb)
  1019  		if op == scanError {
  1020  			return op
  1021  		}
  1022  
  1023  		sb.skipSpaces()
  1024  		cur = sb.Peek()
  1025  	}
  1026  
  1027  	//here is incomplete array
  1028  	s.err = &SyntaxError{"unexpected end of JSON input", int64(sb.pos)}
  1029  	return scanError
  1030  
  1031  }