github.com/intel-go/fastjson@v0.0.0-20170329170629-f846ae58a1ab/stream.go (about)

     1  // Copyright 2010 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package fastjson
     6  
     7  import (
     8  	"bytes"
     9  	"errors"
    10  	"io"
    11  )
    12  
    13  // A Decoder reads and decodes JSON objects from an input stream.
    14  type Decoder struct {
    15  	r     io.Reader
    16  	buf   []byte
    17  	d     decodeState
    18  	scanp int // start of unread data in buf
    19  	scan  scanner
    20  	err   error
    21  
    22  	tokenState int
    23  	tokenStack []int
    24  	lastEnd    int // index in stateRecord, where previous object in stream ends
    25  }
    26  
    27  // NewDecoder returns a new decoder that reads from r.
    28  //
    29  // The decoder introduces its own buffering and may
    30  // read data from r beyond the JSON values requested.
    31  func NewDecoder(r io.Reader) *Decoder {
    32  	return &Decoder{r: r}
    33  }
    34  
    35  // UseNumber causes the Decoder to unmarshal a number into an interface{} as a
    36  // Number instead of as a float64.
    37  func (dec *Decoder) UseNumber() { dec.d.useNumber = true }
    38  
    39  // Decode reads the next JSON-encoded value from its
    40  // input and stores it in the value pointed to by v.
    41  //
    42  // See the documentation for Unmarshal for details about
    43  // the conversion of JSON into a Go value.
    44  func (dec *Decoder) Decode(v interface{}) error {
    45  	if dec.err != nil {
    46  		return dec.err
    47  	}
    48  
    49  	if err := dec.tokenPrepareForDecode(); err != nil {
    50  		return err
    51  	}
    52  
    53  	if !dec.tokenValueAllowed() {
    54  		return &SyntaxError{msg: "not at beginning of value"}
    55  	}
    56  
    57  	// Read whole value into buffer.
    58  	n, err := dec.readValue()
    59  	//dec.scan.printArrayofRecords()
    60  	if err != nil {
    61  		return err
    62  	}
    63  	dec.d.init(dec.buf[dec.scanp : dec.scanp+n])
    64  	dec.d.scan.stateRecord = dec.scan.stateRecord[dec.lastEnd:]
    65  
    66  	dec.lastEnd = len(dec.scan.stateRecord) //saves the begin of stateRecord of next object
    67  	dec.scanp += n
    68  
    69  	// Don't save err from unmarshal into dec.err:
    70  	// the connection is still usable since we read a complete JSON
    71  	// object from it before the error happened.
    72  	err = dec.d.unmarshal(v)
    73  
    74  	// fixup token streaming state
    75  	dec.tokenValueEnd()
    76  
    77  	return err
    78  }
    79  
    80  // Buffered returns a reader of the data remaining in the Decoder's
    81  // buffer. The reader is valid until the next call to Decode.
    82  func (dec *Decoder) Buffered() io.Reader {
    83  	return bytes.NewReader(dec.buf[dec.scanp:])
    84  }
    85  
    86  // readValue reads a JSON value into dec.buf.
    87  // It returns the length of the encoding.
    88  func (dec *Decoder) readValue() (int, error) {
    89  	dec.scan.reset()
    90  	scanp := dec.scanp
    91  	var err error
    92  	scanedBytes := 0
    93  Input:
    94  	for {
    95  		dec.scan.length_data = len(dec.buf) - scanp
    96  		// Look in the buffer for a new value.
    97  		for i, c := range dec.buf[scanp:] {
    98  			dec.scan.bytes++
    99  			v := dec.scan.step(&dec.scan, c)
   100  			dec.scan.fillRecord(scanedBytes, v)
   101  			scanedBytes++
   102  			if v == scanEnd {
   103  				scanp += i
   104  				break Input
   105  			}
   106  
   107  			// scanEnd is delayed one byte.
   108  			// We might block trying to get that byte from src,
   109  			// so instead invent a space byte.
   110  			if (v == scanEndObject || v == scanEndArray) && dec.scan.step(&dec.scan, ' ') == scanEnd {
   111  				scanp += i + 1
   112  				break Input
   113  			}
   114  			if v == scanError {
   115  				dec.err = dec.scan.err
   116  				return 0, dec.scan.err
   117  			}
   118  		}
   119  		scanp = len(dec.buf)
   120  
   121  		n := scanp - dec.scanp
   122  		// Did the last read have an error?
   123  		// Delayed until now to allow buffer scan.
   124  		if err != nil {
   125  			if err == io.EOF {
   126  				if dec.scan.step(&dec.scan, ' ') == scanEnd {
   127  					dec.scan.fillRecord(n, scanEnd) //passes length of read json value
   128  					break Input
   129  				}
   130  				if nonSpace(dec.buf) {
   131  					err = io.ErrUnexpectedEOF
   132  				}
   133  			}
   134  			dec.err = err
   135  			return 0, err
   136  		}
   137  		err = dec.refill()
   138  		scanp = dec.scanp + n
   139  	}
   140  	return scanp - dec.scanp, nil
   141  }
   142  
   143  func (dec *Decoder) refill() error {
   144  	// Make room to read more into the buffer.
   145  	// First slide down data already consumed.
   146  	if dec.scanp > 0 {
   147  		n := copy(dec.buf, dec.buf[dec.scanp:])
   148  		dec.buf = dec.buf[:n]
   149  		dec.scanp = 0
   150  	}
   151  
   152  	// Grow buffer if not large enough.
   153  	const minRead = 512
   154  	if cap(dec.buf)-len(dec.buf) < minRead {
   155  		newBuf := make([]byte, len(dec.buf), 2*cap(dec.buf)+minRead)
   156  		copy(newBuf, dec.buf)
   157  		dec.buf = newBuf
   158  	}
   159  
   160  	// Read. Delay error for next iteration (after scan).
   161  	n, err := dec.r.Read(dec.buf[len(dec.buf):cap(dec.buf)])
   162  	dec.buf = dec.buf[0 : len(dec.buf)+n]
   163  
   164  	return err
   165  }
   166  
   167  func nonSpace(b []byte) bool {
   168  	for _, c := range b {
   169  		if !isSpace(c) {
   170  			return true
   171  		}
   172  	}
   173  	return false
   174  }
   175  
   176  // An Encoder writes JSON objects to an output stream.
   177  type Encoder struct {
   178  	w   io.Writer
   179  	err error
   180  }
   181  
   182  // NewEncoder returns a new encoder that writes to w.
   183  func NewEncoder(w io.Writer) *Encoder {
   184  	return &Encoder{w: w}
   185  }
   186  
   187  // Encode writes the JSON encoding of v to the stream,
   188  // followed by a newline character.
   189  //
   190  // See the documentation for Marshal for details about the
   191  // conversion of Go values to JSON.
   192  func (enc *Encoder) Encode(v interface{}) error {
   193  	if enc.err != nil {
   194  		return enc.err
   195  	}
   196  	e := newEncodeState()
   197  	err := e.marshal(v)
   198  	if err != nil {
   199  		return err
   200  	}
   201  
   202  	// Terminate each value with a newline.
   203  	// This makes the output look a little nicer
   204  	// when debugging, and some kind of space
   205  	// is required if the encoded value was a number,
   206  	// so that the reader knows there aren't more
   207  	// digits coming.
   208  	e.WriteByte('\n')
   209  
   210  	if _, err = enc.w.Write(e.Bytes()); err != nil {
   211  		enc.err = err
   212  	}
   213  	encodeStatePool.Put(e)
   214  	return err
   215  }
   216  
   217  // RawMessage is a raw encoded JSON object.
   218  // It implements Marshaler and Unmarshaler and can
   219  // be used to delay JSON decoding or precompute a JSON encoding.
   220  type RawMessage []byte
   221  
   222  // MarshalJSON returns *m as the JSON encoding of m.
   223  func (m *RawMessage) MarshalJSON() ([]byte, error) {
   224  	return *m, nil
   225  }
   226  
   227  // UnmarshalJSON sets *m to a copy of data.
   228  func (m *RawMessage) UnmarshalJSON(data []byte) error {
   229  	if m == nil {
   230  		return errors.New("json.RawMessage: UnmarshalJSON on nil pointer")
   231  	}
   232  	*m = append((*m)[0:0], data...)
   233  	return nil
   234  }
   235  
   236  var _ Marshaler = (*RawMessage)(nil)
   237  var _ Unmarshaler = (*RawMessage)(nil)
   238  
   239  // A Token holds a value of one of these types:
   240  //
   241  //	Delim, for the four JSON delimiters [ ] { }
   242  //	bool, for JSON booleans
   243  //	float64, for JSON numbers
   244  //	Number, for JSON numbers
   245  //	string, for JSON string literals
   246  //	nil, for JSON null
   247  //
   248  type Token interface{}
   249  
   250  const (
   251  	tokenTopValue = iota
   252  	tokenArrayStart
   253  	tokenArrayValue
   254  	tokenArrayComma
   255  	tokenObjectStart
   256  	tokenObjectKey
   257  	tokenObjectColon
   258  	tokenObjectValue
   259  	tokenObjectComma
   260  )
   261  
   262  // advance tokenstate from a separator state to a value state
   263  func (dec *Decoder) tokenPrepareForDecode() error {
   264  	// Note: Not calling peek before switch, to avoid
   265  	// putting peek into the standard Decode path.
   266  	// peek is only called when using the Token API.
   267  	switch dec.tokenState {
   268  	case tokenArrayComma:
   269  		c, err := dec.peek()
   270  		if err != nil {
   271  			return err
   272  		}
   273  		if c != ',' {
   274  			return &SyntaxError{"expected comma after array element", 0}
   275  		}
   276  		dec.scanp++
   277  		dec.tokenState = tokenArrayValue
   278  	case tokenObjectColon:
   279  		c, err := dec.peek()
   280  		if err != nil {
   281  			return err
   282  		}
   283  		if c != ':' {
   284  			return &SyntaxError{"expected colon after object key", 0}
   285  		}
   286  		dec.scanp++
   287  		dec.tokenState = tokenObjectValue
   288  	}
   289  	return nil
   290  }
   291  
   292  func (dec *Decoder) tokenValueAllowed() bool {
   293  	switch dec.tokenState {
   294  	case tokenTopValue, tokenArrayStart, tokenArrayValue, tokenObjectValue:
   295  		return true
   296  	}
   297  	return false
   298  }
   299  
   300  func (dec *Decoder) tokenValueEnd() {
   301  	switch dec.tokenState {
   302  	case tokenArrayStart, tokenArrayValue:
   303  		dec.tokenState = tokenArrayComma
   304  	case tokenObjectValue:
   305  		dec.tokenState = tokenObjectComma
   306  	}
   307  }
   308  
   309  // A Delim is a JSON array or object delimiter, one of [ ] { or }.
   310  type Delim rune
   311  
   312  func (d Delim) String() string {
   313  	return string(d)
   314  }
   315  
   316  // Token returns the next JSON token in the input stream.
   317  // At the end of the input stream, Token returns nil, io.EOF.
   318  //
   319  // Token guarantees that the delimiters [ ] { } it returns are
   320  // properly nested and matched: if Token encounters an unexpected
   321  // delimiter in the input, it will return an error.
   322  //
   323  // The input stream consists of basic JSON values—bool, string,
   324  // number, and null—along with delimiters [ ] { } of type Delim
   325  // to mark the start and end of arrays and objects.
   326  // Commas and colons are elided.
   327  func (dec *Decoder) Token() (Token, error) {
   328  	for {
   329  		c, err := dec.peek()
   330  		if err != nil {
   331  			return nil, err
   332  		}
   333  		switch c {
   334  		case '[':
   335  			if !dec.tokenValueAllowed() {
   336  				return dec.tokenError(c)
   337  			}
   338  			dec.scanp++
   339  			dec.tokenStack = append(dec.tokenStack, dec.tokenState)
   340  			dec.tokenState = tokenArrayStart
   341  			return Delim('['), nil
   342  
   343  		case ']':
   344  			if dec.tokenState != tokenArrayStart && dec.tokenState != tokenArrayComma {
   345  				return dec.tokenError(c)
   346  			}
   347  			dec.scanp++
   348  			dec.tokenState = dec.tokenStack[len(dec.tokenStack)-1]
   349  			dec.tokenStack = dec.tokenStack[:len(dec.tokenStack)-1]
   350  			dec.tokenValueEnd()
   351  			return Delim(']'), nil
   352  
   353  		case '{':
   354  			if !dec.tokenValueAllowed() {
   355  				return dec.tokenError(c)
   356  			}
   357  			dec.scanp++
   358  			dec.tokenStack = append(dec.tokenStack, dec.tokenState)
   359  			dec.tokenState = tokenObjectStart
   360  			return Delim('{'), nil
   361  
   362  		case '}':
   363  			if dec.tokenState != tokenObjectStart && dec.tokenState != tokenObjectComma {
   364  				return dec.tokenError(c)
   365  			}
   366  			dec.scanp++
   367  			dec.tokenState = dec.tokenStack[len(dec.tokenStack)-1]
   368  			dec.tokenStack = dec.tokenStack[:len(dec.tokenStack)-1]
   369  			dec.tokenValueEnd()
   370  			return Delim('}'), nil
   371  
   372  		case ':':
   373  			if dec.tokenState != tokenObjectColon {
   374  				return dec.tokenError(c)
   375  			}
   376  			dec.scanp++
   377  			dec.tokenState = tokenObjectValue
   378  			continue
   379  
   380  		case ',':
   381  			if dec.tokenState == tokenArrayComma {
   382  				dec.scanp++
   383  				dec.tokenState = tokenArrayValue
   384  				continue
   385  			}
   386  			if dec.tokenState == tokenObjectComma {
   387  				dec.scanp++
   388  				dec.tokenState = tokenObjectKey
   389  				continue
   390  			}
   391  			return dec.tokenError(c)
   392  
   393  		case '"':
   394  			if dec.tokenState == tokenObjectStart || dec.tokenState == tokenObjectKey {
   395  				var x string
   396  				old := dec.tokenState
   397  				dec.tokenState = tokenTopValue
   398  				err := dec.Decode(&x)
   399  				dec.tokenState = old
   400  				if err != nil {
   401  					clearOffset(err)
   402  					return nil, err
   403  				}
   404  				dec.tokenState = tokenObjectColon
   405  				return x, nil
   406  			}
   407  			fallthrough
   408  
   409  		default:
   410  			if !dec.tokenValueAllowed() {
   411  				return dec.tokenError(c)
   412  			}
   413  			var x interface{}
   414  			if err := dec.Decode(&x); err != nil {
   415  				clearOffset(err)
   416  				return nil, err
   417  			}
   418  			return x, nil
   419  		}
   420  	}
   421  }
   422  
   423  func clearOffset(err error) {
   424  	if s, ok := err.(*SyntaxError); ok {
   425  		s.Offset = 0
   426  	}
   427  }
   428  
   429  func (dec *Decoder) tokenError(c byte) (Token, error) {
   430  	var context string
   431  	switch dec.tokenState {
   432  	case tokenTopValue:
   433  		context = " looking for beginning of value"
   434  	case tokenArrayStart, tokenArrayValue, tokenObjectValue:
   435  		context = " looking for beginning of value"
   436  	case tokenArrayComma:
   437  		context = " after array element"
   438  	case tokenObjectKey:
   439  		context = " looking for beginning of object key string"
   440  	case tokenObjectColon:
   441  		context = " after object key"
   442  	case tokenObjectComma:
   443  		context = " after object key:value pair"
   444  	}
   445  	return nil, &SyntaxError{"invalid character " + quoteChar(c) + " " + context, 0}
   446  }
   447  
   448  // More reports whether there is another element in the
   449  // current array or object being parsed.
   450  func (dec *Decoder) More() bool {
   451  	c, err := dec.peek()
   452  	return err == nil && c != ']' && c != '}'
   453  }
   454  
   455  func (dec *Decoder) peek() (byte, error) {
   456  	var err error
   457  	for {
   458  		for i := dec.scanp; i < len(dec.buf); i++ {
   459  			c := dec.buf[i]
   460  			if isSpace(c) {
   461  				continue
   462  			}
   463  			dec.scanp = i
   464  			return c, nil
   465  		}
   466  		// buffer has been scanned, now report any error
   467  		if err != nil {
   468  			return 0, err
   469  		}
   470  		err = dec.refill()
   471  	}
   472  }
   473  
   474  /*
   475  TODO
   476  
   477  // EncodeToken writes the given JSON token to the stream.
   478  // It returns an error if the delimiters [ ] { } are not properly used.
   479  //
   480  // EncodeToken does not call Flush, because usually it is part of
   481  // a larger operation such as Encode, and those will call Flush when finished.
   482  // Callers that create an Encoder and then invoke EncodeToken directly,
   483  // without using Encode, need to call Flush when finished to ensure that
   484  // the JSON is written to the underlying writer.
   485  func (e *Encoder) EncodeToken(t Token) error  {
   486  	...
   487  }
   488  
   489  */