github.com/sean-/go@v0.0.0-20151219100004-97f854cd7bb6/src/encoding/json/stream.go (about)

     1  // Copyright 2010 The Go Authors.  All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package json
     6  
     7  import (
     8  	"bytes"
     9  	"errors"
    10  	"io"
    11  )
    12  
    13  // A Decoder reads and decodes JSON objects from an input stream.
    14  type Decoder struct {
    15  	r     io.Reader
    16  	buf   []byte
    17  	d     decodeState
    18  	scanp int // start of unread data in buf
    19  	scan  scanner
    20  	err   error
    21  
    22  	tokenState int
    23  	tokenStack []int
    24  }
    25  
    26  // NewDecoder returns a new decoder that reads from r.
    27  //
    28  // The decoder introduces its own buffering and may
    29  // read data from r beyond the JSON values requested.
    30  func NewDecoder(r io.Reader) *Decoder {
    31  	return &Decoder{r: r}
    32  }
    33  
    34  // UseNumber causes the Decoder to unmarshal a number into an interface{} as a
    35  // Number instead of as a float64.
    36  func (dec *Decoder) UseNumber() { dec.d.useNumber = true }
    37  
    38  // Decode reads the next JSON-encoded value from its
    39  // input and stores it in the value pointed to by v.
    40  //
    41  // See the documentation for Unmarshal for details about
    42  // the conversion of JSON into a Go value.
    43  func (dec *Decoder) Decode(v interface{}) error {
    44  	if dec.err != nil {
    45  		return dec.err
    46  	}
    47  
    48  	if err := dec.tokenPrepareForDecode(); err != nil {
    49  		return err
    50  	}
    51  
    52  	if !dec.tokenValueAllowed() {
    53  		return &SyntaxError{msg: "not at beginning of value"}
    54  	}
    55  
    56  	// Read whole value into buffer.
    57  	n, err := dec.readValue()
    58  	if err != nil {
    59  		return err
    60  	}
    61  	dec.d.init(dec.buf[dec.scanp : dec.scanp+n])
    62  	dec.scanp += n
    63  
    64  	// Don't save err from unmarshal into dec.err:
    65  	// the connection is still usable since we read a complete JSON
    66  	// object from it before the error happened.
    67  	err = dec.d.unmarshal(v)
    68  
    69  	// fixup token streaming state
    70  	dec.tokenValueEnd()
    71  
    72  	return err
    73  }
    74  
    75  // Buffered returns a reader of the data remaining in the Decoder's
    76  // buffer. The reader is valid until the next call to Decode.
    77  func (dec *Decoder) Buffered() io.Reader {
    78  	return bytes.NewReader(dec.buf[dec.scanp:])
    79  }
    80  
    81  // readValue reads a JSON value into dec.buf.
    82  // It returns the length of the encoding.
    83  func (dec *Decoder) readValue() (int, error) {
    84  	dec.scan.reset()
    85  
    86  	scanp := dec.scanp
    87  	var err error
    88  Input:
    89  	for {
    90  		// Look in the buffer for a new value.
    91  		for i, c := range dec.buf[scanp:] {
    92  			dec.scan.bytes++
    93  			v := dec.scan.step(&dec.scan, c)
    94  			if v == scanEnd {
    95  				scanp += i
    96  				break Input
    97  			}
    98  			// scanEnd is delayed one byte.
    99  			// We might block trying to get that byte from src,
   100  			// so instead invent a space byte.
   101  			if (v == scanEndObject || v == scanEndArray) && dec.scan.step(&dec.scan, ' ') == scanEnd {
   102  				scanp += i + 1
   103  				break Input
   104  			}
   105  			if v == scanError {
   106  				dec.err = dec.scan.err
   107  				return 0, dec.scan.err
   108  			}
   109  		}
   110  		scanp = len(dec.buf)
   111  
   112  		// Did the last read have an error?
   113  		// Delayed until now to allow buffer scan.
   114  		if err != nil {
   115  			if err == io.EOF {
   116  				if dec.scan.step(&dec.scan, ' ') == scanEnd {
   117  					break Input
   118  				}
   119  				if nonSpace(dec.buf) {
   120  					err = io.ErrUnexpectedEOF
   121  				}
   122  			}
   123  			dec.err = err
   124  			return 0, err
   125  		}
   126  
   127  		n := scanp - dec.scanp
   128  		err = dec.refill()
   129  		scanp = dec.scanp + n
   130  	}
   131  	return scanp - dec.scanp, nil
   132  }
   133  
   134  func (dec *Decoder) refill() error {
   135  	// Make room to read more into the buffer.
   136  	// First slide down data already consumed.
   137  	if dec.scanp > 0 {
   138  		n := copy(dec.buf, dec.buf[dec.scanp:])
   139  		dec.buf = dec.buf[:n]
   140  		dec.scanp = 0
   141  	}
   142  
   143  	// Grow buffer if not large enough.
   144  	const minRead = 512
   145  	if cap(dec.buf)-len(dec.buf) < minRead {
   146  		newBuf := make([]byte, len(dec.buf), 2*cap(dec.buf)+minRead)
   147  		copy(newBuf, dec.buf)
   148  		dec.buf = newBuf
   149  	}
   150  
   151  	// Read.  Delay error for next iteration (after scan).
   152  	n, err := dec.r.Read(dec.buf[len(dec.buf):cap(dec.buf)])
   153  	dec.buf = dec.buf[0 : len(dec.buf)+n]
   154  
   155  	return err
   156  }
   157  
   158  func nonSpace(b []byte) bool {
   159  	for _, c := range b {
   160  		if !isSpace(c) {
   161  			return true
   162  		}
   163  	}
   164  	return false
   165  }
   166  
   167  // An Encoder writes JSON objects to an output stream.
   168  type Encoder struct {
   169  	w   io.Writer
   170  	err error
   171  }
   172  
   173  // NewEncoder returns a new encoder that writes to w.
   174  func NewEncoder(w io.Writer) *Encoder {
   175  	return &Encoder{w: w}
   176  }
   177  
   178  // Encode writes the JSON encoding of v to the stream,
   179  // followed by a newline character.
   180  //
   181  // See the documentation for Marshal for details about the
   182  // conversion of Go values to JSON.
   183  func (enc *Encoder) Encode(v interface{}) error {
   184  	if enc.err != nil {
   185  		return enc.err
   186  	}
   187  	e := newEncodeState()
   188  	err := e.marshal(v)
   189  	if err != nil {
   190  		return err
   191  	}
   192  
   193  	// Terminate each value with a newline.
   194  	// This makes the output look a little nicer
   195  	// when debugging, and some kind of space
   196  	// is required if the encoded value was a number,
   197  	// so that the reader knows there aren't more
   198  	// digits coming.
   199  	e.WriteByte('\n')
   200  
   201  	if _, err = enc.w.Write(e.Bytes()); err != nil {
   202  		enc.err = err
   203  	}
   204  	encodeStatePool.Put(e)
   205  	return err
   206  }
   207  
   208  // RawMessage is a raw encoded JSON object.
   209  // It implements Marshaler and Unmarshaler and can
   210  // be used to delay JSON decoding or precompute a JSON encoding.
   211  type RawMessage []byte
   212  
   213  // MarshalJSON returns *m as the JSON encoding of m.
   214  func (m *RawMessage) MarshalJSON() ([]byte, error) {
   215  	return *m, nil
   216  }
   217  
   218  // UnmarshalJSON sets *m to a copy of data.
   219  func (m *RawMessage) UnmarshalJSON(data []byte) error {
   220  	if m == nil {
   221  		return errors.New("json.RawMessage: UnmarshalJSON on nil pointer")
   222  	}
   223  	*m = append((*m)[0:0], data...)
   224  	return nil
   225  }
   226  
   227  var _ Marshaler = (*RawMessage)(nil)
   228  var _ Unmarshaler = (*RawMessage)(nil)
   229  
   230  // A Token holds a value of one of these types:
   231  //
   232  //	Delim, for the four JSON delimiters [ ] { }
   233  //	bool, for JSON booleans
   234  //	float64, for JSON numbers
   235  //	Number, for JSON numbers
   236  //	string, for JSON string literals
   237  //	nil, for JSON null
   238  //
   239  type Token interface{}
   240  
   241  const (
   242  	tokenTopValue = iota
   243  	tokenArrayStart
   244  	tokenArrayValue
   245  	tokenArrayComma
   246  	tokenObjectStart
   247  	tokenObjectKey
   248  	tokenObjectColon
   249  	tokenObjectValue
   250  	tokenObjectComma
   251  )
   252  
   253  // advance tokenstate from a separator state to a value state
   254  func (dec *Decoder) tokenPrepareForDecode() error {
   255  	// Note: Not calling peek before switch, to avoid
   256  	// putting peek into the standard Decode path.
   257  	// peek is only called when using the Token API.
   258  	switch dec.tokenState {
   259  	case tokenArrayComma:
   260  		c, err := dec.peek()
   261  		if err != nil {
   262  			return err
   263  		}
   264  		if c != ',' {
   265  			return &SyntaxError{"expected comma after array element", 0}
   266  		}
   267  		dec.scanp++
   268  		dec.tokenState = tokenArrayValue
   269  	case tokenObjectColon:
   270  		c, err := dec.peek()
   271  		if err != nil {
   272  			return err
   273  		}
   274  		if c != ':' {
   275  			return &SyntaxError{"expected colon after object key", 0}
   276  		}
   277  		dec.scanp++
   278  		dec.tokenState = tokenObjectValue
   279  	}
   280  	return nil
   281  }
   282  
   283  func (dec *Decoder) tokenValueAllowed() bool {
   284  	switch dec.tokenState {
   285  	case tokenTopValue, tokenArrayStart, tokenArrayValue, tokenObjectValue:
   286  		return true
   287  	}
   288  	return false
   289  }
   290  
   291  func (dec *Decoder) tokenValueEnd() {
   292  	switch dec.tokenState {
   293  	case tokenArrayStart, tokenArrayValue:
   294  		dec.tokenState = tokenArrayComma
   295  	case tokenObjectValue:
   296  		dec.tokenState = tokenObjectComma
   297  	}
   298  }
   299  
   300  // A Delim is a JSON array or object delimiter, one of [ ] { or }.
   301  type Delim rune
   302  
   303  func (d Delim) String() string {
   304  	return string(d)
   305  }
   306  
   307  // Token returns the next JSON token in the input stream.
   308  // At the end of the input stream, Token returns nil, io.EOF.
   309  //
   310  // Token guarantees that the delimiters [ ] { } it returns are
   311  // properly nested and matched: if Token encounters an unexpected
   312  // delimiter in the input, it will return an error.
   313  //
   314  // The input stream consists of basic JSON values—bool, string,
   315  // number, and null—along with delimiters [ ] { } of type Delim
   316  // to mark the start and end of arrays and objects.
   317  // Commas and colons are elided.
   318  func (dec *Decoder) Token() (Token, error) {
   319  	for {
   320  		c, err := dec.peek()
   321  		if err != nil {
   322  			return nil, err
   323  		}
   324  		switch c {
   325  		case '[':
   326  			if !dec.tokenValueAllowed() {
   327  				return dec.tokenError(c)
   328  			}
   329  			dec.scanp++
   330  			dec.tokenStack = append(dec.tokenStack, dec.tokenState)
   331  			dec.tokenState = tokenArrayStart
   332  			return Delim('['), nil
   333  
   334  		case ']':
   335  			if dec.tokenState != tokenArrayStart && dec.tokenState != tokenArrayComma {
   336  				return dec.tokenError(c)
   337  			}
   338  			dec.scanp++
   339  			dec.tokenState = dec.tokenStack[len(dec.tokenStack)-1]
   340  			dec.tokenStack = dec.tokenStack[:len(dec.tokenStack)-1]
   341  			dec.tokenValueEnd()
   342  			return Delim(']'), nil
   343  
   344  		case '{':
   345  			if !dec.tokenValueAllowed() {
   346  				return dec.tokenError(c)
   347  			}
   348  			dec.scanp++
   349  			dec.tokenStack = append(dec.tokenStack, dec.tokenState)
   350  			dec.tokenState = tokenObjectStart
   351  			return Delim('{'), nil
   352  
   353  		case '}':
   354  			if dec.tokenState != tokenObjectStart && dec.tokenState != tokenObjectComma {
   355  				return dec.tokenError(c)
   356  			}
   357  			dec.scanp++
   358  			dec.tokenState = dec.tokenStack[len(dec.tokenStack)-1]
   359  			dec.tokenStack = dec.tokenStack[:len(dec.tokenStack)-1]
   360  			dec.tokenValueEnd()
   361  			return Delim('}'), nil
   362  
   363  		case ':':
   364  			if dec.tokenState != tokenObjectColon {
   365  				return dec.tokenError(c)
   366  			}
   367  			dec.scanp++
   368  			dec.tokenState = tokenObjectValue
   369  			continue
   370  
   371  		case ',':
   372  			if dec.tokenState == tokenArrayComma {
   373  				dec.scanp++
   374  				dec.tokenState = tokenArrayValue
   375  				continue
   376  			}
   377  			if dec.tokenState == tokenObjectComma {
   378  				dec.scanp++
   379  				dec.tokenState = tokenObjectKey
   380  				continue
   381  			}
   382  			return dec.tokenError(c)
   383  
   384  		case '"':
   385  			if dec.tokenState == tokenObjectStart || dec.tokenState == tokenObjectKey {
   386  				var x string
   387  				old := dec.tokenState
   388  				dec.tokenState = tokenTopValue
   389  				err := dec.Decode(&x)
   390  				dec.tokenState = old
   391  				if err != nil {
   392  					clearOffset(err)
   393  					return nil, err
   394  				}
   395  				dec.tokenState = tokenObjectColon
   396  				return x, nil
   397  			}
   398  			fallthrough
   399  
   400  		default:
   401  			if !dec.tokenValueAllowed() {
   402  				return dec.tokenError(c)
   403  			}
   404  			var x interface{}
   405  			if err := dec.Decode(&x); err != nil {
   406  				clearOffset(err)
   407  				return nil, err
   408  			}
   409  			return x, nil
   410  		}
   411  	}
   412  }
   413  
   414  func clearOffset(err error) {
   415  	if s, ok := err.(*SyntaxError); ok {
   416  		s.Offset = 0
   417  	}
   418  }
   419  
   420  func (dec *Decoder) tokenError(c byte) (Token, error) {
   421  	var context string
   422  	switch dec.tokenState {
   423  	case tokenTopValue:
   424  		context = " looking for beginning of value"
   425  	case tokenArrayStart, tokenArrayValue, tokenObjectValue:
   426  		context = " looking for beginning of value"
   427  	case tokenArrayComma:
   428  		context = " after array element"
   429  	case tokenObjectKey:
   430  		context = " looking for beginning of object key string"
   431  	case tokenObjectColon:
   432  		context = " after object key"
   433  	case tokenObjectComma:
   434  		context = " after object key:value pair"
   435  	}
   436  	return nil, &SyntaxError{"invalid character " + quoteChar(c) + " " + context, 0}
   437  }
   438  
   439  // More reports whether there is another element in the
   440  // current array or object being parsed.
   441  func (dec *Decoder) More() bool {
   442  	c, err := dec.peek()
   443  	return err == nil && c != ']' && c != '}'
   444  }
   445  
   446  func (dec *Decoder) peek() (byte, error) {
   447  	var err error
   448  	for {
   449  		for i := dec.scanp; i < len(dec.buf); i++ {
   450  			c := dec.buf[i]
   451  			if isSpace(c) {
   452  				continue
   453  			}
   454  			dec.scanp = i
   455  			return c, nil
   456  		}
   457  		// buffer has been scanned, now report any error
   458  		if err != nil {
   459  			return 0, err
   460  		}
   461  		err = dec.refill()
   462  	}
   463  }
   464  
   465  /*
   466  TODO
   467  
   468  // EncodeToken writes the given JSON token to the stream.
   469  // It returns an error if the delimiters [ ] { } are not properly used.
   470  //
   471  // EncodeToken does not call Flush, because usually it is part of
   472  // a larger operation such as Encode, and those will call Flush when finished.
   473  // Callers that create an Encoder and then invoke EncodeToken directly,
   474  // without using Encode, need to call Flush when finished to ensure that
   475  // the JSON is written to the underlying writer.
   476  func (e *Encoder) EncodeToken(t Token) error  {
   477  	...
   478  }
   479  
   480  */