github.com/miolini/go@v0.0.0-20160405192216-fca68c8cb408/src/encoding/json/stream.go (about)

     1  // Copyright 2010 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package json
     6  
     7  import (
     8  	"bytes"
     9  	"errors"
    10  	"io"
    11  )
    12  
    13  // A Decoder reads and decodes JSON objects from an input stream.
    14  type Decoder struct {
    15  	r     io.Reader
    16  	buf   []byte
    17  	d     decodeState
    18  	scanp int // start of unread data in buf
    19  	scan  scanner
    20  	err   error
    21  
    22  	tokenState int
    23  	tokenStack []int
    24  }
    25  
    26  // NewDecoder returns a new decoder that reads from r.
    27  //
    28  // The decoder introduces its own buffering and may
    29  // read data from r beyond the JSON values requested.
    30  func NewDecoder(r io.Reader) *Decoder {
    31  	return &Decoder{r: r}
    32  }
    33  
    34  // UseNumber causes the Decoder to unmarshal a number into an interface{} as a
    35  // Number instead of as a float64.
    36  func (dec *Decoder) UseNumber() { dec.d.useNumber = true }
    37  
    38  // Decode reads the next JSON-encoded value from its
    39  // input and stores it in the value pointed to by v.
    40  //
    41  // See the documentation for Unmarshal for details about
    42  // the conversion of JSON into a Go value.
    43  func (dec *Decoder) Decode(v interface{}) error {
    44  	if dec.err != nil {
    45  		return dec.err
    46  	}
    47  
    48  	if err := dec.tokenPrepareForDecode(); err != nil {
    49  		return err
    50  	}
    51  
    52  	if !dec.tokenValueAllowed() {
    53  		return &SyntaxError{msg: "not at beginning of value"}
    54  	}
    55  
    56  	// Read whole value into buffer.
    57  	n, err := dec.readValue()
    58  	if err != nil {
    59  		return err
    60  	}
    61  	dec.d.init(dec.buf[dec.scanp : dec.scanp+n])
    62  	dec.scanp += n
    63  
    64  	// Don't save err from unmarshal into dec.err:
    65  	// the connection is still usable since we read a complete JSON
    66  	// object from it before the error happened.
    67  	err = dec.d.unmarshal(v)
    68  
    69  	// fixup token streaming state
    70  	dec.tokenValueEnd()
    71  
    72  	return err
    73  }
    74  
    75  // Buffered returns a reader of the data remaining in the Decoder's
    76  // buffer. The reader is valid until the next call to Decode.
    77  func (dec *Decoder) Buffered() io.Reader {
    78  	return bytes.NewReader(dec.buf[dec.scanp:])
    79  }
    80  
    81  // readValue reads a JSON value into dec.buf.
    82  // It returns the length of the encoding.
    83  func (dec *Decoder) readValue() (int, error) {
    84  	dec.scan.reset()
    85  
    86  	scanp := dec.scanp
    87  	var err error
    88  Input:
    89  	for {
    90  		// Look in the buffer for a new value.
    91  		for i, c := range dec.buf[scanp:] {
    92  			dec.scan.bytes++
    93  			v := dec.scan.step(&dec.scan, c)
    94  			if v == scanEnd {
    95  				scanp += i
    96  				break Input
    97  			}
    98  			// scanEnd is delayed one byte.
    99  			// We might block trying to get that byte from src,
   100  			// so instead invent a space byte.
   101  			if (v == scanEndObject || v == scanEndArray) && dec.scan.step(&dec.scan, ' ') == scanEnd {
   102  				scanp += i + 1
   103  				break Input
   104  			}
   105  			if v == scanError {
   106  				dec.err = dec.scan.err
   107  				return 0, dec.scan.err
   108  			}
   109  		}
   110  		scanp = len(dec.buf)
   111  
   112  		// Did the last read have an error?
   113  		// Delayed until now to allow buffer scan.
   114  		if err != nil {
   115  			if err == io.EOF {
   116  				if dec.scan.step(&dec.scan, ' ') == scanEnd {
   117  					break Input
   118  				}
   119  				if nonSpace(dec.buf) {
   120  					err = io.ErrUnexpectedEOF
   121  				}
   122  			}
   123  			dec.err = err
   124  			return 0, err
   125  		}
   126  
   127  		n := scanp - dec.scanp
   128  		err = dec.refill()
   129  		scanp = dec.scanp + n
   130  	}
   131  	return scanp - dec.scanp, nil
   132  }
   133  
   134  func (dec *Decoder) refill() error {
   135  	// Make room to read more into the buffer.
   136  	// First slide down data already consumed.
   137  	if dec.scanp > 0 {
   138  		n := copy(dec.buf, dec.buf[dec.scanp:])
   139  		dec.buf = dec.buf[:n]
   140  		dec.scanp = 0
   141  	}
   142  
   143  	// Grow buffer if not large enough.
   144  	const minRead = 512
   145  	if cap(dec.buf)-len(dec.buf) < minRead {
   146  		newBuf := make([]byte, len(dec.buf), 2*cap(dec.buf)+minRead)
   147  		copy(newBuf, dec.buf)
   148  		dec.buf = newBuf
   149  	}
   150  
   151  	// Read. Delay error for next iteration (after scan).
   152  	n, err := dec.r.Read(dec.buf[len(dec.buf):cap(dec.buf)])
   153  	dec.buf = dec.buf[0 : len(dec.buf)+n]
   154  
   155  	return err
   156  }
   157  
   158  func nonSpace(b []byte) bool {
   159  	for _, c := range b {
   160  		if !isSpace(c) {
   161  			return true
   162  		}
   163  	}
   164  	return false
   165  }
   166  
   167  // An Encoder writes JSON objects to an output stream.
   168  type Encoder struct {
   169  	w   io.Writer
   170  	err error
   171  
   172  	indentBuf    *bytes.Buffer
   173  	indentPrefix string
   174  	indentValue  string
   175  }
   176  
   177  // NewEncoder returns a new encoder that writes to w.
   178  func NewEncoder(w io.Writer) *Encoder {
   179  	return &Encoder{w: w}
   180  }
   181  
   182  // Encode writes the JSON encoding of v to the stream,
   183  // followed by a newline character.
   184  //
   185  // See the documentation for Marshal for details about the
   186  // conversion of Go values to JSON.
   187  func (enc *Encoder) Encode(v interface{}) error {
   188  	if enc.err != nil {
   189  		return enc.err
   190  	}
   191  	e := newEncodeState()
   192  	err := e.marshal(v)
   193  	if err != nil {
   194  		return err
   195  	}
   196  
   197  	// Terminate each value with a newline.
   198  	// This makes the output look a little nicer
   199  	// when debugging, and some kind of space
   200  	// is required if the encoded value was a number,
   201  	// so that the reader knows there aren't more
   202  	// digits coming.
   203  	e.WriteByte('\n')
   204  
   205  	b := e.Bytes()
   206  	if enc.indentBuf != nil {
   207  		enc.indentBuf.Reset()
   208  		err = Indent(enc.indentBuf, b, enc.indentPrefix, enc.indentValue)
   209  		if err != nil {
   210  			return err
   211  		}
   212  		b = enc.indentBuf.Bytes()
   213  	}
   214  	if _, err = enc.w.Write(b); err != nil {
   215  		enc.err = err
   216  	}
   217  	encodeStatePool.Put(e)
   218  	return err
   219  }
   220  
   221  // Indent sets the encoder to format each encoded object with Indent.
   222  func (enc *Encoder) Indent(prefix, indent string) {
   223  	enc.indentBuf = new(bytes.Buffer)
   224  	enc.indentPrefix = prefix
   225  	enc.indentValue = indent
   226  }
   227  
   228  // RawMessage is a raw encoded JSON object.
   229  // It implements Marshaler and Unmarshaler and can
   230  // be used to delay JSON decoding or precompute a JSON encoding.
   231  type RawMessage []byte
   232  
   233  // MarshalJSON returns *m as the JSON encoding of m.
   234  func (m *RawMessage) MarshalJSON() ([]byte, error) {
   235  	return *m, nil
   236  }
   237  
   238  // UnmarshalJSON sets *m to a copy of data.
   239  func (m *RawMessage) UnmarshalJSON(data []byte) error {
   240  	if m == nil {
   241  		return errors.New("json.RawMessage: UnmarshalJSON on nil pointer")
   242  	}
   243  	*m = append((*m)[0:0], data...)
   244  	return nil
   245  }
   246  
   247  var _ Marshaler = (*RawMessage)(nil)
   248  var _ Unmarshaler = (*RawMessage)(nil)
   249  
   250  // A Token holds a value of one of these types:
   251  //
   252  //	Delim, for the four JSON delimiters [ ] { }
   253  //	bool, for JSON booleans
   254  //	float64, for JSON numbers
   255  //	Number, for JSON numbers
   256  //	string, for JSON string literals
   257  //	nil, for JSON null
   258  //
   259  type Token interface{}
   260  
   261  const (
   262  	tokenTopValue = iota
   263  	tokenArrayStart
   264  	tokenArrayValue
   265  	tokenArrayComma
   266  	tokenObjectStart
   267  	tokenObjectKey
   268  	tokenObjectColon
   269  	tokenObjectValue
   270  	tokenObjectComma
   271  )
   272  
   273  // advance tokenstate from a separator state to a value state
   274  func (dec *Decoder) tokenPrepareForDecode() error {
   275  	// Note: Not calling peek before switch, to avoid
   276  	// putting peek into the standard Decode path.
   277  	// peek is only called when using the Token API.
   278  	switch dec.tokenState {
   279  	case tokenArrayComma:
   280  		c, err := dec.peek()
   281  		if err != nil {
   282  			return err
   283  		}
   284  		if c != ',' {
   285  			return &SyntaxError{"expected comma after array element", 0}
   286  		}
   287  		dec.scanp++
   288  		dec.tokenState = tokenArrayValue
   289  	case tokenObjectColon:
   290  		c, err := dec.peek()
   291  		if err != nil {
   292  			return err
   293  		}
   294  		if c != ':' {
   295  			return &SyntaxError{"expected colon after object key", 0}
   296  		}
   297  		dec.scanp++
   298  		dec.tokenState = tokenObjectValue
   299  	}
   300  	return nil
   301  }
   302  
   303  func (dec *Decoder) tokenValueAllowed() bool {
   304  	switch dec.tokenState {
   305  	case tokenTopValue, tokenArrayStart, tokenArrayValue, tokenObjectValue:
   306  		return true
   307  	}
   308  	return false
   309  }
   310  
   311  func (dec *Decoder) tokenValueEnd() {
   312  	switch dec.tokenState {
   313  	case tokenArrayStart, tokenArrayValue:
   314  		dec.tokenState = tokenArrayComma
   315  	case tokenObjectValue:
   316  		dec.tokenState = tokenObjectComma
   317  	}
   318  }
   319  
   320  // A Delim is a JSON array or object delimiter, one of [ ] { or }.
   321  type Delim rune
   322  
   323  func (d Delim) String() string {
   324  	return string(d)
   325  }
   326  
   327  // Token returns the next JSON token in the input stream.
   328  // At the end of the input stream, Token returns nil, io.EOF.
   329  //
   330  // Token guarantees that the delimiters [ ] { } it returns are
   331  // properly nested and matched: if Token encounters an unexpected
   332  // delimiter in the input, it will return an error.
   333  //
   334  // The input stream consists of basic JSON values—bool, string,
   335  // number, and null—along with delimiters [ ] { } of type Delim
   336  // to mark the start and end of arrays and objects.
   337  // Commas and colons are elided.
   338  func (dec *Decoder) Token() (Token, error) {
   339  	for {
   340  		c, err := dec.peek()
   341  		if err != nil {
   342  			return nil, err
   343  		}
   344  		switch c {
   345  		case '[':
   346  			if !dec.tokenValueAllowed() {
   347  				return dec.tokenError(c)
   348  			}
   349  			dec.scanp++
   350  			dec.tokenStack = append(dec.tokenStack, dec.tokenState)
   351  			dec.tokenState = tokenArrayStart
   352  			return Delim('['), nil
   353  
   354  		case ']':
   355  			if dec.tokenState != tokenArrayStart && dec.tokenState != tokenArrayComma {
   356  				return dec.tokenError(c)
   357  			}
   358  			dec.scanp++
   359  			dec.tokenState = dec.tokenStack[len(dec.tokenStack)-1]
   360  			dec.tokenStack = dec.tokenStack[:len(dec.tokenStack)-1]
   361  			dec.tokenValueEnd()
   362  			return Delim(']'), nil
   363  
   364  		case '{':
   365  			if !dec.tokenValueAllowed() {
   366  				return dec.tokenError(c)
   367  			}
   368  			dec.scanp++
   369  			dec.tokenStack = append(dec.tokenStack, dec.tokenState)
   370  			dec.tokenState = tokenObjectStart
   371  			return Delim('{'), nil
   372  
   373  		case '}':
   374  			if dec.tokenState != tokenObjectStart && dec.tokenState != tokenObjectComma {
   375  				return dec.tokenError(c)
   376  			}
   377  			dec.scanp++
   378  			dec.tokenState = dec.tokenStack[len(dec.tokenStack)-1]
   379  			dec.tokenStack = dec.tokenStack[:len(dec.tokenStack)-1]
   380  			dec.tokenValueEnd()
   381  			return Delim('}'), nil
   382  
   383  		case ':':
   384  			if dec.tokenState != tokenObjectColon {
   385  				return dec.tokenError(c)
   386  			}
   387  			dec.scanp++
   388  			dec.tokenState = tokenObjectValue
   389  			continue
   390  
   391  		case ',':
   392  			if dec.tokenState == tokenArrayComma {
   393  				dec.scanp++
   394  				dec.tokenState = tokenArrayValue
   395  				continue
   396  			}
   397  			if dec.tokenState == tokenObjectComma {
   398  				dec.scanp++
   399  				dec.tokenState = tokenObjectKey
   400  				continue
   401  			}
   402  			return dec.tokenError(c)
   403  
   404  		case '"':
   405  			if dec.tokenState == tokenObjectStart || dec.tokenState == tokenObjectKey {
   406  				var x string
   407  				old := dec.tokenState
   408  				dec.tokenState = tokenTopValue
   409  				err := dec.Decode(&x)
   410  				dec.tokenState = old
   411  				if err != nil {
   412  					clearOffset(err)
   413  					return nil, err
   414  				}
   415  				dec.tokenState = tokenObjectColon
   416  				return x, nil
   417  			}
   418  			fallthrough
   419  
   420  		default:
   421  			if !dec.tokenValueAllowed() {
   422  				return dec.tokenError(c)
   423  			}
   424  			var x interface{}
   425  			if err := dec.Decode(&x); err != nil {
   426  				clearOffset(err)
   427  				return nil, err
   428  			}
   429  			return x, nil
   430  		}
   431  	}
   432  }
   433  
   434  func clearOffset(err error) {
   435  	if s, ok := err.(*SyntaxError); ok {
   436  		s.Offset = 0
   437  	}
   438  }
   439  
   440  func (dec *Decoder) tokenError(c byte) (Token, error) {
   441  	var context string
   442  	switch dec.tokenState {
   443  	case tokenTopValue:
   444  		context = " looking for beginning of value"
   445  	case tokenArrayStart, tokenArrayValue, tokenObjectValue:
   446  		context = " looking for beginning of value"
   447  	case tokenArrayComma:
   448  		context = " after array element"
   449  	case tokenObjectKey:
   450  		context = " looking for beginning of object key string"
   451  	case tokenObjectColon:
   452  		context = " after object key"
   453  	case tokenObjectComma:
   454  		context = " after object key:value pair"
   455  	}
   456  	return nil, &SyntaxError{"invalid character " + quoteChar(c) + " " + context, 0}
   457  }
   458  
   459  // More reports whether there is another element in the
   460  // current array or object being parsed.
   461  func (dec *Decoder) More() bool {
   462  	c, err := dec.peek()
   463  	return err == nil && c != ']' && c != '}'
   464  }
   465  
   466  func (dec *Decoder) peek() (byte, error) {
   467  	var err error
   468  	for {
   469  		for i := dec.scanp; i < len(dec.buf); i++ {
   470  			c := dec.buf[i]
   471  			if isSpace(c) {
   472  				continue
   473  			}
   474  			dec.scanp = i
   475  			return c, nil
   476  		}
   477  		// buffer has been scanned, now report any error
   478  		if err != nil {
   479  			return 0, err
   480  		}
   481  		err = dec.refill()
   482  	}
   483  }
   484  
   485  /*
   486  TODO
   487  
   488  // EncodeToken writes the given JSON token to the stream.
   489  // It returns an error if the delimiters [ ] { } are not properly used.
   490  //
   491  // EncodeToken does not call Flush, because usually it is part of
   492  // a larger operation such as Encode, and those will call Flush when finished.
   493  // Callers that create an Encoder and then invoke EncodeToken directly,
   494  // without using Encode, need to call Flush when finished to ensure that
   495  // the JSON is written to the underlying writer.
   496  func (e *Encoder) EncodeToken(t Token) error  {
   497  	...
   498  }
   499  
   500  */