github.com/panjjo/go@v0.0.0-20161104043856-d62b31386338/src/encoding/json/stream.go (about)

     1  // Copyright 2010 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package json
     6  
     7  import (
     8  	"bytes"
     9  	"errors"
    10  	"io"
    11  )
    12  
    13  // A Decoder reads and decodes JSON values from an input stream.
    14  type Decoder struct {
    15  	r     io.Reader
    16  	buf   []byte
    17  	d     decodeState
    18  	scanp int // start of unread data in buf
    19  	scan  scanner
    20  	err   error
    21  
    22  	tokenState int
    23  	tokenStack []int
    24  }
    25  
    26  // NewDecoder returns a new decoder that reads from r.
    27  //
    28  // The decoder introduces its own buffering and may
    29  // read data from r beyond the JSON values requested.
    30  func NewDecoder(r io.Reader) *Decoder {
    31  	return &Decoder{r: r}
    32  }
    33  
    34  // UseNumber causes the Decoder to unmarshal a number into an interface{} as a
    35  // Number instead of as a float64.
    36  func (dec *Decoder) UseNumber() { dec.d.useNumber = true }
    37  
    38  // Decode reads the next JSON-encoded value from its
    39  // input and stores it in the value pointed to by v.
    40  //
    41  // See the documentation for Unmarshal for details about
    42  // the conversion of JSON into a Go value.
    43  func (dec *Decoder) Decode(v interface{}) error {
    44  	if dec.err != nil {
    45  		return dec.err
    46  	}
    47  
    48  	if err := dec.tokenPrepareForDecode(); err != nil {
    49  		return err
    50  	}
    51  
    52  	if !dec.tokenValueAllowed() {
    53  		return &SyntaxError{msg: "not at beginning of value"}
    54  	}
    55  
    56  	// Read whole value into buffer.
    57  	n, err := dec.readValue()
    58  	if err != nil {
    59  		return err
    60  	}
    61  	dec.d.init(dec.buf[dec.scanp : dec.scanp+n])
    62  	dec.scanp += n
    63  
    64  	// Don't save err from unmarshal into dec.err:
    65  	// the connection is still usable since we read a complete JSON
    66  	// object from it before the error happened.
    67  	err = dec.d.unmarshal(v)
    68  
    69  	// fixup token streaming state
    70  	dec.tokenValueEnd()
    71  
    72  	return err
    73  }
    74  
    75  // Buffered returns a reader of the data remaining in the Decoder's
    76  // buffer. The reader is valid until the next call to Decode.
    77  func (dec *Decoder) Buffered() io.Reader {
    78  	return bytes.NewReader(dec.buf[dec.scanp:])
    79  }
    80  
    81  // readValue reads a JSON value into dec.buf.
    82  // It returns the length of the encoding.
    83  func (dec *Decoder) readValue() (int, error) {
    84  	dec.scan.reset()
    85  
    86  	scanp := dec.scanp
    87  	var err error
    88  Input:
    89  	for {
    90  		// Look in the buffer for a new value.
    91  		for i, c := range dec.buf[scanp:] {
    92  			dec.scan.bytes++
    93  			v := dec.scan.step(&dec.scan, c)
    94  			if v == scanEnd {
    95  				scanp += i
    96  				break Input
    97  			}
    98  			// scanEnd is delayed one byte.
    99  			// We might block trying to get that byte from src,
   100  			// so instead invent a space byte.
   101  			if (v == scanEndObject || v == scanEndArray) && dec.scan.step(&dec.scan, ' ') == scanEnd {
   102  				scanp += i + 1
   103  				break Input
   104  			}
   105  			if v == scanError {
   106  				dec.err = dec.scan.err
   107  				return 0, dec.scan.err
   108  			}
   109  		}
   110  		scanp = len(dec.buf)
   111  
   112  		// Did the last read have an error?
   113  		// Delayed until now to allow buffer scan.
   114  		if err != nil {
   115  			if err == io.EOF {
   116  				if dec.scan.step(&dec.scan, ' ') == scanEnd {
   117  					break Input
   118  				}
   119  				if nonSpace(dec.buf) {
   120  					err = io.ErrUnexpectedEOF
   121  				}
   122  			}
   123  			dec.err = err
   124  			return 0, err
   125  		}
   126  
   127  		n := scanp - dec.scanp
   128  		err = dec.refill()
   129  		scanp = dec.scanp + n
   130  	}
   131  	return scanp - dec.scanp, nil
   132  }
   133  
   134  func (dec *Decoder) refill() error {
   135  	// Make room to read more into the buffer.
   136  	// First slide down data already consumed.
   137  	if dec.scanp > 0 {
   138  		n := copy(dec.buf, dec.buf[dec.scanp:])
   139  		dec.buf = dec.buf[:n]
   140  		dec.scanp = 0
   141  	}
   142  
   143  	// Grow buffer if not large enough.
   144  	const minRead = 512
   145  	if cap(dec.buf)-len(dec.buf) < minRead {
   146  		newBuf := make([]byte, len(dec.buf), 2*cap(dec.buf)+minRead)
   147  		copy(newBuf, dec.buf)
   148  		dec.buf = newBuf
   149  	}
   150  
   151  	// Read. Delay error for next iteration (after scan).
   152  	n, err := dec.r.Read(dec.buf[len(dec.buf):cap(dec.buf)])
   153  	dec.buf = dec.buf[0 : len(dec.buf)+n]
   154  
   155  	return err
   156  }
   157  
   158  func nonSpace(b []byte) bool {
   159  	for _, c := range b {
   160  		if !isSpace(c) {
   161  			return true
   162  		}
   163  	}
   164  	return false
   165  }
   166  
   167  // An Encoder writes JSON values to an output stream.
   168  type Encoder struct {
   169  	w          io.Writer
   170  	err        error
   171  	escapeHTML bool
   172  
   173  	indentBuf    *bytes.Buffer
   174  	indentPrefix string
   175  	indentValue  string
   176  }
   177  
   178  // NewEncoder returns a new encoder that writes to w.
   179  func NewEncoder(w io.Writer) *Encoder {
   180  	return &Encoder{w: w, escapeHTML: true}
   181  }
   182  
   183  // Encode writes the JSON encoding of v to the stream,
   184  // followed by a newline character.
   185  //
   186  // See the documentation for Marshal for details about the
   187  // conversion of Go values to JSON.
   188  func (enc *Encoder) Encode(v interface{}) error {
   189  	if enc.err != nil {
   190  		return enc.err
   191  	}
   192  	e := newEncodeState()
   193  	err := e.marshal(v, encOpts{escapeHTML: enc.escapeHTML})
   194  	if err != nil {
   195  		return err
   196  	}
   197  
   198  	// Terminate each value with a newline.
   199  	// This makes the output look a little nicer
   200  	// when debugging, and some kind of space
   201  	// is required if the encoded value was a number,
   202  	// so that the reader knows there aren't more
   203  	// digits coming.
   204  	e.WriteByte('\n')
   205  
   206  	b := e.Bytes()
   207  	if enc.indentPrefix != "" || enc.indentValue != "" {
   208  		if enc.indentBuf == nil {
   209  			enc.indentBuf = new(bytes.Buffer)
   210  		}
   211  		enc.indentBuf.Reset()
   212  		err = Indent(enc.indentBuf, b, enc.indentPrefix, enc.indentValue)
   213  		if err != nil {
   214  			return err
   215  		}
   216  		b = enc.indentBuf.Bytes()
   217  	}
   218  	if _, err = enc.w.Write(b); err != nil {
   219  		enc.err = err
   220  	}
   221  	encodeStatePool.Put(e)
   222  	return err
   223  }
   224  
   225  // SetIndent instructs the encoder to format each subsequent encoded
   226  // value as if indented by the package-level function Indent(dst, src, prefix, indent).
   227  // Calling SetIndent("", "") disables indentation.
   228  func (enc *Encoder) SetIndent(prefix, indent string) {
   229  	enc.indentPrefix = prefix
   230  	enc.indentValue = indent
   231  }
   232  
   233  // SetEscapeHTML specifies whether problematic HTML characters
   234  // should be escaped inside JSON quoted strings.
   235  // The default behavior is to escape &, <, and > to \u0026, \u003c, and \u003e
   236  // to avoid certain safety problems that can arise when embedding JSON in HTML.
   237  //
   238  // In non-HTML settings where the escaping interferes with the readability
   239  // of the output, SetEscapeHTML(false) disables this behavior.
   240  func (enc *Encoder) SetEscapeHTML(on bool) {
   241  	enc.escapeHTML = on
   242  }
   243  
   244  // RawMessage is a raw encoded JSON value.
   245  // It implements Marshaler and Unmarshaler and can
   246  // be used to delay JSON decoding or precompute a JSON encoding.
   247  type RawMessage []byte
   248  
   249  // MarshalJSON returns m as the JSON encoding of m.
   250  func (m RawMessage) MarshalJSON() ([]byte, error) {
   251  	if m == nil {
   252  		return []byte("null"), nil
   253  	}
   254  	return m, nil
   255  }
   256  
   257  // UnmarshalJSON sets *m to a copy of data.
   258  func (m *RawMessage) UnmarshalJSON(data []byte) error {
   259  	if m == nil {
   260  		return errors.New("json.RawMessage: UnmarshalJSON on nil pointer")
   261  	}
   262  	*m = append((*m)[0:0], data...)
   263  	return nil
   264  }
   265  
   266  var _ Marshaler = (*RawMessage)(nil)
   267  var _ Unmarshaler = (*RawMessage)(nil)
   268  
   269  // A Token holds a value of one of these types:
   270  //
   271  //	Delim, for the four JSON delimiters [ ] { }
   272  //	bool, for JSON booleans
   273  //	float64, for JSON numbers
   274  //	Number, for JSON numbers
   275  //	string, for JSON string literals
   276  //	nil, for JSON null
   277  //
   278  type Token interface{}
   279  
   280  const (
   281  	tokenTopValue = iota
   282  	tokenArrayStart
   283  	tokenArrayValue
   284  	tokenArrayComma
   285  	tokenObjectStart
   286  	tokenObjectKey
   287  	tokenObjectColon
   288  	tokenObjectValue
   289  	tokenObjectComma
   290  )
   291  
   292  // advance tokenstate from a separator state to a value state
   293  func (dec *Decoder) tokenPrepareForDecode() error {
   294  	// Note: Not calling peek before switch, to avoid
   295  	// putting peek into the standard Decode path.
   296  	// peek is only called when using the Token API.
   297  	switch dec.tokenState {
   298  	case tokenArrayComma:
   299  		c, err := dec.peek()
   300  		if err != nil {
   301  			return err
   302  		}
   303  		if c != ',' {
   304  			return &SyntaxError{"expected comma after array element", 0}
   305  		}
   306  		dec.scanp++
   307  		dec.tokenState = tokenArrayValue
   308  	case tokenObjectColon:
   309  		c, err := dec.peek()
   310  		if err != nil {
   311  			return err
   312  		}
   313  		if c != ':' {
   314  			return &SyntaxError{"expected colon after object key", 0}
   315  		}
   316  		dec.scanp++
   317  		dec.tokenState = tokenObjectValue
   318  	}
   319  	return nil
   320  }
   321  
   322  func (dec *Decoder) tokenValueAllowed() bool {
   323  	switch dec.tokenState {
   324  	case tokenTopValue, tokenArrayStart, tokenArrayValue, tokenObjectValue:
   325  		return true
   326  	}
   327  	return false
   328  }
   329  
   330  func (dec *Decoder) tokenValueEnd() {
   331  	switch dec.tokenState {
   332  	case tokenArrayStart, tokenArrayValue:
   333  		dec.tokenState = tokenArrayComma
   334  	case tokenObjectValue:
   335  		dec.tokenState = tokenObjectComma
   336  	}
   337  }
   338  
   339  // A Delim is a JSON array or object delimiter, one of [ ] { or }.
   340  type Delim rune
   341  
   342  func (d Delim) String() string {
   343  	return string(d)
   344  }
   345  
   346  // Token returns the next JSON token in the input stream.
   347  // At the end of the input stream, Token returns nil, io.EOF.
   348  //
   349  // Token guarantees that the delimiters [ ] { } it returns are
   350  // properly nested and matched: if Token encounters an unexpected
   351  // delimiter in the input, it will return an error.
   352  //
   353  // The input stream consists of basic JSON values—bool, string,
   354  // number, and null—along with delimiters [ ] { } of type Delim
   355  // to mark the start and end of arrays and objects.
   356  // Commas and colons are elided.
   357  func (dec *Decoder) Token() (Token, error) {
   358  	for {
   359  		c, err := dec.peek()
   360  		if err != nil {
   361  			return nil, err
   362  		}
   363  		switch c {
   364  		case '[':
   365  			if !dec.tokenValueAllowed() {
   366  				return dec.tokenError(c)
   367  			}
   368  			dec.scanp++
   369  			dec.tokenStack = append(dec.tokenStack, dec.tokenState)
   370  			dec.tokenState = tokenArrayStart
   371  			return Delim('['), nil
   372  
   373  		case ']':
   374  			if dec.tokenState != tokenArrayStart && dec.tokenState != tokenArrayComma {
   375  				return dec.tokenError(c)
   376  			}
   377  			dec.scanp++
   378  			dec.tokenState = dec.tokenStack[len(dec.tokenStack)-1]
   379  			dec.tokenStack = dec.tokenStack[:len(dec.tokenStack)-1]
   380  			dec.tokenValueEnd()
   381  			return Delim(']'), nil
   382  
   383  		case '{':
   384  			if !dec.tokenValueAllowed() {
   385  				return dec.tokenError(c)
   386  			}
   387  			dec.scanp++
   388  			dec.tokenStack = append(dec.tokenStack, dec.tokenState)
   389  			dec.tokenState = tokenObjectStart
   390  			return Delim('{'), nil
   391  
   392  		case '}':
   393  			if dec.tokenState != tokenObjectStart && dec.tokenState != tokenObjectComma {
   394  				return dec.tokenError(c)
   395  			}
   396  			dec.scanp++
   397  			dec.tokenState = dec.tokenStack[len(dec.tokenStack)-1]
   398  			dec.tokenStack = dec.tokenStack[:len(dec.tokenStack)-1]
   399  			dec.tokenValueEnd()
   400  			return Delim('}'), nil
   401  
   402  		case ':':
   403  			if dec.tokenState != tokenObjectColon {
   404  				return dec.tokenError(c)
   405  			}
   406  			dec.scanp++
   407  			dec.tokenState = tokenObjectValue
   408  			continue
   409  
   410  		case ',':
   411  			if dec.tokenState == tokenArrayComma {
   412  				dec.scanp++
   413  				dec.tokenState = tokenArrayValue
   414  				continue
   415  			}
   416  			if dec.tokenState == tokenObjectComma {
   417  				dec.scanp++
   418  				dec.tokenState = tokenObjectKey
   419  				continue
   420  			}
   421  			return dec.tokenError(c)
   422  
   423  		case '"':
   424  			if dec.tokenState == tokenObjectStart || dec.tokenState == tokenObjectKey {
   425  				var x string
   426  				old := dec.tokenState
   427  				dec.tokenState = tokenTopValue
   428  				err := dec.Decode(&x)
   429  				dec.tokenState = old
   430  				if err != nil {
   431  					clearOffset(err)
   432  					return nil, err
   433  				}
   434  				dec.tokenState = tokenObjectColon
   435  				return x, nil
   436  			}
   437  			fallthrough
   438  
   439  		default:
   440  			if !dec.tokenValueAllowed() {
   441  				return dec.tokenError(c)
   442  			}
   443  			var x interface{}
   444  			if err := dec.Decode(&x); err != nil {
   445  				clearOffset(err)
   446  				return nil, err
   447  			}
   448  			return x, nil
   449  		}
   450  	}
   451  }
   452  
   453  func clearOffset(err error) {
   454  	if s, ok := err.(*SyntaxError); ok {
   455  		s.Offset = 0
   456  	}
   457  }
   458  
   459  func (dec *Decoder) tokenError(c byte) (Token, error) {
   460  	var context string
   461  	switch dec.tokenState {
   462  	case tokenTopValue:
   463  		context = " looking for beginning of value"
   464  	case tokenArrayStart, tokenArrayValue, tokenObjectValue:
   465  		context = " looking for beginning of value"
   466  	case tokenArrayComma:
   467  		context = " after array element"
   468  	case tokenObjectKey:
   469  		context = " looking for beginning of object key string"
   470  	case tokenObjectColon:
   471  		context = " after object key"
   472  	case tokenObjectComma:
   473  		context = " after object key:value pair"
   474  	}
   475  	return nil, &SyntaxError{"invalid character " + quoteChar(c) + " " + context, 0}
   476  }
   477  
   478  // More reports whether there is another element in the
   479  // current array or object being parsed.
   480  func (dec *Decoder) More() bool {
   481  	c, err := dec.peek()
   482  	return err == nil && c != ']' && c != '}'
   483  }
   484  
   485  func (dec *Decoder) peek() (byte, error) {
   486  	var err error
   487  	for {
   488  		for i := dec.scanp; i < len(dec.buf); i++ {
   489  			c := dec.buf[i]
   490  			if isSpace(c) {
   491  				continue
   492  			}
   493  			dec.scanp = i
   494  			return c, nil
   495  		}
   496  		// buffer has been scanned, now report any error
   497  		if err != nil {
   498  			return 0, err
   499  		}
   500  		err = dec.refill()
   501  	}
   502  }
   503  
   504  /*
   505  TODO
   506  
   507  // EncodeToken writes the given JSON token to the stream.
   508  // It returns an error if the delimiters [ ] { } are not properly used.
   509  //
   510  // EncodeToken does not call Flush, because usually it is part of
   511  // a larger operation such as Encode, and those will call Flush when finished.
   512  // Callers that create an Encoder and then invoke EncodeToken directly,
   513  // without using Encode, need to call Flush when finished to ensure that
   514  // the JSON is written to the underlying writer.
   515  func (e *Encoder) EncodeToken(t Token) error  {
   516  	...
   517  }
   518  
   519  */