github.com/mh-cbon/go@v0.0.0-20160603070303-9e112a3fe4c0/src/encoding/json/stream.go (about)

     1  // Copyright 2010 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package json
     6  
     7  import (
     8  	"bytes"
     9  	"errors"
    10  	"io"
    11  )
    12  
    13  // A Decoder reads and decodes JSON values from an input stream.
    14  type Decoder struct {
    15  	r     io.Reader
    16  	buf   []byte
    17  	d     decodeState
    18  	scanp int // start of unread data in buf
    19  	scan  scanner
    20  	err   error
    21  
    22  	tokenState int
    23  	tokenStack []int
    24  }
    25  
    26  // NewDecoder returns a new decoder that reads from r.
    27  //
    28  // The decoder introduces its own buffering and may
    29  // read data from r beyond the JSON values requested.
    30  func NewDecoder(r io.Reader) *Decoder {
    31  	return &Decoder{r: r}
    32  }
    33  
    34  // UseNumber causes the Decoder to unmarshal a number into an interface{} as a
    35  // Number instead of as a float64.
    36  func (dec *Decoder) UseNumber() { dec.d.useNumber = true }
    37  
    38  // Decode reads the next JSON-encoded value from its
    39  // input and stores it in the value pointed to by v.
    40  //
    41  // See the documentation for Unmarshal for details about
    42  // the conversion of JSON into a Go value.
    43  func (dec *Decoder) Decode(v interface{}) error {
    44  	if dec.err != nil {
    45  		return dec.err
    46  	}
    47  
    48  	if err := dec.tokenPrepareForDecode(); err != nil {
    49  		return err
    50  	}
    51  
    52  	if !dec.tokenValueAllowed() {
    53  		return &SyntaxError{msg: "not at beginning of value"}
    54  	}
    55  
    56  	// Read whole value into buffer.
    57  	n, err := dec.readValue()
    58  	if err != nil {
    59  		return err
    60  	}
    61  	dec.d.init(dec.buf[dec.scanp : dec.scanp+n])
    62  	dec.scanp += n
    63  
    64  	// Don't save err from unmarshal into dec.err:
    65  	// the connection is still usable since we read a complete JSON
    66  	// object from it before the error happened.
    67  	err = dec.d.unmarshal(v)
    68  
    69  	// fixup token streaming state
    70  	dec.tokenValueEnd()
    71  
    72  	return err
    73  }
    74  
    75  // Buffered returns a reader of the data remaining in the Decoder's
    76  // buffer. The reader is valid until the next call to Decode.
    77  func (dec *Decoder) Buffered() io.Reader {
    78  	return bytes.NewReader(dec.buf[dec.scanp:])
    79  }
    80  
    81  // readValue reads a JSON value into dec.buf.
    82  // It returns the length of the encoding.
    83  func (dec *Decoder) readValue() (int, error) {
    84  	dec.scan.reset()
    85  
    86  	scanp := dec.scanp
    87  	var err error
    88  Input:
    89  	for {
    90  		// Look in the buffer for a new value.
    91  		for i, c := range dec.buf[scanp:] {
    92  			dec.scan.bytes++
    93  			v := dec.scan.step(&dec.scan, c)
    94  			if v == scanEnd {
    95  				scanp += i
    96  				break Input
    97  			}
    98  			// scanEnd is delayed one byte.
    99  			// We might block trying to get that byte from src,
   100  			// so instead invent a space byte.
   101  			if (v == scanEndObject || v == scanEndArray) && dec.scan.step(&dec.scan, ' ') == scanEnd {
   102  				scanp += i + 1
   103  				break Input
   104  			}
   105  			if v == scanError {
   106  				dec.err = dec.scan.err
   107  				return 0, dec.scan.err
   108  			}
   109  		}
   110  		scanp = len(dec.buf)
   111  
   112  		// Did the last read have an error?
   113  		// Delayed until now to allow buffer scan.
   114  		if err != nil {
   115  			if err == io.EOF {
   116  				if dec.scan.step(&dec.scan, ' ') == scanEnd {
   117  					break Input
   118  				}
   119  				if nonSpace(dec.buf) {
   120  					err = io.ErrUnexpectedEOF
   121  				}
   122  			}
   123  			dec.err = err
   124  			return 0, err
   125  		}
   126  
   127  		n := scanp - dec.scanp
   128  		err = dec.refill()
   129  		scanp = dec.scanp + n
   130  	}
   131  	return scanp - dec.scanp, nil
   132  }
   133  
   134  func (dec *Decoder) refill() error {
   135  	// Make room to read more into the buffer.
   136  	// First slide down data already consumed.
   137  	if dec.scanp > 0 {
   138  		n := copy(dec.buf, dec.buf[dec.scanp:])
   139  		dec.buf = dec.buf[:n]
   140  		dec.scanp = 0
   141  	}
   142  
   143  	// Grow buffer if not large enough.
   144  	const minRead = 512
   145  	if cap(dec.buf)-len(dec.buf) < minRead {
   146  		newBuf := make([]byte, len(dec.buf), 2*cap(dec.buf)+minRead)
   147  		copy(newBuf, dec.buf)
   148  		dec.buf = newBuf
   149  	}
   150  
   151  	// Read. Delay error for next iteration (after scan).
   152  	n, err := dec.r.Read(dec.buf[len(dec.buf):cap(dec.buf)])
   153  	dec.buf = dec.buf[0 : len(dec.buf)+n]
   154  
   155  	return err
   156  }
   157  
   158  func nonSpace(b []byte) bool {
   159  	for _, c := range b {
   160  		if !isSpace(c) {
   161  			return true
   162  		}
   163  	}
   164  	return false
   165  }
   166  
   167  // An Encoder writes JSON values to an output stream.
   168  type Encoder struct {
   169  	w          io.Writer
   170  	err        error
   171  	escapeHTML bool
   172  
   173  	indentBuf    *bytes.Buffer
   174  	indentPrefix string
   175  	indentValue  string
   176  }
   177  
   178  // NewEncoder returns a new encoder that writes to w.
   179  func NewEncoder(w io.Writer) *Encoder {
   180  	return &Encoder{w: w, escapeHTML: true}
   181  }
   182  
   183  // Encode writes the JSON encoding of v to the stream,
   184  // followed by a newline character.
   185  //
   186  // See the documentation for Marshal for details about the
   187  // conversion of Go values to JSON.
   188  func (enc *Encoder) Encode(v interface{}) error {
   189  	if enc.err != nil {
   190  		return enc.err
   191  	}
   192  	e := newEncodeState()
   193  	err := e.marshal(v, encOpts{escapeHTML: enc.escapeHTML})
   194  	if err != nil {
   195  		return err
   196  	}
   197  
   198  	// Terminate each value with a newline.
   199  	// This makes the output look a little nicer
   200  	// when debugging, and some kind of space
   201  	// is required if the encoded value was a number,
   202  	// so that the reader knows there aren't more
   203  	// digits coming.
   204  	e.WriteByte('\n')
   205  
   206  	b := e.Bytes()
   207  	if enc.indentPrefix != "" || enc.indentValue != "" {
   208  		if enc.indentBuf == nil {
   209  			enc.indentBuf = new(bytes.Buffer)
   210  		}
   211  		enc.indentBuf.Reset()
   212  		err = Indent(enc.indentBuf, b, enc.indentPrefix, enc.indentValue)
   213  		if err != nil {
   214  			return err
   215  		}
   216  		b = enc.indentBuf.Bytes()
   217  	}
   218  	if _, err = enc.w.Write(b); err != nil {
   219  		enc.err = err
   220  	}
   221  	encodeStatePool.Put(e)
   222  	return err
   223  }
   224  
   225  // SetIndent instructs the encoder to format each subsequent encoded
   226  // value as if indented by the package-level function Indent(dst, src, prefix, indent).
   227  // Calling SetIndent("", "") disables indentation.
   228  func (enc *Encoder) SetIndent(prefix, indent string) {
   229  	enc.indentPrefix = prefix
   230  	enc.indentValue = indent
   231  }
   232  
   233  // SetEscapeHTML specifies whether problematic HTML characters
   234  // should be escaped inside JSON quoted strings.
   235  // The default behavior is to escape &, <, and > to \u0026, \u003c, and \u003e
   236  // to avoid certain safety problems that can arise when embedding JSON in HTML.
   237  //
   238  // In non-HTML settings where the escaping interferes with the readability
   239  // of the output, SetEscapeHTML(false) disables this behavior.
   240  func (enc *Encoder) SetEscapeHTML(on bool) {
   241  	enc.escapeHTML = on
   242  }
   243  
   244  // RawMessage is a raw encoded JSON value.
   245  // It implements Marshaler and Unmarshaler and can
   246  // be used to delay JSON decoding or precompute a JSON encoding.
   247  type RawMessage []byte
   248  
   249  // MarshalJSON returns *m as the JSON encoding of m.
   250  func (m *RawMessage) MarshalJSON() ([]byte, error) {
   251  	return *m, nil
   252  }
   253  
   254  // UnmarshalJSON sets *m to a copy of data.
   255  func (m *RawMessage) UnmarshalJSON(data []byte) error {
   256  	if m == nil {
   257  		return errors.New("json.RawMessage: UnmarshalJSON on nil pointer")
   258  	}
   259  	*m = append((*m)[0:0], data...)
   260  	return nil
   261  }
   262  
   263  var _ Marshaler = (*RawMessage)(nil)
   264  var _ Unmarshaler = (*RawMessage)(nil)
   265  
   266  // A Token holds a value of one of these types:
   267  //
   268  //	Delim, for the four JSON delimiters [ ] { }
   269  //	bool, for JSON booleans
   270  //	float64, for JSON numbers
   271  //	Number, for JSON numbers
   272  //	string, for JSON string literals
   273  //	nil, for JSON null
   274  //
   275  type Token interface{}
   276  
   277  const (
   278  	tokenTopValue = iota
   279  	tokenArrayStart
   280  	tokenArrayValue
   281  	tokenArrayComma
   282  	tokenObjectStart
   283  	tokenObjectKey
   284  	tokenObjectColon
   285  	tokenObjectValue
   286  	tokenObjectComma
   287  )
   288  
   289  // advance tokenstate from a separator state to a value state
   290  func (dec *Decoder) tokenPrepareForDecode() error {
   291  	// Note: Not calling peek before switch, to avoid
   292  	// putting peek into the standard Decode path.
   293  	// peek is only called when using the Token API.
   294  	switch dec.tokenState {
   295  	case tokenArrayComma:
   296  		c, err := dec.peek()
   297  		if err != nil {
   298  			return err
   299  		}
   300  		if c != ',' {
   301  			return &SyntaxError{"expected comma after array element", 0}
   302  		}
   303  		dec.scanp++
   304  		dec.tokenState = tokenArrayValue
   305  	case tokenObjectColon:
   306  		c, err := dec.peek()
   307  		if err != nil {
   308  			return err
   309  		}
   310  		if c != ':' {
   311  			return &SyntaxError{"expected colon after object key", 0}
   312  		}
   313  		dec.scanp++
   314  		dec.tokenState = tokenObjectValue
   315  	}
   316  	return nil
   317  }
   318  
   319  func (dec *Decoder) tokenValueAllowed() bool {
   320  	switch dec.tokenState {
   321  	case tokenTopValue, tokenArrayStart, tokenArrayValue, tokenObjectValue:
   322  		return true
   323  	}
   324  	return false
   325  }
   326  
   327  func (dec *Decoder) tokenValueEnd() {
   328  	switch dec.tokenState {
   329  	case tokenArrayStart, tokenArrayValue:
   330  		dec.tokenState = tokenArrayComma
   331  	case tokenObjectValue:
   332  		dec.tokenState = tokenObjectComma
   333  	}
   334  }
   335  
   336  // A Delim is a JSON array or object delimiter, one of [ ] { or }.
   337  type Delim rune
   338  
   339  func (d Delim) String() string {
   340  	return string(d)
   341  }
   342  
   343  // Token returns the next JSON token in the input stream.
   344  // At the end of the input stream, Token returns nil, io.EOF.
   345  //
   346  // Token guarantees that the delimiters [ ] { } it returns are
   347  // properly nested and matched: if Token encounters an unexpected
   348  // delimiter in the input, it will return an error.
   349  //
   350  // The input stream consists of basic JSON values—bool, string,
   351  // number, and null—along with delimiters [ ] { } of type Delim
   352  // to mark the start and end of arrays and objects.
   353  // Commas and colons are elided.
   354  func (dec *Decoder) Token() (Token, error) {
   355  	for {
   356  		c, err := dec.peek()
   357  		if err != nil {
   358  			return nil, err
   359  		}
   360  		switch c {
   361  		case '[':
   362  			if !dec.tokenValueAllowed() {
   363  				return dec.tokenError(c)
   364  			}
   365  			dec.scanp++
   366  			dec.tokenStack = append(dec.tokenStack, dec.tokenState)
   367  			dec.tokenState = tokenArrayStart
   368  			return Delim('['), nil
   369  
   370  		case ']':
   371  			if dec.tokenState != tokenArrayStart && dec.tokenState != tokenArrayComma {
   372  				return dec.tokenError(c)
   373  			}
   374  			dec.scanp++
   375  			dec.tokenState = dec.tokenStack[len(dec.tokenStack)-1]
   376  			dec.tokenStack = dec.tokenStack[:len(dec.tokenStack)-1]
   377  			dec.tokenValueEnd()
   378  			return Delim(']'), nil
   379  
   380  		case '{':
   381  			if !dec.tokenValueAllowed() {
   382  				return dec.tokenError(c)
   383  			}
   384  			dec.scanp++
   385  			dec.tokenStack = append(dec.tokenStack, dec.tokenState)
   386  			dec.tokenState = tokenObjectStart
   387  			return Delim('{'), nil
   388  
   389  		case '}':
   390  			if dec.tokenState != tokenObjectStart && dec.tokenState != tokenObjectComma {
   391  				return dec.tokenError(c)
   392  			}
   393  			dec.scanp++
   394  			dec.tokenState = dec.tokenStack[len(dec.tokenStack)-1]
   395  			dec.tokenStack = dec.tokenStack[:len(dec.tokenStack)-1]
   396  			dec.tokenValueEnd()
   397  			return Delim('}'), nil
   398  
   399  		case ':':
   400  			if dec.tokenState != tokenObjectColon {
   401  				return dec.tokenError(c)
   402  			}
   403  			dec.scanp++
   404  			dec.tokenState = tokenObjectValue
   405  			continue
   406  
   407  		case ',':
   408  			if dec.tokenState == tokenArrayComma {
   409  				dec.scanp++
   410  				dec.tokenState = tokenArrayValue
   411  				continue
   412  			}
   413  			if dec.tokenState == tokenObjectComma {
   414  				dec.scanp++
   415  				dec.tokenState = tokenObjectKey
   416  				continue
   417  			}
   418  			return dec.tokenError(c)
   419  
   420  		case '"':
   421  			if dec.tokenState == tokenObjectStart || dec.tokenState == tokenObjectKey {
   422  				var x string
   423  				old := dec.tokenState
   424  				dec.tokenState = tokenTopValue
   425  				err := dec.Decode(&x)
   426  				dec.tokenState = old
   427  				if err != nil {
   428  					clearOffset(err)
   429  					return nil, err
   430  				}
   431  				dec.tokenState = tokenObjectColon
   432  				return x, nil
   433  			}
   434  			fallthrough
   435  
   436  		default:
   437  			if !dec.tokenValueAllowed() {
   438  				return dec.tokenError(c)
   439  			}
   440  			var x interface{}
   441  			if err := dec.Decode(&x); err != nil {
   442  				clearOffset(err)
   443  				return nil, err
   444  			}
   445  			return x, nil
   446  		}
   447  	}
   448  }
   449  
   450  func clearOffset(err error) {
   451  	if s, ok := err.(*SyntaxError); ok {
   452  		s.Offset = 0
   453  	}
   454  }
   455  
   456  func (dec *Decoder) tokenError(c byte) (Token, error) {
   457  	var context string
   458  	switch dec.tokenState {
   459  	case tokenTopValue:
   460  		context = " looking for beginning of value"
   461  	case tokenArrayStart, tokenArrayValue, tokenObjectValue:
   462  		context = " looking for beginning of value"
   463  	case tokenArrayComma:
   464  		context = " after array element"
   465  	case tokenObjectKey:
   466  		context = " looking for beginning of object key string"
   467  	case tokenObjectColon:
   468  		context = " after object key"
   469  	case tokenObjectComma:
   470  		context = " after object key:value pair"
   471  	}
   472  	return nil, &SyntaxError{"invalid character " + quoteChar(c) + " " + context, 0}
   473  }
   474  
   475  // More reports whether there is another element in the
   476  // current array or object being parsed.
   477  func (dec *Decoder) More() bool {
   478  	c, err := dec.peek()
   479  	return err == nil && c != ']' && c != '}'
   480  }
   481  
   482  func (dec *Decoder) peek() (byte, error) {
   483  	var err error
   484  	for {
   485  		for i := dec.scanp; i < len(dec.buf); i++ {
   486  			c := dec.buf[i]
   487  			if isSpace(c) {
   488  				continue
   489  			}
   490  			dec.scanp = i
   491  			return c, nil
   492  		}
   493  		// buffer has been scanned, now report any error
   494  		if err != nil {
   495  			return 0, err
   496  		}
   497  		err = dec.refill()
   498  	}
   499  }
   500  
   501  /*
   502  TODO
   503  
   504  // EncodeToken writes the given JSON token to the stream.
   505  // It returns an error if the delimiters [ ] { } are not properly used.
   506  //
   507  // EncodeToken does not call Flush, because usually it is part of
   508  // a larger operation such as Encode, and those will call Flush when finished.
   509  // Callers that create an Encoder and then invoke EncodeToken directly,
   510  // without using Encode, need to call Flush when finished to ensure that
   511  // the JSON is written to the underlying writer.
   512  func (e *Encoder) EncodeToken(t Token) error  {
   513  	...
   514  }
   515  
   516  */