github.com/rakyll/go@v0.0.0-20170216000551-64c02460d703/src/encoding/json/stream.go (about) 1 // Copyright 2010 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package json 6 7 import ( 8 "bytes" 9 "errors" 10 "io" 11 ) 12 13 // A Decoder reads and decodes JSON values from an input stream. 14 type Decoder struct { 15 r io.Reader 16 buf []byte 17 d decodeState 18 scanp int // start of unread data in buf 19 scan scanner 20 err error 21 22 tokenState int 23 tokenStack []int 24 } 25 26 // NewDecoder returns a new decoder that reads from r. 27 // 28 // The decoder introduces its own buffering and may 29 // read data from r beyond the JSON values requested. 30 func NewDecoder(r io.Reader) *Decoder { 31 return &Decoder{r: r} 32 } 33 34 // UseNumber causes the Decoder to unmarshal a number into an interface{} as a 35 // Number instead of as a float64. 36 func (dec *Decoder) UseNumber() { dec.d.useNumber = true } 37 38 // Decode reads the next JSON-encoded value from its 39 // input and stores it in the value pointed to by v. 40 // 41 // See the documentation for Unmarshal for details about 42 // the conversion of JSON into a Go value. 43 func (dec *Decoder) Decode(v interface{}) error { 44 if dec.err != nil { 45 return dec.err 46 } 47 48 if err := dec.tokenPrepareForDecode(); err != nil { 49 return err 50 } 51 52 if !dec.tokenValueAllowed() { 53 return &SyntaxError{msg: "not at beginning of value"} 54 } 55 56 // Read whole value into buffer. 57 n, err := dec.readValue() 58 if err != nil { 59 return err 60 } 61 dec.d.init(dec.buf[dec.scanp : dec.scanp+n]) 62 dec.scanp += n 63 64 // Don't save err from unmarshal into dec.err: 65 // the connection is still usable since we read a complete JSON 66 // object from it before the error happened. 67 err = dec.d.unmarshal(v) 68 69 // fixup token streaming state 70 dec.tokenValueEnd() 71 72 return err 73 } 74 75 // Buffered returns a reader of the data remaining in the Decoder's 76 // buffer. The reader is valid until the next call to Decode. 77 func (dec *Decoder) Buffered() io.Reader { 78 return bytes.NewReader(dec.buf[dec.scanp:]) 79 } 80 81 // readValue reads a JSON value into dec.buf. 82 // It returns the length of the encoding. 83 func (dec *Decoder) readValue() (int, error) { 84 dec.scan.reset() 85 86 scanp := dec.scanp 87 var err error 88 Input: 89 for { 90 // Look in the buffer for a new value. 91 for i, c := range dec.buf[scanp:] { 92 dec.scan.bytes++ 93 v := dec.scan.step(&dec.scan, c) 94 if v == scanEnd { 95 scanp += i 96 break Input 97 } 98 // scanEnd is delayed one byte. 99 // We might block trying to get that byte from src, 100 // so instead invent a space byte. 101 if (v == scanEndObject || v == scanEndArray) && dec.scan.step(&dec.scan, ' ') == scanEnd { 102 scanp += i + 1 103 break Input 104 } 105 if v == scanError { 106 dec.err = dec.scan.err 107 return 0, dec.scan.err 108 } 109 } 110 scanp = len(dec.buf) 111 112 // Did the last read have an error? 113 // Delayed until now to allow buffer scan. 114 if err != nil { 115 if err == io.EOF { 116 if dec.scan.step(&dec.scan, ' ') == scanEnd { 117 break Input 118 } 119 if nonSpace(dec.buf) { 120 err = io.ErrUnexpectedEOF 121 } 122 } 123 dec.err = err 124 return 0, err 125 } 126 127 n := scanp - dec.scanp 128 err = dec.refill() 129 scanp = dec.scanp + n 130 } 131 return scanp - dec.scanp, nil 132 } 133 134 func (dec *Decoder) refill() error { 135 // Make room to read more into the buffer. 136 // First slide down data already consumed. 137 if dec.scanp > 0 { 138 n := copy(dec.buf, dec.buf[dec.scanp:]) 139 dec.buf = dec.buf[:n] 140 dec.scanp = 0 141 } 142 143 // Grow buffer if not large enough. 144 const minRead = 512 145 if cap(dec.buf)-len(dec.buf) < minRead { 146 newBuf := make([]byte, len(dec.buf), 2*cap(dec.buf)+minRead) 147 copy(newBuf, dec.buf) 148 dec.buf = newBuf 149 } 150 151 // Read. Delay error for next iteration (after scan). 152 n, err := dec.r.Read(dec.buf[len(dec.buf):cap(dec.buf)]) 153 dec.buf = dec.buf[0 : len(dec.buf)+n] 154 155 return err 156 } 157 158 func nonSpace(b []byte) bool { 159 for _, c := range b { 160 if !isSpace(c) { 161 return true 162 } 163 } 164 return false 165 } 166 167 // An Encoder writes JSON values to an output stream. 168 type Encoder struct { 169 w io.Writer 170 err error 171 escapeHTML bool 172 173 indentBuf *bytes.Buffer 174 indentPrefix string 175 indentValue string 176 } 177 178 // NewEncoder returns a new encoder that writes to w. 179 func NewEncoder(w io.Writer) *Encoder { 180 return &Encoder{w: w, escapeHTML: true} 181 } 182 183 // Encode writes the JSON encoding of v to the stream, 184 // followed by a newline character. 185 // 186 // See the documentation for Marshal for details about the 187 // conversion of Go values to JSON. 188 func (enc *Encoder) Encode(v interface{}) error { 189 if enc.err != nil { 190 return enc.err 191 } 192 e := newEncodeState() 193 err := e.marshal(v, encOpts{escapeHTML: enc.escapeHTML}) 194 if err != nil { 195 return err 196 } 197 198 // Terminate each value with a newline. 199 // This makes the output look a little nicer 200 // when debugging, and some kind of space 201 // is required if the encoded value was a number, 202 // so that the reader knows there aren't more 203 // digits coming. 204 e.WriteByte('\n') 205 206 b := e.Bytes() 207 if enc.indentPrefix != "" || enc.indentValue != "" { 208 if enc.indentBuf == nil { 209 enc.indentBuf = new(bytes.Buffer) 210 } 211 enc.indentBuf.Reset() 212 err = Indent(enc.indentBuf, b, enc.indentPrefix, enc.indentValue) 213 if err != nil { 214 return err 215 } 216 b = enc.indentBuf.Bytes() 217 } 218 if _, err = enc.w.Write(b); err != nil { 219 enc.err = err 220 } 221 encodeStatePool.Put(e) 222 return err 223 } 224 225 // SetIndent instructs the encoder to format each subsequent encoded 226 // value as if indented by the package-level function Indent(dst, src, prefix, indent). 227 // Calling SetIndent("", "") disables indentation. 228 func (enc *Encoder) SetIndent(prefix, indent string) { 229 enc.indentPrefix = prefix 230 enc.indentValue = indent 231 } 232 233 // SetEscapeHTML specifies whether problematic HTML characters 234 // should be escaped inside JSON quoted strings. 235 // The default behavior is to escape &, <, and > to \u0026, \u003c, and \u003e 236 // to avoid certain safety problems that can arise when embedding JSON in HTML. 237 // 238 // In non-HTML settings where the escaping interferes with the readability 239 // of the output, SetEscapeHTML(false) disables this behavior. 240 func (enc *Encoder) SetEscapeHTML(on bool) { 241 enc.escapeHTML = on 242 } 243 244 // RawMessage is a raw encoded JSON value. 245 // It implements Marshaler and Unmarshaler and can 246 // be used to delay JSON decoding or precompute a JSON encoding. 247 type RawMessage []byte 248 249 // MarshalJSON returns m as the JSON encoding of m. 250 func (m RawMessage) MarshalJSON() ([]byte, error) { 251 if m == nil { 252 return []byte("null"), nil 253 } 254 return m, nil 255 } 256 257 // UnmarshalJSON sets *m to a copy of data. 258 func (m *RawMessage) UnmarshalJSON(data []byte) error { 259 if m == nil { 260 return errors.New("json.RawMessage: UnmarshalJSON on nil pointer") 261 } 262 *m = append((*m)[0:0], data...) 263 return nil 264 } 265 266 var _ Marshaler = (*RawMessage)(nil) 267 var _ Unmarshaler = (*RawMessage)(nil) 268 269 // A Token holds a value of one of these types: 270 // 271 // Delim, for the four JSON delimiters [ ] { } 272 // bool, for JSON booleans 273 // float64, for JSON numbers 274 // Number, for JSON numbers 275 // string, for JSON string literals 276 // nil, for JSON null 277 // 278 type Token interface{} 279 280 const ( 281 tokenTopValue = iota 282 tokenArrayStart 283 tokenArrayValue 284 tokenArrayComma 285 tokenObjectStart 286 tokenObjectKey 287 tokenObjectColon 288 tokenObjectValue 289 tokenObjectComma 290 ) 291 292 // advance tokenstate from a separator state to a value state 293 func (dec *Decoder) tokenPrepareForDecode() error { 294 // Note: Not calling peek before switch, to avoid 295 // putting peek into the standard Decode path. 296 // peek is only called when using the Token API. 297 switch dec.tokenState { 298 case tokenArrayComma: 299 c, err := dec.peek() 300 if err != nil { 301 return err 302 } 303 if c != ',' { 304 return &SyntaxError{"expected comma after array element", 0} 305 } 306 dec.scanp++ 307 dec.tokenState = tokenArrayValue 308 case tokenObjectColon: 309 c, err := dec.peek() 310 if err != nil { 311 return err 312 } 313 if c != ':' { 314 return &SyntaxError{"expected colon after object key", 0} 315 } 316 dec.scanp++ 317 dec.tokenState = tokenObjectValue 318 } 319 return nil 320 } 321 322 func (dec *Decoder) tokenValueAllowed() bool { 323 switch dec.tokenState { 324 case tokenTopValue, tokenArrayStart, tokenArrayValue, tokenObjectValue: 325 return true 326 } 327 return false 328 } 329 330 func (dec *Decoder) tokenValueEnd() { 331 switch dec.tokenState { 332 case tokenArrayStart, tokenArrayValue: 333 dec.tokenState = tokenArrayComma 334 case tokenObjectValue: 335 dec.tokenState = tokenObjectComma 336 } 337 } 338 339 // A Delim is a JSON array or object delimiter, one of [ ] { or }. 340 type Delim rune 341 342 func (d Delim) String() string { 343 return string(d) 344 } 345 346 // Token returns the next JSON token in the input stream. 347 // At the end of the input stream, Token returns nil, io.EOF. 348 // 349 // Token guarantees that the delimiters [ ] { } it returns are 350 // properly nested and matched: if Token encounters an unexpected 351 // delimiter in the input, it will return an error. 352 // 353 // The input stream consists of basic JSON values—bool, string, 354 // number, and null—along with delimiters [ ] { } of type Delim 355 // to mark the start and end of arrays and objects. 356 // Commas and colons are elided. 357 func (dec *Decoder) Token() (Token, error) { 358 for { 359 c, err := dec.peek() 360 if err != nil { 361 return nil, err 362 } 363 switch c { 364 case '[': 365 if !dec.tokenValueAllowed() { 366 return dec.tokenError(c) 367 } 368 dec.scanp++ 369 dec.tokenStack = append(dec.tokenStack, dec.tokenState) 370 dec.tokenState = tokenArrayStart 371 return Delim('['), nil 372 373 case ']': 374 if dec.tokenState != tokenArrayStart && dec.tokenState != tokenArrayComma { 375 return dec.tokenError(c) 376 } 377 dec.scanp++ 378 dec.tokenState = dec.tokenStack[len(dec.tokenStack)-1] 379 dec.tokenStack = dec.tokenStack[:len(dec.tokenStack)-1] 380 dec.tokenValueEnd() 381 return Delim(']'), nil 382 383 case '{': 384 if !dec.tokenValueAllowed() { 385 return dec.tokenError(c) 386 } 387 dec.scanp++ 388 dec.tokenStack = append(dec.tokenStack, dec.tokenState) 389 dec.tokenState = tokenObjectStart 390 return Delim('{'), nil 391 392 case '}': 393 if dec.tokenState != tokenObjectStart && dec.tokenState != tokenObjectComma { 394 return dec.tokenError(c) 395 } 396 dec.scanp++ 397 dec.tokenState = dec.tokenStack[len(dec.tokenStack)-1] 398 dec.tokenStack = dec.tokenStack[:len(dec.tokenStack)-1] 399 dec.tokenValueEnd() 400 return Delim('}'), nil 401 402 case ':': 403 if dec.tokenState != tokenObjectColon { 404 return dec.tokenError(c) 405 } 406 dec.scanp++ 407 dec.tokenState = tokenObjectValue 408 continue 409 410 case ',': 411 if dec.tokenState == tokenArrayComma { 412 dec.scanp++ 413 dec.tokenState = tokenArrayValue 414 continue 415 } 416 if dec.tokenState == tokenObjectComma { 417 dec.scanp++ 418 dec.tokenState = tokenObjectKey 419 continue 420 } 421 return dec.tokenError(c) 422 423 case '"': 424 if dec.tokenState == tokenObjectStart || dec.tokenState == tokenObjectKey { 425 var x string 426 old := dec.tokenState 427 dec.tokenState = tokenTopValue 428 err := dec.Decode(&x) 429 dec.tokenState = old 430 if err != nil { 431 clearOffset(err) 432 return nil, err 433 } 434 dec.tokenState = tokenObjectColon 435 return x, nil 436 } 437 fallthrough 438 439 default: 440 if !dec.tokenValueAllowed() { 441 return dec.tokenError(c) 442 } 443 var x interface{} 444 if err := dec.Decode(&x); err != nil { 445 clearOffset(err) 446 return nil, err 447 } 448 return x, nil 449 } 450 } 451 } 452 453 func clearOffset(err error) { 454 if s, ok := err.(*SyntaxError); ok { 455 s.Offset = 0 456 } 457 } 458 459 func (dec *Decoder) tokenError(c byte) (Token, error) { 460 var context string 461 switch dec.tokenState { 462 case tokenTopValue: 463 context = " looking for beginning of value" 464 case tokenArrayStart, tokenArrayValue, tokenObjectValue: 465 context = " looking for beginning of value" 466 case tokenArrayComma: 467 context = " after array element" 468 case tokenObjectKey: 469 context = " looking for beginning of object key string" 470 case tokenObjectColon: 471 context = " after object key" 472 case tokenObjectComma: 473 context = " after object key:value pair" 474 } 475 return nil, &SyntaxError{"invalid character " + quoteChar(c) + " " + context, 0} 476 } 477 478 // More reports whether there is another element in the 479 // current array or object being parsed. 480 func (dec *Decoder) More() bool { 481 c, err := dec.peek() 482 return err == nil && c != ']' && c != '}' 483 } 484 485 func (dec *Decoder) peek() (byte, error) { 486 var err error 487 for { 488 for i := dec.scanp; i < len(dec.buf); i++ { 489 c := dec.buf[i] 490 if isSpace(c) { 491 continue 492 } 493 dec.scanp = i 494 return c, nil 495 } 496 // buffer has been scanned, now report any error 497 if err != nil { 498 return 0, err 499 } 500 err = dec.refill() 501 } 502 } 503 504 /* 505 TODO 506 507 // EncodeToken writes the given JSON token to the stream. 508 // It returns an error if the delimiters [ ] { } are not properly used. 509 // 510 // EncodeToken does not call Flush, because usually it is part of 511 // a larger operation such as Encode, and those will call Flush when finished. 512 // Callers that create an Encoder and then invoke EncodeToken directly, 513 // without using Encode, need to call Flush when finished to ensure that 514 // the JSON is written to the underlying writer. 515 func (e *Encoder) EncodeToken(t Token) error { 516 ... 517 } 518 519 */