github.com/gocuntian/go@v0.0.0-20160610041250-fee02d270bf8/src/encoding/json/stream.go (about) 1 // Copyright 2010 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package json 6 7 import ( 8 "bytes" 9 "errors" 10 "io" 11 ) 12 13 // A Decoder reads and decodes JSON values from an input stream. 14 type Decoder struct { 15 r io.Reader 16 buf []byte 17 d decodeState 18 scanp int // start of unread data in buf 19 scan scanner 20 err error 21 22 tokenState int 23 tokenStack []int 24 } 25 26 // NewDecoder returns a new decoder that reads from r. 27 // 28 // The decoder introduces its own buffering and may 29 // read data from r beyond the JSON values requested. 30 func NewDecoder(r io.Reader) *Decoder { 31 return &Decoder{r: r} 32 } 33 34 // UseNumber causes the Decoder to unmarshal a number into an interface{} as a 35 // Number instead of as a float64. 36 func (dec *Decoder) UseNumber() { dec.d.useNumber = true } 37 38 // Decode reads the next JSON-encoded value from its 39 // input and stores it in the value pointed to by v. 40 // 41 // See the documentation for Unmarshal for details about 42 // the conversion of JSON into a Go value. 43 func (dec *Decoder) Decode(v interface{}) error { 44 if dec.err != nil { 45 return dec.err 46 } 47 48 if err := dec.tokenPrepareForDecode(); err != nil { 49 return err 50 } 51 52 if !dec.tokenValueAllowed() { 53 return &SyntaxError{msg: "not at beginning of value"} 54 } 55 56 // Read whole value into buffer. 57 n, err := dec.readValue() 58 if err != nil { 59 return err 60 } 61 dec.d.init(dec.buf[dec.scanp : dec.scanp+n]) 62 dec.scanp += n 63 64 // Don't save err from unmarshal into dec.err: 65 // the connection is still usable since we read a complete JSON 66 // object from it before the error happened. 67 err = dec.d.unmarshal(v) 68 69 // fixup token streaming state 70 dec.tokenValueEnd() 71 72 return err 73 } 74 75 // Buffered returns a reader of the data remaining in the Decoder's 76 // buffer. The reader is valid until the next call to Decode. 77 func (dec *Decoder) Buffered() io.Reader { 78 return bytes.NewReader(dec.buf[dec.scanp:]) 79 } 80 81 // readValue reads a JSON value into dec.buf. 82 // It returns the length of the encoding. 83 func (dec *Decoder) readValue() (int, error) { 84 dec.scan.reset() 85 86 scanp := dec.scanp 87 var err error 88 Input: 89 for { 90 // Look in the buffer for a new value. 91 for i, c := range dec.buf[scanp:] { 92 dec.scan.bytes++ 93 v := dec.scan.step(&dec.scan, c) 94 if v == scanEnd { 95 scanp += i 96 break Input 97 } 98 // scanEnd is delayed one byte. 99 // We might block trying to get that byte from src, 100 // so instead invent a space byte. 101 if (v == scanEndObject || v == scanEndArray) && dec.scan.step(&dec.scan, ' ') == scanEnd { 102 scanp += i + 1 103 break Input 104 } 105 if v == scanError { 106 dec.err = dec.scan.err 107 return 0, dec.scan.err 108 } 109 } 110 scanp = len(dec.buf) 111 112 // Did the last read have an error? 113 // Delayed until now to allow buffer scan. 114 if err != nil { 115 if err == io.EOF { 116 if dec.scan.step(&dec.scan, ' ') == scanEnd { 117 break Input 118 } 119 if nonSpace(dec.buf) { 120 err = io.ErrUnexpectedEOF 121 } 122 } 123 dec.err = err 124 return 0, err 125 } 126 127 n := scanp - dec.scanp 128 err = dec.refill() 129 scanp = dec.scanp + n 130 } 131 return scanp - dec.scanp, nil 132 } 133 134 func (dec *Decoder) refill() error { 135 // Make room to read more into the buffer. 136 // First slide down data already consumed. 137 if dec.scanp > 0 { 138 n := copy(dec.buf, dec.buf[dec.scanp:]) 139 dec.buf = dec.buf[:n] 140 dec.scanp = 0 141 } 142 143 // Grow buffer if not large enough. 144 const minRead = 512 145 if cap(dec.buf)-len(dec.buf) < minRead { 146 newBuf := make([]byte, len(dec.buf), 2*cap(dec.buf)+minRead) 147 copy(newBuf, dec.buf) 148 dec.buf = newBuf 149 } 150 151 // Read. Delay error for next iteration (after scan). 152 n, err := dec.r.Read(dec.buf[len(dec.buf):cap(dec.buf)]) 153 dec.buf = dec.buf[0 : len(dec.buf)+n] 154 155 return err 156 } 157 158 func nonSpace(b []byte) bool { 159 for _, c := range b { 160 if !isSpace(c) { 161 return true 162 } 163 } 164 return false 165 } 166 167 // An Encoder writes JSON values to an output stream. 168 type Encoder struct { 169 w io.Writer 170 err error 171 escapeHTML bool 172 173 indentBuf *bytes.Buffer 174 indentPrefix string 175 indentValue string 176 } 177 178 // NewEncoder returns a new encoder that writes to w. 179 func NewEncoder(w io.Writer) *Encoder { 180 return &Encoder{w: w, escapeHTML: true} 181 } 182 183 // Encode writes the JSON encoding of v to the stream, 184 // followed by a newline character. 185 // 186 // See the documentation for Marshal for details about the 187 // conversion of Go values to JSON. 188 func (enc *Encoder) Encode(v interface{}) error { 189 if enc.err != nil { 190 return enc.err 191 } 192 e := newEncodeState() 193 err := e.marshal(v, encOpts{escapeHTML: enc.escapeHTML}) 194 if err != nil { 195 return err 196 } 197 198 // Terminate each value with a newline. 199 // This makes the output look a little nicer 200 // when debugging, and some kind of space 201 // is required if the encoded value was a number, 202 // so that the reader knows there aren't more 203 // digits coming. 204 e.WriteByte('\n') 205 206 b := e.Bytes() 207 if enc.indentPrefix != "" || enc.indentValue != "" { 208 if enc.indentBuf == nil { 209 enc.indentBuf = new(bytes.Buffer) 210 } 211 enc.indentBuf.Reset() 212 err = Indent(enc.indentBuf, b, enc.indentPrefix, enc.indentValue) 213 if err != nil { 214 return err 215 } 216 b = enc.indentBuf.Bytes() 217 } 218 if _, err = enc.w.Write(b); err != nil { 219 enc.err = err 220 } 221 encodeStatePool.Put(e) 222 return err 223 } 224 225 // SetIndent instructs the encoder to format each subsequent encoded 226 // value as if indented by the package-level function Indent(dst, src, prefix, indent). 227 // Calling SetIndent("", "") disables indentation. 228 func (enc *Encoder) SetIndent(prefix, indent string) { 229 enc.indentPrefix = prefix 230 enc.indentValue = indent 231 } 232 233 // SetEscapeHTML specifies whether problematic HTML characters 234 // should be escaped inside JSON quoted strings. 235 // The default behavior is to escape &, <, and > to \u0026, \u003c, and \u003e 236 // to avoid certain safety problems that can arise when embedding JSON in HTML. 237 // 238 // In non-HTML settings where the escaping interferes with the readability 239 // of the output, SetEscapeHTML(false) disables this behavior. 240 func (enc *Encoder) SetEscapeHTML(on bool) { 241 enc.escapeHTML = on 242 } 243 244 // RawMessage is a raw encoded JSON value. 245 // It implements Marshaler and Unmarshaler and can 246 // be used to delay JSON decoding or precompute a JSON encoding. 247 type RawMessage []byte 248 249 // MarshalJSON returns *m as the JSON encoding of m. 250 func (m *RawMessage) MarshalJSON() ([]byte, error) { 251 return *m, nil 252 } 253 254 // UnmarshalJSON sets *m to a copy of data. 255 func (m *RawMessage) UnmarshalJSON(data []byte) error { 256 if m == nil { 257 return errors.New("json.RawMessage: UnmarshalJSON on nil pointer") 258 } 259 *m = append((*m)[0:0], data...) 260 return nil 261 } 262 263 var _ Marshaler = (*RawMessage)(nil) 264 var _ Unmarshaler = (*RawMessage)(nil) 265 266 // A Token holds a value of one of these types: 267 // 268 // Delim, for the four JSON delimiters [ ] { } 269 // bool, for JSON booleans 270 // float64, for JSON numbers 271 // Number, for JSON numbers 272 // string, for JSON string literals 273 // nil, for JSON null 274 // 275 type Token interface{} 276 277 const ( 278 tokenTopValue = iota 279 tokenArrayStart 280 tokenArrayValue 281 tokenArrayComma 282 tokenObjectStart 283 tokenObjectKey 284 tokenObjectColon 285 tokenObjectValue 286 tokenObjectComma 287 ) 288 289 // advance tokenstate from a separator state to a value state 290 func (dec *Decoder) tokenPrepareForDecode() error { 291 // Note: Not calling peek before switch, to avoid 292 // putting peek into the standard Decode path. 293 // peek is only called when using the Token API. 294 switch dec.tokenState { 295 case tokenArrayComma: 296 c, err := dec.peek() 297 if err != nil { 298 return err 299 } 300 if c != ',' { 301 return &SyntaxError{"expected comma after array element", 0} 302 } 303 dec.scanp++ 304 dec.tokenState = tokenArrayValue 305 case tokenObjectColon: 306 c, err := dec.peek() 307 if err != nil { 308 return err 309 } 310 if c != ':' { 311 return &SyntaxError{"expected colon after object key", 0} 312 } 313 dec.scanp++ 314 dec.tokenState = tokenObjectValue 315 } 316 return nil 317 } 318 319 func (dec *Decoder) tokenValueAllowed() bool { 320 switch dec.tokenState { 321 case tokenTopValue, tokenArrayStart, tokenArrayValue, tokenObjectValue: 322 return true 323 } 324 return false 325 } 326 327 func (dec *Decoder) tokenValueEnd() { 328 switch dec.tokenState { 329 case tokenArrayStart, tokenArrayValue: 330 dec.tokenState = tokenArrayComma 331 case tokenObjectValue: 332 dec.tokenState = tokenObjectComma 333 } 334 } 335 336 // A Delim is a JSON array or object delimiter, one of [ ] { or }. 337 type Delim rune 338 339 func (d Delim) String() string { 340 return string(d) 341 } 342 343 // Token returns the next JSON token in the input stream. 344 // At the end of the input stream, Token returns nil, io.EOF. 345 // 346 // Token guarantees that the delimiters [ ] { } it returns are 347 // properly nested and matched: if Token encounters an unexpected 348 // delimiter in the input, it will return an error. 349 // 350 // The input stream consists of basic JSON values—bool, string, 351 // number, and null—along with delimiters [ ] { } of type Delim 352 // to mark the start and end of arrays and objects. 353 // Commas and colons are elided. 354 func (dec *Decoder) Token() (Token, error) { 355 for { 356 c, err := dec.peek() 357 if err != nil { 358 return nil, err 359 } 360 switch c { 361 case '[': 362 if !dec.tokenValueAllowed() { 363 return dec.tokenError(c) 364 } 365 dec.scanp++ 366 dec.tokenStack = append(dec.tokenStack, dec.tokenState) 367 dec.tokenState = tokenArrayStart 368 return Delim('['), nil 369 370 case ']': 371 if dec.tokenState != tokenArrayStart && dec.tokenState != tokenArrayComma { 372 return dec.tokenError(c) 373 } 374 dec.scanp++ 375 dec.tokenState = dec.tokenStack[len(dec.tokenStack)-1] 376 dec.tokenStack = dec.tokenStack[:len(dec.tokenStack)-1] 377 dec.tokenValueEnd() 378 return Delim(']'), nil 379 380 case '{': 381 if !dec.tokenValueAllowed() { 382 return dec.tokenError(c) 383 } 384 dec.scanp++ 385 dec.tokenStack = append(dec.tokenStack, dec.tokenState) 386 dec.tokenState = tokenObjectStart 387 return Delim('{'), nil 388 389 case '}': 390 if dec.tokenState != tokenObjectStart && dec.tokenState != tokenObjectComma { 391 return dec.tokenError(c) 392 } 393 dec.scanp++ 394 dec.tokenState = dec.tokenStack[len(dec.tokenStack)-1] 395 dec.tokenStack = dec.tokenStack[:len(dec.tokenStack)-1] 396 dec.tokenValueEnd() 397 return Delim('}'), nil 398 399 case ':': 400 if dec.tokenState != tokenObjectColon { 401 return dec.tokenError(c) 402 } 403 dec.scanp++ 404 dec.tokenState = tokenObjectValue 405 continue 406 407 case ',': 408 if dec.tokenState == tokenArrayComma { 409 dec.scanp++ 410 dec.tokenState = tokenArrayValue 411 continue 412 } 413 if dec.tokenState == tokenObjectComma { 414 dec.scanp++ 415 dec.tokenState = tokenObjectKey 416 continue 417 } 418 return dec.tokenError(c) 419 420 case '"': 421 if dec.tokenState == tokenObjectStart || dec.tokenState == tokenObjectKey { 422 var x string 423 old := dec.tokenState 424 dec.tokenState = tokenTopValue 425 err := dec.Decode(&x) 426 dec.tokenState = old 427 if err != nil { 428 clearOffset(err) 429 return nil, err 430 } 431 dec.tokenState = tokenObjectColon 432 return x, nil 433 } 434 fallthrough 435 436 default: 437 if !dec.tokenValueAllowed() { 438 return dec.tokenError(c) 439 } 440 var x interface{} 441 if err := dec.Decode(&x); err != nil { 442 clearOffset(err) 443 return nil, err 444 } 445 return x, nil 446 } 447 } 448 } 449 450 func clearOffset(err error) { 451 if s, ok := err.(*SyntaxError); ok { 452 s.Offset = 0 453 } 454 } 455 456 func (dec *Decoder) tokenError(c byte) (Token, error) { 457 var context string 458 switch dec.tokenState { 459 case tokenTopValue: 460 context = " looking for beginning of value" 461 case tokenArrayStart, tokenArrayValue, tokenObjectValue: 462 context = " looking for beginning of value" 463 case tokenArrayComma: 464 context = " after array element" 465 case tokenObjectKey: 466 context = " looking for beginning of object key string" 467 case tokenObjectColon: 468 context = " after object key" 469 case tokenObjectComma: 470 context = " after object key:value pair" 471 } 472 return nil, &SyntaxError{"invalid character " + quoteChar(c) + " " + context, 0} 473 } 474 475 // More reports whether there is another element in the 476 // current array or object being parsed. 477 func (dec *Decoder) More() bool { 478 c, err := dec.peek() 479 return err == nil && c != ']' && c != '}' 480 } 481 482 func (dec *Decoder) peek() (byte, error) { 483 var err error 484 for { 485 for i := dec.scanp; i < len(dec.buf); i++ { 486 c := dec.buf[i] 487 if isSpace(c) { 488 continue 489 } 490 dec.scanp = i 491 return c, nil 492 } 493 // buffer has been scanned, now report any error 494 if err != nil { 495 return 0, err 496 } 497 err = dec.refill() 498 } 499 } 500 501 /* 502 TODO 503 504 // EncodeToken writes the given JSON token to the stream. 505 // It returns an error if the delimiters [ ] { } are not properly used. 506 // 507 // EncodeToken does not call Flush, because usually it is part of 508 // a larger operation such as Encode, and those will call Flush when finished. 509 // Callers that create an Encoder and then invoke EncodeToken directly, 510 // without using Encode, need to call Flush when finished to ensure that 511 // the JSON is written to the underlying writer. 512 func (e *Encoder) EncodeToken(t Token) error { 513 ... 514 } 515 516 */