github.com/yaegashi/msgraph.go@v0.1.4/jsonx/stream.go (about) 1 // Copyright 2010 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package jsonx 6 7 import ( 8 "bytes" 9 "errors" 10 "io" 11 ) 12 13 // A Decoder reads and decodes JSON values from an input stream. 14 type Decoder struct { 15 r io.Reader 16 buf []byte 17 d decodeState 18 scanp int // start of unread data in buf 19 scanned int64 // amount of data already scanned 20 scan scanner 21 err error 22 23 tokenState int 24 tokenStack []int 25 } 26 27 // NewDecoder returns a new decoder that reads from r. 28 // 29 // The decoder introduces its own buffering and may 30 // read data from r beyond the JSON values requested. 31 func NewDecoder(r io.Reader) *Decoder { 32 return &Decoder{r: r} 33 } 34 35 // UseNumber causes the Decoder to unmarshal a number into an interface{} as a 36 // Number instead of as a float64. 37 func (dec *Decoder) UseNumber() { dec.d.useNumber = true } 38 39 // DisallowUnknownFields causes the Decoder to return an error when the destination 40 // is a struct and the input contains object keys which do not match any 41 // non-ignored, exported fields in the destination. 42 func (dec *Decoder) DisallowUnknownFields() { dec.d.disallowUnknownFields = true } 43 44 // Decode reads the next JSON-encoded value from its 45 // input and stores it in the value pointed to by v. 46 // 47 // See the documentation for Unmarshal for details about 48 // the conversion of JSON into a Go value. 49 func (dec *Decoder) Decode(v interface{}) error { 50 if dec.err != nil { 51 return dec.err 52 } 53 54 if err := dec.tokenPrepareForDecode(); err != nil { 55 return err 56 } 57 58 if !dec.tokenValueAllowed() { 59 return &SyntaxError{msg: "not at beginning of value", Offset: dec.offset()} 60 } 61 62 // Read whole value into buffer. 63 n, err := dec.readValue() 64 if err != nil { 65 return err 66 } 67 dec.d.init(dec.buf[dec.scanp : dec.scanp+n]) 68 dec.scanp += n 69 70 // Don't save err from unmarshal into dec.err: 71 // the connection is still usable since we read a complete JSON 72 // object from it before the error happened. 73 err = dec.d.unmarshal(v) 74 75 // fixup token streaming state 76 dec.tokenValueEnd() 77 78 return err 79 } 80 81 // Buffered returns a reader of the data remaining in the Decoder's 82 // buffer. The reader is valid until the next call to Decode. 83 func (dec *Decoder) Buffered() io.Reader { 84 return bytes.NewReader(dec.buf[dec.scanp:]) 85 } 86 87 // readValue reads a JSON value into dec.buf. 88 // It returns the length of the encoding. 89 func (dec *Decoder) readValue() (int, error) { 90 dec.scan.reset() 91 92 scanp := dec.scanp 93 var err error 94 Input: 95 // help the compiler see that scanp is never negative, so it can remove 96 // some bounds checks below. 97 for scanp >= 0 { 98 99 // Look in the buffer for a new value. 100 for ; scanp < len(dec.buf); scanp++ { 101 c := dec.buf[scanp] 102 dec.scan.bytes++ 103 switch dec.scan.step(&dec.scan, c) { 104 case scanEnd: 105 break Input 106 case scanEndObject, scanEndArray: 107 // scanEnd is delayed one byte. 108 // We might block trying to get that byte from src, 109 // so instead invent a space byte. 110 if stateEndValue(&dec.scan, ' ') == scanEnd { 111 scanp++ 112 break Input 113 } 114 case scanError: 115 dec.err = dec.scan.err 116 return 0, dec.scan.err 117 } 118 } 119 120 // Did the last read have an error? 121 // Delayed until now to allow buffer scan. 122 if err != nil { 123 if err == io.EOF { 124 if dec.scan.step(&dec.scan, ' ') == scanEnd { 125 break Input 126 } 127 if nonSpace(dec.buf) { 128 err = io.ErrUnexpectedEOF 129 } 130 } 131 dec.err = err 132 return 0, err 133 } 134 135 n := scanp - dec.scanp 136 err = dec.refill() 137 scanp = dec.scanp + n 138 } 139 return scanp - dec.scanp, nil 140 } 141 142 func (dec *Decoder) refill() error { 143 // Make room to read more into the buffer. 144 // First slide down data already consumed. 145 if dec.scanp > 0 { 146 dec.scanned += int64(dec.scanp) 147 n := copy(dec.buf, dec.buf[dec.scanp:]) 148 dec.buf = dec.buf[:n] 149 dec.scanp = 0 150 } 151 152 // Grow buffer if not large enough. 153 const minRead = 512 154 if cap(dec.buf)-len(dec.buf) < minRead { 155 newBuf := make([]byte, len(dec.buf), 2*cap(dec.buf)+minRead) 156 copy(newBuf, dec.buf) 157 dec.buf = newBuf 158 } 159 160 // Read. Delay error for next iteration (after scan). 161 n, err := dec.r.Read(dec.buf[len(dec.buf):cap(dec.buf)]) 162 dec.buf = dec.buf[0 : len(dec.buf)+n] 163 164 return err 165 } 166 167 func nonSpace(b []byte) bool { 168 for _, c := range b { 169 if !isSpace(c) { 170 return true 171 } 172 } 173 return false 174 } 175 176 // An Encoder writes JSON values to an output stream. 177 type Encoder struct { 178 w io.Writer 179 err error 180 escapeHTML bool 181 182 indentBuf *bytes.Buffer 183 indentPrefix string 184 indentValue string 185 } 186 187 // NewEncoder returns a new encoder that writes to w. 188 func NewEncoder(w io.Writer) *Encoder { 189 return &Encoder{w: w, escapeHTML: true} 190 } 191 192 // Encode writes the JSON encoding of v to the stream, 193 // followed by a newline character. 194 // 195 // See the documentation for Marshal for details about the 196 // conversion of Go values to JSON. 197 func (enc *Encoder) Encode(v interface{}) error { 198 if enc.err != nil { 199 return enc.err 200 } 201 e := newEncodeState() 202 err := e.marshal(v, encOpts{escapeHTML: enc.escapeHTML}) 203 if err != nil { 204 return err 205 } 206 207 // Terminate each value with a newline. 208 // This makes the output look a little nicer 209 // when debugging, and some kind of space 210 // is required if the encoded value was a number, 211 // so that the reader knows there aren't more 212 // digits coming. 213 e.WriteByte('\n') 214 215 b := e.Bytes() 216 if enc.indentPrefix != "" || enc.indentValue != "" { 217 if enc.indentBuf == nil { 218 enc.indentBuf = new(bytes.Buffer) 219 } 220 enc.indentBuf.Reset() 221 err = Indent(enc.indentBuf, b, enc.indentPrefix, enc.indentValue) 222 if err != nil { 223 return err 224 } 225 b = enc.indentBuf.Bytes() 226 } 227 if _, err = enc.w.Write(b); err != nil { 228 enc.err = err 229 } 230 encodeStatePool.Put(e) 231 return err 232 } 233 234 // SetIndent instructs the encoder to format each subsequent encoded 235 // value as if indented by the package-level function Indent(dst, src, prefix, indent). 236 // Calling SetIndent("", "") disables indentation. 237 func (enc *Encoder) SetIndent(prefix, indent string) { 238 enc.indentPrefix = prefix 239 enc.indentValue = indent 240 } 241 242 // SetEscapeHTML specifies whether problematic HTML characters 243 // should be escaped inside JSON quoted strings. 244 // The default behavior is to escape &, <, and > to \u0026, \u003c, and \u003e 245 // to avoid certain safety problems that can arise when embedding JSON in HTML. 246 // 247 // In non-HTML settings where the escaping interferes with the readability 248 // of the output, SetEscapeHTML(false) disables this behavior. 249 func (enc *Encoder) SetEscapeHTML(on bool) { 250 enc.escapeHTML = on 251 } 252 253 // RawMessage is a raw encoded JSON value. 254 // It implements Marshaler and Unmarshaler and can 255 // be used to delay JSON decoding or precompute a JSON encoding. 256 type RawMessage []byte 257 258 // MarshalJSON returns m as the JSON encoding of m. 259 func (m RawMessage) MarshalJSON() ([]byte, error) { 260 if m == nil { 261 return []byte("null"), nil 262 } 263 return m, nil 264 } 265 266 // UnmarshalJSON sets *m to a copy of data. 267 func (m *RawMessage) UnmarshalJSON(data []byte) error { 268 if m == nil { 269 return errors.New("json.RawMessage: UnmarshalJSON on nil pointer") 270 } 271 *m = append((*m)[0:0], data...) 272 return nil 273 } 274 275 var _ Marshaler = (*RawMessage)(nil) 276 var _ Unmarshaler = (*RawMessage)(nil) 277 278 // A Token holds a value of one of these types: 279 // 280 // Delim, for the four JSON delimiters [ ] { } 281 // bool, for JSON booleans 282 // float64, for JSON numbers 283 // Number, for JSON numbers 284 // string, for JSON string literals 285 // nil, for JSON null 286 // 287 type Token interface{} 288 289 const ( 290 tokenTopValue = iota 291 tokenArrayStart 292 tokenArrayValue 293 tokenArrayComma 294 tokenObjectStart 295 tokenObjectKey 296 tokenObjectColon 297 tokenObjectValue 298 tokenObjectComma 299 ) 300 301 // advance tokenstate from a separator state to a value state 302 func (dec *Decoder) tokenPrepareForDecode() error { 303 // Note: Not calling peek before switch, to avoid 304 // putting peek into the standard Decode path. 305 // peek is only called when using the Token API. 306 switch dec.tokenState { 307 case tokenArrayComma: 308 c, err := dec.peek() 309 if err != nil { 310 return err 311 } 312 if c != ',' { 313 return &SyntaxError{"expected comma after array element", dec.offset()} 314 } 315 dec.scanp++ 316 dec.tokenState = tokenArrayValue 317 case tokenObjectColon: 318 c, err := dec.peek() 319 if err != nil { 320 return err 321 } 322 if c != ':' { 323 return &SyntaxError{"expected colon after object key", dec.offset()} 324 } 325 dec.scanp++ 326 dec.tokenState = tokenObjectValue 327 } 328 return nil 329 } 330 331 func (dec *Decoder) tokenValueAllowed() bool { 332 switch dec.tokenState { 333 case tokenTopValue, tokenArrayStart, tokenArrayValue, tokenObjectValue: 334 return true 335 } 336 return false 337 } 338 339 func (dec *Decoder) tokenValueEnd() { 340 switch dec.tokenState { 341 case tokenArrayStart, tokenArrayValue: 342 dec.tokenState = tokenArrayComma 343 case tokenObjectValue: 344 dec.tokenState = tokenObjectComma 345 } 346 } 347 348 // A Delim is a JSON array or object delimiter, one of [ ] { or }. 349 type Delim rune 350 351 func (d Delim) String() string { 352 return string(d) 353 } 354 355 // Token returns the next JSON token in the input stream. 356 // At the end of the input stream, Token returns nil, io.EOF. 357 // 358 // Token guarantees that the delimiters [ ] { } it returns are 359 // properly nested and matched: if Token encounters an unexpected 360 // delimiter in the input, it will return an error. 361 // 362 // The input stream consists of basic JSON values—bool, string, 363 // number, and null—along with delimiters [ ] { } of type Delim 364 // to mark the start and end of arrays and objects. 365 // Commas and colons are elided. 366 func (dec *Decoder) Token() (Token, error) { 367 for { 368 c, err := dec.peek() 369 if err != nil { 370 return nil, err 371 } 372 switch c { 373 case '[': 374 if !dec.tokenValueAllowed() { 375 return dec.tokenError(c) 376 } 377 dec.scanp++ 378 dec.tokenStack = append(dec.tokenStack, dec.tokenState) 379 dec.tokenState = tokenArrayStart 380 return Delim('['), nil 381 382 case ']': 383 if dec.tokenState != tokenArrayStart && dec.tokenState != tokenArrayComma { 384 return dec.tokenError(c) 385 } 386 dec.scanp++ 387 dec.tokenState = dec.tokenStack[len(dec.tokenStack)-1] 388 dec.tokenStack = dec.tokenStack[:len(dec.tokenStack)-1] 389 dec.tokenValueEnd() 390 return Delim(']'), nil 391 392 case '{': 393 if !dec.tokenValueAllowed() { 394 return dec.tokenError(c) 395 } 396 dec.scanp++ 397 dec.tokenStack = append(dec.tokenStack, dec.tokenState) 398 dec.tokenState = tokenObjectStart 399 return Delim('{'), nil 400 401 case '}': 402 if dec.tokenState != tokenObjectStart && dec.tokenState != tokenObjectComma { 403 return dec.tokenError(c) 404 } 405 dec.scanp++ 406 dec.tokenState = dec.tokenStack[len(dec.tokenStack)-1] 407 dec.tokenStack = dec.tokenStack[:len(dec.tokenStack)-1] 408 dec.tokenValueEnd() 409 return Delim('}'), nil 410 411 case ':': 412 if dec.tokenState != tokenObjectColon { 413 return dec.tokenError(c) 414 } 415 dec.scanp++ 416 dec.tokenState = tokenObjectValue 417 continue 418 419 case ',': 420 if dec.tokenState == tokenArrayComma { 421 dec.scanp++ 422 dec.tokenState = tokenArrayValue 423 continue 424 } 425 if dec.tokenState == tokenObjectComma { 426 dec.scanp++ 427 dec.tokenState = tokenObjectKey 428 continue 429 } 430 return dec.tokenError(c) 431 432 case '"': 433 if dec.tokenState == tokenObjectStart || dec.tokenState == tokenObjectKey { 434 var x string 435 old := dec.tokenState 436 dec.tokenState = tokenTopValue 437 err := dec.Decode(&x) 438 dec.tokenState = old 439 if err != nil { 440 return nil, err 441 } 442 dec.tokenState = tokenObjectColon 443 return x, nil 444 } 445 fallthrough 446 447 default: 448 if !dec.tokenValueAllowed() { 449 return dec.tokenError(c) 450 } 451 var x interface{} 452 if err := dec.Decode(&x); err != nil { 453 return nil, err 454 } 455 return x, nil 456 } 457 } 458 } 459 460 func (dec *Decoder) tokenError(c byte) (Token, error) { 461 var context string 462 switch dec.tokenState { 463 case tokenTopValue: 464 context = " looking for beginning of value" 465 case tokenArrayStart, tokenArrayValue, tokenObjectValue: 466 context = " looking for beginning of value" 467 case tokenArrayComma: 468 context = " after array element" 469 case tokenObjectKey: 470 context = " looking for beginning of object key string" 471 case tokenObjectColon: 472 context = " after object key" 473 case tokenObjectComma: 474 context = " after object key:value pair" 475 } 476 return nil, &SyntaxError{"invalid character " + quoteChar(c) + context, dec.offset()} 477 } 478 479 // More reports whether there is another element in the 480 // current array or object being parsed. 481 func (dec *Decoder) More() bool { 482 c, err := dec.peek() 483 return err == nil && c != ']' && c != '}' 484 } 485 486 func (dec *Decoder) peek() (byte, error) { 487 var err error 488 for { 489 for i := dec.scanp; i < len(dec.buf); i++ { 490 c := dec.buf[i] 491 if isSpace(c) { 492 continue 493 } 494 dec.scanp = i 495 return c, nil 496 } 497 // buffer has been scanned, now report any error 498 if err != nil { 499 return 0, err 500 } 501 err = dec.refill() 502 } 503 } 504 505 func (dec *Decoder) offset() int64 { 506 return dec.scanned + int64(dec.scanp) 507 }