github.com/sagernet/sing@v0.4.0-beta.19.0.20240518125136-f67a0988a636/common/json/internal/contextjson_120/stream.go (about) 1 // Copyright 2010 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package json 6 7 import ( 8 "bytes" 9 "errors" 10 "io" 11 ) 12 13 // A Decoder reads and decodes JSON values from an input stream. 14 type Decoder struct { 15 r io.Reader 16 buf []byte 17 d decodeState 18 scanp int // start of unread data in buf 19 scanned int64 // amount of data already scanned 20 scan scanner 21 err error 22 23 tokenState int 24 tokenStack []int 25 } 26 27 // NewDecoder returns a new decoder that reads from r. 28 // 29 // The decoder introduces its own buffering and may 30 // read data from r beyond the JSON values requested. 31 func NewDecoder(r io.Reader) *Decoder { 32 return &Decoder{r: r} 33 } 34 35 // UseNumber causes the Decoder to unmarshal a number into an interface{} as a 36 // Number instead of as a float64. 37 func (dec *Decoder) UseNumber() { dec.d.useNumber = true } 38 39 // DisallowUnknownFields causes the Decoder to return an error when the destination 40 // is a struct and the input contains object keys which do not match any 41 // non-ignored, exported fields in the destination. 42 func (dec *Decoder) DisallowUnknownFields() { dec.d.disallowUnknownFields = true } 43 44 // Decode reads the next JSON-encoded value from its 45 // input and stores it in the value pointed to by v. 46 // 47 // See the documentation for Unmarshal for details about 48 // the conversion of JSON into a Go value. 49 func (dec *Decoder) Decode(v any) error { 50 if dec.err != nil { 51 return dec.err 52 } 53 54 if err := dec.tokenPrepareForDecode(); err != nil { 55 return err 56 } 57 58 if !dec.tokenValueAllowed() { 59 return &SyntaxError{msg: "not at beginning of value", Offset: dec.InputOffset()} 60 } 61 62 // Read whole value into buffer. 63 n, err := dec.readValue() 64 if err != nil { 65 return err 66 } 67 dec.d.init(dec.buf[dec.scanp : dec.scanp+n]) 68 dec.scanp += n 69 70 // Don't save err from unmarshal into dec.err: 71 // the connection is still usable since we read a complete JSON 72 // object from it before the error happened. 73 err = dec.d.unmarshal(v) 74 75 // fixup token streaming state 76 dec.tokenValueEnd() 77 78 return err 79 } 80 81 // Buffered returns a reader of the data remaining in the Decoder's 82 // buffer. The reader is valid until the next call to Decode. 83 func (dec *Decoder) Buffered() io.Reader { 84 return bytes.NewReader(dec.buf[dec.scanp:]) 85 } 86 87 // readValue reads a JSON value into dec.buf. 88 // It returns the length of the encoding. 89 func (dec *Decoder) readValue() (int, error) { 90 dec.scan.reset() 91 92 scanp := dec.scanp 93 var err error 94 Input: 95 // help the compiler see that scanp is never negative, so it can remove 96 // some bounds checks below. 97 for scanp >= 0 { 98 99 // Look in the buffer for a new value. 100 for ; scanp < len(dec.buf); scanp++ { 101 c := dec.buf[scanp] 102 dec.scan.bytes++ 103 switch dec.scan.step(&dec.scan, c) { 104 case scanEnd: 105 // scanEnd is delayed one byte so we decrement 106 // the scanner bytes count by 1 to ensure that 107 // this value is correct in the next call of Decode. 108 dec.scan.bytes-- 109 break Input 110 case scanEndObject, scanEndArray: 111 // scanEnd is delayed one byte. 112 // We might block trying to get that byte from src, 113 // so instead invent a space byte. 114 if stateEndValue(&dec.scan, ' ') == scanEnd { 115 scanp++ 116 break Input 117 } 118 case scanError: 119 dec.err = dec.scan.err 120 return 0, dec.scan.err 121 } 122 } 123 124 // Did the last read have an error? 125 // Delayed until now to allow buffer scan. 126 if err != nil { 127 if err == io.EOF { 128 if dec.scan.step(&dec.scan, ' ') == scanEnd { 129 break Input 130 } 131 if nonSpace(dec.buf) { 132 err = io.ErrUnexpectedEOF 133 } 134 } 135 dec.err = err 136 return 0, err 137 } 138 139 n := scanp - dec.scanp 140 err = dec.refill() 141 scanp = dec.scanp + n 142 } 143 return scanp - dec.scanp, nil 144 } 145 146 func (dec *Decoder) refill() error { 147 // Make room to read more into the buffer. 148 // First slide down data already consumed. 149 if dec.scanp > 0 { 150 dec.scanned += int64(dec.scanp) 151 n := copy(dec.buf, dec.buf[dec.scanp:]) 152 dec.buf = dec.buf[:n] 153 dec.scanp = 0 154 } 155 156 // Grow buffer if not large enough. 157 const minRead = 512 158 if cap(dec.buf)-len(dec.buf) < minRead { 159 newBuf := make([]byte, len(dec.buf), 2*cap(dec.buf)+minRead) 160 copy(newBuf, dec.buf) 161 dec.buf = newBuf 162 } 163 164 // Read. Delay error for next iteration (after scan). 165 n, err := dec.r.Read(dec.buf[len(dec.buf):cap(dec.buf)]) 166 dec.buf = dec.buf[0 : len(dec.buf)+n] 167 168 return err 169 } 170 171 func nonSpace(b []byte) bool { 172 for _, c := range b { 173 if !isSpace(c) { 174 return true 175 } 176 } 177 return false 178 } 179 180 // An Encoder writes JSON values to an output stream. 181 type Encoder struct { 182 w io.Writer 183 err error 184 escapeHTML bool 185 186 indentBuf *bytes.Buffer 187 indentPrefix string 188 indentValue string 189 } 190 191 // NewEncoder returns a new encoder that writes to w. 192 func NewEncoder(w io.Writer) *Encoder { 193 return &Encoder{w: w, escapeHTML: true} 194 } 195 196 // Encode writes the JSON encoding of v to the stream, 197 // followed by a newline character. 198 // 199 // See the documentation for Marshal for details about the 200 // conversion of Go values to JSON. 201 func (enc *Encoder) Encode(v any) error { 202 if enc.err != nil { 203 return enc.err 204 } 205 206 e := newEncodeState() 207 defer encodeStatePool.Put(e) 208 209 err := e.marshal(v, encOpts{escapeHTML: enc.escapeHTML}) 210 if err != nil { 211 return err 212 } 213 214 // Terminate each value with a newline. 215 // This makes the output look a little nicer 216 // when debugging, and some kind of space 217 // is required if the encoded value was a number, 218 // so that the reader knows there aren't more 219 // digits coming. 220 e.WriteByte('\n') 221 222 b := e.Bytes() 223 if enc.indentPrefix != "" || enc.indentValue != "" { 224 if enc.indentBuf == nil { 225 enc.indentBuf = new(bytes.Buffer) 226 } 227 enc.indentBuf.Reset() 228 err = Indent(enc.indentBuf, b, enc.indentPrefix, enc.indentValue) 229 if err != nil { 230 return err 231 } 232 b = enc.indentBuf.Bytes() 233 } 234 if _, err = enc.w.Write(b); err != nil { 235 enc.err = err 236 } 237 return err 238 } 239 240 // SetIndent instructs the encoder to format each subsequent encoded 241 // value as if indented by the package-level function Indent(dst, src, prefix, indent). 242 // Calling SetIndent("", "") disables indentation. 243 func (enc *Encoder) SetIndent(prefix, indent string) { 244 enc.indentPrefix = prefix 245 enc.indentValue = indent 246 } 247 248 // SetEscapeHTML specifies whether problematic HTML characters 249 // should be escaped inside JSON quoted strings. 250 // The default behavior is to escape &, <, and > to \u0026, \u003c, and \u003e 251 // to avoid certain safety problems that can arise when embedding JSON in HTML. 252 // 253 // In non-HTML settings where the escaping interferes with the readability 254 // of the output, SetEscapeHTML(false) disables this behavior. 255 func (enc *Encoder) SetEscapeHTML(on bool) { 256 enc.escapeHTML = on 257 } 258 259 // RawMessage is a raw encoded JSON value. 260 // It implements Marshaler and Unmarshaler and can 261 // be used to delay JSON decoding or precompute a JSON encoding. 262 type RawMessage []byte 263 264 // MarshalJSON returns m as the JSON encoding of m. 265 func (m RawMessage) MarshalJSON() ([]byte, error) { 266 if m == nil { 267 return []byte("null"), nil 268 } 269 return m, nil 270 } 271 272 // UnmarshalJSON sets *m to a copy of data. 273 func (m *RawMessage) UnmarshalJSON(data []byte) error { 274 if m == nil { 275 return errors.New("json.RawMessage: UnmarshalJSON on nil pointer") 276 } 277 *m = append((*m)[0:0], data...) 278 return nil 279 } 280 281 var ( 282 _ Marshaler = (*RawMessage)(nil) 283 _ Unmarshaler = (*RawMessage)(nil) 284 ) 285 286 // A Token holds a value of one of these types: 287 // 288 // Delim, for the four JSON delimiters [ ] { } 289 // bool, for JSON booleans 290 // float64, for JSON numbers 291 // Number, for JSON numbers 292 // string, for JSON string literals 293 // nil, for JSON null 294 type Token any 295 296 const ( 297 tokenTopValue = iota 298 tokenArrayStart 299 tokenArrayValue 300 tokenArrayComma 301 tokenObjectStart 302 tokenObjectKey 303 tokenObjectColon 304 tokenObjectValue 305 tokenObjectComma 306 ) 307 308 // advance tokenstate from a separator state to a value state 309 func (dec *Decoder) tokenPrepareForDecode() error { 310 // Note: Not calling peek before switch, to avoid 311 // putting peek into the standard Decode path. 312 // peek is only called when using the Token API. 313 switch dec.tokenState { 314 case tokenArrayComma: 315 c, err := dec.peek() 316 if err != nil { 317 return err 318 } 319 if c != ',' { 320 return &SyntaxError{"expected comma after array element", dec.InputOffset()} 321 } 322 dec.scanp++ 323 dec.tokenState = tokenArrayValue 324 case tokenObjectColon: 325 c, err := dec.peek() 326 if err != nil { 327 return err 328 } 329 if c != ':' { 330 return &SyntaxError{"expected colon after object key", dec.InputOffset()} 331 } 332 dec.scanp++ 333 dec.tokenState = tokenObjectValue 334 } 335 return nil 336 } 337 338 func (dec *Decoder) tokenValueAllowed() bool { 339 switch dec.tokenState { 340 case tokenTopValue, tokenArrayStart, tokenArrayValue, tokenObjectValue: 341 return true 342 } 343 return false 344 } 345 346 func (dec *Decoder) tokenValueEnd() { 347 switch dec.tokenState { 348 case tokenArrayStart, tokenArrayValue: 349 dec.tokenState = tokenArrayComma 350 case tokenObjectValue: 351 dec.tokenState = tokenObjectComma 352 } 353 } 354 355 // A Delim is a JSON array or object delimiter, one of [ ] { or }. 356 type Delim rune 357 358 func (d Delim) String() string { 359 return string(d) 360 } 361 362 // Token returns the next JSON token in the input stream. 363 // At the end of the input stream, Token returns nil, io.EOF. 364 // 365 // Token guarantees that the delimiters [ ] { } it returns are 366 // properly nested and matched: if Token encounters an unexpected 367 // delimiter in the input, it will return an error. 368 // 369 // The input stream consists of basic JSON values—bool, string, 370 // number, and null—along with delimiters [ ] { } of type Delim 371 // to mark the start and end of arrays and objects. 372 // Commas and colons are elided. 373 func (dec *Decoder) Token() (Token, error) { 374 for { 375 c, err := dec.peek() 376 if err != nil { 377 return nil, err 378 } 379 switch c { 380 case '[': 381 if !dec.tokenValueAllowed() { 382 return dec.tokenError(c) 383 } 384 dec.scanp++ 385 dec.tokenStack = append(dec.tokenStack, dec.tokenState) 386 dec.tokenState = tokenArrayStart 387 return Delim('['), nil 388 389 case ']': 390 if dec.tokenState != tokenArrayStart && dec.tokenState != tokenArrayComma { 391 return dec.tokenError(c) 392 } 393 dec.scanp++ 394 dec.tokenState = dec.tokenStack[len(dec.tokenStack)-1] 395 dec.tokenStack = dec.tokenStack[:len(dec.tokenStack)-1] 396 dec.tokenValueEnd() 397 return Delim(']'), nil 398 399 case '{': 400 if !dec.tokenValueAllowed() { 401 return dec.tokenError(c) 402 } 403 dec.scanp++ 404 dec.tokenStack = append(dec.tokenStack, dec.tokenState) 405 dec.tokenState = tokenObjectStart 406 return Delim('{'), nil 407 408 case '}': 409 if dec.tokenState != tokenObjectStart && dec.tokenState != tokenObjectComma { 410 return dec.tokenError(c) 411 } 412 dec.scanp++ 413 dec.tokenState = dec.tokenStack[len(dec.tokenStack)-1] 414 dec.tokenStack = dec.tokenStack[:len(dec.tokenStack)-1] 415 dec.tokenValueEnd() 416 return Delim('}'), nil 417 418 case ':': 419 if dec.tokenState != tokenObjectColon { 420 return dec.tokenError(c) 421 } 422 dec.scanp++ 423 dec.tokenState = tokenObjectValue 424 continue 425 426 case ',': 427 if dec.tokenState == tokenArrayComma { 428 dec.scanp++ 429 dec.tokenState = tokenArrayValue 430 continue 431 } 432 if dec.tokenState == tokenObjectComma { 433 dec.scanp++ 434 dec.tokenState = tokenObjectKey 435 continue 436 } 437 return dec.tokenError(c) 438 439 case '"': 440 if dec.tokenState == tokenObjectStart || dec.tokenState == tokenObjectKey { 441 var x string 442 old := dec.tokenState 443 dec.tokenState = tokenTopValue 444 err := dec.Decode(&x) 445 dec.tokenState = old 446 if err != nil { 447 return nil, err 448 } 449 dec.tokenState = tokenObjectColon 450 return x, nil 451 } 452 fallthrough 453 454 default: 455 if !dec.tokenValueAllowed() { 456 return dec.tokenError(c) 457 } 458 var x any 459 if err := dec.Decode(&x); err != nil { 460 return nil, err 461 } 462 return x, nil 463 } 464 } 465 } 466 467 func (dec *Decoder) tokenError(c byte) (Token, error) { 468 var context string 469 switch dec.tokenState { 470 case tokenTopValue: 471 context = " looking for beginning of value" 472 case tokenArrayStart, tokenArrayValue, tokenObjectValue: 473 context = " looking for beginning of value" 474 case tokenArrayComma: 475 context = " after array element" 476 case tokenObjectKey: 477 context = " looking for beginning of object key string" 478 case tokenObjectColon: 479 context = " after object key" 480 case tokenObjectComma: 481 context = " after object key:value pair" 482 } 483 return nil, &SyntaxError{"invalid character " + quoteChar(c) + context, dec.InputOffset()} 484 } 485 486 // More reports whether there is another element in the 487 // current array or object being parsed. 488 func (dec *Decoder) More() bool { 489 c, err := dec.peek() 490 return err == nil && c != ']' && c != '}' 491 } 492 493 func (dec *Decoder) peek() (byte, error) { 494 var err error 495 for { 496 for i := dec.scanp; i < len(dec.buf); i++ { 497 c := dec.buf[i] 498 if isSpace(c) { 499 continue 500 } 501 dec.scanp = i 502 return c, nil 503 } 504 // buffer has been scanned, now report any error 505 if err != nil { 506 return 0, err 507 } 508 err = dec.refill() 509 } 510 } 511 512 // InputOffset returns the input stream byte offset of the current decoder position. 513 // The offset gives the location of the end of the most recently returned token 514 // and the beginning of the next token. 515 func (dec *Decoder) InputOffset() int64 { 516 return dec.scanned + int64(dec.scanp) 517 }