k8s.io/kube-openapi@v0.0.0-20240228011516-70dd3763d340/pkg/internal/third_party/go-json-experiment/json/decode.go (about) 1 // Copyright 2020 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package json 6 7 import ( 8 "bytes" 9 "errors" 10 "io" 11 "math" 12 "strconv" 13 "unicode/utf16" 14 "unicode/utf8" 15 ) 16 17 // NOTE: The logic for decoding is complicated by the fact that reading from 18 // an io.Reader into a temporary buffer means that the buffer may contain a 19 // truncated portion of some valid input, requiring the need to fetch more data. 20 // 21 // This file is structured in the following way: 22 // 23 // - consumeXXX functions parse an exact JSON token from a []byte. 24 // If the buffer appears truncated, then it returns io.ErrUnexpectedEOF. 25 // The consumeSimpleXXX functions are so named because they only handle 26 // a subset of the grammar for the JSON token being parsed. 27 // They do not handle the full grammar to keep these functions inlineable. 28 // 29 // - Decoder.consumeXXX methods parse the next JSON token from Decoder.buf, 30 // automatically fetching more input if necessary. These methods take 31 // a position relative to the start of Decoder.buf as an argument and 32 // return the end of the consumed JSON token as a position, 33 // also relative to the start of Decoder.buf. 34 // 35 // - In the event of an I/O errors or state machine violations, 36 // the implementation avoids mutating the state of Decoder 37 // (aside from the book-keeping needed to implement Decoder.fetch). 38 // For this reason, only Decoder.ReadToken and Decoder.ReadValue are 39 // responsible for updated Decoder.prevStart and Decoder.prevEnd. 40 // 41 // - For performance, much of the implementation uses the pattern of calling 42 // the inlineable consumeXXX functions first, and if more work is necessary, 43 // then it calls the slower Decoder.consumeXXX methods. 44 // TODO: Revisit this pattern if the Go compiler provides finer control 45 // over exactly which calls are inlined or not. 46 47 // DecodeOptions configures how JSON decoding operates. 48 // The zero value is equivalent to the default settings, 49 // which is compliant with both RFC 7493 and RFC 8259. 50 type DecodeOptions struct { 51 requireKeyedLiterals 52 nonComparable 53 54 // AllowDuplicateNames specifies that JSON objects may contain 55 // duplicate member names. Disabling the duplicate name check may provide 56 // computational and performance benefits, but breaks compliance with 57 // RFC 7493, section 2.3. The input will still be compliant with RFC 8259, 58 // which leaves the handling of duplicate names as unspecified behavior. 59 AllowDuplicateNames bool 60 61 // AllowInvalidUTF8 specifies that JSON strings may contain invalid UTF-8, 62 // which will be mangled as the Unicode replacement character, U+FFFD. 63 // This causes the decoder to break compliance with 64 // RFC 7493, section 2.1, and RFC 8259, section 8.1. 65 AllowInvalidUTF8 bool 66 } 67 68 // Decoder is a streaming decoder for raw JSON tokens and values. 69 // It is used to read a stream of top-level JSON values, 70 // each separated by optional whitespace characters. 71 // 72 // ReadToken and ReadValue calls may be interleaved. 73 // For example, the following JSON value: 74 // 75 // {"name":"value","array":[null,false,true,3.14159],"object":{"k":"v"}} 76 // 77 // can be parsed with the following calls (ignoring errors for brevity): 78 // 79 // d.ReadToken() // { 80 // d.ReadToken() // "name" 81 // d.ReadToken() // "value" 82 // d.ReadValue() // "array" 83 // d.ReadToken() // [ 84 // d.ReadToken() // null 85 // d.ReadToken() // false 86 // d.ReadValue() // true 87 // d.ReadToken() // 3.14159 88 // d.ReadToken() // ] 89 // d.ReadValue() // "object" 90 // d.ReadValue() // {"k":"v"} 91 // d.ReadToken() // } 92 // 93 // The above is one of many possible sequence of calls and 94 // may not represent the most sensible method to call for any given token/value. 95 // For example, it is probably more common to call ReadToken to obtain a 96 // string token for object names. 97 type Decoder struct { 98 state 99 decodeBuffer 100 options DecodeOptions 101 102 stringCache *stringCache // only used when unmarshaling 103 } 104 105 // decodeBuffer is a buffer split into 4 segments: 106 // 107 // - buf[0:prevEnd] // already read portion of the buffer 108 // - buf[prevStart:prevEnd] // previously read value 109 // - buf[prevEnd:len(buf)] // unread portion of the buffer 110 // - buf[len(buf):cap(buf)] // unused portion of the buffer 111 // 112 // Invariants: 113 // 114 // 0 ≤ prevStart ≤ prevEnd ≤ len(buf) ≤ cap(buf) 115 type decodeBuffer struct { 116 peekPos int // non-zero if valid offset into buf for start of next token 117 peekErr error // implies peekPos is -1 118 119 buf []byte // may alias rd if it is a bytes.Buffer 120 prevStart int 121 prevEnd int 122 123 // baseOffset is added to prevStart and prevEnd to obtain 124 // the absolute offset relative to the start of io.Reader stream. 125 baseOffset int64 126 127 rd io.Reader 128 } 129 130 // NewDecoder constructs a new streaming decoder reading from r. 131 // 132 // If r is a bytes.Buffer, then the decoder parses directly from the buffer 133 // without first copying the contents to an intermediate buffer. 134 // Additional writes to the buffer must not occur while the decoder is in use. 135 func NewDecoder(r io.Reader) *Decoder { 136 return DecodeOptions{}.NewDecoder(r) 137 } 138 139 // NewDecoder constructs a new streaming decoder reading from r 140 // configured with the provided options. 141 func (o DecodeOptions) NewDecoder(r io.Reader) *Decoder { 142 d := new(Decoder) 143 o.ResetDecoder(d, r) 144 return d 145 } 146 147 // ResetDecoder resets a decoder such that it is reading afresh from r and 148 // configured with the provided options. 149 func (o DecodeOptions) ResetDecoder(d *Decoder, r io.Reader) { 150 if d == nil { 151 panic("json: invalid nil Decoder") 152 } 153 if r == nil { 154 panic("json: invalid nil io.Reader") 155 } 156 d.reset(nil, r, o) 157 } 158 159 func (d *Decoder) reset(b []byte, r io.Reader, o DecodeOptions) { 160 d.state.reset() 161 d.decodeBuffer = decodeBuffer{buf: b, rd: r} 162 d.options = o 163 } 164 165 // Reset resets a decoder such that it is reading afresh from r but 166 // keep any pre-existing decoder options. 167 func (d *Decoder) Reset(r io.Reader) { 168 d.options.ResetDecoder(d, r) 169 } 170 171 var errBufferWriteAfterNext = errors.New("invalid bytes.Buffer.Write call after calling bytes.Buffer.Next") 172 173 // fetch reads at least 1 byte from the underlying io.Reader. 174 // It returns io.ErrUnexpectedEOF if zero bytes were read and io.EOF was seen. 175 func (d *Decoder) fetch() error { 176 if d.rd == nil { 177 return io.ErrUnexpectedEOF 178 } 179 180 // Inform objectNameStack that we are about to fetch new buffer content. 181 d.names.copyQuotedBuffer(d.buf) 182 183 // Specialize bytes.Buffer for better performance. 184 if bb, ok := d.rd.(*bytes.Buffer); ok { 185 switch { 186 case bb.Len() == 0: 187 return io.ErrUnexpectedEOF 188 case len(d.buf) == 0: 189 d.buf = bb.Next(bb.Len()) // "read" all data in the buffer 190 return nil 191 default: 192 // This only occurs if a partially filled bytes.Buffer was provided 193 // and more data is written to it while Decoder is reading from it. 194 // This practice will lead to data corruption since future writes 195 // may overwrite the contents of the current buffer. 196 // 197 // The user is trying to use a bytes.Buffer as a pipe, 198 // but a bytes.Buffer is poor implementation of a pipe, 199 // the purpose-built io.Pipe should be used instead. 200 return &ioError{action: "read", err: errBufferWriteAfterNext} 201 } 202 } 203 204 // Allocate initial buffer if empty. 205 if cap(d.buf) == 0 { 206 d.buf = make([]byte, 0, 64) 207 } 208 209 // Check whether to grow the buffer. 210 const maxBufferSize = 4 << 10 211 const growthSizeFactor = 2 // higher value is faster 212 const growthRateFactor = 2 // higher value is slower 213 // By default, grow if below the maximum buffer size. 214 grow := cap(d.buf) <= maxBufferSize/growthSizeFactor 215 // Growing can be expensive, so only grow 216 // if a sufficient number of bytes have been processed. 217 grow = grow && int64(cap(d.buf)) < d.previousOffsetEnd()/growthRateFactor 218 // If prevStart==0, then fetch was called in order to fetch more data 219 // to finish consuming a large JSON value contiguously. 220 // Grow if less than 25% of the remaining capacity is available. 221 // Note that this may cause the input buffer to exceed maxBufferSize. 222 grow = grow || (d.prevStart == 0 && len(d.buf) >= 3*cap(d.buf)/4) 223 224 if grow { 225 // Allocate a new buffer and copy the contents of the old buffer over. 226 // TODO: Provide a hard limit on the maximum internal buffer size? 227 buf := make([]byte, 0, cap(d.buf)*growthSizeFactor) 228 d.buf = append(buf, d.buf[d.prevStart:]...) 229 } else { 230 // Move unread portion of the data to the front. 231 n := copy(d.buf[:cap(d.buf)], d.buf[d.prevStart:]) 232 d.buf = d.buf[:n] 233 } 234 d.baseOffset += int64(d.prevStart) 235 d.prevEnd -= d.prevStart 236 d.prevStart = 0 237 238 // Read more data into the internal buffer. 239 for { 240 n, err := d.rd.Read(d.buf[len(d.buf):cap(d.buf)]) 241 switch { 242 case n > 0: 243 d.buf = d.buf[:len(d.buf)+n] 244 return nil // ignore errors if any bytes are read 245 case err == io.EOF: 246 return io.ErrUnexpectedEOF 247 case err != nil: 248 return &ioError{action: "read", err: err} 249 default: 250 continue // Read returned (0, nil) 251 } 252 } 253 } 254 255 const invalidateBufferByte = '#' // invalid starting character for JSON grammar 256 257 // invalidatePreviousRead invalidates buffers returned by Peek and Read calls 258 // so that the first byte is an invalid character. 259 // This Hyrum-proofs the API against faulty application code that assumes 260 // values returned by ReadValue remain valid past subsequent Read calls. 261 func (d *decodeBuffer) invalidatePreviousRead() { 262 // Avoid mutating the buffer if d.rd is nil which implies that d.buf 263 // is provided by the user code and may not expect mutations. 264 isBytesBuffer := func(r io.Reader) bool { 265 _, ok := r.(*bytes.Buffer) 266 return ok 267 } 268 if d.rd != nil && !isBytesBuffer(d.rd) && d.prevStart < d.prevEnd && uint(d.prevStart) < uint(len(d.buf)) { 269 d.buf[d.prevStart] = invalidateBufferByte 270 d.prevStart = d.prevEnd 271 } 272 } 273 274 // needMore reports whether there are no more unread bytes. 275 func (d *decodeBuffer) needMore(pos int) bool { 276 // NOTE: The arguments and logic are kept simple to keep this inlineable. 277 return pos == len(d.buf) 278 } 279 280 // injectSyntacticErrorWithPosition wraps a SyntacticError with the position, 281 // otherwise it returns the error as is. 282 // It takes a position relative to the start of the start of d.buf. 283 func (d *decodeBuffer) injectSyntacticErrorWithPosition(err error, pos int) error { 284 if serr, ok := err.(*SyntacticError); ok { 285 return serr.withOffset(d.baseOffset + int64(pos)) 286 } 287 return err 288 } 289 290 func (d *decodeBuffer) previousOffsetStart() int64 { return d.baseOffset + int64(d.prevStart) } 291 func (d *decodeBuffer) previousOffsetEnd() int64 { return d.baseOffset + int64(d.prevEnd) } 292 func (d *decodeBuffer) previousBuffer() []byte { return d.buf[d.prevStart:d.prevEnd] } 293 func (d *decodeBuffer) unreadBuffer() []byte { return d.buf[d.prevEnd:len(d.buf)] } 294 295 // PeekKind retrieves the next token kind, but does not advance the read offset. 296 // It returns 0 if there are no more tokens. 297 func (d *Decoder) PeekKind() Kind { 298 // Check whether we have a cached peek result. 299 if d.peekPos > 0 { 300 return Kind(d.buf[d.peekPos]).normalize() 301 } 302 303 var err error 304 d.invalidatePreviousRead() 305 pos := d.prevEnd 306 307 // Consume leading whitespace. 308 pos += consumeWhitespace(d.buf[pos:]) 309 if d.needMore(pos) { 310 if pos, err = d.consumeWhitespace(pos); err != nil { 311 if err == io.ErrUnexpectedEOF && d.tokens.depth() == 1 { 312 err = io.EOF // EOF possibly if no Tokens present after top-level value 313 } 314 d.peekPos, d.peekErr = -1, err 315 return invalidKind 316 } 317 } 318 319 // Consume colon or comma. 320 var delim byte 321 if c := d.buf[pos]; c == ':' || c == ',' { 322 delim = c 323 pos += 1 324 pos += consumeWhitespace(d.buf[pos:]) 325 if d.needMore(pos) { 326 if pos, err = d.consumeWhitespace(pos); err != nil { 327 d.peekPos, d.peekErr = -1, err 328 return invalidKind 329 } 330 } 331 } 332 next := Kind(d.buf[pos]).normalize() 333 if d.tokens.needDelim(next) != delim { 334 pos = d.prevEnd // restore position to right after leading whitespace 335 pos += consumeWhitespace(d.buf[pos:]) 336 err = d.tokens.checkDelim(delim, next) 337 err = d.injectSyntacticErrorWithPosition(err, pos) 338 d.peekPos, d.peekErr = -1, err 339 return invalidKind 340 } 341 342 // This may set peekPos to zero, which is indistinguishable from 343 // the uninitialized state. While a small hit to performance, it is correct 344 // since ReadValue and ReadToken will disregard the cached result and 345 // recompute the next kind. 346 d.peekPos, d.peekErr = pos, nil 347 return next 348 } 349 350 // SkipValue is semantically equivalent to calling ReadValue and discarding 351 // the result except that memory is not wasted trying to hold the entire result. 352 func (d *Decoder) SkipValue() error { 353 switch d.PeekKind() { 354 case '{', '[': 355 // For JSON objects and arrays, keep skipping all tokens 356 // until the depth matches the starting depth. 357 depth := d.tokens.depth() 358 for { 359 if _, err := d.ReadToken(); err != nil { 360 return err 361 } 362 if depth >= d.tokens.depth() { 363 return nil 364 } 365 } 366 default: 367 // Trying to skip a value when the next token is a '}' or ']' 368 // will result in an error being returned here. 369 if _, err := d.ReadValue(); err != nil { 370 return err 371 } 372 return nil 373 } 374 } 375 376 // ReadToken reads the next Token, advancing the read offset. 377 // The returned token is only valid until the next Peek, Read, or Skip call. 378 // It returns io.EOF if there are no more tokens. 379 func (d *Decoder) ReadToken() (Token, error) { 380 // Determine the next kind. 381 var err error 382 var next Kind 383 pos := d.peekPos 384 if pos != 0 { 385 // Use cached peek result. 386 if d.peekErr != nil { 387 err := d.peekErr 388 d.peekPos, d.peekErr = 0, nil // possibly a transient I/O error 389 return Token{}, err 390 } 391 next = Kind(d.buf[pos]).normalize() 392 d.peekPos = 0 // reset cache 393 } else { 394 d.invalidatePreviousRead() 395 pos = d.prevEnd 396 397 // Consume leading whitespace. 398 pos += consumeWhitespace(d.buf[pos:]) 399 if d.needMore(pos) { 400 if pos, err = d.consumeWhitespace(pos); err != nil { 401 if err == io.ErrUnexpectedEOF && d.tokens.depth() == 1 { 402 err = io.EOF // EOF possibly if no Tokens present after top-level value 403 } 404 return Token{}, err 405 } 406 } 407 408 // Consume colon or comma. 409 var delim byte 410 if c := d.buf[pos]; c == ':' || c == ',' { 411 delim = c 412 pos += 1 413 pos += consumeWhitespace(d.buf[pos:]) 414 if d.needMore(pos) { 415 if pos, err = d.consumeWhitespace(pos); err != nil { 416 return Token{}, err 417 } 418 } 419 } 420 next = Kind(d.buf[pos]).normalize() 421 if d.tokens.needDelim(next) != delim { 422 pos = d.prevEnd // restore position to right after leading whitespace 423 pos += consumeWhitespace(d.buf[pos:]) 424 err = d.tokens.checkDelim(delim, next) 425 return Token{}, d.injectSyntacticErrorWithPosition(err, pos) 426 } 427 } 428 429 // Handle the next token. 430 var n int 431 switch next { 432 case 'n': 433 if consumeNull(d.buf[pos:]) == 0 { 434 pos, err = d.consumeLiteral(pos, "null") 435 if err != nil { 436 return Token{}, d.injectSyntacticErrorWithPosition(err, pos) 437 } 438 } else { 439 pos += len("null") 440 } 441 if err = d.tokens.appendLiteral(); err != nil { 442 return Token{}, d.injectSyntacticErrorWithPosition(err, pos-len("null")) // report position at start of literal 443 } 444 d.prevStart, d.prevEnd = pos, pos 445 return Null, nil 446 447 case 'f': 448 if consumeFalse(d.buf[pos:]) == 0 { 449 pos, err = d.consumeLiteral(pos, "false") 450 if err != nil { 451 return Token{}, d.injectSyntacticErrorWithPosition(err, pos) 452 } 453 } else { 454 pos += len("false") 455 } 456 if err = d.tokens.appendLiteral(); err != nil { 457 return Token{}, d.injectSyntacticErrorWithPosition(err, pos-len("false")) // report position at start of literal 458 } 459 d.prevStart, d.prevEnd = pos, pos 460 return False, nil 461 462 case 't': 463 if consumeTrue(d.buf[pos:]) == 0 { 464 pos, err = d.consumeLiteral(pos, "true") 465 if err != nil { 466 return Token{}, d.injectSyntacticErrorWithPosition(err, pos) 467 } 468 } else { 469 pos += len("true") 470 } 471 if err = d.tokens.appendLiteral(); err != nil { 472 return Token{}, d.injectSyntacticErrorWithPosition(err, pos-len("true")) // report position at start of literal 473 } 474 d.prevStart, d.prevEnd = pos, pos 475 return True, nil 476 477 case '"': 478 var flags valueFlags // TODO: Preserve this in Token? 479 if n = consumeSimpleString(d.buf[pos:]); n == 0 { 480 oldAbsPos := d.baseOffset + int64(pos) 481 pos, err = d.consumeString(&flags, pos) 482 newAbsPos := d.baseOffset + int64(pos) 483 n = int(newAbsPos - oldAbsPos) 484 if err != nil { 485 return Token{}, d.injectSyntacticErrorWithPosition(err, pos) 486 } 487 } else { 488 pos += n 489 } 490 if !d.options.AllowDuplicateNames && d.tokens.last.needObjectName() { 491 if !d.tokens.last.isValidNamespace() { 492 return Token{}, errInvalidNamespace 493 } 494 if d.tokens.last.isActiveNamespace() && !d.namespaces.last().insertQuoted(d.buf[pos-n:pos], flags.isVerbatim()) { 495 err = &SyntacticError{str: "duplicate name " + string(d.buf[pos-n:pos]) + " in object"} 496 return Token{}, d.injectSyntacticErrorWithPosition(err, pos-n) // report position at start of string 497 } 498 d.names.replaceLastQuotedOffset(pos - n) // only replace if insertQuoted succeeds 499 } 500 if err = d.tokens.appendString(); err != nil { 501 return Token{}, d.injectSyntacticErrorWithPosition(err, pos-n) // report position at start of string 502 } 503 d.prevStart, d.prevEnd = pos-n, pos 504 return Token{raw: &d.decodeBuffer, num: uint64(d.previousOffsetStart())}, nil 505 506 case '0': 507 // NOTE: Since JSON numbers are not self-terminating, 508 // we need to make sure that the next byte is not part of a number. 509 if n = consumeSimpleNumber(d.buf[pos:]); n == 0 || d.needMore(pos+n) { 510 oldAbsPos := d.baseOffset + int64(pos) 511 pos, err = d.consumeNumber(pos) 512 newAbsPos := d.baseOffset + int64(pos) 513 n = int(newAbsPos - oldAbsPos) 514 if err != nil { 515 return Token{}, d.injectSyntacticErrorWithPosition(err, pos) 516 } 517 } else { 518 pos += n 519 } 520 if err = d.tokens.appendNumber(); err != nil { 521 return Token{}, d.injectSyntacticErrorWithPosition(err, pos-n) // report position at start of number 522 } 523 d.prevStart, d.prevEnd = pos-n, pos 524 return Token{raw: &d.decodeBuffer, num: uint64(d.previousOffsetStart())}, nil 525 526 case '{': 527 if err = d.tokens.pushObject(); err != nil { 528 return Token{}, d.injectSyntacticErrorWithPosition(err, pos) 529 } 530 if !d.options.AllowDuplicateNames { 531 d.names.push() 532 d.namespaces.push() 533 } 534 pos += 1 535 d.prevStart, d.prevEnd = pos, pos 536 return ObjectStart, nil 537 538 case '}': 539 if err = d.tokens.popObject(); err != nil { 540 return Token{}, d.injectSyntacticErrorWithPosition(err, pos) 541 } 542 if !d.options.AllowDuplicateNames { 543 d.names.pop() 544 d.namespaces.pop() 545 } 546 pos += 1 547 d.prevStart, d.prevEnd = pos, pos 548 return ObjectEnd, nil 549 550 case '[': 551 if err = d.tokens.pushArray(); err != nil { 552 return Token{}, d.injectSyntacticErrorWithPosition(err, pos) 553 } 554 pos += 1 555 d.prevStart, d.prevEnd = pos, pos 556 return ArrayStart, nil 557 558 case ']': 559 if err = d.tokens.popArray(); err != nil { 560 return Token{}, d.injectSyntacticErrorWithPosition(err, pos) 561 } 562 pos += 1 563 d.prevStart, d.prevEnd = pos, pos 564 return ArrayEnd, nil 565 566 default: 567 err = newInvalidCharacterError(d.buf[pos:], "at start of token") 568 return Token{}, d.injectSyntacticErrorWithPosition(err, pos) 569 } 570 } 571 572 type valueFlags uint 573 574 const ( 575 _ valueFlags = (1 << iota) / 2 // powers of two starting with zero 576 577 stringNonVerbatim // string cannot be naively treated as valid UTF-8 578 stringNonCanonical // string not formatted according to RFC 8785, section 3.2.2.2. 579 // TODO: Track whether a number is a non-integer? 580 ) 581 582 func (f *valueFlags) set(f2 valueFlags) { *f |= f2 } 583 func (f valueFlags) isVerbatim() bool { return f&stringNonVerbatim == 0 } 584 func (f valueFlags) isCanonical() bool { return f&stringNonCanonical == 0 } 585 586 // ReadValue returns the next raw JSON value, advancing the read offset. 587 // The value is stripped of any leading or trailing whitespace. 588 // The returned value is only valid until the next Peek, Read, or Skip call and 589 // may not be mutated while the Decoder remains in use. 590 // If the decoder is currently at the end token for an object or array, 591 // then it reports a SyntacticError and the internal state remains unchanged. 592 // It returns io.EOF if there are no more values. 593 func (d *Decoder) ReadValue() (RawValue, error) { 594 var flags valueFlags 595 return d.readValue(&flags) 596 } 597 func (d *Decoder) readValue(flags *valueFlags) (RawValue, error) { 598 // Determine the next kind. 599 var err error 600 var next Kind 601 pos := d.peekPos 602 if pos != 0 { 603 // Use cached peek result. 604 if d.peekErr != nil { 605 err := d.peekErr 606 d.peekPos, d.peekErr = 0, nil // possibly a transient I/O error 607 return nil, err 608 } 609 next = Kind(d.buf[pos]).normalize() 610 d.peekPos = 0 // reset cache 611 } else { 612 d.invalidatePreviousRead() 613 pos = d.prevEnd 614 615 // Consume leading whitespace. 616 pos += consumeWhitespace(d.buf[pos:]) 617 if d.needMore(pos) { 618 if pos, err = d.consumeWhitespace(pos); err != nil { 619 if err == io.ErrUnexpectedEOF && d.tokens.depth() == 1 { 620 err = io.EOF // EOF possibly if no Tokens present after top-level value 621 } 622 return nil, err 623 } 624 } 625 626 // Consume colon or comma. 627 var delim byte 628 if c := d.buf[pos]; c == ':' || c == ',' { 629 delim = c 630 pos += 1 631 pos += consumeWhitespace(d.buf[pos:]) 632 if d.needMore(pos) { 633 if pos, err = d.consumeWhitespace(pos); err != nil { 634 return nil, err 635 } 636 } 637 } 638 next = Kind(d.buf[pos]).normalize() 639 if d.tokens.needDelim(next) != delim { 640 pos = d.prevEnd // restore position to right after leading whitespace 641 pos += consumeWhitespace(d.buf[pos:]) 642 err = d.tokens.checkDelim(delim, next) 643 return nil, d.injectSyntacticErrorWithPosition(err, pos) 644 } 645 } 646 647 // Handle the next value. 648 oldAbsPos := d.baseOffset + int64(pos) 649 pos, err = d.consumeValue(flags, pos) 650 newAbsPos := d.baseOffset + int64(pos) 651 n := int(newAbsPos - oldAbsPos) 652 if err != nil { 653 return nil, d.injectSyntacticErrorWithPosition(err, pos) 654 } 655 switch next { 656 case 'n', 't', 'f': 657 err = d.tokens.appendLiteral() 658 case '"': 659 if !d.options.AllowDuplicateNames && d.tokens.last.needObjectName() { 660 if !d.tokens.last.isValidNamespace() { 661 err = errInvalidNamespace 662 break 663 } 664 if d.tokens.last.isActiveNamespace() && !d.namespaces.last().insertQuoted(d.buf[pos-n:pos], flags.isVerbatim()) { 665 err = &SyntacticError{str: "duplicate name " + string(d.buf[pos-n:pos]) + " in object"} 666 break 667 } 668 d.names.replaceLastQuotedOffset(pos - n) // only replace if insertQuoted succeeds 669 } 670 err = d.tokens.appendString() 671 case '0': 672 err = d.tokens.appendNumber() 673 case '{': 674 if err = d.tokens.pushObject(); err != nil { 675 break 676 } 677 if err = d.tokens.popObject(); err != nil { 678 panic("BUG: popObject should never fail immediately after pushObject: " + err.Error()) 679 } 680 case '[': 681 if err = d.tokens.pushArray(); err != nil { 682 break 683 } 684 if err = d.tokens.popArray(); err != nil { 685 panic("BUG: popArray should never fail immediately after pushArray: " + err.Error()) 686 } 687 } 688 if err != nil { 689 return nil, d.injectSyntacticErrorWithPosition(err, pos-n) // report position at start of value 690 } 691 d.prevEnd = pos 692 d.prevStart = pos - n 693 return d.buf[pos-n : pos : pos], nil 694 } 695 696 // checkEOF verifies that the input has no more data. 697 func (d *Decoder) checkEOF() error { 698 switch pos, err := d.consumeWhitespace(d.prevEnd); err { 699 case nil: 700 return newInvalidCharacterError(d.buf[pos:], "after top-level value") 701 case io.ErrUnexpectedEOF: 702 return nil 703 default: 704 return err 705 } 706 } 707 708 // consumeWhitespace consumes all whitespace starting at d.buf[pos:]. 709 // It returns the new position in d.buf immediately after the last whitespace. 710 // If it returns nil, there is guaranteed to at least be one unread byte. 711 // 712 // The following pattern is common in this implementation: 713 // 714 // pos += consumeWhitespace(d.buf[pos:]) 715 // if d.needMore(pos) { 716 // if pos, err = d.consumeWhitespace(pos); err != nil { 717 // return ... 718 // } 719 // } 720 // 721 // It is difficult to simplify this without sacrificing performance since 722 // consumeWhitespace must be inlined. The body of the if statement is 723 // executed only in rare situations where we need to fetch more data. 724 // Since fetching may return an error, we also need to check the error. 725 func (d *Decoder) consumeWhitespace(pos int) (newPos int, err error) { 726 for { 727 pos += consumeWhitespace(d.buf[pos:]) 728 if d.needMore(pos) { 729 absPos := d.baseOffset + int64(pos) 730 err = d.fetch() // will mutate d.buf and invalidate pos 731 pos = int(absPos - d.baseOffset) 732 if err != nil { 733 return pos, err 734 } 735 continue 736 } 737 return pos, nil 738 } 739 } 740 741 // consumeValue consumes a single JSON value starting at d.buf[pos:]. 742 // It returns the new position in d.buf immediately after the value. 743 func (d *Decoder) consumeValue(flags *valueFlags, pos int) (newPos int, err error) { 744 for { 745 var n int 746 var err error 747 switch next := Kind(d.buf[pos]).normalize(); next { 748 case 'n': 749 if n = consumeNull(d.buf[pos:]); n == 0 { 750 n, err = consumeLiteral(d.buf[pos:], "null") 751 } 752 case 'f': 753 if n = consumeFalse(d.buf[pos:]); n == 0 { 754 n, err = consumeLiteral(d.buf[pos:], "false") 755 } 756 case 't': 757 if n = consumeTrue(d.buf[pos:]); n == 0 { 758 n, err = consumeLiteral(d.buf[pos:], "true") 759 } 760 case '"': 761 if n = consumeSimpleString(d.buf[pos:]); n == 0 { 762 return d.consumeString(flags, pos) 763 } 764 case '0': 765 // NOTE: Since JSON numbers are not self-terminating, 766 // we need to make sure that the next byte is not part of a number. 767 if n = consumeSimpleNumber(d.buf[pos:]); n == 0 || d.needMore(pos+n) { 768 return d.consumeNumber(pos) 769 } 770 case '{': 771 return d.consumeObject(flags, pos) 772 case '[': 773 return d.consumeArray(flags, pos) 774 default: 775 return pos, newInvalidCharacterError(d.buf[pos:], "at start of value") 776 } 777 if err == io.ErrUnexpectedEOF { 778 absPos := d.baseOffset + int64(pos) 779 err = d.fetch() // will mutate d.buf and invalidate pos 780 pos = int(absPos - d.baseOffset) 781 if err != nil { 782 return pos, err 783 } 784 continue 785 } 786 return pos + n, err 787 } 788 } 789 790 // consumeLiteral consumes a single JSON literal starting at d.buf[pos:]. 791 // It returns the new position in d.buf immediately after the literal. 792 func (d *Decoder) consumeLiteral(pos int, lit string) (newPos int, err error) { 793 for { 794 n, err := consumeLiteral(d.buf[pos:], lit) 795 if err == io.ErrUnexpectedEOF { 796 absPos := d.baseOffset + int64(pos) 797 err = d.fetch() // will mutate d.buf and invalidate pos 798 pos = int(absPos - d.baseOffset) 799 if err != nil { 800 return pos, err 801 } 802 continue 803 } 804 return pos + n, err 805 } 806 } 807 808 // consumeString consumes a single JSON string starting at d.buf[pos:]. 809 // It returns the new position in d.buf immediately after the string. 810 func (d *Decoder) consumeString(flags *valueFlags, pos int) (newPos int, err error) { 811 var n int 812 for { 813 n, err = consumeStringResumable(flags, d.buf[pos:], n, !d.options.AllowInvalidUTF8) 814 if err == io.ErrUnexpectedEOF { 815 absPos := d.baseOffset + int64(pos) 816 err = d.fetch() // will mutate d.buf and invalidate pos 817 pos = int(absPos - d.baseOffset) 818 if err != nil { 819 return pos, err 820 } 821 continue 822 } 823 return pos + n, err 824 } 825 } 826 827 // consumeNumber consumes a single JSON number starting at d.buf[pos:]. 828 // It returns the new position in d.buf immediately after the number. 829 func (d *Decoder) consumeNumber(pos int) (newPos int, err error) { 830 var n int 831 var state consumeNumberState 832 for { 833 n, state, err = consumeNumberResumable(d.buf[pos:], n, state) 834 // NOTE: Since JSON numbers are not self-terminating, 835 // we need to make sure that the next byte is not part of a number. 836 if err == io.ErrUnexpectedEOF || d.needMore(pos+n) { 837 mayTerminate := err == nil 838 absPos := d.baseOffset + int64(pos) 839 err = d.fetch() // will mutate d.buf and invalidate pos 840 pos = int(absPos - d.baseOffset) 841 if err != nil { 842 if mayTerminate && err == io.ErrUnexpectedEOF { 843 return pos + n, nil 844 } 845 return pos, err 846 } 847 continue 848 } 849 return pos + n, err 850 } 851 } 852 853 // consumeObject consumes a single JSON object starting at d.buf[pos:]. 854 // It returns the new position in d.buf immediately after the object. 855 func (d *Decoder) consumeObject(flags *valueFlags, pos int) (newPos int, err error) { 856 var n int 857 var names *objectNamespace 858 if !d.options.AllowDuplicateNames { 859 d.namespaces.push() 860 defer d.namespaces.pop() 861 names = d.namespaces.last() 862 } 863 864 // Handle before start. 865 if d.buf[pos] != '{' { 866 panic("BUG: consumeObject must be called with a buffer that starts with '{'") 867 } 868 pos++ 869 870 // Handle after start. 871 pos += consumeWhitespace(d.buf[pos:]) 872 if d.needMore(pos) { 873 if pos, err = d.consumeWhitespace(pos); err != nil { 874 return pos, err 875 } 876 } 877 if d.buf[pos] == '}' { 878 pos++ 879 return pos, nil 880 } 881 882 for { 883 // Handle before name. 884 pos += consumeWhitespace(d.buf[pos:]) 885 if d.needMore(pos) { 886 if pos, err = d.consumeWhitespace(pos); err != nil { 887 return pos, err 888 } 889 } 890 var flags2 valueFlags 891 if n = consumeSimpleString(d.buf[pos:]); n == 0 { 892 oldAbsPos := d.baseOffset + int64(pos) 893 pos, err = d.consumeString(&flags2, pos) 894 newAbsPos := d.baseOffset + int64(pos) 895 n = int(newAbsPos - oldAbsPos) 896 flags.set(flags2) 897 if err != nil { 898 return pos, err 899 } 900 } else { 901 pos += n 902 } 903 if !d.options.AllowDuplicateNames && !names.insertQuoted(d.buf[pos-n:pos], flags2.isVerbatim()) { 904 return pos - n, &SyntacticError{str: "duplicate name " + string(d.buf[pos-n:pos]) + " in object"} 905 } 906 907 // Handle after name. 908 pos += consumeWhitespace(d.buf[pos:]) 909 if d.needMore(pos) { 910 if pos, err = d.consumeWhitespace(pos); err != nil { 911 return pos, err 912 } 913 } 914 if d.buf[pos] != ':' { 915 return pos, newInvalidCharacterError(d.buf[pos:], "after object name (expecting ':')") 916 } 917 pos++ 918 919 // Handle before value. 920 pos += consumeWhitespace(d.buf[pos:]) 921 if d.needMore(pos) { 922 if pos, err = d.consumeWhitespace(pos); err != nil { 923 return pos, err 924 } 925 } 926 pos, err = d.consumeValue(flags, pos) 927 if err != nil { 928 return pos, err 929 } 930 931 // Handle after value. 932 pos += consumeWhitespace(d.buf[pos:]) 933 if d.needMore(pos) { 934 if pos, err = d.consumeWhitespace(pos); err != nil { 935 return pos, err 936 } 937 } 938 switch d.buf[pos] { 939 case ',': 940 pos++ 941 continue 942 case '}': 943 pos++ 944 return pos, nil 945 default: 946 return pos, newInvalidCharacterError(d.buf[pos:], "after object value (expecting ',' or '}')") 947 } 948 } 949 } 950 951 // consumeArray consumes a single JSON array starting at d.buf[pos:]. 952 // It returns the new position in d.buf immediately after the array. 953 func (d *Decoder) consumeArray(flags *valueFlags, pos int) (newPos int, err error) { 954 // Handle before start. 955 if d.buf[pos] != '[' { 956 panic("BUG: consumeArray must be called with a buffer that starts with '['") 957 } 958 pos++ 959 960 // Handle after start. 961 pos += consumeWhitespace(d.buf[pos:]) 962 if d.needMore(pos) { 963 if pos, err = d.consumeWhitespace(pos); err != nil { 964 return pos, err 965 } 966 } 967 if d.buf[pos] == ']' { 968 pos++ 969 return pos, nil 970 } 971 972 for { 973 // Handle before value. 974 pos += consumeWhitespace(d.buf[pos:]) 975 if d.needMore(pos) { 976 if pos, err = d.consumeWhitespace(pos); err != nil { 977 return pos, err 978 } 979 } 980 pos, err = d.consumeValue(flags, pos) 981 if err != nil { 982 return pos, err 983 } 984 985 // Handle after value. 986 pos += consumeWhitespace(d.buf[pos:]) 987 if d.needMore(pos) { 988 if pos, err = d.consumeWhitespace(pos); err != nil { 989 return pos, err 990 } 991 } 992 switch d.buf[pos] { 993 case ',': 994 pos++ 995 continue 996 case ']': 997 pos++ 998 return pos, nil 999 default: 1000 return pos, newInvalidCharacterError(d.buf[pos:], "after array value (expecting ',' or ']')") 1001 } 1002 } 1003 } 1004 1005 // InputOffset returns the current input byte offset. It gives the location 1006 // of the next byte immediately after the most recently returned token or value. 1007 // The number of bytes actually read from the underlying io.Reader may be more 1008 // than this offset due to internal buffering effects. 1009 func (d *Decoder) InputOffset() int64 { 1010 return d.previousOffsetEnd() 1011 } 1012 1013 // UnreadBuffer returns the data remaining in the unread buffer, 1014 // which may contain zero or more bytes. 1015 // The returned buffer must not be mutated while Decoder continues to be used. 1016 // The buffer contents are valid until the next Peek, Read, or Skip call. 1017 func (d *Decoder) UnreadBuffer() []byte { 1018 return d.unreadBuffer() 1019 } 1020 1021 // StackDepth returns the depth of the state machine for read JSON data. 1022 // Each level on the stack represents a nested JSON object or array. 1023 // It is incremented whenever an ObjectStart or ArrayStart token is encountered 1024 // and decremented whenever an ObjectEnd or ArrayEnd token is encountered. 1025 // The depth is zero-indexed, where zero represents the top-level JSON value. 1026 func (d *Decoder) StackDepth() int { 1027 // NOTE: Keep in sync with Encoder.StackDepth. 1028 return d.tokens.depth() - 1 1029 } 1030 1031 // StackIndex returns information about the specified stack level. 1032 // It must be a number between 0 and StackDepth, inclusive. 1033 // For each level, it reports the kind: 1034 // 1035 // - 0 for a level of zero, 1036 // - '{' for a level representing a JSON object, and 1037 // - '[' for a level representing a JSON array. 1038 // 1039 // It also reports the length of that JSON object or array. 1040 // Each name and value in a JSON object is counted separately, 1041 // so the effective number of members would be half the length. 1042 // A complete JSON object must have an even length. 1043 func (d *Decoder) StackIndex(i int) (Kind, int) { 1044 // NOTE: Keep in sync with Encoder.StackIndex. 1045 switch s := d.tokens.index(i); { 1046 case i > 0 && s.isObject(): 1047 return '{', s.length() 1048 case i > 0 && s.isArray(): 1049 return '[', s.length() 1050 default: 1051 return 0, s.length() 1052 } 1053 } 1054 1055 // StackPointer returns a JSON Pointer (RFC 6901) to the most recently read value. 1056 // Object names are only present if AllowDuplicateNames is false, otherwise 1057 // object members are represented using their index within the object. 1058 func (d *Decoder) StackPointer() string { 1059 d.names.copyQuotedBuffer(d.buf) 1060 return string(d.appendStackPointer(nil)) 1061 } 1062 1063 // consumeWhitespace consumes leading JSON whitespace per RFC 7159, section 2. 1064 func consumeWhitespace(b []byte) (n int) { 1065 // NOTE: The arguments and logic are kept simple to keep this inlineable. 1066 for len(b) > n && (b[n] == ' ' || b[n] == '\t' || b[n] == '\r' || b[n] == '\n') { 1067 n++ 1068 } 1069 return n 1070 } 1071 1072 // consumeNull consumes the next JSON null literal per RFC 7159, section 3. 1073 // It returns 0 if it is invalid, in which case consumeLiteral should be used. 1074 func consumeNull(b []byte) int { 1075 // NOTE: The arguments and logic are kept simple to keep this inlineable. 1076 const literal = "null" 1077 if len(b) >= len(literal) && string(b[:len(literal)]) == literal { 1078 return len(literal) 1079 } 1080 return 0 1081 } 1082 1083 // consumeFalse consumes the next JSON false literal per RFC 7159, section 3. 1084 // It returns 0 if it is invalid, in which case consumeLiteral should be used. 1085 func consumeFalse(b []byte) int { 1086 // NOTE: The arguments and logic are kept simple to keep this inlineable. 1087 const literal = "false" 1088 if len(b) >= len(literal) && string(b[:len(literal)]) == literal { 1089 return len(literal) 1090 } 1091 return 0 1092 } 1093 1094 // consumeTrue consumes the next JSON true literal per RFC 7159, section 3. 1095 // It returns 0 if it is invalid, in which case consumeLiteral should be used. 1096 func consumeTrue(b []byte) int { 1097 // NOTE: The arguments and logic are kept simple to keep this inlineable. 1098 const literal = "true" 1099 if len(b) >= len(literal) && string(b[:len(literal)]) == literal { 1100 return len(literal) 1101 } 1102 return 0 1103 } 1104 1105 // consumeLiteral consumes the next JSON literal per RFC 7159, section 3. 1106 // If the input appears truncated, it returns io.ErrUnexpectedEOF. 1107 func consumeLiteral(b []byte, lit string) (n int, err error) { 1108 for i := 0; i < len(b) && i < len(lit); i++ { 1109 if b[i] != lit[i] { 1110 return i, newInvalidCharacterError(b[i:], "within literal "+lit+" (expecting "+strconv.QuoteRune(rune(lit[i]))+")") 1111 } 1112 } 1113 if len(b) < len(lit) { 1114 return len(b), io.ErrUnexpectedEOF 1115 } 1116 return len(lit), nil 1117 } 1118 1119 // consumeSimpleString consumes the next JSON string per RFC 7159, section 7 1120 // but is limited to the grammar for an ASCII string without escape sequences. 1121 // It returns 0 if it is invalid or more complicated than a simple string, 1122 // in which case consumeString should be called. 1123 func consumeSimpleString(b []byte) (n int) { 1124 // NOTE: The arguments and logic are kept simple to keep this inlineable. 1125 if len(b) > 0 && b[0] == '"' { 1126 n++ 1127 for len(b) > n && (' ' <= b[n] && b[n] != '\\' && b[n] != '"' && b[n] < utf8.RuneSelf) { 1128 n++ 1129 } 1130 if len(b) > n && b[n] == '"' { 1131 n++ 1132 return n 1133 } 1134 } 1135 return 0 1136 } 1137 1138 // consumeString consumes the next JSON string per RFC 7159, section 7. 1139 // If validateUTF8 is false, then this allows the presence of invalid UTF-8 1140 // characters within the string itself. 1141 // It reports the number of bytes consumed and whether an error was encountered. 1142 // If the input appears truncated, it returns io.ErrUnexpectedEOF. 1143 func consumeString(flags *valueFlags, b []byte, validateUTF8 bool) (n int, err error) { 1144 return consumeStringResumable(flags, b, 0, validateUTF8) 1145 } 1146 1147 // consumeStringResumable is identical to consumeString but supports resuming 1148 // from a previous call that returned io.ErrUnexpectedEOF. 1149 func consumeStringResumable(flags *valueFlags, b []byte, resumeOffset int, validateUTF8 bool) (n int, err error) { 1150 // Consume the leading double quote. 1151 switch { 1152 case resumeOffset > 0: 1153 n = resumeOffset // already handled the leading quote 1154 case uint(len(b)) == 0: 1155 return n, io.ErrUnexpectedEOF 1156 case b[0] == '"': 1157 n++ 1158 default: 1159 return n, newInvalidCharacterError(b[n:], `at start of string (expecting '"')`) 1160 } 1161 1162 // Consume every character in the string. 1163 for uint(len(b)) > uint(n) { 1164 // Optimize for long sequences of unescaped characters. 1165 noEscape := func(c byte) bool { 1166 return c < utf8.RuneSelf && ' ' <= c && c != '\\' && c != '"' 1167 } 1168 for uint(len(b)) > uint(n) && noEscape(b[n]) { 1169 n++ 1170 } 1171 if uint(len(b)) <= uint(n) { 1172 return n, io.ErrUnexpectedEOF 1173 } 1174 1175 // Check for terminating double quote. 1176 if b[n] == '"' { 1177 n++ 1178 return n, nil 1179 } 1180 1181 switch r, rn := utf8.DecodeRune(b[n:]); { 1182 // Handle UTF-8 encoded byte sequence. 1183 // Due to specialized handling of ASCII above, we know that 1184 // all normal sequences at this point must be 2 bytes or larger. 1185 case rn > 1: 1186 n += rn 1187 // Handle escape sequence. 1188 case r == '\\': 1189 flags.set(stringNonVerbatim) 1190 resumeOffset = n 1191 if uint(len(b)) < uint(n+2) { 1192 return resumeOffset, io.ErrUnexpectedEOF 1193 } 1194 switch r := b[n+1]; r { 1195 case '/': 1196 // Forward slash is the only character with 3 representations. 1197 // Per RFC 8785, section 3.2.2.2., this must not be escaped. 1198 flags.set(stringNonCanonical) 1199 n += 2 1200 case '"', '\\', 'b', 'f', 'n', 'r', 't': 1201 n += 2 1202 case 'u': 1203 if uint(len(b)) < uint(n+6) { 1204 if !hasEscapeSequencePrefix(b[n:]) { 1205 flags.set(stringNonCanonical) 1206 return n, &SyntacticError{str: "invalid escape sequence " + strconv.Quote(string(b[n:])) + " within string"} 1207 } 1208 return resumeOffset, io.ErrUnexpectedEOF 1209 } 1210 v1, ok := parseHexUint16(b[n+2 : n+6]) 1211 if !ok { 1212 flags.set(stringNonCanonical) 1213 return n, &SyntacticError{str: "invalid escape sequence " + strconv.Quote(string(b[n:n+6])) + " within string"} 1214 } 1215 // Only certain control characters can use the \uFFFF notation 1216 // for canonical formatting (per RFC 8785, section 3.2.2.2.). 1217 switch v1 { 1218 // \uFFFF notation not permitted for these characters. 1219 case '\b', '\f', '\n', '\r', '\t': 1220 flags.set(stringNonCanonical) 1221 default: 1222 // \uFFFF notation only permitted for control characters. 1223 if v1 >= ' ' { 1224 flags.set(stringNonCanonical) 1225 } else { 1226 // \uFFFF notation must be lower case. 1227 for _, c := range b[n+2 : n+6] { 1228 if 'A' <= c && c <= 'F' { 1229 flags.set(stringNonCanonical) 1230 } 1231 } 1232 } 1233 } 1234 n += 6 1235 1236 if validateUTF8 && utf16.IsSurrogate(rune(v1)) { 1237 if uint(len(b)) >= uint(n+2) && (b[n] != '\\' || b[n+1] != 'u') { 1238 return n, &SyntacticError{str: "invalid unpaired surrogate half within string"} 1239 } 1240 if uint(len(b)) < uint(n+6) { 1241 if !hasEscapeSequencePrefix(b[n:]) { 1242 flags.set(stringNonCanonical) 1243 return n, &SyntacticError{str: "invalid escape sequence " + strconv.Quote(string(b[n:])) + " within string"} 1244 } 1245 return resumeOffset, io.ErrUnexpectedEOF 1246 } 1247 v2, ok := parseHexUint16(b[n+2 : n+6]) 1248 if !ok { 1249 return n, &SyntacticError{str: "invalid escape sequence " + strconv.Quote(string(b[n:n+6])) + " within string"} 1250 } 1251 if utf16.DecodeRune(rune(v1), rune(v2)) == utf8.RuneError { 1252 return n, &SyntacticError{str: "invalid surrogate pair in string"} 1253 } 1254 n += 6 1255 } 1256 default: 1257 flags.set(stringNonCanonical) 1258 return n, &SyntacticError{str: "invalid escape sequence " + strconv.Quote(string(b[n:n+2])) + " within string"} 1259 } 1260 // Handle invalid UTF-8. 1261 case r == utf8.RuneError: 1262 if !utf8.FullRune(b[n:]) { 1263 return n, io.ErrUnexpectedEOF 1264 } 1265 flags.set(stringNonVerbatim | stringNonCanonical) 1266 if validateUTF8 { 1267 return n, &SyntacticError{str: "invalid UTF-8 within string"} 1268 } 1269 n++ 1270 // Handle invalid control characters. 1271 case r < ' ': 1272 flags.set(stringNonVerbatim | stringNonCanonical) 1273 return n, newInvalidCharacterError(b[n:], "within string (expecting non-control character)") 1274 default: 1275 panic("BUG: unhandled character " + quoteRune(b[n:])) 1276 } 1277 } 1278 return n, io.ErrUnexpectedEOF 1279 } 1280 1281 // hasEscapeSequencePrefix reports whether b is possibly 1282 // the truncated prefix of a \uFFFF escape sequence. 1283 func hasEscapeSequencePrefix(b []byte) bool { 1284 for i, c := range b { 1285 switch { 1286 case i == 0 && c != '\\': 1287 return false 1288 case i == 1 && c != 'u': 1289 return false 1290 case i >= 2 && i < 6 && !('0' <= c && c <= '9') && !('a' <= c && c <= 'f') && !('A' <= c && c <= 'F'): 1291 return false 1292 } 1293 } 1294 return true 1295 } 1296 1297 // unescapeString appends the unescaped form of a JSON string in src to dst. 1298 // Any invalid UTF-8 within the string will be replaced with utf8.RuneError. 1299 // The input must be an entire JSON string with no surrounding whitespace. 1300 func unescapeString(dst, src []byte) (v []byte, ok bool) { 1301 // Consume leading double quote. 1302 if uint(len(src)) == 0 || src[0] != '"' { 1303 return dst, false 1304 } 1305 i, n := 1, 1 1306 1307 // Consume every character until completion. 1308 for uint(len(src)) > uint(n) { 1309 // Optimize for long sequences of unescaped characters. 1310 noEscape := func(c byte) bool { 1311 return c < utf8.RuneSelf && ' ' <= c && c != '\\' && c != '"' 1312 } 1313 for uint(len(src)) > uint(n) && noEscape(src[n]) { 1314 n++ 1315 } 1316 if uint(len(src)) <= uint(n) { 1317 break 1318 } 1319 1320 // Check for terminating double quote. 1321 if src[n] == '"' { 1322 dst = append(dst, src[i:n]...) 1323 n++ 1324 return dst, len(src) == n 1325 } 1326 1327 switch r, rn := utf8.DecodeRune(src[n:]); { 1328 // Handle UTF-8 encoded byte sequence. 1329 // Due to specialized handling of ASCII above, we know that 1330 // all normal sequences at this point must be 2 bytes or larger. 1331 case rn > 1: 1332 n += rn 1333 // Handle escape sequence. 1334 case r == '\\': 1335 dst = append(dst, src[i:n]...) 1336 if r < ' ' { 1337 return dst, false // invalid control character or unescaped quote 1338 } 1339 1340 // Handle escape sequence. 1341 if uint(len(src)) < uint(n+2) { 1342 return dst, false // truncated escape sequence 1343 } 1344 switch r := src[n+1]; r { 1345 case '"', '\\', '/': 1346 dst = append(dst, r) 1347 n += 2 1348 case 'b': 1349 dst = append(dst, '\b') 1350 n += 2 1351 case 'f': 1352 dst = append(dst, '\f') 1353 n += 2 1354 case 'n': 1355 dst = append(dst, '\n') 1356 n += 2 1357 case 'r': 1358 dst = append(dst, '\r') 1359 n += 2 1360 case 't': 1361 dst = append(dst, '\t') 1362 n += 2 1363 case 'u': 1364 if uint(len(src)) < uint(n+6) { 1365 return dst, false // truncated escape sequence 1366 } 1367 v1, ok := parseHexUint16(src[n+2 : n+6]) 1368 if !ok { 1369 return dst, false // invalid escape sequence 1370 } 1371 n += 6 1372 1373 // Check whether this is a surrogate half. 1374 r := rune(v1) 1375 if utf16.IsSurrogate(r) { 1376 r = utf8.RuneError // assume failure unless the following succeeds 1377 if uint(len(src)) >= uint(n+6) && src[n+0] == '\\' && src[n+1] == 'u' { 1378 if v2, ok := parseHexUint16(src[n+2 : n+6]); ok { 1379 if r = utf16.DecodeRune(rune(v1), rune(v2)); r != utf8.RuneError { 1380 n += 6 1381 } 1382 } 1383 } 1384 } 1385 1386 dst = utf8.AppendRune(dst, r) 1387 default: 1388 return dst, false // invalid escape sequence 1389 } 1390 i = n 1391 // Handle invalid UTF-8. 1392 case r == utf8.RuneError: 1393 // NOTE: An unescaped string may be longer than the escaped string 1394 // because invalid UTF-8 bytes are being replaced. 1395 dst = append(dst, src[i:n]...) 1396 dst = append(dst, "\uFFFD"...) 1397 n += rn 1398 i = n 1399 // Handle invalid control characters. 1400 case r < ' ': 1401 dst = append(dst, src[i:n]...) 1402 return dst, false // invalid control character or unescaped quote 1403 default: 1404 panic("BUG: unhandled character " + quoteRune(src[n:])) 1405 } 1406 } 1407 dst = append(dst, src[i:n]...) 1408 return dst, false // truncated input 1409 } 1410 1411 // unescapeStringMayCopy returns the unescaped form of b. 1412 // If there are no escaped characters, the output is simply a subslice of 1413 // the input with the surrounding quotes removed. 1414 // Otherwise, a new buffer is allocated for the output. 1415 func unescapeStringMayCopy(b []byte, isVerbatim bool) []byte { 1416 // NOTE: The arguments and logic are kept simple to keep this inlineable. 1417 if isVerbatim { 1418 return b[len(`"`) : len(b)-len(`"`)] 1419 } 1420 b, _ = unescapeString(make([]byte, 0, len(b)), b) 1421 return b 1422 } 1423 1424 // consumeSimpleNumber consumes the next JSON number per RFC 7159, section 6 1425 // but is limited to the grammar for a positive integer. 1426 // It returns 0 if it is invalid or more complicated than a simple integer, 1427 // in which case consumeNumber should be called. 1428 func consumeSimpleNumber(b []byte) (n int) { 1429 // NOTE: The arguments and logic are kept simple to keep this inlineable. 1430 if len(b) > 0 { 1431 if b[0] == '0' { 1432 n++ 1433 } else if '1' <= b[0] && b[0] <= '9' { 1434 n++ 1435 for len(b) > n && ('0' <= b[n] && b[n] <= '9') { 1436 n++ 1437 } 1438 } else { 1439 return 0 1440 } 1441 if len(b) == n || !(b[n] == '.' || b[n] == 'e' || b[n] == 'E') { 1442 return n 1443 } 1444 } 1445 return 0 1446 } 1447 1448 type consumeNumberState uint 1449 1450 const ( 1451 consumeNumberInit consumeNumberState = iota 1452 beforeIntegerDigits 1453 withinIntegerDigits 1454 beforeFractionalDigits 1455 withinFractionalDigits 1456 beforeExponentDigits 1457 withinExponentDigits 1458 ) 1459 1460 // consumeNumber consumes the next JSON number per RFC 7159, section 6. 1461 // It reports the number of bytes consumed and whether an error was encountered. 1462 // If the input appears truncated, it returns io.ErrUnexpectedEOF. 1463 // 1464 // Note that JSON numbers are not self-terminating. 1465 // If the entire input is consumed, then the caller needs to consider whether 1466 // there may be subsequent unread data that may still be part of this number. 1467 func consumeNumber(b []byte) (n int, err error) { 1468 n, _, err = consumeNumberResumable(b, 0, consumeNumberInit) 1469 return n, err 1470 } 1471 1472 // consumeNumberResumable is identical to consumeNumber but supports resuming 1473 // from a previous call that returned io.ErrUnexpectedEOF. 1474 func consumeNumberResumable(b []byte, resumeOffset int, state consumeNumberState) (n int, _ consumeNumberState, err error) { 1475 // Jump to the right state when resuming from a partial consumption. 1476 n = resumeOffset 1477 if state > consumeNumberInit { 1478 switch state { 1479 case withinIntegerDigits, withinFractionalDigits, withinExponentDigits: 1480 // Consume leading digits. 1481 for len(b) > n && ('0' <= b[n] && b[n] <= '9') { 1482 n++ 1483 } 1484 if len(b) == n { 1485 return n, state, nil // still within the same state 1486 } 1487 state++ // switches "withinX" to "beforeY" where Y is the state after X 1488 } 1489 switch state { 1490 case beforeIntegerDigits: 1491 goto beforeInteger 1492 case beforeFractionalDigits: 1493 goto beforeFractional 1494 case beforeExponentDigits: 1495 goto beforeExponent 1496 default: 1497 return n, state, nil 1498 } 1499 } 1500 1501 // Consume required integer component (with optional minus sign). 1502 beforeInteger: 1503 resumeOffset = n 1504 if len(b) > 0 && b[0] == '-' { 1505 n++ 1506 } 1507 switch { 1508 case len(b) == n: 1509 return resumeOffset, beforeIntegerDigits, io.ErrUnexpectedEOF 1510 case b[n] == '0': 1511 n++ 1512 state = beforeFractionalDigits 1513 case '1' <= b[n] && b[n] <= '9': 1514 n++ 1515 for len(b) > n && ('0' <= b[n] && b[n] <= '9') { 1516 n++ 1517 } 1518 state = withinIntegerDigits 1519 default: 1520 return n, state, newInvalidCharacterError(b[n:], "within number (expecting digit)") 1521 } 1522 1523 // Consume optional fractional component. 1524 beforeFractional: 1525 if len(b) > n && b[n] == '.' { 1526 resumeOffset = n 1527 n++ 1528 switch { 1529 case len(b) == n: 1530 return resumeOffset, beforeFractionalDigits, io.ErrUnexpectedEOF 1531 case '0' <= b[n] && b[n] <= '9': 1532 n++ 1533 default: 1534 return n, state, newInvalidCharacterError(b[n:], "within number (expecting digit)") 1535 } 1536 for len(b) > n && ('0' <= b[n] && b[n] <= '9') { 1537 n++ 1538 } 1539 state = withinFractionalDigits 1540 } 1541 1542 // Consume optional exponent component. 1543 beforeExponent: 1544 if len(b) > n && (b[n] == 'e' || b[n] == 'E') { 1545 resumeOffset = n 1546 n++ 1547 if len(b) > n && (b[n] == '-' || b[n] == '+') { 1548 n++ 1549 } 1550 switch { 1551 case len(b) == n: 1552 return resumeOffset, beforeExponentDigits, io.ErrUnexpectedEOF 1553 case '0' <= b[n] && b[n] <= '9': 1554 n++ 1555 default: 1556 return n, state, newInvalidCharacterError(b[n:], "within number (expecting digit)") 1557 } 1558 for len(b) > n && ('0' <= b[n] && b[n] <= '9') { 1559 n++ 1560 } 1561 state = withinExponentDigits 1562 } 1563 1564 return n, state, nil 1565 } 1566 1567 // parseHexUint16 is similar to strconv.ParseUint, 1568 // but operates directly on []byte and is optimized for base-16. 1569 // See https://go.dev/issue/42429. 1570 func parseHexUint16(b []byte) (v uint16, ok bool) { 1571 if len(b) != 4 { 1572 return 0, false 1573 } 1574 for _, c := range b[:4] { 1575 switch { 1576 case '0' <= c && c <= '9': 1577 c = c - '0' 1578 case 'a' <= c && c <= 'f': 1579 c = 10 + c - 'a' 1580 case 'A' <= c && c <= 'F': 1581 c = 10 + c - 'A' 1582 default: 1583 return 0, false 1584 } 1585 v = v*16 + uint16(c) 1586 } 1587 return v, true 1588 } 1589 1590 // parseDecUint is similar to strconv.ParseUint, 1591 // but operates directly on []byte and is optimized for base-10. 1592 // If the number is syntactically valid but overflows uint64, 1593 // then it returns (math.MaxUint64, false). 1594 // See https://go.dev/issue/42429. 1595 func parseDecUint(b []byte) (v uint64, ok bool) { 1596 // Overflow logic is based on strconv/atoi.go:138-149 from Go1.15, where: 1597 // - cutoff is equal to math.MaxUint64/10+1, and 1598 // - the n1 > maxVal check is unnecessary 1599 // since maxVal is equivalent to math.MaxUint64. 1600 var n int 1601 var overflow bool 1602 for len(b) > n && ('0' <= b[n] && b[n] <= '9') { 1603 overflow = overflow || v >= math.MaxUint64/10+1 1604 v *= 10 1605 1606 v1 := v + uint64(b[n]-'0') 1607 overflow = overflow || v1 < v 1608 v = v1 1609 1610 n++ 1611 } 1612 if n == 0 || len(b) != n { 1613 return 0, false 1614 } 1615 if overflow { 1616 return math.MaxUint64, false 1617 } 1618 return v, true 1619 } 1620 1621 // parseFloat parses a floating point number according to the Go float grammar. 1622 // Note that the JSON number grammar is a strict subset. 1623 // 1624 // If the number overflows the finite representation of a float, 1625 // then we return MaxFloat since any finite value will always be infinitely 1626 // more accurate at representing another finite value than an infinite value. 1627 func parseFloat(b []byte, bits int) (v float64, ok bool) { 1628 // Fast path for exact integer numbers which fit in the 1629 // 24-bit or 53-bit significand of a float32 or float64. 1630 var negLen int // either 0 or 1 1631 if len(b) > 0 && b[0] == '-' { 1632 negLen = 1 1633 } 1634 u, ok := parseDecUint(b[negLen:]) 1635 if ok && ((bits == 32 && u <= 1<<24) || (bits == 64 && u <= 1<<53)) { 1636 return math.Copysign(float64(u), float64(-1*negLen)), true 1637 } 1638 1639 // Note that the []byte->string conversion unfortunately allocates. 1640 // See https://go.dev/issue/42429 for more information. 1641 fv, err := strconv.ParseFloat(string(b), bits) 1642 if math.IsInf(fv, 0) { 1643 switch { 1644 case bits == 32 && math.IsInf(fv, +1): 1645 return +math.MaxFloat32, true 1646 case bits == 64 && math.IsInf(fv, +1): 1647 return +math.MaxFloat64, true 1648 case bits == 32 && math.IsInf(fv, -1): 1649 return -math.MaxFloat32, true 1650 case bits == 64 && math.IsInf(fv, -1): 1651 return -math.MaxFloat64, true 1652 } 1653 } 1654 return fv, err == nil 1655 }