k8s.io/kube-openapi@v0.0.0-20240228011516-70dd3763d340/pkg/internal/third_party/go-json-experiment/json/decode.go (about)

     1  // Copyright 2020 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package json
     6  
     7  import (
     8  	"bytes"
     9  	"errors"
    10  	"io"
    11  	"math"
    12  	"strconv"
    13  	"unicode/utf16"
    14  	"unicode/utf8"
    15  )
    16  
    17  // NOTE: The logic for decoding is complicated by the fact that reading from
    18  // an io.Reader into a temporary buffer means that the buffer may contain a
    19  // truncated portion of some valid input, requiring the need to fetch more data.
    20  //
    21  // This file is structured in the following way:
    22  //
    23  //   - consumeXXX functions parse an exact JSON token from a []byte.
    24  //     If the buffer appears truncated, then it returns io.ErrUnexpectedEOF.
    25  //     The consumeSimpleXXX functions are so named because they only handle
    26  //     a subset of the grammar for the JSON token being parsed.
    27  //     They do not handle the full grammar to keep these functions inlineable.
    28  //
    29  //   - Decoder.consumeXXX methods parse the next JSON token from Decoder.buf,
    30  //     automatically fetching more input if necessary. These methods take
    31  //     a position relative to the start of Decoder.buf as an argument and
    32  //     return the end of the consumed JSON token as a position,
    33  //     also relative to the start of Decoder.buf.
    34  //
    35  //   - In the event of an I/O errors or state machine violations,
    36  //     the implementation avoids mutating the state of Decoder
    37  //     (aside from the book-keeping needed to implement Decoder.fetch).
    38  //     For this reason, only Decoder.ReadToken and Decoder.ReadValue are
    39  //     responsible for updated Decoder.prevStart and Decoder.prevEnd.
    40  //
    41  //   - For performance, much of the implementation uses the pattern of calling
    42  //     the inlineable consumeXXX functions first, and if more work is necessary,
    43  //     then it calls the slower Decoder.consumeXXX methods.
    44  //     TODO: Revisit this pattern if the Go compiler provides finer control
    45  //     over exactly which calls are inlined or not.
    46  
    47  // DecodeOptions configures how JSON decoding operates.
    48  // The zero value is equivalent to the default settings,
    49  // which is compliant with both RFC 7493 and RFC 8259.
    50  type DecodeOptions struct {
    51  	requireKeyedLiterals
    52  	nonComparable
    53  
    54  	// AllowDuplicateNames specifies that JSON objects may contain
    55  	// duplicate member names. Disabling the duplicate name check may provide
    56  	// computational and performance benefits, but breaks compliance with
    57  	// RFC 7493, section 2.3. The input will still be compliant with RFC 8259,
    58  	// which leaves the handling of duplicate names as unspecified behavior.
    59  	AllowDuplicateNames bool
    60  
    61  	// AllowInvalidUTF8 specifies that JSON strings may contain invalid UTF-8,
    62  	// which will be mangled as the Unicode replacement character, U+FFFD.
    63  	// This causes the decoder to break compliance with
    64  	// RFC 7493, section 2.1, and RFC 8259, section 8.1.
    65  	AllowInvalidUTF8 bool
    66  }
    67  
    68  // Decoder is a streaming decoder for raw JSON tokens and values.
    69  // It is used to read a stream of top-level JSON values,
    70  // each separated by optional whitespace characters.
    71  //
    72  // ReadToken and ReadValue calls may be interleaved.
    73  // For example, the following JSON value:
    74  //
    75  //	{"name":"value","array":[null,false,true,3.14159],"object":{"k":"v"}}
    76  //
    77  // can be parsed with the following calls (ignoring errors for brevity):
    78  //
    79  //	d.ReadToken() // {
    80  //	d.ReadToken() // "name"
    81  //	d.ReadToken() // "value"
    82  //	d.ReadValue() // "array"
    83  //	d.ReadToken() // [
    84  //	d.ReadToken() // null
    85  //	d.ReadToken() // false
    86  //	d.ReadValue() // true
    87  //	d.ReadToken() // 3.14159
    88  //	d.ReadToken() // ]
    89  //	d.ReadValue() // "object"
    90  //	d.ReadValue() // {"k":"v"}
    91  //	d.ReadToken() // }
    92  //
    93  // The above is one of many possible sequence of calls and
    94  // may not represent the most sensible method to call for any given token/value.
    95  // For example, it is probably more common to call ReadToken to obtain a
    96  // string token for object names.
    97  type Decoder struct {
    98  	state
    99  	decodeBuffer
   100  	options DecodeOptions
   101  
   102  	stringCache *stringCache // only used when unmarshaling
   103  }
   104  
   105  // decodeBuffer is a buffer split into 4 segments:
   106  //
   107  //   - buf[0:prevEnd]         // already read portion of the buffer
   108  //   - buf[prevStart:prevEnd] // previously read value
   109  //   - buf[prevEnd:len(buf)]  // unread portion of the buffer
   110  //   - buf[len(buf):cap(buf)] // unused portion of the buffer
   111  //
   112  // Invariants:
   113  //
   114  //	0 ≤ prevStart ≤ prevEnd ≤ len(buf) ≤ cap(buf)
   115  type decodeBuffer struct {
   116  	peekPos int   // non-zero if valid offset into buf for start of next token
   117  	peekErr error // implies peekPos is -1
   118  
   119  	buf       []byte // may alias rd if it is a bytes.Buffer
   120  	prevStart int
   121  	prevEnd   int
   122  
   123  	// baseOffset is added to prevStart and prevEnd to obtain
   124  	// the absolute offset relative to the start of io.Reader stream.
   125  	baseOffset int64
   126  
   127  	rd io.Reader
   128  }
   129  
   130  // NewDecoder constructs a new streaming decoder reading from r.
   131  //
   132  // If r is a bytes.Buffer, then the decoder parses directly from the buffer
   133  // without first copying the contents to an intermediate buffer.
   134  // Additional writes to the buffer must not occur while the decoder is in use.
   135  func NewDecoder(r io.Reader) *Decoder {
   136  	return DecodeOptions{}.NewDecoder(r)
   137  }
   138  
   139  // NewDecoder constructs a new streaming decoder reading from r
   140  // configured with the provided options.
   141  func (o DecodeOptions) NewDecoder(r io.Reader) *Decoder {
   142  	d := new(Decoder)
   143  	o.ResetDecoder(d, r)
   144  	return d
   145  }
   146  
   147  // ResetDecoder resets a decoder such that it is reading afresh from r and
   148  // configured with the provided options.
   149  func (o DecodeOptions) ResetDecoder(d *Decoder, r io.Reader) {
   150  	if d == nil {
   151  		panic("json: invalid nil Decoder")
   152  	}
   153  	if r == nil {
   154  		panic("json: invalid nil io.Reader")
   155  	}
   156  	d.reset(nil, r, o)
   157  }
   158  
   159  func (d *Decoder) reset(b []byte, r io.Reader, o DecodeOptions) {
   160  	d.state.reset()
   161  	d.decodeBuffer = decodeBuffer{buf: b, rd: r}
   162  	d.options = o
   163  }
   164  
   165  // Reset resets a decoder such that it is reading afresh from r but
   166  // keep any pre-existing decoder options.
   167  func (d *Decoder) Reset(r io.Reader) {
   168  	d.options.ResetDecoder(d, r)
   169  }
   170  
   171  var errBufferWriteAfterNext = errors.New("invalid bytes.Buffer.Write call after calling bytes.Buffer.Next")
   172  
   173  // fetch reads at least 1 byte from the underlying io.Reader.
   174  // It returns io.ErrUnexpectedEOF if zero bytes were read and io.EOF was seen.
   175  func (d *Decoder) fetch() error {
   176  	if d.rd == nil {
   177  		return io.ErrUnexpectedEOF
   178  	}
   179  
   180  	// Inform objectNameStack that we are about to fetch new buffer content.
   181  	d.names.copyQuotedBuffer(d.buf)
   182  
   183  	// Specialize bytes.Buffer for better performance.
   184  	if bb, ok := d.rd.(*bytes.Buffer); ok {
   185  		switch {
   186  		case bb.Len() == 0:
   187  			return io.ErrUnexpectedEOF
   188  		case len(d.buf) == 0:
   189  			d.buf = bb.Next(bb.Len()) // "read" all data in the buffer
   190  			return nil
   191  		default:
   192  			// This only occurs if a partially filled bytes.Buffer was provided
   193  			// and more data is written to it while Decoder is reading from it.
   194  			// This practice will lead to data corruption since future writes
   195  			// may overwrite the contents of the current buffer.
   196  			//
   197  			// The user is trying to use a bytes.Buffer as a pipe,
   198  			// but a bytes.Buffer is poor implementation of a pipe,
   199  			// the purpose-built io.Pipe should be used instead.
   200  			return &ioError{action: "read", err: errBufferWriteAfterNext}
   201  		}
   202  	}
   203  
   204  	// Allocate initial buffer if empty.
   205  	if cap(d.buf) == 0 {
   206  		d.buf = make([]byte, 0, 64)
   207  	}
   208  
   209  	// Check whether to grow the buffer.
   210  	const maxBufferSize = 4 << 10
   211  	const growthSizeFactor = 2 // higher value is faster
   212  	const growthRateFactor = 2 // higher value is slower
   213  	// By default, grow if below the maximum buffer size.
   214  	grow := cap(d.buf) <= maxBufferSize/growthSizeFactor
   215  	// Growing can be expensive, so only grow
   216  	// if a sufficient number of bytes have been processed.
   217  	grow = grow && int64(cap(d.buf)) < d.previousOffsetEnd()/growthRateFactor
   218  	// If prevStart==0, then fetch was called in order to fetch more data
   219  	// to finish consuming a large JSON value contiguously.
   220  	// Grow if less than 25% of the remaining capacity is available.
   221  	// Note that this may cause the input buffer to exceed maxBufferSize.
   222  	grow = grow || (d.prevStart == 0 && len(d.buf) >= 3*cap(d.buf)/4)
   223  
   224  	if grow {
   225  		// Allocate a new buffer and copy the contents of the old buffer over.
   226  		// TODO: Provide a hard limit on the maximum internal buffer size?
   227  		buf := make([]byte, 0, cap(d.buf)*growthSizeFactor)
   228  		d.buf = append(buf, d.buf[d.prevStart:]...)
   229  	} else {
   230  		// Move unread portion of the data to the front.
   231  		n := copy(d.buf[:cap(d.buf)], d.buf[d.prevStart:])
   232  		d.buf = d.buf[:n]
   233  	}
   234  	d.baseOffset += int64(d.prevStart)
   235  	d.prevEnd -= d.prevStart
   236  	d.prevStart = 0
   237  
   238  	// Read more data into the internal buffer.
   239  	for {
   240  		n, err := d.rd.Read(d.buf[len(d.buf):cap(d.buf)])
   241  		switch {
   242  		case n > 0:
   243  			d.buf = d.buf[:len(d.buf)+n]
   244  			return nil // ignore errors if any bytes are read
   245  		case err == io.EOF:
   246  			return io.ErrUnexpectedEOF
   247  		case err != nil:
   248  			return &ioError{action: "read", err: err}
   249  		default:
   250  			continue // Read returned (0, nil)
   251  		}
   252  	}
   253  }
   254  
   255  const invalidateBufferByte = '#' // invalid starting character for JSON grammar
   256  
   257  // invalidatePreviousRead invalidates buffers returned by Peek and Read calls
   258  // so that the first byte is an invalid character.
   259  // This Hyrum-proofs the API against faulty application code that assumes
   260  // values returned by ReadValue remain valid past subsequent Read calls.
   261  func (d *decodeBuffer) invalidatePreviousRead() {
   262  	// Avoid mutating the buffer if d.rd is nil which implies that d.buf
   263  	// is provided by the user code and may not expect mutations.
   264  	isBytesBuffer := func(r io.Reader) bool {
   265  		_, ok := r.(*bytes.Buffer)
   266  		return ok
   267  	}
   268  	if d.rd != nil && !isBytesBuffer(d.rd) && d.prevStart < d.prevEnd && uint(d.prevStart) < uint(len(d.buf)) {
   269  		d.buf[d.prevStart] = invalidateBufferByte
   270  		d.prevStart = d.prevEnd
   271  	}
   272  }
   273  
   274  // needMore reports whether there are no more unread bytes.
   275  func (d *decodeBuffer) needMore(pos int) bool {
   276  	// NOTE: The arguments and logic are kept simple to keep this inlineable.
   277  	return pos == len(d.buf)
   278  }
   279  
   280  // injectSyntacticErrorWithPosition wraps a SyntacticError with the position,
   281  // otherwise it returns the error as is.
   282  // It takes a position relative to the start of the start of d.buf.
   283  func (d *decodeBuffer) injectSyntacticErrorWithPosition(err error, pos int) error {
   284  	if serr, ok := err.(*SyntacticError); ok {
   285  		return serr.withOffset(d.baseOffset + int64(pos))
   286  	}
   287  	return err
   288  }
   289  
   290  func (d *decodeBuffer) previousOffsetStart() int64 { return d.baseOffset + int64(d.prevStart) }
   291  func (d *decodeBuffer) previousOffsetEnd() int64   { return d.baseOffset + int64(d.prevEnd) }
   292  func (d *decodeBuffer) previousBuffer() []byte     { return d.buf[d.prevStart:d.prevEnd] }
   293  func (d *decodeBuffer) unreadBuffer() []byte       { return d.buf[d.prevEnd:len(d.buf)] }
   294  
   295  // PeekKind retrieves the next token kind, but does not advance the read offset.
   296  // It returns 0 if there are no more tokens.
   297  func (d *Decoder) PeekKind() Kind {
   298  	// Check whether we have a cached peek result.
   299  	if d.peekPos > 0 {
   300  		return Kind(d.buf[d.peekPos]).normalize()
   301  	}
   302  
   303  	var err error
   304  	d.invalidatePreviousRead()
   305  	pos := d.prevEnd
   306  
   307  	// Consume leading whitespace.
   308  	pos += consumeWhitespace(d.buf[pos:])
   309  	if d.needMore(pos) {
   310  		if pos, err = d.consumeWhitespace(pos); err != nil {
   311  			if err == io.ErrUnexpectedEOF && d.tokens.depth() == 1 {
   312  				err = io.EOF // EOF possibly if no Tokens present after top-level value
   313  			}
   314  			d.peekPos, d.peekErr = -1, err
   315  			return invalidKind
   316  		}
   317  	}
   318  
   319  	// Consume colon or comma.
   320  	var delim byte
   321  	if c := d.buf[pos]; c == ':' || c == ',' {
   322  		delim = c
   323  		pos += 1
   324  		pos += consumeWhitespace(d.buf[pos:])
   325  		if d.needMore(pos) {
   326  			if pos, err = d.consumeWhitespace(pos); err != nil {
   327  				d.peekPos, d.peekErr = -1, err
   328  				return invalidKind
   329  			}
   330  		}
   331  	}
   332  	next := Kind(d.buf[pos]).normalize()
   333  	if d.tokens.needDelim(next) != delim {
   334  		pos = d.prevEnd // restore position to right after leading whitespace
   335  		pos += consumeWhitespace(d.buf[pos:])
   336  		err = d.tokens.checkDelim(delim, next)
   337  		err = d.injectSyntacticErrorWithPosition(err, pos)
   338  		d.peekPos, d.peekErr = -1, err
   339  		return invalidKind
   340  	}
   341  
   342  	// This may set peekPos to zero, which is indistinguishable from
   343  	// the uninitialized state. While a small hit to performance, it is correct
   344  	// since ReadValue and ReadToken will disregard the cached result and
   345  	// recompute the next kind.
   346  	d.peekPos, d.peekErr = pos, nil
   347  	return next
   348  }
   349  
   350  // SkipValue is semantically equivalent to calling ReadValue and discarding
   351  // the result except that memory is not wasted trying to hold the entire result.
   352  func (d *Decoder) SkipValue() error {
   353  	switch d.PeekKind() {
   354  	case '{', '[':
   355  		// For JSON objects and arrays, keep skipping all tokens
   356  		// until the depth matches the starting depth.
   357  		depth := d.tokens.depth()
   358  		for {
   359  			if _, err := d.ReadToken(); err != nil {
   360  				return err
   361  			}
   362  			if depth >= d.tokens.depth() {
   363  				return nil
   364  			}
   365  		}
   366  	default:
   367  		// Trying to skip a value when the next token is a '}' or ']'
   368  		// will result in an error being returned here.
   369  		if _, err := d.ReadValue(); err != nil {
   370  			return err
   371  		}
   372  		return nil
   373  	}
   374  }
   375  
   376  // ReadToken reads the next Token, advancing the read offset.
   377  // The returned token is only valid until the next Peek, Read, or Skip call.
   378  // It returns io.EOF if there are no more tokens.
   379  func (d *Decoder) ReadToken() (Token, error) {
   380  	// Determine the next kind.
   381  	var err error
   382  	var next Kind
   383  	pos := d.peekPos
   384  	if pos != 0 {
   385  		// Use cached peek result.
   386  		if d.peekErr != nil {
   387  			err := d.peekErr
   388  			d.peekPos, d.peekErr = 0, nil // possibly a transient I/O error
   389  			return Token{}, err
   390  		}
   391  		next = Kind(d.buf[pos]).normalize()
   392  		d.peekPos = 0 // reset cache
   393  	} else {
   394  		d.invalidatePreviousRead()
   395  		pos = d.prevEnd
   396  
   397  		// Consume leading whitespace.
   398  		pos += consumeWhitespace(d.buf[pos:])
   399  		if d.needMore(pos) {
   400  			if pos, err = d.consumeWhitespace(pos); err != nil {
   401  				if err == io.ErrUnexpectedEOF && d.tokens.depth() == 1 {
   402  					err = io.EOF // EOF possibly if no Tokens present after top-level value
   403  				}
   404  				return Token{}, err
   405  			}
   406  		}
   407  
   408  		// Consume colon or comma.
   409  		var delim byte
   410  		if c := d.buf[pos]; c == ':' || c == ',' {
   411  			delim = c
   412  			pos += 1
   413  			pos += consumeWhitespace(d.buf[pos:])
   414  			if d.needMore(pos) {
   415  				if pos, err = d.consumeWhitespace(pos); err != nil {
   416  					return Token{}, err
   417  				}
   418  			}
   419  		}
   420  		next = Kind(d.buf[pos]).normalize()
   421  		if d.tokens.needDelim(next) != delim {
   422  			pos = d.prevEnd // restore position to right after leading whitespace
   423  			pos += consumeWhitespace(d.buf[pos:])
   424  			err = d.tokens.checkDelim(delim, next)
   425  			return Token{}, d.injectSyntacticErrorWithPosition(err, pos)
   426  		}
   427  	}
   428  
   429  	// Handle the next token.
   430  	var n int
   431  	switch next {
   432  	case 'n':
   433  		if consumeNull(d.buf[pos:]) == 0 {
   434  			pos, err = d.consumeLiteral(pos, "null")
   435  			if err != nil {
   436  				return Token{}, d.injectSyntacticErrorWithPosition(err, pos)
   437  			}
   438  		} else {
   439  			pos += len("null")
   440  		}
   441  		if err = d.tokens.appendLiteral(); err != nil {
   442  			return Token{}, d.injectSyntacticErrorWithPosition(err, pos-len("null")) // report position at start of literal
   443  		}
   444  		d.prevStart, d.prevEnd = pos, pos
   445  		return Null, nil
   446  
   447  	case 'f':
   448  		if consumeFalse(d.buf[pos:]) == 0 {
   449  			pos, err = d.consumeLiteral(pos, "false")
   450  			if err != nil {
   451  				return Token{}, d.injectSyntacticErrorWithPosition(err, pos)
   452  			}
   453  		} else {
   454  			pos += len("false")
   455  		}
   456  		if err = d.tokens.appendLiteral(); err != nil {
   457  			return Token{}, d.injectSyntacticErrorWithPosition(err, pos-len("false")) // report position at start of literal
   458  		}
   459  		d.prevStart, d.prevEnd = pos, pos
   460  		return False, nil
   461  
   462  	case 't':
   463  		if consumeTrue(d.buf[pos:]) == 0 {
   464  			pos, err = d.consumeLiteral(pos, "true")
   465  			if err != nil {
   466  				return Token{}, d.injectSyntacticErrorWithPosition(err, pos)
   467  			}
   468  		} else {
   469  			pos += len("true")
   470  		}
   471  		if err = d.tokens.appendLiteral(); err != nil {
   472  			return Token{}, d.injectSyntacticErrorWithPosition(err, pos-len("true")) // report position at start of literal
   473  		}
   474  		d.prevStart, d.prevEnd = pos, pos
   475  		return True, nil
   476  
   477  	case '"':
   478  		var flags valueFlags // TODO: Preserve this in Token?
   479  		if n = consumeSimpleString(d.buf[pos:]); n == 0 {
   480  			oldAbsPos := d.baseOffset + int64(pos)
   481  			pos, err = d.consumeString(&flags, pos)
   482  			newAbsPos := d.baseOffset + int64(pos)
   483  			n = int(newAbsPos - oldAbsPos)
   484  			if err != nil {
   485  				return Token{}, d.injectSyntacticErrorWithPosition(err, pos)
   486  			}
   487  		} else {
   488  			pos += n
   489  		}
   490  		if !d.options.AllowDuplicateNames && d.tokens.last.needObjectName() {
   491  			if !d.tokens.last.isValidNamespace() {
   492  				return Token{}, errInvalidNamespace
   493  			}
   494  			if d.tokens.last.isActiveNamespace() && !d.namespaces.last().insertQuoted(d.buf[pos-n:pos], flags.isVerbatim()) {
   495  				err = &SyntacticError{str: "duplicate name " + string(d.buf[pos-n:pos]) + " in object"}
   496  				return Token{}, d.injectSyntacticErrorWithPosition(err, pos-n) // report position at start of string
   497  			}
   498  			d.names.replaceLastQuotedOffset(pos - n) // only replace if insertQuoted succeeds
   499  		}
   500  		if err = d.tokens.appendString(); err != nil {
   501  			return Token{}, d.injectSyntacticErrorWithPosition(err, pos-n) // report position at start of string
   502  		}
   503  		d.prevStart, d.prevEnd = pos-n, pos
   504  		return Token{raw: &d.decodeBuffer, num: uint64(d.previousOffsetStart())}, nil
   505  
   506  	case '0':
   507  		// NOTE: Since JSON numbers are not self-terminating,
   508  		// we need to make sure that the next byte is not part of a number.
   509  		if n = consumeSimpleNumber(d.buf[pos:]); n == 0 || d.needMore(pos+n) {
   510  			oldAbsPos := d.baseOffset + int64(pos)
   511  			pos, err = d.consumeNumber(pos)
   512  			newAbsPos := d.baseOffset + int64(pos)
   513  			n = int(newAbsPos - oldAbsPos)
   514  			if err != nil {
   515  				return Token{}, d.injectSyntacticErrorWithPosition(err, pos)
   516  			}
   517  		} else {
   518  			pos += n
   519  		}
   520  		if err = d.tokens.appendNumber(); err != nil {
   521  			return Token{}, d.injectSyntacticErrorWithPosition(err, pos-n) // report position at start of number
   522  		}
   523  		d.prevStart, d.prevEnd = pos-n, pos
   524  		return Token{raw: &d.decodeBuffer, num: uint64(d.previousOffsetStart())}, nil
   525  
   526  	case '{':
   527  		if err = d.tokens.pushObject(); err != nil {
   528  			return Token{}, d.injectSyntacticErrorWithPosition(err, pos)
   529  		}
   530  		if !d.options.AllowDuplicateNames {
   531  			d.names.push()
   532  			d.namespaces.push()
   533  		}
   534  		pos += 1
   535  		d.prevStart, d.prevEnd = pos, pos
   536  		return ObjectStart, nil
   537  
   538  	case '}':
   539  		if err = d.tokens.popObject(); err != nil {
   540  			return Token{}, d.injectSyntacticErrorWithPosition(err, pos)
   541  		}
   542  		if !d.options.AllowDuplicateNames {
   543  			d.names.pop()
   544  			d.namespaces.pop()
   545  		}
   546  		pos += 1
   547  		d.prevStart, d.prevEnd = pos, pos
   548  		return ObjectEnd, nil
   549  
   550  	case '[':
   551  		if err = d.tokens.pushArray(); err != nil {
   552  			return Token{}, d.injectSyntacticErrorWithPosition(err, pos)
   553  		}
   554  		pos += 1
   555  		d.prevStart, d.prevEnd = pos, pos
   556  		return ArrayStart, nil
   557  
   558  	case ']':
   559  		if err = d.tokens.popArray(); err != nil {
   560  			return Token{}, d.injectSyntacticErrorWithPosition(err, pos)
   561  		}
   562  		pos += 1
   563  		d.prevStart, d.prevEnd = pos, pos
   564  		return ArrayEnd, nil
   565  
   566  	default:
   567  		err = newInvalidCharacterError(d.buf[pos:], "at start of token")
   568  		return Token{}, d.injectSyntacticErrorWithPosition(err, pos)
   569  	}
   570  }
   571  
   572  type valueFlags uint
   573  
   574  const (
   575  	_ valueFlags = (1 << iota) / 2 // powers of two starting with zero
   576  
   577  	stringNonVerbatim  // string cannot be naively treated as valid UTF-8
   578  	stringNonCanonical // string not formatted according to RFC 8785, section 3.2.2.2.
   579  	// TODO: Track whether a number is a non-integer?
   580  )
   581  
   582  func (f *valueFlags) set(f2 valueFlags) { *f |= f2 }
   583  func (f valueFlags) isVerbatim() bool   { return f&stringNonVerbatim == 0 }
   584  func (f valueFlags) isCanonical() bool  { return f&stringNonCanonical == 0 }
   585  
   586  // ReadValue returns the next raw JSON value, advancing the read offset.
   587  // The value is stripped of any leading or trailing whitespace.
   588  // The returned value is only valid until the next Peek, Read, or Skip call and
   589  // may not be mutated while the Decoder remains in use.
   590  // If the decoder is currently at the end token for an object or array,
   591  // then it reports a SyntacticError and the internal state remains unchanged.
   592  // It returns io.EOF if there are no more values.
   593  func (d *Decoder) ReadValue() (RawValue, error) {
   594  	var flags valueFlags
   595  	return d.readValue(&flags)
   596  }
   597  func (d *Decoder) readValue(flags *valueFlags) (RawValue, error) {
   598  	// Determine the next kind.
   599  	var err error
   600  	var next Kind
   601  	pos := d.peekPos
   602  	if pos != 0 {
   603  		// Use cached peek result.
   604  		if d.peekErr != nil {
   605  			err := d.peekErr
   606  			d.peekPos, d.peekErr = 0, nil // possibly a transient I/O error
   607  			return nil, err
   608  		}
   609  		next = Kind(d.buf[pos]).normalize()
   610  		d.peekPos = 0 // reset cache
   611  	} else {
   612  		d.invalidatePreviousRead()
   613  		pos = d.prevEnd
   614  
   615  		// Consume leading whitespace.
   616  		pos += consumeWhitespace(d.buf[pos:])
   617  		if d.needMore(pos) {
   618  			if pos, err = d.consumeWhitespace(pos); err != nil {
   619  				if err == io.ErrUnexpectedEOF && d.tokens.depth() == 1 {
   620  					err = io.EOF // EOF possibly if no Tokens present after top-level value
   621  				}
   622  				return nil, err
   623  			}
   624  		}
   625  
   626  		// Consume colon or comma.
   627  		var delim byte
   628  		if c := d.buf[pos]; c == ':' || c == ',' {
   629  			delim = c
   630  			pos += 1
   631  			pos += consumeWhitespace(d.buf[pos:])
   632  			if d.needMore(pos) {
   633  				if pos, err = d.consumeWhitespace(pos); err != nil {
   634  					return nil, err
   635  				}
   636  			}
   637  		}
   638  		next = Kind(d.buf[pos]).normalize()
   639  		if d.tokens.needDelim(next) != delim {
   640  			pos = d.prevEnd // restore position to right after leading whitespace
   641  			pos += consumeWhitespace(d.buf[pos:])
   642  			err = d.tokens.checkDelim(delim, next)
   643  			return nil, d.injectSyntacticErrorWithPosition(err, pos)
   644  		}
   645  	}
   646  
   647  	// Handle the next value.
   648  	oldAbsPos := d.baseOffset + int64(pos)
   649  	pos, err = d.consumeValue(flags, pos)
   650  	newAbsPos := d.baseOffset + int64(pos)
   651  	n := int(newAbsPos - oldAbsPos)
   652  	if err != nil {
   653  		return nil, d.injectSyntacticErrorWithPosition(err, pos)
   654  	}
   655  	switch next {
   656  	case 'n', 't', 'f':
   657  		err = d.tokens.appendLiteral()
   658  	case '"':
   659  		if !d.options.AllowDuplicateNames && d.tokens.last.needObjectName() {
   660  			if !d.tokens.last.isValidNamespace() {
   661  				err = errInvalidNamespace
   662  				break
   663  			}
   664  			if d.tokens.last.isActiveNamespace() && !d.namespaces.last().insertQuoted(d.buf[pos-n:pos], flags.isVerbatim()) {
   665  				err = &SyntacticError{str: "duplicate name " + string(d.buf[pos-n:pos]) + " in object"}
   666  				break
   667  			}
   668  			d.names.replaceLastQuotedOffset(pos - n) // only replace if insertQuoted succeeds
   669  		}
   670  		err = d.tokens.appendString()
   671  	case '0':
   672  		err = d.tokens.appendNumber()
   673  	case '{':
   674  		if err = d.tokens.pushObject(); err != nil {
   675  			break
   676  		}
   677  		if err = d.tokens.popObject(); err != nil {
   678  			panic("BUG: popObject should never fail immediately after pushObject: " + err.Error())
   679  		}
   680  	case '[':
   681  		if err = d.tokens.pushArray(); err != nil {
   682  			break
   683  		}
   684  		if err = d.tokens.popArray(); err != nil {
   685  			panic("BUG: popArray should never fail immediately after pushArray: " + err.Error())
   686  		}
   687  	}
   688  	if err != nil {
   689  		return nil, d.injectSyntacticErrorWithPosition(err, pos-n) // report position at start of value
   690  	}
   691  	d.prevEnd = pos
   692  	d.prevStart = pos - n
   693  	return d.buf[pos-n : pos : pos], nil
   694  }
   695  
   696  // checkEOF verifies that the input has no more data.
   697  func (d *Decoder) checkEOF() error {
   698  	switch pos, err := d.consumeWhitespace(d.prevEnd); err {
   699  	case nil:
   700  		return newInvalidCharacterError(d.buf[pos:], "after top-level value")
   701  	case io.ErrUnexpectedEOF:
   702  		return nil
   703  	default:
   704  		return err
   705  	}
   706  }
   707  
   708  // consumeWhitespace consumes all whitespace starting at d.buf[pos:].
   709  // It returns the new position in d.buf immediately after the last whitespace.
   710  // If it returns nil, there is guaranteed to at least be one unread byte.
   711  //
   712  // The following pattern is common in this implementation:
   713  //
   714  //	pos += consumeWhitespace(d.buf[pos:])
   715  //	if d.needMore(pos) {
   716  //		if pos, err = d.consumeWhitespace(pos); err != nil {
   717  //			return ...
   718  //		}
   719  //	}
   720  //
   721  // It is difficult to simplify this without sacrificing performance since
   722  // consumeWhitespace must be inlined. The body of the if statement is
   723  // executed only in rare situations where we need to fetch more data.
   724  // Since fetching may return an error, we also need to check the error.
   725  func (d *Decoder) consumeWhitespace(pos int) (newPos int, err error) {
   726  	for {
   727  		pos += consumeWhitespace(d.buf[pos:])
   728  		if d.needMore(pos) {
   729  			absPos := d.baseOffset + int64(pos)
   730  			err = d.fetch() // will mutate d.buf and invalidate pos
   731  			pos = int(absPos - d.baseOffset)
   732  			if err != nil {
   733  				return pos, err
   734  			}
   735  			continue
   736  		}
   737  		return pos, nil
   738  	}
   739  }
   740  
   741  // consumeValue consumes a single JSON value starting at d.buf[pos:].
   742  // It returns the new position in d.buf immediately after the value.
   743  func (d *Decoder) consumeValue(flags *valueFlags, pos int) (newPos int, err error) {
   744  	for {
   745  		var n int
   746  		var err error
   747  		switch next := Kind(d.buf[pos]).normalize(); next {
   748  		case 'n':
   749  			if n = consumeNull(d.buf[pos:]); n == 0 {
   750  				n, err = consumeLiteral(d.buf[pos:], "null")
   751  			}
   752  		case 'f':
   753  			if n = consumeFalse(d.buf[pos:]); n == 0 {
   754  				n, err = consumeLiteral(d.buf[pos:], "false")
   755  			}
   756  		case 't':
   757  			if n = consumeTrue(d.buf[pos:]); n == 0 {
   758  				n, err = consumeLiteral(d.buf[pos:], "true")
   759  			}
   760  		case '"':
   761  			if n = consumeSimpleString(d.buf[pos:]); n == 0 {
   762  				return d.consumeString(flags, pos)
   763  			}
   764  		case '0':
   765  			// NOTE: Since JSON numbers are not self-terminating,
   766  			// we need to make sure that the next byte is not part of a number.
   767  			if n = consumeSimpleNumber(d.buf[pos:]); n == 0 || d.needMore(pos+n) {
   768  				return d.consumeNumber(pos)
   769  			}
   770  		case '{':
   771  			return d.consumeObject(flags, pos)
   772  		case '[':
   773  			return d.consumeArray(flags, pos)
   774  		default:
   775  			return pos, newInvalidCharacterError(d.buf[pos:], "at start of value")
   776  		}
   777  		if err == io.ErrUnexpectedEOF {
   778  			absPos := d.baseOffset + int64(pos)
   779  			err = d.fetch() // will mutate d.buf and invalidate pos
   780  			pos = int(absPos - d.baseOffset)
   781  			if err != nil {
   782  				return pos, err
   783  			}
   784  			continue
   785  		}
   786  		return pos + n, err
   787  	}
   788  }
   789  
   790  // consumeLiteral consumes a single JSON literal starting at d.buf[pos:].
   791  // It returns the new position in d.buf immediately after the literal.
   792  func (d *Decoder) consumeLiteral(pos int, lit string) (newPos int, err error) {
   793  	for {
   794  		n, err := consumeLiteral(d.buf[pos:], lit)
   795  		if err == io.ErrUnexpectedEOF {
   796  			absPos := d.baseOffset + int64(pos)
   797  			err = d.fetch() // will mutate d.buf and invalidate pos
   798  			pos = int(absPos - d.baseOffset)
   799  			if err != nil {
   800  				return pos, err
   801  			}
   802  			continue
   803  		}
   804  		return pos + n, err
   805  	}
   806  }
   807  
   808  // consumeString consumes a single JSON string starting at d.buf[pos:].
   809  // It returns the new position in d.buf immediately after the string.
   810  func (d *Decoder) consumeString(flags *valueFlags, pos int) (newPos int, err error) {
   811  	var n int
   812  	for {
   813  		n, err = consumeStringResumable(flags, d.buf[pos:], n, !d.options.AllowInvalidUTF8)
   814  		if err == io.ErrUnexpectedEOF {
   815  			absPos := d.baseOffset + int64(pos)
   816  			err = d.fetch() // will mutate d.buf and invalidate pos
   817  			pos = int(absPos - d.baseOffset)
   818  			if err != nil {
   819  				return pos, err
   820  			}
   821  			continue
   822  		}
   823  		return pos + n, err
   824  	}
   825  }
   826  
   827  // consumeNumber consumes a single JSON number starting at d.buf[pos:].
   828  // It returns the new position in d.buf immediately after the number.
   829  func (d *Decoder) consumeNumber(pos int) (newPos int, err error) {
   830  	var n int
   831  	var state consumeNumberState
   832  	for {
   833  		n, state, err = consumeNumberResumable(d.buf[pos:], n, state)
   834  		// NOTE: Since JSON numbers are not self-terminating,
   835  		// we need to make sure that the next byte is not part of a number.
   836  		if err == io.ErrUnexpectedEOF || d.needMore(pos+n) {
   837  			mayTerminate := err == nil
   838  			absPos := d.baseOffset + int64(pos)
   839  			err = d.fetch() // will mutate d.buf and invalidate pos
   840  			pos = int(absPos - d.baseOffset)
   841  			if err != nil {
   842  				if mayTerminate && err == io.ErrUnexpectedEOF {
   843  					return pos + n, nil
   844  				}
   845  				return pos, err
   846  			}
   847  			continue
   848  		}
   849  		return pos + n, err
   850  	}
   851  }
   852  
   853  // consumeObject consumes a single JSON object starting at d.buf[pos:].
   854  // It returns the new position in d.buf immediately after the object.
   855  func (d *Decoder) consumeObject(flags *valueFlags, pos int) (newPos int, err error) {
   856  	var n int
   857  	var names *objectNamespace
   858  	if !d.options.AllowDuplicateNames {
   859  		d.namespaces.push()
   860  		defer d.namespaces.pop()
   861  		names = d.namespaces.last()
   862  	}
   863  
   864  	// Handle before start.
   865  	if d.buf[pos] != '{' {
   866  		panic("BUG: consumeObject must be called with a buffer that starts with '{'")
   867  	}
   868  	pos++
   869  
   870  	// Handle after start.
   871  	pos += consumeWhitespace(d.buf[pos:])
   872  	if d.needMore(pos) {
   873  		if pos, err = d.consumeWhitespace(pos); err != nil {
   874  			return pos, err
   875  		}
   876  	}
   877  	if d.buf[pos] == '}' {
   878  		pos++
   879  		return pos, nil
   880  	}
   881  
   882  	for {
   883  		// Handle before name.
   884  		pos += consumeWhitespace(d.buf[pos:])
   885  		if d.needMore(pos) {
   886  			if pos, err = d.consumeWhitespace(pos); err != nil {
   887  				return pos, err
   888  			}
   889  		}
   890  		var flags2 valueFlags
   891  		if n = consumeSimpleString(d.buf[pos:]); n == 0 {
   892  			oldAbsPos := d.baseOffset + int64(pos)
   893  			pos, err = d.consumeString(&flags2, pos)
   894  			newAbsPos := d.baseOffset + int64(pos)
   895  			n = int(newAbsPos - oldAbsPos)
   896  			flags.set(flags2)
   897  			if err != nil {
   898  				return pos, err
   899  			}
   900  		} else {
   901  			pos += n
   902  		}
   903  		if !d.options.AllowDuplicateNames && !names.insertQuoted(d.buf[pos-n:pos], flags2.isVerbatim()) {
   904  			return pos - n, &SyntacticError{str: "duplicate name " + string(d.buf[pos-n:pos]) + " in object"}
   905  		}
   906  
   907  		// Handle after name.
   908  		pos += consumeWhitespace(d.buf[pos:])
   909  		if d.needMore(pos) {
   910  			if pos, err = d.consumeWhitespace(pos); err != nil {
   911  				return pos, err
   912  			}
   913  		}
   914  		if d.buf[pos] != ':' {
   915  			return pos, newInvalidCharacterError(d.buf[pos:], "after object name (expecting ':')")
   916  		}
   917  		pos++
   918  
   919  		// Handle before value.
   920  		pos += consumeWhitespace(d.buf[pos:])
   921  		if d.needMore(pos) {
   922  			if pos, err = d.consumeWhitespace(pos); err != nil {
   923  				return pos, err
   924  			}
   925  		}
   926  		pos, err = d.consumeValue(flags, pos)
   927  		if err != nil {
   928  			return pos, err
   929  		}
   930  
   931  		// Handle after value.
   932  		pos += consumeWhitespace(d.buf[pos:])
   933  		if d.needMore(pos) {
   934  			if pos, err = d.consumeWhitespace(pos); err != nil {
   935  				return pos, err
   936  			}
   937  		}
   938  		switch d.buf[pos] {
   939  		case ',':
   940  			pos++
   941  			continue
   942  		case '}':
   943  			pos++
   944  			return pos, nil
   945  		default:
   946  			return pos, newInvalidCharacterError(d.buf[pos:], "after object value (expecting ',' or '}')")
   947  		}
   948  	}
   949  }
   950  
   951  // consumeArray consumes a single JSON array starting at d.buf[pos:].
   952  // It returns the new position in d.buf immediately after the array.
   953  func (d *Decoder) consumeArray(flags *valueFlags, pos int) (newPos int, err error) {
   954  	// Handle before start.
   955  	if d.buf[pos] != '[' {
   956  		panic("BUG: consumeArray must be called with a buffer that starts with '['")
   957  	}
   958  	pos++
   959  
   960  	// Handle after start.
   961  	pos += consumeWhitespace(d.buf[pos:])
   962  	if d.needMore(pos) {
   963  		if pos, err = d.consumeWhitespace(pos); err != nil {
   964  			return pos, err
   965  		}
   966  	}
   967  	if d.buf[pos] == ']' {
   968  		pos++
   969  		return pos, nil
   970  	}
   971  
   972  	for {
   973  		// Handle before value.
   974  		pos += consumeWhitespace(d.buf[pos:])
   975  		if d.needMore(pos) {
   976  			if pos, err = d.consumeWhitespace(pos); err != nil {
   977  				return pos, err
   978  			}
   979  		}
   980  		pos, err = d.consumeValue(flags, pos)
   981  		if err != nil {
   982  			return pos, err
   983  		}
   984  
   985  		// Handle after value.
   986  		pos += consumeWhitespace(d.buf[pos:])
   987  		if d.needMore(pos) {
   988  			if pos, err = d.consumeWhitespace(pos); err != nil {
   989  				return pos, err
   990  			}
   991  		}
   992  		switch d.buf[pos] {
   993  		case ',':
   994  			pos++
   995  			continue
   996  		case ']':
   997  			pos++
   998  			return pos, nil
   999  		default:
  1000  			return pos, newInvalidCharacterError(d.buf[pos:], "after array value (expecting ',' or ']')")
  1001  		}
  1002  	}
  1003  }
  1004  
  1005  // InputOffset returns the current input byte offset. It gives the location
  1006  // of the next byte immediately after the most recently returned token or value.
  1007  // The number of bytes actually read from the underlying io.Reader may be more
  1008  // than this offset due to internal buffering effects.
  1009  func (d *Decoder) InputOffset() int64 {
  1010  	return d.previousOffsetEnd()
  1011  }
  1012  
  1013  // UnreadBuffer returns the data remaining in the unread buffer,
  1014  // which may contain zero or more bytes.
  1015  // The returned buffer must not be mutated while Decoder continues to be used.
  1016  // The buffer contents are valid until the next Peek, Read, or Skip call.
  1017  func (d *Decoder) UnreadBuffer() []byte {
  1018  	return d.unreadBuffer()
  1019  }
  1020  
  1021  // StackDepth returns the depth of the state machine for read JSON data.
  1022  // Each level on the stack represents a nested JSON object or array.
  1023  // It is incremented whenever an ObjectStart or ArrayStart token is encountered
  1024  // and decremented whenever an ObjectEnd or ArrayEnd token is encountered.
  1025  // The depth is zero-indexed, where zero represents the top-level JSON value.
  1026  func (d *Decoder) StackDepth() int {
  1027  	// NOTE: Keep in sync with Encoder.StackDepth.
  1028  	return d.tokens.depth() - 1
  1029  }
  1030  
  1031  // StackIndex returns information about the specified stack level.
  1032  // It must be a number between 0 and StackDepth, inclusive.
  1033  // For each level, it reports the kind:
  1034  //
  1035  //   - 0 for a level of zero,
  1036  //   - '{' for a level representing a JSON object, and
  1037  //   - '[' for a level representing a JSON array.
  1038  //
  1039  // It also reports the length of that JSON object or array.
  1040  // Each name and value in a JSON object is counted separately,
  1041  // so the effective number of members would be half the length.
  1042  // A complete JSON object must have an even length.
  1043  func (d *Decoder) StackIndex(i int) (Kind, int) {
  1044  	// NOTE: Keep in sync with Encoder.StackIndex.
  1045  	switch s := d.tokens.index(i); {
  1046  	case i > 0 && s.isObject():
  1047  		return '{', s.length()
  1048  	case i > 0 && s.isArray():
  1049  		return '[', s.length()
  1050  	default:
  1051  		return 0, s.length()
  1052  	}
  1053  }
  1054  
  1055  // StackPointer returns a JSON Pointer (RFC 6901) to the most recently read value.
  1056  // Object names are only present if AllowDuplicateNames is false, otherwise
  1057  // object members are represented using their index within the object.
  1058  func (d *Decoder) StackPointer() string {
  1059  	d.names.copyQuotedBuffer(d.buf)
  1060  	return string(d.appendStackPointer(nil))
  1061  }
  1062  
  1063  // consumeWhitespace consumes leading JSON whitespace per RFC 7159, section 2.
  1064  func consumeWhitespace(b []byte) (n int) {
  1065  	// NOTE: The arguments and logic are kept simple to keep this inlineable.
  1066  	for len(b) > n && (b[n] == ' ' || b[n] == '\t' || b[n] == '\r' || b[n] == '\n') {
  1067  		n++
  1068  	}
  1069  	return n
  1070  }
  1071  
  1072  // consumeNull consumes the next JSON null literal per RFC 7159, section 3.
  1073  // It returns 0 if it is invalid, in which case consumeLiteral should be used.
  1074  func consumeNull(b []byte) int {
  1075  	// NOTE: The arguments and logic are kept simple to keep this inlineable.
  1076  	const literal = "null"
  1077  	if len(b) >= len(literal) && string(b[:len(literal)]) == literal {
  1078  		return len(literal)
  1079  	}
  1080  	return 0
  1081  }
  1082  
  1083  // consumeFalse consumes the next JSON false literal per RFC 7159, section 3.
  1084  // It returns 0 if it is invalid, in which case consumeLiteral should be used.
  1085  func consumeFalse(b []byte) int {
  1086  	// NOTE: The arguments and logic are kept simple to keep this inlineable.
  1087  	const literal = "false"
  1088  	if len(b) >= len(literal) && string(b[:len(literal)]) == literal {
  1089  		return len(literal)
  1090  	}
  1091  	return 0
  1092  }
  1093  
  1094  // consumeTrue consumes the next JSON true literal per RFC 7159, section 3.
  1095  // It returns 0 if it is invalid, in which case consumeLiteral should be used.
  1096  func consumeTrue(b []byte) int {
  1097  	// NOTE: The arguments and logic are kept simple to keep this inlineable.
  1098  	const literal = "true"
  1099  	if len(b) >= len(literal) && string(b[:len(literal)]) == literal {
  1100  		return len(literal)
  1101  	}
  1102  	return 0
  1103  }
  1104  
  1105  // consumeLiteral consumes the next JSON literal per RFC 7159, section 3.
  1106  // If the input appears truncated, it returns io.ErrUnexpectedEOF.
  1107  func consumeLiteral(b []byte, lit string) (n int, err error) {
  1108  	for i := 0; i < len(b) && i < len(lit); i++ {
  1109  		if b[i] != lit[i] {
  1110  			return i, newInvalidCharacterError(b[i:], "within literal "+lit+" (expecting "+strconv.QuoteRune(rune(lit[i]))+")")
  1111  		}
  1112  	}
  1113  	if len(b) < len(lit) {
  1114  		return len(b), io.ErrUnexpectedEOF
  1115  	}
  1116  	return len(lit), nil
  1117  }
  1118  
  1119  // consumeSimpleString consumes the next JSON string per RFC 7159, section 7
  1120  // but is limited to the grammar for an ASCII string without escape sequences.
  1121  // It returns 0 if it is invalid or more complicated than a simple string,
  1122  // in which case consumeString should be called.
  1123  func consumeSimpleString(b []byte) (n int) {
  1124  	// NOTE: The arguments and logic are kept simple to keep this inlineable.
  1125  	if len(b) > 0 && b[0] == '"' {
  1126  		n++
  1127  		for len(b) > n && (' ' <= b[n] && b[n] != '\\' && b[n] != '"' && b[n] < utf8.RuneSelf) {
  1128  			n++
  1129  		}
  1130  		if len(b) > n && b[n] == '"' {
  1131  			n++
  1132  			return n
  1133  		}
  1134  	}
  1135  	return 0
  1136  }
  1137  
  1138  // consumeString consumes the next JSON string per RFC 7159, section 7.
  1139  // If validateUTF8 is false, then this allows the presence of invalid UTF-8
  1140  // characters within the string itself.
  1141  // It reports the number of bytes consumed and whether an error was encountered.
  1142  // If the input appears truncated, it returns io.ErrUnexpectedEOF.
  1143  func consumeString(flags *valueFlags, b []byte, validateUTF8 bool) (n int, err error) {
  1144  	return consumeStringResumable(flags, b, 0, validateUTF8)
  1145  }
  1146  
  1147  // consumeStringResumable is identical to consumeString but supports resuming
  1148  // from a previous call that returned io.ErrUnexpectedEOF.
  1149  func consumeStringResumable(flags *valueFlags, b []byte, resumeOffset int, validateUTF8 bool) (n int, err error) {
  1150  	// Consume the leading double quote.
  1151  	switch {
  1152  	case resumeOffset > 0:
  1153  		n = resumeOffset // already handled the leading quote
  1154  	case uint(len(b)) == 0:
  1155  		return n, io.ErrUnexpectedEOF
  1156  	case b[0] == '"':
  1157  		n++
  1158  	default:
  1159  		return n, newInvalidCharacterError(b[n:], `at start of string (expecting '"')`)
  1160  	}
  1161  
  1162  	// Consume every character in the string.
  1163  	for uint(len(b)) > uint(n) {
  1164  		// Optimize for long sequences of unescaped characters.
  1165  		noEscape := func(c byte) bool {
  1166  			return c < utf8.RuneSelf && ' ' <= c && c != '\\' && c != '"'
  1167  		}
  1168  		for uint(len(b)) > uint(n) && noEscape(b[n]) {
  1169  			n++
  1170  		}
  1171  		if uint(len(b)) <= uint(n) {
  1172  			return n, io.ErrUnexpectedEOF
  1173  		}
  1174  
  1175  		// Check for terminating double quote.
  1176  		if b[n] == '"' {
  1177  			n++
  1178  			return n, nil
  1179  		}
  1180  
  1181  		switch r, rn := utf8.DecodeRune(b[n:]); {
  1182  		// Handle UTF-8 encoded byte sequence.
  1183  		// Due to specialized handling of ASCII above, we know that
  1184  		// all normal sequences at this point must be 2 bytes or larger.
  1185  		case rn > 1:
  1186  			n += rn
  1187  		// Handle escape sequence.
  1188  		case r == '\\':
  1189  			flags.set(stringNonVerbatim)
  1190  			resumeOffset = n
  1191  			if uint(len(b)) < uint(n+2) {
  1192  				return resumeOffset, io.ErrUnexpectedEOF
  1193  			}
  1194  			switch r := b[n+1]; r {
  1195  			case '/':
  1196  				// Forward slash is the only character with 3 representations.
  1197  				// Per RFC 8785, section 3.2.2.2., this must not be escaped.
  1198  				flags.set(stringNonCanonical)
  1199  				n += 2
  1200  			case '"', '\\', 'b', 'f', 'n', 'r', 't':
  1201  				n += 2
  1202  			case 'u':
  1203  				if uint(len(b)) < uint(n+6) {
  1204  					if !hasEscapeSequencePrefix(b[n:]) {
  1205  						flags.set(stringNonCanonical)
  1206  						return n, &SyntacticError{str: "invalid escape sequence " + strconv.Quote(string(b[n:])) + " within string"}
  1207  					}
  1208  					return resumeOffset, io.ErrUnexpectedEOF
  1209  				}
  1210  				v1, ok := parseHexUint16(b[n+2 : n+6])
  1211  				if !ok {
  1212  					flags.set(stringNonCanonical)
  1213  					return n, &SyntacticError{str: "invalid escape sequence " + strconv.Quote(string(b[n:n+6])) + " within string"}
  1214  				}
  1215  				// Only certain control characters can use the \uFFFF notation
  1216  				// for canonical formatting (per RFC 8785, section 3.2.2.2.).
  1217  				switch v1 {
  1218  				// \uFFFF notation not permitted for these characters.
  1219  				case '\b', '\f', '\n', '\r', '\t':
  1220  					flags.set(stringNonCanonical)
  1221  				default:
  1222  					// \uFFFF notation only permitted for control characters.
  1223  					if v1 >= ' ' {
  1224  						flags.set(stringNonCanonical)
  1225  					} else {
  1226  						// \uFFFF notation must be lower case.
  1227  						for _, c := range b[n+2 : n+6] {
  1228  							if 'A' <= c && c <= 'F' {
  1229  								flags.set(stringNonCanonical)
  1230  							}
  1231  						}
  1232  					}
  1233  				}
  1234  				n += 6
  1235  
  1236  				if validateUTF8 && utf16.IsSurrogate(rune(v1)) {
  1237  					if uint(len(b)) >= uint(n+2) && (b[n] != '\\' || b[n+1] != 'u') {
  1238  						return n, &SyntacticError{str: "invalid unpaired surrogate half within string"}
  1239  					}
  1240  					if uint(len(b)) < uint(n+6) {
  1241  						if !hasEscapeSequencePrefix(b[n:]) {
  1242  							flags.set(stringNonCanonical)
  1243  							return n, &SyntacticError{str: "invalid escape sequence " + strconv.Quote(string(b[n:])) + " within string"}
  1244  						}
  1245  						return resumeOffset, io.ErrUnexpectedEOF
  1246  					}
  1247  					v2, ok := parseHexUint16(b[n+2 : n+6])
  1248  					if !ok {
  1249  						return n, &SyntacticError{str: "invalid escape sequence " + strconv.Quote(string(b[n:n+6])) + " within string"}
  1250  					}
  1251  					if utf16.DecodeRune(rune(v1), rune(v2)) == utf8.RuneError {
  1252  						return n, &SyntacticError{str: "invalid surrogate pair in string"}
  1253  					}
  1254  					n += 6
  1255  				}
  1256  			default:
  1257  				flags.set(stringNonCanonical)
  1258  				return n, &SyntacticError{str: "invalid escape sequence " + strconv.Quote(string(b[n:n+2])) + " within string"}
  1259  			}
  1260  		// Handle invalid UTF-8.
  1261  		case r == utf8.RuneError:
  1262  			if !utf8.FullRune(b[n:]) {
  1263  				return n, io.ErrUnexpectedEOF
  1264  			}
  1265  			flags.set(stringNonVerbatim | stringNonCanonical)
  1266  			if validateUTF8 {
  1267  				return n, &SyntacticError{str: "invalid UTF-8 within string"}
  1268  			}
  1269  			n++
  1270  		// Handle invalid control characters.
  1271  		case r < ' ':
  1272  			flags.set(stringNonVerbatim | stringNonCanonical)
  1273  			return n, newInvalidCharacterError(b[n:], "within string (expecting non-control character)")
  1274  		default:
  1275  			panic("BUG: unhandled character " + quoteRune(b[n:]))
  1276  		}
  1277  	}
  1278  	return n, io.ErrUnexpectedEOF
  1279  }
  1280  
  1281  // hasEscapeSequencePrefix reports whether b is possibly
  1282  // the truncated prefix of a \uFFFF escape sequence.
  1283  func hasEscapeSequencePrefix(b []byte) bool {
  1284  	for i, c := range b {
  1285  		switch {
  1286  		case i == 0 && c != '\\':
  1287  			return false
  1288  		case i == 1 && c != 'u':
  1289  			return false
  1290  		case i >= 2 && i < 6 && !('0' <= c && c <= '9') && !('a' <= c && c <= 'f') && !('A' <= c && c <= 'F'):
  1291  			return false
  1292  		}
  1293  	}
  1294  	return true
  1295  }
  1296  
  1297  // unescapeString appends the unescaped form of a JSON string in src to dst.
  1298  // Any invalid UTF-8 within the string will be replaced with utf8.RuneError.
  1299  // The input must be an entire JSON string with no surrounding whitespace.
  1300  func unescapeString(dst, src []byte) (v []byte, ok bool) {
  1301  	// Consume leading double quote.
  1302  	if uint(len(src)) == 0 || src[0] != '"' {
  1303  		return dst, false
  1304  	}
  1305  	i, n := 1, 1
  1306  
  1307  	// Consume every character until completion.
  1308  	for uint(len(src)) > uint(n) {
  1309  		// Optimize for long sequences of unescaped characters.
  1310  		noEscape := func(c byte) bool {
  1311  			return c < utf8.RuneSelf && ' ' <= c && c != '\\' && c != '"'
  1312  		}
  1313  		for uint(len(src)) > uint(n) && noEscape(src[n]) {
  1314  			n++
  1315  		}
  1316  		if uint(len(src)) <= uint(n) {
  1317  			break
  1318  		}
  1319  
  1320  		// Check for terminating double quote.
  1321  		if src[n] == '"' {
  1322  			dst = append(dst, src[i:n]...)
  1323  			n++
  1324  			return dst, len(src) == n
  1325  		}
  1326  
  1327  		switch r, rn := utf8.DecodeRune(src[n:]); {
  1328  		// Handle UTF-8 encoded byte sequence.
  1329  		// Due to specialized handling of ASCII above, we know that
  1330  		// all normal sequences at this point must be 2 bytes or larger.
  1331  		case rn > 1:
  1332  			n += rn
  1333  		// Handle escape sequence.
  1334  		case r == '\\':
  1335  			dst = append(dst, src[i:n]...)
  1336  			if r < ' ' {
  1337  				return dst, false // invalid control character or unescaped quote
  1338  			}
  1339  
  1340  			// Handle escape sequence.
  1341  			if uint(len(src)) < uint(n+2) {
  1342  				return dst, false // truncated escape sequence
  1343  			}
  1344  			switch r := src[n+1]; r {
  1345  			case '"', '\\', '/':
  1346  				dst = append(dst, r)
  1347  				n += 2
  1348  			case 'b':
  1349  				dst = append(dst, '\b')
  1350  				n += 2
  1351  			case 'f':
  1352  				dst = append(dst, '\f')
  1353  				n += 2
  1354  			case 'n':
  1355  				dst = append(dst, '\n')
  1356  				n += 2
  1357  			case 'r':
  1358  				dst = append(dst, '\r')
  1359  				n += 2
  1360  			case 't':
  1361  				dst = append(dst, '\t')
  1362  				n += 2
  1363  			case 'u':
  1364  				if uint(len(src)) < uint(n+6) {
  1365  					return dst, false // truncated escape sequence
  1366  				}
  1367  				v1, ok := parseHexUint16(src[n+2 : n+6])
  1368  				if !ok {
  1369  					return dst, false // invalid escape sequence
  1370  				}
  1371  				n += 6
  1372  
  1373  				// Check whether this is a surrogate half.
  1374  				r := rune(v1)
  1375  				if utf16.IsSurrogate(r) {
  1376  					r = utf8.RuneError // assume failure unless the following succeeds
  1377  					if uint(len(src)) >= uint(n+6) && src[n+0] == '\\' && src[n+1] == 'u' {
  1378  						if v2, ok := parseHexUint16(src[n+2 : n+6]); ok {
  1379  							if r = utf16.DecodeRune(rune(v1), rune(v2)); r != utf8.RuneError {
  1380  								n += 6
  1381  							}
  1382  						}
  1383  					}
  1384  				}
  1385  
  1386  				dst = utf8.AppendRune(dst, r)
  1387  			default:
  1388  				return dst, false // invalid escape sequence
  1389  			}
  1390  			i = n
  1391  		// Handle invalid UTF-8.
  1392  		case r == utf8.RuneError:
  1393  			// NOTE: An unescaped string may be longer than the escaped string
  1394  			// because invalid UTF-8 bytes are being replaced.
  1395  			dst = append(dst, src[i:n]...)
  1396  			dst = append(dst, "\uFFFD"...)
  1397  			n += rn
  1398  			i = n
  1399  		// Handle invalid control characters.
  1400  		case r < ' ':
  1401  			dst = append(dst, src[i:n]...)
  1402  			return dst, false // invalid control character or unescaped quote
  1403  		default:
  1404  			panic("BUG: unhandled character " + quoteRune(src[n:]))
  1405  		}
  1406  	}
  1407  	dst = append(dst, src[i:n]...)
  1408  	return dst, false // truncated input
  1409  }
  1410  
  1411  // unescapeStringMayCopy returns the unescaped form of b.
  1412  // If there are no escaped characters, the output is simply a subslice of
  1413  // the input with the surrounding quotes removed.
  1414  // Otherwise, a new buffer is allocated for the output.
  1415  func unescapeStringMayCopy(b []byte, isVerbatim bool) []byte {
  1416  	// NOTE: The arguments and logic are kept simple to keep this inlineable.
  1417  	if isVerbatim {
  1418  		return b[len(`"`) : len(b)-len(`"`)]
  1419  	}
  1420  	b, _ = unescapeString(make([]byte, 0, len(b)), b)
  1421  	return b
  1422  }
  1423  
  1424  // consumeSimpleNumber consumes the next JSON number per RFC 7159, section 6
  1425  // but is limited to the grammar for a positive integer.
  1426  // It returns 0 if it is invalid or more complicated than a simple integer,
  1427  // in which case consumeNumber should be called.
  1428  func consumeSimpleNumber(b []byte) (n int) {
  1429  	// NOTE: The arguments and logic are kept simple to keep this inlineable.
  1430  	if len(b) > 0 {
  1431  		if b[0] == '0' {
  1432  			n++
  1433  		} else if '1' <= b[0] && b[0] <= '9' {
  1434  			n++
  1435  			for len(b) > n && ('0' <= b[n] && b[n] <= '9') {
  1436  				n++
  1437  			}
  1438  		} else {
  1439  			return 0
  1440  		}
  1441  		if len(b) == n || !(b[n] == '.' || b[n] == 'e' || b[n] == 'E') {
  1442  			return n
  1443  		}
  1444  	}
  1445  	return 0
  1446  }
  1447  
  1448  type consumeNumberState uint
  1449  
  1450  const (
  1451  	consumeNumberInit consumeNumberState = iota
  1452  	beforeIntegerDigits
  1453  	withinIntegerDigits
  1454  	beforeFractionalDigits
  1455  	withinFractionalDigits
  1456  	beforeExponentDigits
  1457  	withinExponentDigits
  1458  )
  1459  
  1460  // consumeNumber consumes the next JSON number per RFC 7159, section 6.
  1461  // It reports the number of bytes consumed and whether an error was encountered.
  1462  // If the input appears truncated, it returns io.ErrUnexpectedEOF.
  1463  //
  1464  // Note that JSON numbers are not self-terminating.
  1465  // If the entire input is consumed, then the caller needs to consider whether
  1466  // there may be subsequent unread data that may still be part of this number.
  1467  func consumeNumber(b []byte) (n int, err error) {
  1468  	n, _, err = consumeNumberResumable(b, 0, consumeNumberInit)
  1469  	return n, err
  1470  }
  1471  
  1472  // consumeNumberResumable is identical to consumeNumber but supports resuming
  1473  // from a previous call that returned io.ErrUnexpectedEOF.
  1474  func consumeNumberResumable(b []byte, resumeOffset int, state consumeNumberState) (n int, _ consumeNumberState, err error) {
  1475  	// Jump to the right state when resuming from a partial consumption.
  1476  	n = resumeOffset
  1477  	if state > consumeNumberInit {
  1478  		switch state {
  1479  		case withinIntegerDigits, withinFractionalDigits, withinExponentDigits:
  1480  			// Consume leading digits.
  1481  			for len(b) > n && ('0' <= b[n] && b[n] <= '9') {
  1482  				n++
  1483  			}
  1484  			if len(b) == n {
  1485  				return n, state, nil // still within the same state
  1486  			}
  1487  			state++ // switches "withinX" to "beforeY" where Y is the state after X
  1488  		}
  1489  		switch state {
  1490  		case beforeIntegerDigits:
  1491  			goto beforeInteger
  1492  		case beforeFractionalDigits:
  1493  			goto beforeFractional
  1494  		case beforeExponentDigits:
  1495  			goto beforeExponent
  1496  		default:
  1497  			return n, state, nil
  1498  		}
  1499  	}
  1500  
  1501  	// Consume required integer component (with optional minus sign).
  1502  beforeInteger:
  1503  	resumeOffset = n
  1504  	if len(b) > 0 && b[0] == '-' {
  1505  		n++
  1506  	}
  1507  	switch {
  1508  	case len(b) == n:
  1509  		return resumeOffset, beforeIntegerDigits, io.ErrUnexpectedEOF
  1510  	case b[n] == '0':
  1511  		n++
  1512  		state = beforeFractionalDigits
  1513  	case '1' <= b[n] && b[n] <= '9':
  1514  		n++
  1515  		for len(b) > n && ('0' <= b[n] && b[n] <= '9') {
  1516  			n++
  1517  		}
  1518  		state = withinIntegerDigits
  1519  	default:
  1520  		return n, state, newInvalidCharacterError(b[n:], "within number (expecting digit)")
  1521  	}
  1522  
  1523  	// Consume optional fractional component.
  1524  beforeFractional:
  1525  	if len(b) > n && b[n] == '.' {
  1526  		resumeOffset = n
  1527  		n++
  1528  		switch {
  1529  		case len(b) == n:
  1530  			return resumeOffset, beforeFractionalDigits, io.ErrUnexpectedEOF
  1531  		case '0' <= b[n] && b[n] <= '9':
  1532  			n++
  1533  		default:
  1534  			return n, state, newInvalidCharacterError(b[n:], "within number (expecting digit)")
  1535  		}
  1536  		for len(b) > n && ('0' <= b[n] && b[n] <= '9') {
  1537  			n++
  1538  		}
  1539  		state = withinFractionalDigits
  1540  	}
  1541  
  1542  	// Consume optional exponent component.
  1543  beforeExponent:
  1544  	if len(b) > n && (b[n] == 'e' || b[n] == 'E') {
  1545  		resumeOffset = n
  1546  		n++
  1547  		if len(b) > n && (b[n] == '-' || b[n] == '+') {
  1548  			n++
  1549  		}
  1550  		switch {
  1551  		case len(b) == n:
  1552  			return resumeOffset, beforeExponentDigits, io.ErrUnexpectedEOF
  1553  		case '0' <= b[n] && b[n] <= '9':
  1554  			n++
  1555  		default:
  1556  			return n, state, newInvalidCharacterError(b[n:], "within number (expecting digit)")
  1557  		}
  1558  		for len(b) > n && ('0' <= b[n] && b[n] <= '9') {
  1559  			n++
  1560  		}
  1561  		state = withinExponentDigits
  1562  	}
  1563  
  1564  	return n, state, nil
  1565  }
  1566  
  1567  // parseHexUint16 is similar to strconv.ParseUint,
  1568  // but operates directly on []byte and is optimized for base-16.
  1569  // See https://go.dev/issue/42429.
  1570  func parseHexUint16(b []byte) (v uint16, ok bool) {
  1571  	if len(b) != 4 {
  1572  		return 0, false
  1573  	}
  1574  	for _, c := range b[:4] {
  1575  		switch {
  1576  		case '0' <= c && c <= '9':
  1577  			c = c - '0'
  1578  		case 'a' <= c && c <= 'f':
  1579  			c = 10 + c - 'a'
  1580  		case 'A' <= c && c <= 'F':
  1581  			c = 10 + c - 'A'
  1582  		default:
  1583  			return 0, false
  1584  		}
  1585  		v = v*16 + uint16(c)
  1586  	}
  1587  	return v, true
  1588  }
  1589  
  1590  // parseDecUint is similar to strconv.ParseUint,
  1591  // but operates directly on []byte and is optimized for base-10.
  1592  // If the number is syntactically valid but overflows uint64,
  1593  // then it returns (math.MaxUint64, false).
  1594  // See https://go.dev/issue/42429.
  1595  func parseDecUint(b []byte) (v uint64, ok bool) {
  1596  	// Overflow logic is based on strconv/atoi.go:138-149 from Go1.15, where:
  1597  	//   - cutoff is equal to math.MaxUint64/10+1, and
  1598  	//   - the n1 > maxVal check is unnecessary
  1599  	//     since maxVal is equivalent to math.MaxUint64.
  1600  	var n int
  1601  	var overflow bool
  1602  	for len(b) > n && ('0' <= b[n] && b[n] <= '9') {
  1603  		overflow = overflow || v >= math.MaxUint64/10+1
  1604  		v *= 10
  1605  
  1606  		v1 := v + uint64(b[n]-'0')
  1607  		overflow = overflow || v1 < v
  1608  		v = v1
  1609  
  1610  		n++
  1611  	}
  1612  	if n == 0 || len(b) != n {
  1613  		return 0, false
  1614  	}
  1615  	if overflow {
  1616  		return math.MaxUint64, false
  1617  	}
  1618  	return v, true
  1619  }
  1620  
  1621  // parseFloat parses a floating point number according to the Go float grammar.
  1622  // Note that the JSON number grammar is a strict subset.
  1623  //
  1624  // If the number overflows the finite representation of a float,
  1625  // then we return MaxFloat since any finite value will always be infinitely
  1626  // more accurate at representing another finite value than an infinite value.
  1627  func parseFloat(b []byte, bits int) (v float64, ok bool) {
  1628  	// Fast path for exact integer numbers which fit in the
  1629  	// 24-bit or 53-bit significand of a float32 or float64.
  1630  	var negLen int // either 0 or 1
  1631  	if len(b) > 0 && b[0] == '-' {
  1632  		negLen = 1
  1633  	}
  1634  	u, ok := parseDecUint(b[negLen:])
  1635  	if ok && ((bits == 32 && u <= 1<<24) || (bits == 64 && u <= 1<<53)) {
  1636  		return math.Copysign(float64(u), float64(-1*negLen)), true
  1637  	}
  1638  
  1639  	// Note that the []byte->string conversion unfortunately allocates.
  1640  	// See https://go.dev/issue/42429 for more information.
  1641  	fv, err := strconv.ParseFloat(string(b), bits)
  1642  	if math.IsInf(fv, 0) {
  1643  		switch {
  1644  		case bits == 32 && math.IsInf(fv, +1):
  1645  			return +math.MaxFloat32, true
  1646  		case bits == 64 && math.IsInf(fv, +1):
  1647  			return +math.MaxFloat64, true
  1648  		case bits == 32 && math.IsInf(fv, -1):
  1649  			return -math.MaxFloat32, true
  1650  		case bits == 64 && math.IsInf(fv, -1):
  1651  			return -math.MaxFloat64, true
  1652  		}
  1653  	}
  1654  	return fv, err == nil
  1655  }