k8s.io/kube-openapi@v0.0.0-20240228011516-70dd3763d340/pkg/internal/third_party/go-json-experiment/json/value.go (about)

     1  // Copyright 2020 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package json
     6  
     7  import (
     8  	"bytes"
     9  	"errors"
    10  	"io"
    11  	"sort"
    12  	"sync"
    13  	"unicode/utf16"
    14  	"unicode/utf8"
    15  )
    16  
    17  // NOTE: RawValue is analogous to v1 json.RawMessage.
    18  
    19  // RawValue represents a single raw JSON value, which may be one of the following:
    20  //   - a JSON literal (i.e., null, true, or false)
    21  //   - a JSON string (e.g., "hello, world!")
    22  //   - a JSON number (e.g., 123.456)
    23  //   - an entire JSON object (e.g., {"fizz":"buzz"} )
    24  //   - an entire JSON array (e.g., [1,2,3] )
    25  //
    26  // RawValue can represent entire array or object values, while Token cannot.
    27  // RawValue may contain leading and/or trailing whitespace.
    28  type RawValue []byte
    29  
    30  // Clone returns a copy of v.
    31  func (v RawValue) Clone() RawValue {
    32  	if v == nil {
    33  		return nil
    34  	}
    35  	return append(RawValue{}, v...)
    36  }
    37  
    38  // String returns the string formatting of v.
    39  func (v RawValue) String() string {
    40  	if v == nil {
    41  		return "null"
    42  	}
    43  	return string(v)
    44  }
    45  
    46  // IsValid reports whether the raw JSON value is syntactically valid
    47  // according to RFC 7493.
    48  //
    49  // It verifies whether the input is properly encoded as UTF-8,
    50  // that escape sequences within strings decode to valid Unicode codepoints, and
    51  // that all names in each object are unique.
    52  // It does not verify whether numbers are representable within the limits
    53  // of any common numeric type (e.g., float64, int64, or uint64).
    54  func (v RawValue) IsValid() bool {
    55  	d := getBufferedDecoder(v, DecodeOptions{})
    56  	defer putBufferedDecoder(d)
    57  	_, errVal := d.ReadValue()
    58  	_, errEOF := d.ReadToken()
    59  	return errVal == nil && errEOF == io.EOF
    60  }
    61  
    62  // Compact removes all whitespace from the raw JSON value.
    63  //
    64  // It does not reformat JSON strings to use any other representation.
    65  // It is guaranteed to succeed if the input is valid.
    66  // If the value is already compacted, then the buffer is not mutated.
    67  func (v *RawValue) Compact() error {
    68  	return v.reformat(false, false, "", "")
    69  }
    70  
    71  // Indent reformats the whitespace in the raw JSON value so that each element
    72  // in a JSON object or array begins on a new, indented line beginning with
    73  // prefix followed by one or more copies of indent according to the nesting.
    74  // The value does not begin with the prefix nor any indention,
    75  // to make it easier to embed inside other formatted JSON data.
    76  //
    77  // It does not reformat JSON strings to use any other representation.
    78  // It is guaranteed to succeed if the input is valid.
    79  // If the value is already indented properly, then the buffer is not mutated.
    80  func (v *RawValue) Indent(prefix, indent string) error {
    81  	return v.reformat(false, true, prefix, indent)
    82  }
    83  
    84  // Canonicalize canonicalizes the raw JSON value according to the
    85  // JSON Canonicalization Scheme (JCS) as defined by RFC 8785
    86  // where it produces a stable representation of a JSON value.
    87  //
    88  // The output stability is dependent on the stability of the application data
    89  // (see RFC 8785, Appendix E). It cannot produce stable output from
    90  // fundamentally unstable input. For example, if the JSON value
    91  // contains ephemeral data (e.g., a frequently changing timestamp),
    92  // then the value is still unstable regardless of whether this is called.
    93  //
    94  // Note that JCS treats all JSON numbers as IEEE 754 double precision numbers.
    95  // Any numbers with precision beyond what is representable by that form
    96  // will lose their precision when canonicalized. For example, integer values
    97  // beyond ±2⁵³ will lose their precision. It is recommended that
    98  // int64 and uint64 data types be represented as a JSON string.
    99  //
   100  // It is guaranteed to succeed if the input is valid.
   101  // If the value is already canonicalized, then the buffer is not mutated.
   102  func (v *RawValue) Canonicalize() error {
   103  	return v.reformat(true, false, "", "")
   104  }
   105  
   106  // TODO: Instead of implementing the v1 Marshaler/Unmarshaler,
   107  // consider implementing the v2 versions instead.
   108  
   109  // MarshalJSON returns v as the JSON encoding of v.
   110  // It returns the stored value as the raw JSON output without any validation.
   111  // If v is nil, then this returns a JSON null.
   112  func (v RawValue) MarshalJSON() ([]byte, error) {
   113  	// NOTE: This matches the behavior of v1 json.RawMessage.MarshalJSON.
   114  	if v == nil {
   115  		return []byte("null"), nil
   116  	}
   117  	return v, nil
   118  }
   119  
   120  // UnmarshalJSON sets v as the JSON encoding of b.
   121  // It stores a copy of the provided raw JSON input without any validation.
   122  func (v *RawValue) UnmarshalJSON(b []byte) error {
   123  	// NOTE: This matches the behavior of v1 json.RawMessage.UnmarshalJSON.
   124  	if v == nil {
   125  		return errors.New("json.RawValue: UnmarshalJSON on nil pointer")
   126  	}
   127  	*v = append((*v)[:0], b...)
   128  	return nil
   129  }
   130  
   131  // Kind returns the starting token kind.
   132  // For a valid value, this will never include '}' or ']'.
   133  func (v RawValue) Kind() Kind {
   134  	if v := v[consumeWhitespace(v):]; len(v) > 0 {
   135  		return Kind(v[0]).normalize()
   136  	}
   137  	return invalidKind
   138  }
   139  
   140  func (v *RawValue) reformat(canonical, multiline bool, prefix, indent string) error {
   141  	var eo EncodeOptions
   142  	if canonical {
   143  		eo.AllowInvalidUTF8 = false    // per RFC 8785, section 3.2.4
   144  		eo.AllowDuplicateNames = false // per RFC 8785, section 3.1
   145  		eo.canonicalizeNumbers = true  // per RFC 8785, section 3.2.2.3
   146  		eo.EscapeRune = nil            // per RFC 8785, section 3.2.2.2
   147  		eo.multiline = false           // per RFC 8785, section 3.2.1
   148  	} else {
   149  		if s := trimLeftSpaceTab(prefix); len(s) > 0 {
   150  			panic("json: invalid character " + quoteRune([]byte(s)) + " in indent prefix")
   151  		}
   152  		if s := trimLeftSpaceTab(indent); len(s) > 0 {
   153  			panic("json: invalid character " + quoteRune([]byte(s)) + " in indent")
   154  		}
   155  		eo.AllowInvalidUTF8 = true
   156  		eo.AllowDuplicateNames = true
   157  		eo.preserveRawStrings = true
   158  		eo.multiline = multiline // in case indent is empty
   159  		eo.IndentPrefix = prefix
   160  		eo.Indent = indent
   161  	}
   162  	eo.omitTopLevelNewline = true
   163  
   164  	// Write the entire value to reformat all tokens and whitespace.
   165  	e := getBufferedEncoder(eo)
   166  	defer putBufferedEncoder(e)
   167  	if err := e.WriteValue(*v); err != nil {
   168  		return err
   169  	}
   170  
   171  	// For canonical output, we may need to reorder object members.
   172  	if canonical {
   173  		// Obtain a buffered encoder just to use its internal buffer as
   174  		// a scratch buffer in reorderObjects for reordering object members.
   175  		e2 := getBufferedEncoder(EncodeOptions{})
   176  		defer putBufferedEncoder(e2)
   177  
   178  		// Disable redundant checks performed earlier during encoding.
   179  		d := getBufferedDecoder(e.buf, DecodeOptions{AllowInvalidUTF8: true, AllowDuplicateNames: true})
   180  		defer putBufferedDecoder(d)
   181  		reorderObjects(d, &e2.buf) // per RFC 8785, section 3.2.3
   182  	}
   183  
   184  	// Store the result back into the value if different.
   185  	if !bytes.Equal(*v, e.buf) {
   186  		*v = append((*v)[:0], e.buf...)
   187  	}
   188  	return nil
   189  }
   190  
   191  func trimLeftSpaceTab(s string) string {
   192  	for i, r := range s {
   193  		switch r {
   194  		case ' ', '\t':
   195  		default:
   196  			return s[i:]
   197  		}
   198  	}
   199  	return ""
   200  }
   201  
   202  type memberName struct {
   203  	// name is the unescaped name.
   204  	name []byte
   205  	// before and after are byte offsets into Decoder.buf that represents
   206  	// the entire name/value pair. It may contain leading commas.
   207  	before, after int64
   208  }
   209  
   210  var memberNamePool = sync.Pool{New: func() any { return new(memberNames) }}
   211  
   212  func getMemberNames() *memberNames {
   213  	ns := memberNamePool.Get().(*memberNames)
   214  	*ns = (*ns)[:0]
   215  	return ns
   216  }
   217  func putMemberNames(ns *memberNames) {
   218  	if cap(*ns) < 1<<10 {
   219  		for i := range *ns {
   220  			(*ns)[i] = memberName{} // avoid pinning name
   221  		}
   222  		memberNamePool.Put(ns)
   223  	}
   224  }
   225  
   226  type memberNames []memberName
   227  
   228  func (m *memberNames) Len() int           { return len(*m) }
   229  func (m *memberNames) Less(i, j int) bool { return lessUTF16((*m)[i].name, (*m)[j].name) }
   230  func (m *memberNames) Swap(i, j int)      { (*m)[i], (*m)[j] = (*m)[j], (*m)[i] }
   231  
   232  // reorderObjects recursively reorders all object members in place
   233  // according to the ordering specified in RFC 8785, section 3.2.3.
   234  //
   235  // Pre-conditions:
   236  //   - The value is valid (i.e., no decoder errors should ever occur).
   237  //   - The value is compact (i.e., no whitespace is present).
   238  //   - Initial call is provided a Decoder reading from the start of v.
   239  //
   240  // Post-conditions:
   241  //   - Exactly one JSON value is read from the Decoder.
   242  //   - All fully-parsed JSON objects are reordered by directly moving
   243  //     the members in the value buffer.
   244  //
   245  // The runtime is approximately O(n·log(n)) + O(m·log(m)),
   246  // where n is len(v) and m is the total number of object members.
   247  func reorderObjects(d *Decoder, scratch *[]byte) {
   248  	switch tok, _ := d.ReadToken(); tok.Kind() {
   249  	case '{':
   250  		// Iterate and collect the name and offsets for every object member.
   251  		members := getMemberNames()
   252  		defer putMemberNames(members)
   253  		var prevName []byte
   254  		isSorted := true
   255  
   256  		beforeBody := d.InputOffset() // offset after '{'
   257  		for d.PeekKind() != '}' {
   258  			beforeName := d.InputOffset()
   259  			var flags valueFlags
   260  			name, _ := d.readValue(&flags)
   261  			name = unescapeStringMayCopy(name, flags.isVerbatim())
   262  			reorderObjects(d, scratch)
   263  			afterValue := d.InputOffset()
   264  
   265  			if isSorted && len(*members) > 0 {
   266  				isSorted = lessUTF16(prevName, []byte(name))
   267  			}
   268  			*members = append(*members, memberName{name, beforeName, afterValue})
   269  			prevName = name
   270  		}
   271  		afterBody := d.InputOffset() // offset before '}'
   272  		d.ReadToken()
   273  
   274  		// Sort the members; return early if it's already sorted.
   275  		if isSorted {
   276  			return
   277  		}
   278  		// TODO(https://go.dev/issue/47619): Use slices.Sort.
   279  		sort.Sort(members)
   280  
   281  		// Append the reordered members to a new buffer,
   282  		// then copy the reordered members back over the original members.
   283  		// Avoid swapping in place since each member may be a different size
   284  		// where moving a member over a smaller member may corrupt the data
   285  		// for subsequent members before they have been moved.
   286  		//
   287  		// The following invariant must hold:
   288  		//	sum([m.after-m.before for m in members]) == afterBody-beforeBody
   289  		sorted := (*scratch)[:0]
   290  		for i, member := range *members {
   291  			if d.buf[member.before] == ',' {
   292  				member.before++ // trim leading comma
   293  			}
   294  			sorted = append(sorted, d.buf[member.before:member.after]...)
   295  			if i < len(*members)-1 {
   296  				sorted = append(sorted, ',') // append trailing comma
   297  			}
   298  		}
   299  		if int(afterBody-beforeBody) != len(sorted) {
   300  			panic("BUG: length invariant violated")
   301  		}
   302  		copy(d.buf[beforeBody:afterBody], sorted)
   303  
   304  		// Update scratch buffer to the largest amount ever used.
   305  		if len(sorted) > len(*scratch) {
   306  			*scratch = sorted
   307  		}
   308  	case '[':
   309  		for d.PeekKind() != ']' {
   310  			reorderObjects(d, scratch)
   311  		}
   312  		d.ReadToken()
   313  	}
   314  }
   315  
   316  // lessUTF16 reports whether x is lexicographically less than y according
   317  // to the UTF-16 codepoints of the UTF-8 encoded input strings.
   318  // This implements the ordering specified in RFC 8785, section 3.2.3.
   319  // The inputs must be valid UTF-8, otherwise this may panic.
   320  func lessUTF16[Bytes []byte | string](x, y Bytes) bool {
   321  	// NOTE: This is an optimized, allocation-free implementation
   322  	// of lessUTF16Simple in fuzz_test.go. FuzzLessUTF16 verifies that the
   323  	// two implementations agree on the result of comparing any two strings.
   324  
   325  	isUTF16Self := func(r rune) bool {
   326  		return ('\u0000' <= r && r <= '\uD7FF') || ('\uE000' <= r && r <= '\uFFFF')
   327  	}
   328  
   329  	var invalidUTF8 bool
   330  	x0, y0 := x, y
   331  	for {
   332  		if len(x) == 0 || len(y) == 0 {
   333  			if len(x) == len(y) && invalidUTF8 {
   334  				return string(x0) < string(y0)
   335  			}
   336  			return len(x) < len(y)
   337  		}
   338  
   339  		// ASCII fast-path.
   340  		if x[0] < utf8.RuneSelf || y[0] < utf8.RuneSelf {
   341  			if x[0] != y[0] {
   342  				return x[0] < y[0]
   343  			}
   344  			x, y = x[1:], y[1:]
   345  			continue
   346  		}
   347  
   348  		// Decode next pair of runes as UTF-8.
   349  		// TODO(https://go.dev/issue/56948): Use a generic implementation
   350  		// of utf8.DecodeRune, or rely on a compiler optimization to statically
   351  		// hide the cost of a type switch (https://go.dev/issue/57072).
   352  		var rx, ry rune
   353  		var nx, ny int
   354  		switch any(x).(type) {
   355  		case string:
   356  			rx, nx = utf8.DecodeRuneInString(string(x))
   357  			ry, ny = utf8.DecodeRuneInString(string(y))
   358  		case []byte:
   359  			rx, nx = utf8.DecodeRune([]byte(x))
   360  			ry, ny = utf8.DecodeRune([]byte(y))
   361  		}
   362  
   363  		selfx := isUTF16Self(rx)
   364  		selfy := isUTF16Self(ry)
   365  		switch {
   366  		// The x rune is a single UTF-16 codepoint, while
   367  		// the y rune is a surrogate pair of UTF-16 codepoints.
   368  		case selfx && !selfy:
   369  			ry, _ = utf16.EncodeRune(ry)
   370  		// The y rune is a single UTF-16 codepoint, while
   371  		// the x rune is a surrogate pair of UTF-16 codepoints.
   372  		case selfy && !selfx:
   373  			rx, _ = utf16.EncodeRune(rx)
   374  		}
   375  		if rx != ry {
   376  			return rx < ry
   377  		}
   378  		invalidUTF8 = invalidUTF8 || (rx == utf8.RuneError && nx == 1) || (ry == utf8.RuneError && ny == 1)
   379  		x, y = x[nx:], y[ny:]
   380  	}
   381  }