k8s.io/kube-openapi@v0.0.0-20240228011516-70dd3763d340/pkg/internal/third_party/go-json-experiment/json/value.go (about) 1 // Copyright 2020 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package json 6 7 import ( 8 "bytes" 9 "errors" 10 "io" 11 "sort" 12 "sync" 13 "unicode/utf16" 14 "unicode/utf8" 15 ) 16 17 // NOTE: RawValue is analogous to v1 json.RawMessage. 18 19 // RawValue represents a single raw JSON value, which may be one of the following: 20 // - a JSON literal (i.e., null, true, or false) 21 // - a JSON string (e.g., "hello, world!") 22 // - a JSON number (e.g., 123.456) 23 // - an entire JSON object (e.g., {"fizz":"buzz"} ) 24 // - an entire JSON array (e.g., [1,2,3] ) 25 // 26 // RawValue can represent entire array or object values, while Token cannot. 27 // RawValue may contain leading and/or trailing whitespace. 28 type RawValue []byte 29 30 // Clone returns a copy of v. 31 func (v RawValue) Clone() RawValue { 32 if v == nil { 33 return nil 34 } 35 return append(RawValue{}, v...) 36 } 37 38 // String returns the string formatting of v. 39 func (v RawValue) String() string { 40 if v == nil { 41 return "null" 42 } 43 return string(v) 44 } 45 46 // IsValid reports whether the raw JSON value is syntactically valid 47 // according to RFC 7493. 48 // 49 // It verifies whether the input is properly encoded as UTF-8, 50 // that escape sequences within strings decode to valid Unicode codepoints, and 51 // that all names in each object are unique. 52 // It does not verify whether numbers are representable within the limits 53 // of any common numeric type (e.g., float64, int64, or uint64). 54 func (v RawValue) IsValid() bool { 55 d := getBufferedDecoder(v, DecodeOptions{}) 56 defer putBufferedDecoder(d) 57 _, errVal := d.ReadValue() 58 _, errEOF := d.ReadToken() 59 return errVal == nil && errEOF == io.EOF 60 } 61 62 // Compact removes all whitespace from the raw JSON value. 63 // 64 // It does not reformat JSON strings to use any other representation. 65 // It is guaranteed to succeed if the input is valid. 66 // If the value is already compacted, then the buffer is not mutated. 67 func (v *RawValue) Compact() error { 68 return v.reformat(false, false, "", "") 69 } 70 71 // Indent reformats the whitespace in the raw JSON value so that each element 72 // in a JSON object or array begins on a new, indented line beginning with 73 // prefix followed by one or more copies of indent according to the nesting. 74 // The value does not begin with the prefix nor any indention, 75 // to make it easier to embed inside other formatted JSON data. 76 // 77 // It does not reformat JSON strings to use any other representation. 78 // It is guaranteed to succeed if the input is valid. 79 // If the value is already indented properly, then the buffer is not mutated. 80 func (v *RawValue) Indent(prefix, indent string) error { 81 return v.reformat(false, true, prefix, indent) 82 } 83 84 // Canonicalize canonicalizes the raw JSON value according to the 85 // JSON Canonicalization Scheme (JCS) as defined by RFC 8785 86 // where it produces a stable representation of a JSON value. 87 // 88 // The output stability is dependent on the stability of the application data 89 // (see RFC 8785, Appendix E). It cannot produce stable output from 90 // fundamentally unstable input. For example, if the JSON value 91 // contains ephemeral data (e.g., a frequently changing timestamp), 92 // then the value is still unstable regardless of whether this is called. 93 // 94 // Note that JCS treats all JSON numbers as IEEE 754 double precision numbers. 95 // Any numbers with precision beyond what is representable by that form 96 // will lose their precision when canonicalized. For example, integer values 97 // beyond ±2⁵³ will lose their precision. It is recommended that 98 // int64 and uint64 data types be represented as a JSON string. 99 // 100 // It is guaranteed to succeed if the input is valid. 101 // If the value is already canonicalized, then the buffer is not mutated. 102 func (v *RawValue) Canonicalize() error { 103 return v.reformat(true, false, "", "") 104 } 105 106 // TODO: Instead of implementing the v1 Marshaler/Unmarshaler, 107 // consider implementing the v2 versions instead. 108 109 // MarshalJSON returns v as the JSON encoding of v. 110 // It returns the stored value as the raw JSON output without any validation. 111 // If v is nil, then this returns a JSON null. 112 func (v RawValue) MarshalJSON() ([]byte, error) { 113 // NOTE: This matches the behavior of v1 json.RawMessage.MarshalJSON. 114 if v == nil { 115 return []byte("null"), nil 116 } 117 return v, nil 118 } 119 120 // UnmarshalJSON sets v as the JSON encoding of b. 121 // It stores a copy of the provided raw JSON input without any validation. 122 func (v *RawValue) UnmarshalJSON(b []byte) error { 123 // NOTE: This matches the behavior of v1 json.RawMessage.UnmarshalJSON. 124 if v == nil { 125 return errors.New("json.RawValue: UnmarshalJSON on nil pointer") 126 } 127 *v = append((*v)[:0], b...) 128 return nil 129 } 130 131 // Kind returns the starting token kind. 132 // For a valid value, this will never include '}' or ']'. 133 func (v RawValue) Kind() Kind { 134 if v := v[consumeWhitespace(v):]; len(v) > 0 { 135 return Kind(v[0]).normalize() 136 } 137 return invalidKind 138 } 139 140 func (v *RawValue) reformat(canonical, multiline bool, prefix, indent string) error { 141 var eo EncodeOptions 142 if canonical { 143 eo.AllowInvalidUTF8 = false // per RFC 8785, section 3.2.4 144 eo.AllowDuplicateNames = false // per RFC 8785, section 3.1 145 eo.canonicalizeNumbers = true // per RFC 8785, section 3.2.2.3 146 eo.EscapeRune = nil // per RFC 8785, section 3.2.2.2 147 eo.multiline = false // per RFC 8785, section 3.2.1 148 } else { 149 if s := trimLeftSpaceTab(prefix); len(s) > 0 { 150 panic("json: invalid character " + quoteRune([]byte(s)) + " in indent prefix") 151 } 152 if s := trimLeftSpaceTab(indent); len(s) > 0 { 153 panic("json: invalid character " + quoteRune([]byte(s)) + " in indent") 154 } 155 eo.AllowInvalidUTF8 = true 156 eo.AllowDuplicateNames = true 157 eo.preserveRawStrings = true 158 eo.multiline = multiline // in case indent is empty 159 eo.IndentPrefix = prefix 160 eo.Indent = indent 161 } 162 eo.omitTopLevelNewline = true 163 164 // Write the entire value to reformat all tokens and whitespace. 165 e := getBufferedEncoder(eo) 166 defer putBufferedEncoder(e) 167 if err := e.WriteValue(*v); err != nil { 168 return err 169 } 170 171 // For canonical output, we may need to reorder object members. 172 if canonical { 173 // Obtain a buffered encoder just to use its internal buffer as 174 // a scratch buffer in reorderObjects for reordering object members. 175 e2 := getBufferedEncoder(EncodeOptions{}) 176 defer putBufferedEncoder(e2) 177 178 // Disable redundant checks performed earlier during encoding. 179 d := getBufferedDecoder(e.buf, DecodeOptions{AllowInvalidUTF8: true, AllowDuplicateNames: true}) 180 defer putBufferedDecoder(d) 181 reorderObjects(d, &e2.buf) // per RFC 8785, section 3.2.3 182 } 183 184 // Store the result back into the value if different. 185 if !bytes.Equal(*v, e.buf) { 186 *v = append((*v)[:0], e.buf...) 187 } 188 return nil 189 } 190 191 func trimLeftSpaceTab(s string) string { 192 for i, r := range s { 193 switch r { 194 case ' ', '\t': 195 default: 196 return s[i:] 197 } 198 } 199 return "" 200 } 201 202 type memberName struct { 203 // name is the unescaped name. 204 name []byte 205 // before and after are byte offsets into Decoder.buf that represents 206 // the entire name/value pair. It may contain leading commas. 207 before, after int64 208 } 209 210 var memberNamePool = sync.Pool{New: func() any { return new(memberNames) }} 211 212 func getMemberNames() *memberNames { 213 ns := memberNamePool.Get().(*memberNames) 214 *ns = (*ns)[:0] 215 return ns 216 } 217 func putMemberNames(ns *memberNames) { 218 if cap(*ns) < 1<<10 { 219 for i := range *ns { 220 (*ns)[i] = memberName{} // avoid pinning name 221 } 222 memberNamePool.Put(ns) 223 } 224 } 225 226 type memberNames []memberName 227 228 func (m *memberNames) Len() int { return len(*m) } 229 func (m *memberNames) Less(i, j int) bool { return lessUTF16((*m)[i].name, (*m)[j].name) } 230 func (m *memberNames) Swap(i, j int) { (*m)[i], (*m)[j] = (*m)[j], (*m)[i] } 231 232 // reorderObjects recursively reorders all object members in place 233 // according to the ordering specified in RFC 8785, section 3.2.3. 234 // 235 // Pre-conditions: 236 // - The value is valid (i.e., no decoder errors should ever occur). 237 // - The value is compact (i.e., no whitespace is present). 238 // - Initial call is provided a Decoder reading from the start of v. 239 // 240 // Post-conditions: 241 // - Exactly one JSON value is read from the Decoder. 242 // - All fully-parsed JSON objects are reordered by directly moving 243 // the members in the value buffer. 244 // 245 // The runtime is approximately O(n·log(n)) + O(m·log(m)), 246 // where n is len(v) and m is the total number of object members. 247 func reorderObjects(d *Decoder, scratch *[]byte) { 248 switch tok, _ := d.ReadToken(); tok.Kind() { 249 case '{': 250 // Iterate and collect the name and offsets for every object member. 251 members := getMemberNames() 252 defer putMemberNames(members) 253 var prevName []byte 254 isSorted := true 255 256 beforeBody := d.InputOffset() // offset after '{' 257 for d.PeekKind() != '}' { 258 beforeName := d.InputOffset() 259 var flags valueFlags 260 name, _ := d.readValue(&flags) 261 name = unescapeStringMayCopy(name, flags.isVerbatim()) 262 reorderObjects(d, scratch) 263 afterValue := d.InputOffset() 264 265 if isSorted && len(*members) > 0 { 266 isSorted = lessUTF16(prevName, []byte(name)) 267 } 268 *members = append(*members, memberName{name, beforeName, afterValue}) 269 prevName = name 270 } 271 afterBody := d.InputOffset() // offset before '}' 272 d.ReadToken() 273 274 // Sort the members; return early if it's already sorted. 275 if isSorted { 276 return 277 } 278 // TODO(https://go.dev/issue/47619): Use slices.Sort. 279 sort.Sort(members) 280 281 // Append the reordered members to a new buffer, 282 // then copy the reordered members back over the original members. 283 // Avoid swapping in place since each member may be a different size 284 // where moving a member over a smaller member may corrupt the data 285 // for subsequent members before they have been moved. 286 // 287 // The following invariant must hold: 288 // sum([m.after-m.before for m in members]) == afterBody-beforeBody 289 sorted := (*scratch)[:0] 290 for i, member := range *members { 291 if d.buf[member.before] == ',' { 292 member.before++ // trim leading comma 293 } 294 sorted = append(sorted, d.buf[member.before:member.after]...) 295 if i < len(*members)-1 { 296 sorted = append(sorted, ',') // append trailing comma 297 } 298 } 299 if int(afterBody-beforeBody) != len(sorted) { 300 panic("BUG: length invariant violated") 301 } 302 copy(d.buf[beforeBody:afterBody], sorted) 303 304 // Update scratch buffer to the largest amount ever used. 305 if len(sorted) > len(*scratch) { 306 *scratch = sorted 307 } 308 case '[': 309 for d.PeekKind() != ']' { 310 reorderObjects(d, scratch) 311 } 312 d.ReadToken() 313 } 314 } 315 316 // lessUTF16 reports whether x is lexicographically less than y according 317 // to the UTF-16 codepoints of the UTF-8 encoded input strings. 318 // This implements the ordering specified in RFC 8785, section 3.2.3. 319 // The inputs must be valid UTF-8, otherwise this may panic. 320 func lessUTF16[Bytes []byte | string](x, y Bytes) bool { 321 // NOTE: This is an optimized, allocation-free implementation 322 // of lessUTF16Simple in fuzz_test.go. FuzzLessUTF16 verifies that the 323 // two implementations agree on the result of comparing any two strings. 324 325 isUTF16Self := func(r rune) bool { 326 return ('\u0000' <= r && r <= '\uD7FF') || ('\uE000' <= r && r <= '\uFFFF') 327 } 328 329 var invalidUTF8 bool 330 x0, y0 := x, y 331 for { 332 if len(x) == 0 || len(y) == 0 { 333 if len(x) == len(y) && invalidUTF8 { 334 return string(x0) < string(y0) 335 } 336 return len(x) < len(y) 337 } 338 339 // ASCII fast-path. 340 if x[0] < utf8.RuneSelf || y[0] < utf8.RuneSelf { 341 if x[0] != y[0] { 342 return x[0] < y[0] 343 } 344 x, y = x[1:], y[1:] 345 continue 346 } 347 348 // Decode next pair of runes as UTF-8. 349 // TODO(https://go.dev/issue/56948): Use a generic implementation 350 // of utf8.DecodeRune, or rely on a compiler optimization to statically 351 // hide the cost of a type switch (https://go.dev/issue/57072). 352 var rx, ry rune 353 var nx, ny int 354 switch any(x).(type) { 355 case string: 356 rx, nx = utf8.DecodeRuneInString(string(x)) 357 ry, ny = utf8.DecodeRuneInString(string(y)) 358 case []byte: 359 rx, nx = utf8.DecodeRune([]byte(x)) 360 ry, ny = utf8.DecodeRune([]byte(y)) 361 } 362 363 selfx := isUTF16Self(rx) 364 selfy := isUTF16Self(ry) 365 switch { 366 // The x rune is a single UTF-16 codepoint, while 367 // the y rune is a surrogate pair of UTF-16 codepoints. 368 case selfx && !selfy: 369 ry, _ = utf16.EncodeRune(ry) 370 // The y rune is a single UTF-16 codepoint, while 371 // the x rune is a surrogate pair of UTF-16 codepoints. 372 case selfy && !selfx: 373 rx, _ = utf16.EncodeRune(rx) 374 } 375 if rx != ry { 376 return rx < ry 377 } 378 invalidUTF8 = invalidUTF8 || (rx == utf8.RuneError && nx == 1) || (ry == utf8.RuneError && ny == 1) 379 x, y = x[nx:], y[ny:] 380 } 381 }