k8s.io/kube-openapi@v0.0.0-20240228011516-70dd3763d340/pkg/internal/third_party/go-json-experiment/json/encode.go (about) 1 // Copyright 2020 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package json 6 7 import ( 8 "bytes" 9 "io" 10 "math" 11 "math/bits" 12 "strconv" 13 "unicode/utf16" 14 "unicode/utf8" 15 ) 16 17 // EncodeOptions configures how JSON encoding operates. 18 // The zero value is equivalent to the default settings, 19 // which is compliant with both RFC 7493 and RFC 8259. 20 type EncodeOptions struct { 21 requireKeyedLiterals 22 nonComparable 23 24 // multiline specifies whether the encoder should emit multiline output. 25 multiline bool 26 27 // omitTopLevelNewline specifies whether to omit the newline 28 // that is appended after every top-level JSON value when streaming. 29 omitTopLevelNewline bool 30 31 // AllowDuplicateNames specifies that JSON objects may contain 32 // duplicate member names. Disabling the duplicate name check may provide 33 // performance benefits, but breaks compliance with RFC 7493, section 2.3. 34 // The output will still be compliant with RFC 8259, 35 // which leaves the handling of duplicate names as unspecified behavior. 36 AllowDuplicateNames bool 37 38 // AllowInvalidUTF8 specifies that JSON strings may contain invalid UTF-8, 39 // which will be mangled as the Unicode replacement character, U+FFFD. 40 // This causes the encoder to break compliance with 41 // RFC 7493, section 2.1, and RFC 8259, section 8.1. 42 AllowInvalidUTF8 bool 43 44 // preserveRawStrings specifies that WriteToken and WriteValue should not 45 // reformat any JSON string, but keep the formatting verbatim. 46 preserveRawStrings bool 47 48 // canonicalizeNumbers specifies that WriteToken and WriteValue should 49 // reformat any JSON numbers according to RFC 8785, section 3.2.2.3. 50 canonicalizeNumbers bool 51 52 // EscapeRune reports whether the provided character should be escaped 53 // as a hexadecimal Unicode codepoint (e.g., \ufffd). 54 // If nil, the shortest and simplest encoding will be used, 55 // which is also the formatting specified by RFC 8785, section 3.2.2.2. 56 EscapeRune func(rune) bool 57 58 // Indent (if non-empty) specifies that the encoder should emit multiline 59 // output where each element in a JSON object or array begins on a new, 60 // indented line beginning with the indent prefix followed by one or more 61 // copies of indent according to the indentation nesting. 62 // It may only be composed of space or tab characters. 63 Indent string 64 65 // IndentPrefix is prepended to each line within a JSON object or array. 66 // The purpose of the indent prefix is to encode data that can more easily 67 // be embedded inside other formatted JSON data. 68 // It may only be composed of space or tab characters. 69 // It is ignored if Indent is empty. 70 IndentPrefix string 71 } 72 73 // Encoder is a streaming encoder from raw JSON tokens and values. 74 // It is used to write a stream of top-level JSON values, 75 // each terminated with a newline character. 76 // 77 // WriteToken and WriteValue calls may be interleaved. 78 // For example, the following JSON value: 79 // 80 // {"name":"value","array":[null,false,true,3.14159],"object":{"k":"v"}} 81 // 82 // can be composed with the following calls (ignoring errors for brevity): 83 // 84 // e.WriteToken(ObjectStart) // { 85 // e.WriteToken(String("name")) // "name" 86 // e.WriteToken(String("value")) // "value" 87 // e.WriteValue(RawValue(`"array"`)) // "array" 88 // e.WriteToken(ArrayStart) // [ 89 // e.WriteToken(Null) // null 90 // e.WriteToken(False) // false 91 // e.WriteValue(RawValue("true")) // true 92 // e.WriteToken(Float(3.14159)) // 3.14159 93 // e.WriteToken(ArrayEnd) // ] 94 // e.WriteValue(RawValue(`"object"`)) // "object" 95 // e.WriteValue(RawValue(`{"k":"v"}`)) // {"k":"v"} 96 // e.WriteToken(ObjectEnd) // } 97 // 98 // The above is one of many possible sequence of calls and 99 // may not represent the most sensible method to call for any given token/value. 100 // For example, it is probably more common to call WriteToken with a string 101 // for object names. 102 type Encoder struct { 103 state 104 encodeBuffer 105 options EncodeOptions 106 107 seenPointers seenPointers // only used when marshaling 108 } 109 110 // encodeBuffer is a buffer split into 2 segments: 111 // 112 // - buf[0:len(buf)] // written (but unflushed) portion of the buffer 113 // - buf[len(buf):cap(buf)] // unused portion of the buffer 114 type encodeBuffer struct { 115 buf []byte // may alias wr if it is a bytes.Buffer 116 117 // baseOffset is added to len(buf) to obtain the absolute offset 118 // relative to the start of io.Writer stream. 119 baseOffset int64 120 121 wr io.Writer 122 123 // maxValue is the approximate maximum RawValue size passed to WriteValue. 124 maxValue int 125 // unusedCache is the buffer returned by the UnusedBuffer method. 126 unusedCache []byte 127 // bufStats is statistics about buffer utilization. 128 // It is only used with pooled encoders in pools.go. 129 bufStats bufferStatistics 130 } 131 132 // NewEncoder constructs a new streaming encoder writing to w. 133 func NewEncoder(w io.Writer) *Encoder { 134 return EncodeOptions{}.NewEncoder(w) 135 } 136 137 // NewEncoder constructs a new streaming encoder writing to w 138 // configured with the provided options. 139 // It flushes the internal buffer when the buffer is sufficiently full or 140 // when a top-level value has been written. 141 // 142 // If w is a bytes.Buffer, then the encoder appends directly into the buffer 143 // without copying the contents from an intermediate buffer. 144 func (o EncodeOptions) NewEncoder(w io.Writer) *Encoder { 145 e := new(Encoder) 146 o.ResetEncoder(e, w) 147 return e 148 } 149 150 // ResetEncoder resets an encoder such that it is writing afresh to w and 151 // configured with the provided options. 152 func (o EncodeOptions) ResetEncoder(e *Encoder, w io.Writer) { 153 if e == nil { 154 panic("json: invalid nil Encoder") 155 } 156 if w == nil { 157 panic("json: invalid nil io.Writer") 158 } 159 e.reset(nil, w, o) 160 } 161 162 func (e *Encoder) reset(b []byte, w io.Writer, o EncodeOptions) { 163 if len(o.Indent) > 0 { 164 o.multiline = true 165 if s := trimLeftSpaceTab(o.IndentPrefix); len(s) > 0 { 166 panic("json: invalid character " + quoteRune([]byte(s)) + " in indent prefix") 167 } 168 if s := trimLeftSpaceTab(o.Indent); len(s) > 0 { 169 panic("json: invalid character " + quoteRune([]byte(s)) + " in indent") 170 } 171 } 172 e.state.reset() 173 e.encodeBuffer = encodeBuffer{buf: b, wr: w, bufStats: e.bufStats} 174 e.options = o 175 if bb, ok := w.(*bytes.Buffer); ok && bb != nil { 176 e.buf = bb.Bytes()[bb.Len():] // alias the unused buffer of bb 177 } 178 } 179 180 // Reset resets an encoder such that it is writing afresh to w but 181 // keeps any pre-existing encoder options. 182 func (e *Encoder) Reset(w io.Writer) { 183 e.options.ResetEncoder(e, w) 184 } 185 186 // needFlush determines whether to flush at this point. 187 func (e *Encoder) needFlush() bool { 188 // NOTE: This function is carefully written to be inlineable. 189 190 // Avoid flushing if e.wr is nil since there is no underlying writer. 191 // Flush if less than 25% of the capacity remains. 192 // Flushing at some constant fraction ensures that the buffer stops growing 193 // so long as the largest Token or Value fits within that unused capacity. 194 return e.wr != nil && (e.tokens.depth() == 1 || len(e.buf) > 3*cap(e.buf)/4) 195 } 196 197 // flush flushes the buffer to the underlying io.Writer. 198 // It may append a trailing newline after the top-level value. 199 func (e *Encoder) flush() error { 200 if e.wr == nil || e.avoidFlush() { 201 return nil 202 } 203 204 // In streaming mode, always emit a newline after the top-level value. 205 if e.tokens.depth() == 1 && !e.options.omitTopLevelNewline { 206 e.buf = append(e.buf, '\n') 207 } 208 209 // Inform objectNameStack that we are about to flush the buffer content. 210 e.names.copyQuotedBuffer(e.buf) 211 212 // Specialize bytes.Buffer for better performance. 213 if bb, ok := e.wr.(*bytes.Buffer); ok { 214 // If e.buf already aliases the internal buffer of bb, 215 // then the Write call simply increments the internal offset, 216 // otherwise Write operates as expected. 217 // See https://go.dev/issue/42986. 218 n, _ := bb.Write(e.buf) // never fails unless bb is nil 219 e.baseOffset += int64(n) 220 221 // If the internal buffer of bytes.Buffer is too small, 222 // append operations elsewhere in the Encoder may grow the buffer. 223 // This would be semantically correct, but hurts performance. 224 // As such, ensure 25% of the current length is always available 225 // to reduce the probability that other appends must allocate. 226 if avail := bb.Cap() - bb.Len(); avail < bb.Len()/4 { 227 bb.Grow(avail + 1) 228 } 229 230 e.buf = bb.Bytes()[bb.Len():] // alias the unused buffer of bb 231 return nil 232 } 233 234 // Flush the internal buffer to the underlying io.Writer. 235 n, err := e.wr.Write(e.buf) 236 e.baseOffset += int64(n) 237 if err != nil { 238 // In the event of an error, preserve the unflushed portion. 239 // Thus, write errors aren't fatal so long as the io.Writer 240 // maintains consistent state after errors. 241 if n > 0 { 242 e.buf = e.buf[:copy(e.buf, e.buf[n:])] 243 } 244 return &ioError{action: "write", err: err} 245 } 246 e.buf = e.buf[:0] 247 248 // Check whether to grow the buffer. 249 // Note that cap(e.buf) may already exceed maxBufferSize since 250 // an append elsewhere already grew it to store a large token. 251 const maxBufferSize = 4 << 10 252 const growthSizeFactor = 2 // higher value is faster 253 const growthRateFactor = 2 // higher value is slower 254 // By default, grow if below the maximum buffer size. 255 grow := cap(e.buf) <= maxBufferSize/growthSizeFactor 256 // Growing can be expensive, so only grow 257 // if a sufficient number of bytes have been processed. 258 grow = grow && int64(cap(e.buf)) < e.previousOffsetEnd()/growthRateFactor 259 if grow { 260 e.buf = make([]byte, 0, cap(e.buf)*growthSizeFactor) 261 } 262 263 return nil 264 } 265 266 func (e *encodeBuffer) previousOffsetEnd() int64 { return e.baseOffset + int64(len(e.buf)) } 267 func (e *encodeBuffer) unflushedBuffer() []byte { return e.buf } 268 269 // avoidFlush indicates whether to avoid flushing to ensure there is always 270 // enough in the buffer to unwrite the last object member if it were empty. 271 func (e *Encoder) avoidFlush() bool { 272 switch { 273 case e.tokens.last.length() == 0: 274 // Never flush after ObjectStart or ArrayStart since we don't know yet 275 // if the object or array will end up being empty. 276 return true 277 case e.tokens.last.needObjectValue(): 278 // Never flush before the object value since we don't know yet 279 // if the object value will end up being empty. 280 return true 281 case e.tokens.last.needObjectName() && len(e.buf) >= 2: 282 // Never flush after the object value if it does turn out to be empty. 283 switch string(e.buf[len(e.buf)-2:]) { 284 case `ll`, `""`, `{}`, `[]`: // last two bytes of every empty value 285 return true 286 } 287 } 288 return false 289 } 290 291 // unwriteEmptyObjectMember unwrites the last object member if it is empty 292 // and reports whether it performed an unwrite operation. 293 func (e *Encoder) unwriteEmptyObjectMember(prevName *string) bool { 294 if last := e.tokens.last; !last.isObject() || !last.needObjectName() || last.length() == 0 { 295 panic("BUG: must be called on an object after writing a value") 296 } 297 298 // The flushing logic is modified to never flush a trailing empty value. 299 // The encoder never writes trailing whitespace eagerly. 300 b := e.unflushedBuffer() 301 302 // Detect whether the last value was empty. 303 var n int 304 if len(b) >= 3 { 305 switch string(b[len(b)-2:]) { 306 case "ll": // last two bytes of `null` 307 n = len(`null`) 308 case `""`: 309 // It is possible for a non-empty string to have `""` as a suffix 310 // if the second to the last quote was escaped. 311 if b[len(b)-3] == '\\' { 312 return false // e.g., `"\""` is not empty 313 } 314 n = len(`""`) 315 case `{}`: 316 n = len(`{}`) 317 case `[]`: 318 n = len(`[]`) 319 } 320 } 321 if n == 0 { 322 return false 323 } 324 325 // Unwrite the value, whitespace, colon, name, whitespace, and comma. 326 b = b[:len(b)-n] 327 b = trimSuffixWhitespace(b) 328 b = trimSuffixByte(b, ':') 329 b = trimSuffixString(b) 330 b = trimSuffixWhitespace(b) 331 b = trimSuffixByte(b, ',') 332 e.buf = b // store back truncated unflushed buffer 333 334 // Undo state changes. 335 e.tokens.last.decrement() // for object member value 336 e.tokens.last.decrement() // for object member name 337 if !e.options.AllowDuplicateNames { 338 if e.tokens.last.isActiveNamespace() { 339 e.namespaces.last().removeLast() 340 } 341 e.names.clearLast() 342 if prevName != nil { 343 e.names.copyQuotedBuffer(e.buf) // required by objectNameStack.replaceLastUnquotedName 344 e.names.replaceLastUnquotedName(*prevName) 345 } 346 } 347 return true 348 } 349 350 // unwriteOnlyObjectMemberName unwrites the only object member name 351 // and returns the unquoted name. 352 func (e *Encoder) unwriteOnlyObjectMemberName() string { 353 if last := e.tokens.last; !last.isObject() || last.length() != 1 { 354 panic("BUG: must be called on an object after writing first name") 355 } 356 357 // Unwrite the name and whitespace. 358 b := trimSuffixString(e.buf) 359 isVerbatim := bytes.IndexByte(e.buf[len(b):], '\\') < 0 360 name := string(unescapeStringMayCopy(e.buf[len(b):], isVerbatim)) 361 e.buf = trimSuffixWhitespace(b) 362 363 // Undo state changes. 364 e.tokens.last.decrement() 365 if !e.options.AllowDuplicateNames { 366 if e.tokens.last.isActiveNamespace() { 367 e.namespaces.last().removeLast() 368 } 369 e.names.clearLast() 370 } 371 return name 372 } 373 374 func trimSuffixWhitespace(b []byte) []byte { 375 // NOTE: The arguments and logic are kept simple to keep this inlineable. 376 n := len(b) - 1 377 for n >= 0 && (b[n] == ' ' || b[n] == '\t' || b[n] == '\r' || b[n] == '\n') { 378 n-- 379 } 380 return b[:n+1] 381 } 382 383 func trimSuffixString(b []byte) []byte { 384 // NOTE: The arguments and logic are kept simple to keep this inlineable. 385 if len(b) > 0 && b[len(b)-1] == '"' { 386 b = b[:len(b)-1] 387 } 388 for len(b) >= 2 && !(b[len(b)-1] == '"' && b[len(b)-2] != '\\') { 389 b = b[:len(b)-1] // trim all characters except an unescaped quote 390 } 391 if len(b) > 0 && b[len(b)-1] == '"' { 392 b = b[:len(b)-1] 393 } 394 return b 395 } 396 397 func hasSuffixByte(b []byte, c byte) bool { 398 // NOTE: The arguments and logic are kept simple to keep this inlineable. 399 return len(b) > 0 && b[len(b)-1] == c 400 } 401 402 func trimSuffixByte(b []byte, c byte) []byte { 403 // NOTE: The arguments and logic are kept simple to keep this inlineable. 404 if len(b) > 0 && b[len(b)-1] == c { 405 return b[:len(b)-1] 406 } 407 return b 408 } 409 410 // WriteToken writes the next token and advances the internal write offset. 411 // 412 // The provided token kind must be consistent with the JSON grammar. 413 // For example, it is an error to provide a number when the encoder 414 // is expecting an object name (which is always a string), or 415 // to provide an end object delimiter when the encoder is finishing an array. 416 // If the provided token is invalid, then it reports a SyntacticError and 417 // the internal state remains unchanged. 418 func (e *Encoder) WriteToken(t Token) error { 419 k := t.Kind() 420 b := e.buf // use local variable to avoid mutating e in case of error 421 422 // Append any delimiters or optional whitespace. 423 b = e.tokens.mayAppendDelim(b, k) 424 if e.options.multiline { 425 b = e.appendWhitespace(b, k) 426 } 427 428 // Append the token to the output and to the state machine. 429 var err error 430 switch k { 431 case 'n': 432 b = append(b, "null"...) 433 err = e.tokens.appendLiteral() 434 case 'f': 435 b = append(b, "false"...) 436 err = e.tokens.appendLiteral() 437 case 't': 438 b = append(b, "true"...) 439 err = e.tokens.appendLiteral() 440 case '"': 441 n0 := len(b) // offset before calling t.appendString 442 if b, err = t.appendString(b, !e.options.AllowInvalidUTF8, e.options.preserveRawStrings, e.options.EscapeRune); err != nil { 443 break 444 } 445 if !e.options.AllowDuplicateNames && e.tokens.last.needObjectName() { 446 if !e.tokens.last.isValidNamespace() { 447 err = errInvalidNamespace 448 break 449 } 450 if e.tokens.last.isActiveNamespace() && !e.namespaces.last().insertQuoted(b[n0:], false) { 451 err = &SyntacticError{str: "duplicate name " + string(b[n0:]) + " in object"} 452 break 453 } 454 e.names.replaceLastQuotedOffset(n0) // only replace if insertQuoted succeeds 455 } 456 err = e.tokens.appendString() 457 case '0': 458 if b, err = t.appendNumber(b, e.options.canonicalizeNumbers); err != nil { 459 break 460 } 461 err = e.tokens.appendNumber() 462 case '{': 463 b = append(b, '{') 464 if err = e.tokens.pushObject(); err != nil { 465 break 466 } 467 if !e.options.AllowDuplicateNames { 468 e.names.push() 469 e.namespaces.push() 470 } 471 case '}': 472 b = append(b, '}') 473 if err = e.tokens.popObject(); err != nil { 474 break 475 } 476 if !e.options.AllowDuplicateNames { 477 e.names.pop() 478 e.namespaces.pop() 479 } 480 case '[': 481 b = append(b, '[') 482 err = e.tokens.pushArray() 483 case ']': 484 b = append(b, ']') 485 err = e.tokens.popArray() 486 default: 487 return &SyntacticError{str: "invalid json.Token"} 488 } 489 if err != nil { 490 return err 491 } 492 493 // Finish off the buffer and store it back into e. 494 e.buf = b 495 if e.needFlush() { 496 return e.flush() 497 } 498 return nil 499 } 500 501 const ( 502 rawIntNumber = -1 503 rawUintNumber = -2 504 ) 505 506 // writeNumber is specialized version of WriteToken, but optimized for numbers. 507 // As a special-case, if bits is -1 or -2, it will treat v as 508 // the raw-encoded bits of an int64 or uint64, respectively. 509 // It is only called from arshal_default.go. 510 func (e *Encoder) writeNumber(v float64, bits int, quote bool) error { 511 b := e.buf // use local variable to avoid mutating e in case of error 512 513 // Append any delimiters or optional whitespace. 514 b = e.tokens.mayAppendDelim(b, '0') 515 if e.options.multiline { 516 b = e.appendWhitespace(b, '0') 517 } 518 519 if quote { 520 // Append the value to the output. 521 n0 := len(b) // offset before appending the number 522 b = append(b, '"') 523 switch bits { 524 case rawIntNumber: 525 b = strconv.AppendInt(b, int64(math.Float64bits(v)), 10) 526 case rawUintNumber: 527 b = strconv.AppendUint(b, uint64(math.Float64bits(v)), 10) 528 default: 529 b = appendNumber(b, v, bits) 530 } 531 b = append(b, '"') 532 533 // Escape the string if necessary. 534 if e.options.EscapeRune != nil { 535 b2 := append(e.unusedCache, b[n0+len(`"`):len(b)-len(`"`)]...) 536 b, _ = appendString(b[:n0], string(b2), false, e.options.EscapeRune) 537 e.unusedCache = b2[:0] 538 } 539 540 // Update the state machine. 541 if !e.options.AllowDuplicateNames && e.tokens.last.needObjectName() { 542 if !e.tokens.last.isValidNamespace() { 543 return errInvalidNamespace 544 } 545 if e.tokens.last.isActiveNamespace() && !e.namespaces.last().insertQuoted(b[n0:], false) { 546 return &SyntacticError{str: "duplicate name " + string(b[n0:]) + " in object"} 547 } 548 e.names.replaceLastQuotedOffset(n0) // only replace if insertQuoted succeeds 549 } 550 if err := e.tokens.appendString(); err != nil { 551 return err 552 } 553 } else { 554 switch bits { 555 case rawIntNumber: 556 b = strconv.AppendInt(b, int64(math.Float64bits(v)), 10) 557 case rawUintNumber: 558 b = strconv.AppendUint(b, uint64(math.Float64bits(v)), 10) 559 default: 560 b = appendNumber(b, v, bits) 561 } 562 if err := e.tokens.appendNumber(); err != nil { 563 return err 564 } 565 } 566 567 // Finish off the buffer and store it back into e. 568 e.buf = b 569 if e.needFlush() { 570 return e.flush() 571 } 572 return nil 573 } 574 575 // WriteValue writes the next raw value and advances the internal write offset. 576 // The Encoder does not simply copy the provided value verbatim, but 577 // parses it to ensure that it is syntactically valid and reformats it 578 // according to how the Encoder is configured to format whitespace and strings. 579 // 580 // The provided value kind must be consistent with the JSON grammar 581 // (see examples on Encoder.WriteToken). If the provided value is invalid, 582 // then it reports a SyntacticError and the internal state remains unchanged. 583 func (e *Encoder) WriteValue(v RawValue) error { 584 e.maxValue |= len(v) // bitwise OR is a fast approximation of max 585 586 k := v.Kind() 587 b := e.buf // use local variable to avoid mutating e in case of error 588 589 // Append any delimiters or optional whitespace. 590 b = e.tokens.mayAppendDelim(b, k) 591 if e.options.multiline { 592 b = e.appendWhitespace(b, k) 593 } 594 595 // Append the value the output. 596 var err error 597 v = v[consumeWhitespace(v):] 598 n0 := len(b) // offset before calling e.reformatValue 599 b, v, err = e.reformatValue(b, v, e.tokens.depth()) 600 if err != nil { 601 return err 602 } 603 v = v[consumeWhitespace(v):] 604 if len(v) > 0 { 605 return newInvalidCharacterError(v[0:], "after top-level value") 606 } 607 608 // Append the kind to the state machine. 609 switch k { 610 case 'n', 'f', 't': 611 err = e.tokens.appendLiteral() 612 case '"': 613 if !e.options.AllowDuplicateNames && e.tokens.last.needObjectName() { 614 if !e.tokens.last.isValidNamespace() { 615 err = errInvalidNamespace 616 break 617 } 618 if e.tokens.last.isActiveNamespace() && !e.namespaces.last().insertQuoted(b[n0:], false) { 619 err = &SyntacticError{str: "duplicate name " + string(b[n0:]) + " in object"} 620 break 621 } 622 e.names.replaceLastQuotedOffset(n0) // only replace if insertQuoted succeeds 623 } 624 err = e.tokens.appendString() 625 case '0': 626 err = e.tokens.appendNumber() 627 case '{': 628 if err = e.tokens.pushObject(); err != nil { 629 break 630 } 631 if err = e.tokens.popObject(); err != nil { 632 panic("BUG: popObject should never fail immediately after pushObject: " + err.Error()) 633 } 634 case '[': 635 if err = e.tokens.pushArray(); err != nil { 636 break 637 } 638 if err = e.tokens.popArray(); err != nil { 639 panic("BUG: popArray should never fail immediately after pushArray: " + err.Error()) 640 } 641 } 642 if err != nil { 643 return err 644 } 645 646 // Finish off the buffer and store it back into e. 647 e.buf = b 648 if e.needFlush() { 649 return e.flush() 650 } 651 return nil 652 } 653 654 // appendWhitespace appends whitespace that immediately precedes the next token. 655 func (e *Encoder) appendWhitespace(b []byte, next Kind) []byte { 656 if e.tokens.needDelim(next) == ':' { 657 return append(b, ' ') 658 } else { 659 return e.appendIndent(b, e.tokens.needIndent(next)) 660 } 661 } 662 663 // appendIndent appends the appropriate number of indentation characters 664 // for the current nested level, n. 665 func (e *Encoder) appendIndent(b []byte, n int) []byte { 666 if n == 0 { 667 return b 668 } 669 b = append(b, '\n') 670 b = append(b, e.options.IndentPrefix...) 671 for ; n > 1; n-- { 672 b = append(b, e.options.Indent...) 673 } 674 return b 675 } 676 677 // reformatValue parses a JSON value from the start of src and 678 // appends it to the end of dst, reformatting whitespace and strings as needed. 679 // It returns the updated versions of dst and src. 680 func (e *Encoder) reformatValue(dst []byte, src RawValue, depth int) ([]byte, RawValue, error) { 681 // TODO: Should this update valueFlags as input? 682 if len(src) == 0 { 683 return dst, src, io.ErrUnexpectedEOF 684 } 685 var n int 686 var err error 687 switch k := Kind(src[0]).normalize(); k { 688 case 'n': 689 if n = consumeNull(src); n == 0 { 690 n, err = consumeLiteral(src, "null") 691 } 692 case 'f': 693 if n = consumeFalse(src); n == 0 { 694 n, err = consumeLiteral(src, "false") 695 } 696 case 't': 697 if n = consumeTrue(src); n == 0 { 698 n, err = consumeLiteral(src, "true") 699 } 700 case '"': 701 if n := consumeSimpleString(src); n > 0 && e.options.EscapeRune == nil { 702 dst, src = append(dst, src[:n]...), src[n:] // copy simple strings verbatim 703 return dst, src, nil 704 } 705 return reformatString(dst, src, !e.options.AllowInvalidUTF8, e.options.preserveRawStrings, e.options.EscapeRune) 706 case '0': 707 if n := consumeSimpleNumber(src); n > 0 && !e.options.canonicalizeNumbers { 708 dst, src = append(dst, src[:n]...), src[n:] // copy simple numbers verbatim 709 return dst, src, nil 710 } 711 return reformatNumber(dst, src, e.options.canonicalizeNumbers) 712 case '{': 713 return e.reformatObject(dst, src, depth) 714 case '[': 715 return e.reformatArray(dst, src, depth) 716 default: 717 return dst, src, newInvalidCharacterError(src, "at start of value") 718 } 719 if err != nil { 720 return dst, src, err 721 } 722 dst, src = append(dst, src[:n]...), src[n:] 723 return dst, src, nil 724 } 725 726 // reformatObject parses a JSON object from the start of src and 727 // appends it to the end of src, reformatting whitespace and strings as needed. 728 // It returns the updated versions of dst and src. 729 func (e *Encoder) reformatObject(dst []byte, src RawValue, depth int) ([]byte, RawValue, error) { 730 // Append object start. 731 if src[0] != '{' { 732 panic("BUG: reformatObject must be called with a buffer that starts with '{'") 733 } 734 dst, src = append(dst, '{'), src[1:] 735 736 // Append (possible) object end. 737 src = src[consumeWhitespace(src):] 738 if len(src) == 0 { 739 return dst, src, io.ErrUnexpectedEOF 740 } 741 if src[0] == '}' { 742 dst, src = append(dst, '}'), src[1:] 743 return dst, src, nil 744 } 745 746 var err error 747 var names *objectNamespace 748 if !e.options.AllowDuplicateNames { 749 e.namespaces.push() 750 defer e.namespaces.pop() 751 names = e.namespaces.last() 752 } 753 depth++ 754 for { 755 // Append optional newline and indentation. 756 if e.options.multiline { 757 dst = e.appendIndent(dst, depth) 758 } 759 760 // Append object name. 761 src = src[consumeWhitespace(src):] 762 if len(src) == 0 { 763 return dst, src, io.ErrUnexpectedEOF 764 } 765 n0 := len(dst) // offset before calling reformatString 766 n := consumeSimpleString(src) 767 if n > 0 && e.options.EscapeRune == nil { 768 dst, src = append(dst, src[:n]...), src[n:] // copy simple strings verbatim 769 } else { 770 dst, src, err = reformatString(dst, src, !e.options.AllowInvalidUTF8, e.options.preserveRawStrings, e.options.EscapeRune) 771 } 772 if err != nil { 773 return dst, src, err 774 } 775 if !e.options.AllowDuplicateNames && !names.insertQuoted(dst[n0:], false) { 776 return dst, src, &SyntacticError{str: "duplicate name " + string(dst[n0:]) + " in object"} 777 } 778 779 // Append colon. 780 src = src[consumeWhitespace(src):] 781 if len(src) == 0 { 782 return dst, src, io.ErrUnexpectedEOF 783 } 784 if src[0] != ':' { 785 return dst, src, newInvalidCharacterError(src, "after object name (expecting ':')") 786 } 787 dst, src = append(dst, ':'), src[1:] 788 if e.options.multiline { 789 dst = append(dst, ' ') 790 } 791 792 // Append object value. 793 src = src[consumeWhitespace(src):] 794 if len(src) == 0 { 795 return dst, src, io.ErrUnexpectedEOF 796 } 797 dst, src, err = e.reformatValue(dst, src, depth) 798 if err != nil { 799 return dst, src, err 800 } 801 802 // Append comma or object end. 803 src = src[consumeWhitespace(src):] 804 if len(src) == 0 { 805 return dst, src, io.ErrUnexpectedEOF 806 } 807 switch src[0] { 808 case ',': 809 dst, src = append(dst, ','), src[1:] 810 continue 811 case '}': 812 if e.options.multiline { 813 dst = e.appendIndent(dst, depth-1) 814 } 815 dst, src = append(dst, '}'), src[1:] 816 return dst, src, nil 817 default: 818 return dst, src, newInvalidCharacterError(src, "after object value (expecting ',' or '}')") 819 } 820 } 821 } 822 823 // reformatArray parses a JSON array from the start of src and 824 // appends it to the end of dst, reformatting whitespace and strings as needed. 825 // It returns the updated versions of dst and src. 826 func (e *Encoder) reformatArray(dst []byte, src RawValue, depth int) ([]byte, RawValue, error) { 827 // Append array start. 828 if src[0] != '[' { 829 panic("BUG: reformatArray must be called with a buffer that starts with '['") 830 } 831 dst, src = append(dst, '['), src[1:] 832 833 // Append (possible) array end. 834 src = src[consumeWhitespace(src):] 835 if len(src) == 0 { 836 return dst, src, io.ErrUnexpectedEOF 837 } 838 if src[0] == ']' { 839 dst, src = append(dst, ']'), src[1:] 840 return dst, src, nil 841 } 842 843 var err error 844 depth++ 845 for { 846 // Append optional newline and indentation. 847 if e.options.multiline { 848 dst = e.appendIndent(dst, depth) 849 } 850 851 // Append array value. 852 src = src[consumeWhitespace(src):] 853 if len(src) == 0 { 854 return dst, src, io.ErrUnexpectedEOF 855 } 856 dst, src, err = e.reformatValue(dst, src, depth) 857 if err != nil { 858 return dst, src, err 859 } 860 861 // Append comma or array end. 862 src = src[consumeWhitespace(src):] 863 if len(src) == 0 { 864 return dst, src, io.ErrUnexpectedEOF 865 } 866 switch src[0] { 867 case ',': 868 dst, src = append(dst, ','), src[1:] 869 continue 870 case ']': 871 if e.options.multiline { 872 dst = e.appendIndent(dst, depth-1) 873 } 874 dst, src = append(dst, ']'), src[1:] 875 return dst, src, nil 876 default: 877 return dst, src, newInvalidCharacterError(src, "after array value (expecting ',' or ']')") 878 } 879 } 880 } 881 882 // OutputOffset returns the current output byte offset. It gives the location 883 // of the next byte immediately after the most recently written token or value. 884 // The number of bytes actually written to the underlying io.Writer may be less 885 // than this offset due to internal buffering effects. 886 func (e *Encoder) OutputOffset() int64 { 887 return e.previousOffsetEnd() 888 } 889 890 // UnusedBuffer returns a zero-length buffer with a possible non-zero capacity. 891 // This buffer is intended to be used to populate a RawValue 892 // being passed to an immediately succeeding WriteValue call. 893 // 894 // Example usage: 895 // 896 // b := d.UnusedBuffer() 897 // b = append(b, '"') 898 // b = appendString(b, v) // append the string formatting of v 899 // b = append(b, '"') 900 // ... := d.WriteValue(b) 901 // 902 // It is the user's responsibility to ensure that the value is valid JSON. 903 func (e *Encoder) UnusedBuffer() []byte { 904 // NOTE: We don't return e.buf[len(e.buf):cap(e.buf)] since WriteValue would 905 // need to take special care to avoid mangling the data while reformatting. 906 // WriteValue can't easily identify whether the input RawValue aliases e.buf 907 // without using unsafe.Pointer. Thus, we just return a different buffer. 908 // Should this ever alias e.buf, we need to consider how it operates with 909 // the specialized performance optimization for bytes.Buffer. 910 n := 1 << bits.Len(uint(e.maxValue|63)) // fast approximation for max length 911 if cap(e.unusedCache) < n { 912 e.unusedCache = make([]byte, 0, n) 913 } 914 return e.unusedCache 915 } 916 917 // StackDepth returns the depth of the state machine for written JSON data. 918 // Each level on the stack represents a nested JSON object or array. 919 // It is incremented whenever an ObjectStart or ArrayStart token is encountered 920 // and decremented whenever an ObjectEnd or ArrayEnd token is encountered. 921 // The depth is zero-indexed, where zero represents the top-level JSON value. 922 func (e *Encoder) StackDepth() int { 923 // NOTE: Keep in sync with Decoder.StackDepth. 924 return e.tokens.depth() - 1 925 } 926 927 // StackIndex returns information about the specified stack level. 928 // It must be a number between 0 and StackDepth, inclusive. 929 // For each level, it reports the kind: 930 // 931 // - 0 for a level of zero, 932 // - '{' for a level representing a JSON object, and 933 // - '[' for a level representing a JSON array. 934 // 935 // It also reports the length of that JSON object or array. 936 // Each name and value in a JSON object is counted separately, 937 // so the effective number of members would be half the length. 938 // A complete JSON object must have an even length. 939 func (e *Encoder) StackIndex(i int) (Kind, int) { 940 // NOTE: Keep in sync with Decoder.StackIndex. 941 switch s := e.tokens.index(i); { 942 case i > 0 && s.isObject(): 943 return '{', s.length() 944 case i > 0 && s.isArray(): 945 return '[', s.length() 946 default: 947 return 0, s.length() 948 } 949 } 950 951 // StackPointer returns a JSON Pointer (RFC 6901) to the most recently written value. 952 // Object names are only present if AllowDuplicateNames is false, otherwise 953 // object members are represented using their index within the object. 954 func (e *Encoder) StackPointer() string { 955 e.names.copyQuotedBuffer(e.buf) 956 return string(e.appendStackPointer(nil)) 957 } 958 959 // appendString appends src to dst as a JSON string per RFC 7159, section 7. 960 // 961 // If validateUTF8 is specified, this rejects input that contains invalid UTF-8 962 // otherwise invalid bytes are replaced with the Unicode replacement character. 963 // If escapeRune is provided, it specifies which runes to escape using 964 // hexadecimal sequences. If nil, the shortest representable form is used, 965 // which is also the canonical form for strings (RFC 8785, section 3.2.2.2). 966 // 967 // Note that this API allows full control over the formatting of strings 968 // except for whether a forward solidus '/' may be formatted as '\/' and 969 // the casing of hexadecimal Unicode escape sequences. 970 func appendString(dst []byte, src string, validateUTF8 bool, escapeRune func(rune) bool) ([]byte, error) { 971 appendEscapedASCII := func(dst []byte, c byte) []byte { 972 switch c { 973 case '"', '\\': 974 dst = append(dst, '\\', c) 975 case '\b': 976 dst = append(dst, "\\b"...) 977 case '\f': 978 dst = append(dst, "\\f"...) 979 case '\n': 980 dst = append(dst, "\\n"...) 981 case '\r': 982 dst = append(dst, "\\r"...) 983 case '\t': 984 dst = append(dst, "\\t"...) 985 default: 986 dst = append(dst, "\\u"...) 987 dst = appendHexUint16(dst, uint16(c)) 988 } 989 return dst 990 } 991 appendEscapedUnicode := func(dst []byte, r rune) []byte { 992 if r1, r2 := utf16.EncodeRune(r); r1 != '\ufffd' && r2 != '\ufffd' { 993 dst = append(dst, "\\u"...) 994 dst = appendHexUint16(dst, uint16(r1)) 995 dst = append(dst, "\\u"...) 996 dst = appendHexUint16(dst, uint16(r2)) 997 } else { 998 dst = append(dst, "\\u"...) 999 dst = appendHexUint16(dst, uint16(r)) 1000 } 1001 return dst 1002 } 1003 1004 // Optimize for when escapeRune is nil. 1005 if escapeRune == nil { 1006 var i, n int 1007 dst = append(dst, '"') 1008 for uint(len(src)) > uint(n) { 1009 // Handle single-byte ASCII. 1010 if c := src[n]; c < utf8.RuneSelf { 1011 n++ 1012 if c < ' ' || c == '"' || c == '\\' { 1013 dst = append(dst, src[i:n-1]...) 1014 dst = appendEscapedASCII(dst, c) 1015 i = n 1016 } 1017 continue 1018 } 1019 1020 // Handle multi-byte Unicode. 1021 _, rn := utf8.DecodeRuneInString(src[n:]) 1022 n += rn 1023 if rn == 1 { // must be utf8.RuneError since we already checked for single-byte ASCII 1024 dst = append(dst, src[i:n-rn]...) 1025 if validateUTF8 { 1026 return dst, &SyntacticError{str: "invalid UTF-8 within string"} 1027 } 1028 dst = append(dst, "\ufffd"...) 1029 i = n 1030 } 1031 } 1032 dst = append(dst, src[i:n]...) 1033 dst = append(dst, '"') 1034 return dst, nil 1035 } 1036 1037 // Slower implementation for when escapeRune is non-nil. 1038 var i, n int 1039 dst = append(dst, '"') 1040 for uint(len(src)) > uint(n) { 1041 switch r, rn := utf8.DecodeRuneInString(src[n:]); { 1042 case r == utf8.RuneError && rn == 1: 1043 dst = append(dst, src[i:n]...) 1044 if validateUTF8 { 1045 return dst, &SyntacticError{str: "invalid UTF-8 within string"} 1046 } 1047 if escapeRune('\ufffd') { 1048 dst = append(dst, `\ufffd`...) 1049 } else { 1050 dst = append(dst, "\ufffd"...) 1051 } 1052 n += rn 1053 i = n 1054 case escapeRune(r): 1055 dst = append(dst, src[i:n]...) 1056 dst = appendEscapedUnicode(dst, r) 1057 n += rn 1058 i = n 1059 case r < ' ' || r == '"' || r == '\\': 1060 dst = append(dst, src[i:n]...) 1061 dst = appendEscapedASCII(dst, byte(r)) 1062 n += rn 1063 i = n 1064 default: 1065 n += rn 1066 } 1067 } 1068 dst = append(dst, src[i:n]...) 1069 dst = append(dst, '"') 1070 return dst, nil 1071 } 1072 1073 // reformatString consumes a JSON string from src and appends it to dst, 1074 // reformatting it if necessary for the given escapeRune parameter. 1075 // It returns the appended output and the remainder of the input. 1076 func reformatString(dst, src []byte, validateUTF8, preserveRaw bool, escapeRune func(rune) bool) ([]byte, []byte, error) { 1077 // TODO: Should this update valueFlags as input? 1078 var flags valueFlags 1079 n, err := consumeString(&flags, src, validateUTF8) 1080 if err != nil { 1081 return dst, src[n:], err 1082 } 1083 if preserveRaw || (escapeRune == nil && flags.isCanonical()) { 1084 dst = append(dst, src[:n]...) // copy the string verbatim 1085 return dst, src[n:], nil 1086 } 1087 1088 // TODO: Implement a direct, raw-to-raw reformat for strings. 1089 // If the escapeRune option would have resulted in no changes to the output, 1090 // it would be faster to simply append src to dst without going through 1091 // an intermediary representation in a separate buffer. 1092 b, _ := unescapeString(make([]byte, 0, n), src[:n]) 1093 dst, _ = appendString(dst, string(b), validateUTF8, escapeRune) 1094 return dst, src[n:], nil 1095 } 1096 1097 // appendNumber appends src to dst as a JSON number per RFC 7159, section 6. 1098 // It formats numbers similar to the ES6 number-to-string conversion. 1099 // See https://go.dev/issue/14135. 1100 // 1101 // The output is identical to ECMA-262, 6th edition, section 7.1.12.1 and with 1102 // RFC 8785, section 3.2.2.3 for 64-bit floating-point numbers except for -0, 1103 // which is formatted as -0 instead of just 0. 1104 // 1105 // For 32-bit floating-point numbers, 1106 // the output is a 32-bit equivalent of the algorithm. 1107 // Note that ECMA-262 specifies no algorithm for 32-bit numbers. 1108 func appendNumber(dst []byte, src float64, bits int) []byte { 1109 if bits == 32 { 1110 src = float64(float32(src)) 1111 } 1112 1113 abs := math.Abs(src) 1114 fmt := byte('f') 1115 if abs != 0 { 1116 if bits == 64 && (float64(abs) < 1e-6 || float64(abs) >= 1e21) || 1117 bits == 32 && (float32(abs) < 1e-6 || float32(abs) >= 1e21) { 1118 fmt = 'e' 1119 } 1120 } 1121 dst = strconv.AppendFloat(dst, src, fmt, -1, bits) 1122 if fmt == 'e' { 1123 // Clean up e-09 to e-9. 1124 n := len(dst) 1125 if n >= 4 && dst[n-4] == 'e' && dst[n-3] == '-' && dst[n-2] == '0' { 1126 dst[n-2] = dst[n-1] 1127 dst = dst[:n-1] 1128 } 1129 } 1130 return dst 1131 } 1132 1133 // reformatNumber consumes a JSON string from src and appends it to dst, 1134 // canonicalizing it if specified. 1135 // It returns the appended output and the remainder of the input. 1136 func reformatNumber(dst, src []byte, canonicalize bool) ([]byte, []byte, error) { 1137 n, err := consumeNumber(src) 1138 if err != nil { 1139 return dst, src[n:], err 1140 } 1141 if !canonicalize { 1142 dst = append(dst, src[:n]...) // copy the number verbatim 1143 return dst, src[n:], nil 1144 } 1145 1146 // Canonicalize the number per RFC 8785, section 3.2.2.3. 1147 // As an optimization, we can copy integer numbers below 2⁵³ verbatim. 1148 const maxExactIntegerDigits = 16 // len(strconv.AppendUint(nil, 1<<53, 10)) 1149 if n < maxExactIntegerDigits && consumeSimpleNumber(src[:n]) == n { 1150 dst = append(dst, src[:n]...) // copy the number verbatim 1151 return dst, src[n:], nil 1152 } 1153 fv, _ := strconv.ParseFloat(string(src[:n]), 64) 1154 switch { 1155 case fv == 0: 1156 fv = 0 // normalize negative zero as just zero 1157 case math.IsInf(fv, +1): 1158 fv = +math.MaxFloat64 1159 case math.IsInf(fv, -1): 1160 fv = -math.MaxFloat64 1161 } 1162 return appendNumber(dst, fv, 64), src[n:], nil 1163 } 1164 1165 // appendHexUint16 appends src to dst as a 4-byte hexadecimal number. 1166 func appendHexUint16(dst []byte, src uint16) []byte { 1167 dst = append(dst, "0000"[1+(bits.Len16(src)-1)/4:]...) 1168 dst = strconv.AppendUint(dst, uint64(src), 16) 1169 return dst 1170 }