github.com/bytedance/sonic@v1.11.7-0.20240517092252-d2edb31b167b/internal/encoder/encoder.go (about)

     1  /*
     2   * Copyright 2021 ByteDance Inc.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package encoder
    18  
    19  import (
    20      `bytes`
    21      `encoding/json`
    22      `reflect`
    23      `runtime`
    24      `unsafe`
    25  
    26      `github.com/bytedance/sonic/internal/native`
    27      `github.com/bytedance/sonic/internal/native/types`
    28      `github.com/bytedance/sonic/internal/rt`
    29      `github.com/bytedance/sonic/utf8`
    30      `github.com/bytedance/sonic/option`
    31  )
    32  
    33  // Options is a set of encoding options.
    34  type Options uint64
    35  
    36  const (
    37      bitSortMapKeys          = iota
    38      bitEscapeHTML          
    39      bitCompactMarshaler
    40      bitNoQuoteTextMarshaler
    41      bitNoNullSliceOrMap
    42      bitValidateString
    43      bitNoValidateJSONMarshaler
    44      bitNoEncoderNewline 
    45  
    46      // used for recursive compile
    47      bitPointerValue = 63
    48  )
    49  
    50  const (
    51      // SortMapKeys indicates that the keys of a map needs to be sorted 
    52      // before serializing into JSON.
    53      // WARNING: This hurts performance A LOT, USE WITH CARE.
    54      SortMapKeys          Options = 1 << bitSortMapKeys
    55  
    56      // EscapeHTML indicates encoder to escape all HTML characters 
    57      // after serializing into JSON (see https://pkg.go.dev/encoding/json#HTMLEscape).
    58      // WARNING: This hurts performance A LOT, USE WITH CARE.
    59      EscapeHTML           Options = 1 << bitEscapeHTML
    60  
    61      // CompactMarshaler indicates that the output JSON from json.Marshaler 
    62      // is always compact and needs no validation 
    63      CompactMarshaler     Options = 1 << bitCompactMarshaler
    64  
    65      // NoQuoteTextMarshaler indicates that the output text from encoding.TextMarshaler 
    66      // is always escaped string and needs no quoting
    67      NoQuoteTextMarshaler Options = 1 << bitNoQuoteTextMarshaler
    68  
    69      // NoNullSliceOrMap indicates all empty Array or Object are encoded as '[]' or '{}',
    70      // instead of 'null'
    71      NoNullSliceOrMap     Options = 1 << bitNoNullSliceOrMap
    72  
    73      // ValidateString indicates that encoder should validate the input string
    74      // before encoding it into JSON.
    75      ValidateString       Options = 1 << bitValidateString
    76  
    77      // NoValidateJSONMarshaler indicates that the encoder should not validate the output string
    78      // after encoding the JSONMarshaler to JSON.
    79      NoValidateJSONMarshaler Options = 1 << bitNoValidateJSONMarshaler
    80  
    81      // NoEncoderNewline indicates that the encoder should not add a newline after every message
    82      NoEncoderNewline Options = 1 << bitNoEncoderNewline
    83    
    84      // CompatibleWithStd is used to be compatible with std encoder.
    85      CompatibleWithStd Options = SortMapKeys | EscapeHTML | CompactMarshaler
    86  )
    87  
    88  // Encoder represents a specific set of encoder configurations.
    89  type Encoder struct {
    90      Opts Options
    91      prefix string
    92      indent string
    93  }
    94  
    95  // Encode returns the JSON encoding of v.
    96  func (self *Encoder) Encode(v interface{}) ([]byte, error) {
    97      if self.indent != "" || self.prefix != "" { 
    98          return EncodeIndented(v, self.prefix, self.indent, self.Opts)
    99      }
   100      return Encode(v, self.Opts)
   101  }
   102  
   103  // SortKeys enables the SortMapKeys option.
   104  func (self *Encoder) SortKeys() *Encoder {
   105      self.Opts |= SortMapKeys
   106      return self
   107  }
   108  
   109  // SetEscapeHTML specifies if option EscapeHTML opens
   110  func (self *Encoder) SetEscapeHTML(f bool) {
   111      if f {
   112          self.Opts |= EscapeHTML
   113      } else {
   114          self.Opts &= ^EscapeHTML
   115      }
   116  }
   117  
   118  // SetValidateString specifies if option ValidateString opens
   119  func (self *Encoder) SetValidateString(f bool) {
   120      if f {
   121          self.Opts |= ValidateString
   122      } else {
   123          self.Opts &= ^ValidateString
   124      }
   125  }
   126  
   127  // SetNoValidateJSONMarshaler specifies if option NoValidateJSONMarshaler opens
   128  func (self *Encoder) SetNoValidateJSONMarshaler(f bool) {
   129      if f {
   130          self.Opts |= NoValidateJSONMarshaler
   131      } else {
   132          self.Opts &= ^NoValidateJSONMarshaler
   133      }
   134  }
   135  
   136  // SetNoEncoderNewline specifies if option NoEncoderNewline opens
   137  func (self *Encoder) SetNoEncoderNewline(f bool) {
   138      if f {
   139          self.Opts |= NoEncoderNewline
   140      } else {
   141          self.Opts &= ^NoEncoderNewline
   142      }
   143  }
   144  
   145  
   146  // SetCompactMarshaler specifies if option CompactMarshaler opens
   147  func (self *Encoder) SetCompactMarshaler(f bool) {
   148      if f {
   149          self.Opts |= CompactMarshaler
   150      } else {
   151          self.Opts &= ^CompactMarshaler
   152      }
   153  }
   154  
   155  // SetNoQuoteTextMarshaler specifies if option NoQuoteTextMarshaler opens
   156  func (self *Encoder) SetNoQuoteTextMarshaler(f bool) {
   157      if f {
   158          self.Opts |= NoQuoteTextMarshaler
   159      } else {
   160          self.Opts &= ^NoQuoteTextMarshaler
   161      }
   162  }
   163  
   164  // SetIndent instructs the encoder to format each subsequent encoded
   165  // value as if indented by the package-level function EncodeIndent().
   166  // Calling SetIndent("", "") disables indentation.
   167  func (enc *Encoder) SetIndent(prefix, indent string) {
   168      enc.prefix = prefix
   169      enc.indent = indent
   170  }
   171  
   172  // Quote returns the JSON-quoted version of s.
   173  func Quote(s string) string {
   174      var n int
   175      var p []byte
   176  
   177      /* check for empty string */
   178      if s == "" {
   179          return `""`
   180      }
   181  
   182      /* allocate space for result */
   183      n = len(s) + 2
   184      p = make([]byte, 0, n)
   185  
   186      /* call the encoder */
   187      _ = encodeString(&p, s)
   188      return rt.Mem2Str(p)
   189  }
   190  
   191  // Encode returns the JSON encoding of val, encoded with opts.
   192  func Encode(val interface{}, opts Options) ([]byte, error) {
   193      var ret []byte
   194  
   195      buf := newBytes()
   196      err := encodeInto(&buf, val, opts)
   197  
   198      /* check for errors */
   199      if err != nil {
   200          freeBytes(buf)
   201          return nil, err
   202      }
   203  
   204      /* htmlescape or correct UTF-8 if opts enable */
   205      old := buf
   206      buf = encodeFinish(old, opts)
   207      pbuf := ((*rt.GoSlice)(unsafe.Pointer(&buf))).Ptr
   208      pold := ((*rt.GoSlice)(unsafe.Pointer(&old))).Ptr
   209  
   210      /* return when allocated a new buffer */
   211      if pbuf != pold {
   212          freeBytes(old)
   213          return buf, nil
   214      }
   215  
   216      /* make a copy of the result */
   217      ret = make([]byte, len(buf))
   218      copy(ret, buf)
   219  
   220      freeBytes(buf)
   221      /* return the buffer into pool */
   222      return ret, nil
   223  }
   224  
   225  // EncodeInto is like Encode but uses a user-supplied buffer instead of allocating
   226  // a new one.
   227  func EncodeInto(buf *[]byte, val interface{}, opts Options) error {
   228      err := encodeInto(buf, val, opts)
   229      if err != nil {
   230          return err
   231      }
   232      *buf = encodeFinish(*buf, opts)
   233      return err
   234  }
   235  
   236  func encodeInto(buf *[]byte, val interface{}, opts Options) error {
   237      stk := newStack()
   238      efv := rt.UnpackEface(val)
   239      err := encodeTypedPointer(buf, efv.Type, &efv.Value, stk, uint64(opts))
   240  
   241      /* return the stack into pool */
   242      if err != nil {
   243          resetStack(stk)
   244      }
   245      freeStack(stk)
   246  
   247      /* avoid GC ahead */
   248      runtime.KeepAlive(buf)
   249      runtime.KeepAlive(efv)
   250      return err
   251  }
   252  
   253  func encodeFinish(buf []byte, opts Options) []byte {
   254      if opts & EscapeHTML != 0 {
   255          buf = HTMLEscape(nil, buf)
   256      }
   257      if opts & ValidateString != 0 && !utf8.Validate(buf) {
   258          buf = utf8.CorrectWith(nil, buf, `\ufffd`)
   259      }
   260      return buf
   261  }
   262  
   263  var typeByte = rt.UnpackType(reflect.TypeOf(byte(0)))
   264  
   265  // HTMLEscape appends to dst the JSON-encoded src with <, >, &, U+2028 and U+2029
   266  // characters inside string literals changed to \u003c, \u003e, \u0026, \u2028, \u2029
   267  // so that the JSON will be safe to embed inside HTML <script> tags.
   268  // For historical reasons, web browsers don't honor standard HTML
   269  // escaping within <script> tags, so an alternative JSON encoding must
   270  // be used.
   271  func HTMLEscape(dst []byte, src []byte) []byte {
   272      return htmlEscape(dst, src)
   273  }
   274  
   275  // EncodeIndented is like Encode but applies Indent to format the output.
   276  // Each JSON element in the output will begin on a new line beginning with prefix
   277  // followed by one or more copies of indent according to the indentation nesting.
   278  func EncodeIndented(val interface{}, prefix string, indent string, opts Options) ([]byte, error) {
   279      var err error
   280      var out []byte
   281      var buf *bytes.Buffer
   282  
   283      /* encode into the buffer */
   284      out = newBytes()
   285      err = EncodeInto(&out, val, opts)
   286  
   287      /* check for errors */
   288      if err != nil {
   289          freeBytes(out)
   290          return nil, err
   291      }
   292  
   293      /* indent the JSON */
   294      buf = newBuffer()
   295      err = json.Indent(buf, out, prefix, indent)
   296  
   297      /* check for errors */
   298      if err != nil {
   299          freeBytes(out)
   300          freeBuffer(buf)
   301          return nil, err
   302      }
   303  
   304      /* copy to the result buffer */
   305      ret := make([]byte, buf.Len())
   306      copy(ret, buf.Bytes())
   307  
   308      /* return the buffers into pool */
   309      freeBytes(out)
   310      freeBuffer(buf)
   311      return ret, nil
   312  }
   313  
   314  // Pretouch compiles vt ahead-of-time to avoid JIT compilation on-the-fly, in
   315  // order to reduce the first-hit latency.
   316  //
   317  // Opts are the compile options, for example, "option.WithCompileRecursiveDepth" is
   318  // a compile option to set the depth of recursive compile for the nested struct type.
   319  func Pretouch(vt reflect.Type, opts ...option.CompileOption) error {
   320      cfg := option.DefaultCompileOptions()
   321      for _, opt := range opts {
   322          opt(&cfg)
   323      }
   324      return pretouchRec(map[reflect.Type]uint8{vt: 0}, cfg)
   325  }
   326  
   327  // Valid validates json and returns first non-blank character position,
   328  // if it is only one valid json value.
   329  // Otherwise returns invalid character position using start.
   330  //
   331  // Note: it does not check for the invalid UTF-8 characters.
   332  func Valid(data []byte) (ok bool, start int) {
   333      n := len(data)
   334      if n == 0 {
   335          return false, -1
   336      }
   337      s := rt.Mem2Str(data)
   338      p := 0
   339      m := types.NewStateMachine()
   340      ret := native.ValidateOne(&s, &p, m, types.F_VALIDATE_STRING)
   341      types.FreeStateMachine(m)
   342  
   343      if ret < 0 {
   344          return false, p-1
   345      }
   346  
   347      /* check for trailing spaces */
   348      for ;p < n; p++ {
   349          if (types.SPACE_MASK & (1 << data[p])) == 0 {
   350              return false, p
   351          }
   352      }
   353  
   354      return true, ret
   355  }