github.com/goshafaq/sonic@v0.0.0-20231026082336-871835fb94c6/internal/encoder/encoder.go (about)

     1  /*
     2   * Copyright 2021 ByteDance Inc.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package encoder
    18  
    19  import (
    20  	"bytes"
    21  	"encoding/json"
    22  	"reflect"
    23  	"runtime"
    24  	"unsafe"
    25  
    26  	"github.com/goshafaq/sonic/internal/native"
    27  	"github.com/goshafaq/sonic/internal/native/types"
    28  	"github.com/goshafaq/sonic/internal/rt"
    29  	"github.com/goshafaq/sonic/option"
    30  	"github.com/goshafaq/sonic/utf8"
    31  )
    32  
    33  // Options is a set of encoding options.
    34  type Options uint64
    35  
    36  const (
    37  	bitSortMapKeys = iota
    38  	bitEscapeHTML
    39  	bitCompactMarshaler
    40  	bitNoQuoteTextMarshaler
    41  	bitNoNullSliceOrMap
    42  	bitValidateString
    43  	bitNoValidateJSONMarshaler
    44  
    45  	// used for recursive compile
    46  	bitPointerValue = 63
    47  )
    48  
    49  const (
    50  	// SortMapKeys indicates that the keys of a map needs to be sorted
    51  	// before serializing into JSON.
    52  	// WARNING: This hurts performance A LOT, USE WITH CARE.
    53  	SortMapKeys Options = 1 << bitSortMapKeys
    54  
    55  	// EscapeHTML indicates encoder to escape all HTML characters
    56  	// after serializing into JSON (see https://pkg.go.dev/encoding/json#HTMLEscape).
    57  	// WARNING: This hurts performance A LOT, USE WITH CARE.
    58  	EscapeHTML Options = 1 << bitEscapeHTML
    59  
    60  	// CompactMarshaler indicates that the output JSON from json.Marshaler
    61  	// is always compact and needs no validation
    62  	CompactMarshaler Options = 1 << bitCompactMarshaler
    63  
    64  	// NoQuoteTextMarshaler indicates that the output text from encoding.TextMarshaler
    65  	// is always escaped string and needs no quoting
    66  	NoQuoteTextMarshaler Options = 1 << bitNoQuoteTextMarshaler
    67  
    68  	// NoNullSliceOrMap indicates all empty Array or Object are encoded as '[]' or '{}',
    69  	// instead of 'null'
    70  	NoNullSliceOrMap Options = 1 << bitNoNullSliceOrMap
    71  
    72  	// ValidateString indicates that encoder should validate the input string
    73  	// before encoding it into JSON.
    74  	ValidateString Options = 1 << bitValidateString
    75  
    76  	// NoValidateJSONMarshaler indicates that the encoder should not validate the output string
    77  	// after encoding the JSONMarshaler to JSON.
    78  	NoValidateJSONMarshaler Options = 1 << bitNoValidateJSONMarshaler
    79  
    80  	// CompatibleWithStd is used to be compatible with std encoder.
    81  	CompatibleWithStd Options = SortMapKeys | EscapeHTML | CompactMarshaler
    82  )
    83  
    84  // Encoder represents a specific set of encoder configurations.
    85  type Encoder struct {
    86  	Opts   Options
    87  	prefix string
    88  	indent string
    89  }
    90  
    91  // Encode returns the JSON encoding of v.
    92  func (self *Encoder) Encode(v interface{}) ([]byte, error) {
    93  	if self.indent != "" || self.prefix != "" {
    94  		return EncodeIndented(v, self.prefix, self.indent, self.Opts)
    95  	}
    96  	return Encode(v, self.Opts)
    97  }
    98  
    99  // SortKeys enables the SortMapKeys option.
   100  func (self *Encoder) SortKeys() *Encoder {
   101  	self.Opts |= SortMapKeys
   102  	return self
   103  }
   104  
   105  // SetEscapeHTML specifies if option EscapeHTML opens
   106  func (self *Encoder) SetEscapeHTML(f bool) {
   107  	if f {
   108  		self.Opts |= EscapeHTML
   109  	} else {
   110  		self.Opts &= ^EscapeHTML
   111  	}
   112  }
   113  
   114  // SetValidateString specifies if option ValidateString opens
   115  func (self *Encoder) SetValidateString(f bool) {
   116  	if f {
   117  		self.Opts |= ValidateString
   118  	} else {
   119  		self.Opts &= ^ValidateString
   120  	}
   121  }
   122  
   123  // SetNoValidateJSONMarshaler specifies if option NoValidateJSONMarshaler opens
   124  func (self *Encoder) SetNoValidateJSONMarshaler(f bool) {
   125  	if f {
   126  		self.Opts |= NoValidateJSONMarshaler
   127  	} else {
   128  		self.Opts &= ^NoValidateJSONMarshaler
   129  	}
   130  }
   131  
   132  // SetCompactMarshaler specifies if option CompactMarshaler opens
   133  func (self *Encoder) SetCompactMarshaler(f bool) {
   134  	if f {
   135  		self.Opts |= CompactMarshaler
   136  	} else {
   137  		self.Opts &= ^CompactMarshaler
   138  	}
   139  }
   140  
   141  // SetNoQuoteTextMarshaler specifies if option NoQuoteTextMarshaler opens
   142  func (self *Encoder) SetNoQuoteTextMarshaler(f bool) {
   143  	if f {
   144  		self.Opts |= NoQuoteTextMarshaler
   145  	} else {
   146  		self.Opts &= ^NoQuoteTextMarshaler
   147  	}
   148  }
   149  
   150  // SetIndent instructs the encoder to format each subsequent encoded
   151  // value as if indented by the package-level function EncodeIndent().
   152  // Calling SetIndent("", "") disables indentation.
   153  func (enc *Encoder) SetIndent(prefix, indent string) {
   154  	enc.prefix = prefix
   155  	enc.indent = indent
   156  }
   157  
   158  // Quote returns the JSON-quoted version of s.
   159  func Quote(s string) string {
   160  	var n int
   161  	var p []byte
   162  
   163  	/* check for empty string */
   164  	if s == "" {
   165  		return `""`
   166  	}
   167  
   168  	/* allocate space for result */
   169  	n = len(s) + 2
   170  	p = make([]byte, 0, n)
   171  
   172  	/* call the encoder */
   173  	_ = encodeString(&p, s)
   174  	return rt.Mem2Str(p)
   175  }
   176  
   177  // Encode returns the JSON encoding of val, encoded with opts.
   178  func Encode(val interface{}, opts Options) ([]byte, error) {
   179  	var ret []byte
   180  
   181  	buf := newBytes()
   182  	err := encodeInto(&buf, val, opts)
   183  
   184  	/* check for errors */
   185  	if err != nil {
   186  		freeBytes(buf)
   187  		return nil, err
   188  	}
   189  
   190  	/* htmlescape or correct UTF-8 if opts enable */
   191  	old := buf
   192  	buf = encodeFinish(old, opts)
   193  	pbuf := ((*rt.GoSlice)(unsafe.Pointer(&buf))).Ptr
   194  	pold := ((*rt.GoSlice)(unsafe.Pointer(&old))).Ptr
   195  
   196  	/* return when allocated a new buffer */
   197  	if pbuf != pold {
   198  		freeBytes(old)
   199  		return buf, nil
   200  	}
   201  
   202  	/* make a copy of the result */
   203  	ret = make([]byte, len(buf))
   204  	copy(ret, buf)
   205  
   206  	freeBytes(buf)
   207  	/* return the buffer into pool */
   208  	return ret, nil
   209  }
   210  
   211  // EncodeInto is like Encode but uses a user-supplied buffer instead of allocating
   212  // a new one.
   213  func EncodeInto(buf *[]byte, val interface{}, opts Options) error {
   214  	err := encodeInto(buf, val, opts)
   215  	if err != nil {
   216  		return err
   217  	}
   218  	*buf = encodeFinish(*buf, opts)
   219  	return err
   220  }
   221  
   222  func encodeInto(buf *[]byte, val interface{}, opts Options) error {
   223  	stk := newStack()
   224  	efv := rt.UnpackEface(val)
   225  	err := encodeTypedPointer(buf, efv.Type, &efv.Value, stk, uint64(opts))
   226  
   227  	/* return the stack into pool */
   228  	if err != nil {
   229  		resetStack(stk)
   230  	}
   231  	freeStack(stk)
   232  
   233  	/* avoid GC ahead */
   234  	runtime.KeepAlive(buf)
   235  	runtime.KeepAlive(efv)
   236  	return err
   237  }
   238  
   239  func encodeFinish(buf []byte, opts Options) []byte {
   240  	if opts&EscapeHTML != 0 {
   241  		buf = HTMLEscape(nil, buf)
   242  	}
   243  	if opts&ValidateString != 0 && !utf8.Validate(buf) {
   244  		buf = utf8.CorrectWith(nil, buf, `\ufffd`)
   245  	}
   246  	return buf
   247  }
   248  
   249  var typeByte = rt.UnpackType(reflect.TypeOf(byte(0)))
   250  
   251  // HTMLEscape appends to dst the JSON-encoded src with <, >, &, U+2028 and U+2029
   252  // characters inside string literals changed to \u003c, \u003e, \u0026, \u2028, \u2029
   253  // so that the JSON will be safe to embed inside HTML <script> tags.
   254  // For historical reasons, web browsers don't honor standard HTML
   255  // escaping within <script> tags, so an alternative JSON encoding must
   256  // be used.
   257  func HTMLEscape(dst []byte, src []byte) []byte {
   258  	return htmlEscape(dst, src)
   259  }
   260  
   261  // EncodeIndented is like Encode but applies Indent to format the output.
   262  // Each JSON element in the output will begin on a new line beginning with prefix
   263  // followed by one or more copies of indent according to the indentation nesting.
   264  func EncodeIndented(val interface{}, prefix string, indent string, opts Options) ([]byte, error) {
   265  	var err error
   266  	var out []byte
   267  	var buf *bytes.Buffer
   268  
   269  	/* encode into the buffer */
   270  	out = newBytes()
   271  	err = EncodeInto(&out, val, opts)
   272  
   273  	/* check for errors */
   274  	if err != nil {
   275  		freeBytes(out)
   276  		return nil, err
   277  	}
   278  
   279  	/* indent the JSON */
   280  	buf = newBuffer()
   281  	err = json.Indent(buf, out, prefix, indent)
   282  
   283  	/* check for errors */
   284  	if err != nil {
   285  		freeBytes(out)
   286  		freeBuffer(buf)
   287  		return nil, err
   288  	}
   289  
   290  	/* copy to the result buffer */
   291  	ret := make([]byte, buf.Len())
   292  	copy(ret, buf.Bytes())
   293  
   294  	/* return the buffers into pool */
   295  	freeBytes(out)
   296  	freeBuffer(buf)
   297  	return ret, nil
   298  }
   299  
   300  // Pretouch compiles vt ahead-of-time to avoid JIT compilation on-the-fly, in
   301  // order to reduce the first-hit latency.
   302  //
   303  // Opts are the compile options, for example, "option.WithCompileRecursiveDepth" is
   304  // a compile option to set the depth of recursive compile for the nested struct type.
   305  func Pretouch(vt reflect.Type, opts ...option.CompileOption) error {
   306  	cfg := option.DefaultCompileOptions()
   307  	for _, opt := range opts {
   308  		opt(&cfg)
   309  	}
   310  	return pretouchRec(map[reflect.Type]uint8{vt: 0}, cfg)
   311  }
   312  
   313  // Valid validates json and returns first non-blank character position,
   314  // if it is only one valid json value.
   315  // Otherwise returns invalid character position using start.
   316  //
   317  // Note: it does not check for the invalid UTF-8 characters.
   318  func Valid(data []byte) (ok bool, start int) {
   319  	n := len(data)
   320  	if n == 0 {
   321  		return false, -1
   322  	}
   323  	s := rt.Mem2Str(data)
   324  	p := 0
   325  	m := types.NewStateMachine()
   326  	ret := native.ValidateOne(&s, &p, m)
   327  	types.FreeStateMachine(m)
   328  
   329  	if ret < 0 {
   330  		return false, p - 1
   331  	}
   332  
   333  	/* check for trailing spaces */
   334  	for ; p < n; p++ {
   335  		if (types.SPACE_MASK & (1 << data[p])) == 0 {
   336  			return false, p
   337  		}
   338  	}
   339  
   340  	return true, ret
   341  }