github.com/goshafaq/sonic@v0.0.0-20231026082336-871835fb94c6/internal/encoder/encoder.go (about) 1 /* 2 * Copyright 2021 ByteDance Inc. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package encoder 18 19 import ( 20 "bytes" 21 "encoding/json" 22 "reflect" 23 "runtime" 24 "unsafe" 25 26 "github.com/goshafaq/sonic/internal/native" 27 "github.com/goshafaq/sonic/internal/native/types" 28 "github.com/goshafaq/sonic/internal/rt" 29 "github.com/goshafaq/sonic/option" 30 "github.com/goshafaq/sonic/utf8" 31 ) 32 33 // Options is a set of encoding options. 34 type Options uint64 35 36 const ( 37 bitSortMapKeys = iota 38 bitEscapeHTML 39 bitCompactMarshaler 40 bitNoQuoteTextMarshaler 41 bitNoNullSliceOrMap 42 bitValidateString 43 bitNoValidateJSONMarshaler 44 45 // used for recursive compile 46 bitPointerValue = 63 47 ) 48 49 const ( 50 // SortMapKeys indicates that the keys of a map needs to be sorted 51 // before serializing into JSON. 52 // WARNING: This hurts performance A LOT, USE WITH CARE. 53 SortMapKeys Options = 1 << bitSortMapKeys 54 55 // EscapeHTML indicates encoder to escape all HTML characters 56 // after serializing into JSON (see https://pkg.go.dev/encoding/json#HTMLEscape). 57 // WARNING: This hurts performance A LOT, USE WITH CARE. 58 EscapeHTML Options = 1 << bitEscapeHTML 59 60 // CompactMarshaler indicates that the output JSON from json.Marshaler 61 // is always compact and needs no validation 62 CompactMarshaler Options = 1 << bitCompactMarshaler 63 64 // NoQuoteTextMarshaler indicates that the output text from encoding.TextMarshaler 65 // is always escaped string and needs no quoting 66 NoQuoteTextMarshaler Options = 1 << bitNoQuoteTextMarshaler 67 68 // NoNullSliceOrMap indicates all empty Array or Object are encoded as '[]' or '{}', 69 // instead of 'null' 70 NoNullSliceOrMap Options = 1 << bitNoNullSliceOrMap 71 72 // ValidateString indicates that encoder should validate the input string 73 // before encoding it into JSON. 74 ValidateString Options = 1 << bitValidateString 75 76 // NoValidateJSONMarshaler indicates that the encoder should not validate the output string 77 // after encoding the JSONMarshaler to JSON. 78 NoValidateJSONMarshaler Options = 1 << bitNoValidateJSONMarshaler 79 80 // CompatibleWithStd is used to be compatible with std encoder. 81 CompatibleWithStd Options = SortMapKeys | EscapeHTML | CompactMarshaler 82 ) 83 84 // Encoder represents a specific set of encoder configurations. 85 type Encoder struct { 86 Opts Options 87 prefix string 88 indent string 89 } 90 91 // Encode returns the JSON encoding of v. 92 func (self *Encoder) Encode(v interface{}) ([]byte, error) { 93 if self.indent != "" || self.prefix != "" { 94 return EncodeIndented(v, self.prefix, self.indent, self.Opts) 95 } 96 return Encode(v, self.Opts) 97 } 98 99 // SortKeys enables the SortMapKeys option. 100 func (self *Encoder) SortKeys() *Encoder { 101 self.Opts |= SortMapKeys 102 return self 103 } 104 105 // SetEscapeHTML specifies if option EscapeHTML opens 106 func (self *Encoder) SetEscapeHTML(f bool) { 107 if f { 108 self.Opts |= EscapeHTML 109 } else { 110 self.Opts &= ^EscapeHTML 111 } 112 } 113 114 // SetValidateString specifies if option ValidateString opens 115 func (self *Encoder) SetValidateString(f bool) { 116 if f { 117 self.Opts |= ValidateString 118 } else { 119 self.Opts &= ^ValidateString 120 } 121 } 122 123 // SetNoValidateJSONMarshaler specifies if option NoValidateJSONMarshaler opens 124 func (self *Encoder) SetNoValidateJSONMarshaler(f bool) { 125 if f { 126 self.Opts |= NoValidateJSONMarshaler 127 } else { 128 self.Opts &= ^NoValidateJSONMarshaler 129 } 130 } 131 132 // SetCompactMarshaler specifies if option CompactMarshaler opens 133 func (self *Encoder) SetCompactMarshaler(f bool) { 134 if f { 135 self.Opts |= CompactMarshaler 136 } else { 137 self.Opts &= ^CompactMarshaler 138 } 139 } 140 141 // SetNoQuoteTextMarshaler specifies if option NoQuoteTextMarshaler opens 142 func (self *Encoder) SetNoQuoteTextMarshaler(f bool) { 143 if f { 144 self.Opts |= NoQuoteTextMarshaler 145 } else { 146 self.Opts &= ^NoQuoteTextMarshaler 147 } 148 } 149 150 // SetIndent instructs the encoder to format each subsequent encoded 151 // value as if indented by the package-level function EncodeIndent(). 152 // Calling SetIndent("", "") disables indentation. 153 func (enc *Encoder) SetIndent(prefix, indent string) { 154 enc.prefix = prefix 155 enc.indent = indent 156 } 157 158 // Quote returns the JSON-quoted version of s. 159 func Quote(s string) string { 160 var n int 161 var p []byte 162 163 /* check for empty string */ 164 if s == "" { 165 return `""` 166 } 167 168 /* allocate space for result */ 169 n = len(s) + 2 170 p = make([]byte, 0, n) 171 172 /* call the encoder */ 173 _ = encodeString(&p, s) 174 return rt.Mem2Str(p) 175 } 176 177 // Encode returns the JSON encoding of val, encoded with opts. 178 func Encode(val interface{}, opts Options) ([]byte, error) { 179 var ret []byte 180 181 buf := newBytes() 182 err := encodeInto(&buf, val, opts) 183 184 /* check for errors */ 185 if err != nil { 186 freeBytes(buf) 187 return nil, err 188 } 189 190 /* htmlescape or correct UTF-8 if opts enable */ 191 old := buf 192 buf = encodeFinish(old, opts) 193 pbuf := ((*rt.GoSlice)(unsafe.Pointer(&buf))).Ptr 194 pold := ((*rt.GoSlice)(unsafe.Pointer(&old))).Ptr 195 196 /* return when allocated a new buffer */ 197 if pbuf != pold { 198 freeBytes(old) 199 return buf, nil 200 } 201 202 /* make a copy of the result */ 203 ret = make([]byte, len(buf)) 204 copy(ret, buf) 205 206 freeBytes(buf) 207 /* return the buffer into pool */ 208 return ret, nil 209 } 210 211 // EncodeInto is like Encode but uses a user-supplied buffer instead of allocating 212 // a new one. 213 func EncodeInto(buf *[]byte, val interface{}, opts Options) error { 214 err := encodeInto(buf, val, opts) 215 if err != nil { 216 return err 217 } 218 *buf = encodeFinish(*buf, opts) 219 return err 220 } 221 222 func encodeInto(buf *[]byte, val interface{}, opts Options) error { 223 stk := newStack() 224 efv := rt.UnpackEface(val) 225 err := encodeTypedPointer(buf, efv.Type, &efv.Value, stk, uint64(opts)) 226 227 /* return the stack into pool */ 228 if err != nil { 229 resetStack(stk) 230 } 231 freeStack(stk) 232 233 /* avoid GC ahead */ 234 runtime.KeepAlive(buf) 235 runtime.KeepAlive(efv) 236 return err 237 } 238 239 func encodeFinish(buf []byte, opts Options) []byte { 240 if opts&EscapeHTML != 0 { 241 buf = HTMLEscape(nil, buf) 242 } 243 if opts&ValidateString != 0 && !utf8.Validate(buf) { 244 buf = utf8.CorrectWith(nil, buf, `\ufffd`) 245 } 246 return buf 247 } 248 249 var typeByte = rt.UnpackType(reflect.TypeOf(byte(0))) 250 251 // HTMLEscape appends to dst the JSON-encoded src with <, >, &, U+2028 and U+2029 252 // characters inside string literals changed to \u003c, \u003e, \u0026, \u2028, \u2029 253 // so that the JSON will be safe to embed inside HTML <script> tags. 254 // For historical reasons, web browsers don't honor standard HTML 255 // escaping within <script> tags, so an alternative JSON encoding must 256 // be used. 257 func HTMLEscape(dst []byte, src []byte) []byte { 258 return htmlEscape(dst, src) 259 } 260 261 // EncodeIndented is like Encode but applies Indent to format the output. 262 // Each JSON element in the output will begin on a new line beginning with prefix 263 // followed by one or more copies of indent according to the indentation nesting. 264 func EncodeIndented(val interface{}, prefix string, indent string, opts Options) ([]byte, error) { 265 var err error 266 var out []byte 267 var buf *bytes.Buffer 268 269 /* encode into the buffer */ 270 out = newBytes() 271 err = EncodeInto(&out, val, opts) 272 273 /* check for errors */ 274 if err != nil { 275 freeBytes(out) 276 return nil, err 277 } 278 279 /* indent the JSON */ 280 buf = newBuffer() 281 err = json.Indent(buf, out, prefix, indent) 282 283 /* check for errors */ 284 if err != nil { 285 freeBytes(out) 286 freeBuffer(buf) 287 return nil, err 288 } 289 290 /* copy to the result buffer */ 291 ret := make([]byte, buf.Len()) 292 copy(ret, buf.Bytes()) 293 294 /* return the buffers into pool */ 295 freeBytes(out) 296 freeBuffer(buf) 297 return ret, nil 298 } 299 300 // Pretouch compiles vt ahead-of-time to avoid JIT compilation on-the-fly, in 301 // order to reduce the first-hit latency. 302 // 303 // Opts are the compile options, for example, "option.WithCompileRecursiveDepth" is 304 // a compile option to set the depth of recursive compile for the nested struct type. 305 func Pretouch(vt reflect.Type, opts ...option.CompileOption) error { 306 cfg := option.DefaultCompileOptions() 307 for _, opt := range opts { 308 opt(&cfg) 309 } 310 return pretouchRec(map[reflect.Type]uint8{vt: 0}, cfg) 311 } 312 313 // Valid validates json and returns first non-blank character position, 314 // if it is only one valid json value. 315 // Otherwise returns invalid character position using start. 316 // 317 // Note: it does not check for the invalid UTF-8 characters. 318 func Valid(data []byte) (ok bool, start int) { 319 n := len(data) 320 if n == 0 { 321 return false, -1 322 } 323 s := rt.Mem2Str(data) 324 p := 0 325 m := types.NewStateMachine() 326 ret := native.ValidateOne(&s, &p, m) 327 types.FreeStateMachine(m) 328 329 if ret < 0 { 330 return false, p - 1 331 } 332 333 /* check for trailing spaces */ 334 for ; p < n; p++ { 335 if (types.SPACE_MASK & (1 << data[p])) == 0 { 336 return false, p 337 } 338 } 339 340 return true, ret 341 }