github.com/bytedance/sonic@v1.11.7-0.20240517092252-d2edb31b167b/internal/encoder/encoder.go (about) 1 /* 2 * Copyright 2021 ByteDance Inc. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package encoder 18 19 import ( 20 `bytes` 21 `encoding/json` 22 `reflect` 23 `runtime` 24 `unsafe` 25 26 `github.com/bytedance/sonic/internal/native` 27 `github.com/bytedance/sonic/internal/native/types` 28 `github.com/bytedance/sonic/internal/rt` 29 `github.com/bytedance/sonic/utf8` 30 `github.com/bytedance/sonic/option` 31 ) 32 33 // Options is a set of encoding options. 34 type Options uint64 35 36 const ( 37 bitSortMapKeys = iota 38 bitEscapeHTML 39 bitCompactMarshaler 40 bitNoQuoteTextMarshaler 41 bitNoNullSliceOrMap 42 bitValidateString 43 bitNoValidateJSONMarshaler 44 bitNoEncoderNewline 45 46 // used for recursive compile 47 bitPointerValue = 63 48 ) 49 50 const ( 51 // SortMapKeys indicates that the keys of a map needs to be sorted 52 // before serializing into JSON. 53 // WARNING: This hurts performance A LOT, USE WITH CARE. 54 SortMapKeys Options = 1 << bitSortMapKeys 55 56 // EscapeHTML indicates encoder to escape all HTML characters 57 // after serializing into JSON (see https://pkg.go.dev/encoding/json#HTMLEscape). 58 // WARNING: This hurts performance A LOT, USE WITH CARE. 59 EscapeHTML Options = 1 << bitEscapeHTML 60 61 // CompactMarshaler indicates that the output JSON from json.Marshaler 62 // is always compact and needs no validation 63 CompactMarshaler Options = 1 << bitCompactMarshaler 64 65 // NoQuoteTextMarshaler indicates that the output text from encoding.TextMarshaler 66 // is always escaped string and needs no quoting 67 NoQuoteTextMarshaler Options = 1 << bitNoQuoteTextMarshaler 68 69 // NoNullSliceOrMap indicates all empty Array or Object are encoded as '[]' or '{}', 70 // instead of 'null' 71 NoNullSliceOrMap Options = 1 << bitNoNullSliceOrMap 72 73 // ValidateString indicates that encoder should validate the input string 74 // before encoding it into JSON. 75 ValidateString Options = 1 << bitValidateString 76 77 // NoValidateJSONMarshaler indicates that the encoder should not validate the output string 78 // after encoding the JSONMarshaler to JSON. 79 NoValidateJSONMarshaler Options = 1 << bitNoValidateJSONMarshaler 80 81 // NoEncoderNewline indicates that the encoder should not add a newline after every message 82 NoEncoderNewline Options = 1 << bitNoEncoderNewline 83 84 // CompatibleWithStd is used to be compatible with std encoder. 85 CompatibleWithStd Options = SortMapKeys | EscapeHTML | CompactMarshaler 86 ) 87 88 // Encoder represents a specific set of encoder configurations. 89 type Encoder struct { 90 Opts Options 91 prefix string 92 indent string 93 } 94 95 // Encode returns the JSON encoding of v. 96 func (self *Encoder) Encode(v interface{}) ([]byte, error) { 97 if self.indent != "" || self.prefix != "" { 98 return EncodeIndented(v, self.prefix, self.indent, self.Opts) 99 } 100 return Encode(v, self.Opts) 101 } 102 103 // SortKeys enables the SortMapKeys option. 104 func (self *Encoder) SortKeys() *Encoder { 105 self.Opts |= SortMapKeys 106 return self 107 } 108 109 // SetEscapeHTML specifies if option EscapeHTML opens 110 func (self *Encoder) SetEscapeHTML(f bool) { 111 if f { 112 self.Opts |= EscapeHTML 113 } else { 114 self.Opts &= ^EscapeHTML 115 } 116 } 117 118 // SetValidateString specifies if option ValidateString opens 119 func (self *Encoder) SetValidateString(f bool) { 120 if f { 121 self.Opts |= ValidateString 122 } else { 123 self.Opts &= ^ValidateString 124 } 125 } 126 127 // SetNoValidateJSONMarshaler specifies if option NoValidateJSONMarshaler opens 128 func (self *Encoder) SetNoValidateJSONMarshaler(f bool) { 129 if f { 130 self.Opts |= NoValidateJSONMarshaler 131 } else { 132 self.Opts &= ^NoValidateJSONMarshaler 133 } 134 } 135 136 // SetNoEncoderNewline specifies if option NoEncoderNewline opens 137 func (self *Encoder) SetNoEncoderNewline(f bool) { 138 if f { 139 self.Opts |= NoEncoderNewline 140 } else { 141 self.Opts &= ^NoEncoderNewline 142 } 143 } 144 145 146 // SetCompactMarshaler specifies if option CompactMarshaler opens 147 func (self *Encoder) SetCompactMarshaler(f bool) { 148 if f { 149 self.Opts |= CompactMarshaler 150 } else { 151 self.Opts &= ^CompactMarshaler 152 } 153 } 154 155 // SetNoQuoteTextMarshaler specifies if option NoQuoteTextMarshaler opens 156 func (self *Encoder) SetNoQuoteTextMarshaler(f bool) { 157 if f { 158 self.Opts |= NoQuoteTextMarshaler 159 } else { 160 self.Opts &= ^NoQuoteTextMarshaler 161 } 162 } 163 164 // SetIndent instructs the encoder to format each subsequent encoded 165 // value as if indented by the package-level function EncodeIndent(). 166 // Calling SetIndent("", "") disables indentation. 167 func (enc *Encoder) SetIndent(prefix, indent string) { 168 enc.prefix = prefix 169 enc.indent = indent 170 } 171 172 // Quote returns the JSON-quoted version of s. 173 func Quote(s string) string { 174 var n int 175 var p []byte 176 177 /* check for empty string */ 178 if s == "" { 179 return `""` 180 } 181 182 /* allocate space for result */ 183 n = len(s) + 2 184 p = make([]byte, 0, n) 185 186 /* call the encoder */ 187 _ = encodeString(&p, s) 188 return rt.Mem2Str(p) 189 } 190 191 // Encode returns the JSON encoding of val, encoded with opts. 192 func Encode(val interface{}, opts Options) ([]byte, error) { 193 var ret []byte 194 195 buf := newBytes() 196 err := encodeInto(&buf, val, opts) 197 198 /* check for errors */ 199 if err != nil { 200 freeBytes(buf) 201 return nil, err 202 } 203 204 /* htmlescape or correct UTF-8 if opts enable */ 205 old := buf 206 buf = encodeFinish(old, opts) 207 pbuf := ((*rt.GoSlice)(unsafe.Pointer(&buf))).Ptr 208 pold := ((*rt.GoSlice)(unsafe.Pointer(&old))).Ptr 209 210 /* return when allocated a new buffer */ 211 if pbuf != pold { 212 freeBytes(old) 213 return buf, nil 214 } 215 216 /* make a copy of the result */ 217 ret = make([]byte, len(buf)) 218 copy(ret, buf) 219 220 freeBytes(buf) 221 /* return the buffer into pool */ 222 return ret, nil 223 } 224 225 // EncodeInto is like Encode but uses a user-supplied buffer instead of allocating 226 // a new one. 227 func EncodeInto(buf *[]byte, val interface{}, opts Options) error { 228 err := encodeInto(buf, val, opts) 229 if err != nil { 230 return err 231 } 232 *buf = encodeFinish(*buf, opts) 233 return err 234 } 235 236 func encodeInto(buf *[]byte, val interface{}, opts Options) error { 237 stk := newStack() 238 efv := rt.UnpackEface(val) 239 err := encodeTypedPointer(buf, efv.Type, &efv.Value, stk, uint64(opts)) 240 241 /* return the stack into pool */ 242 if err != nil { 243 resetStack(stk) 244 } 245 freeStack(stk) 246 247 /* avoid GC ahead */ 248 runtime.KeepAlive(buf) 249 runtime.KeepAlive(efv) 250 return err 251 } 252 253 func encodeFinish(buf []byte, opts Options) []byte { 254 if opts & EscapeHTML != 0 { 255 buf = HTMLEscape(nil, buf) 256 } 257 if opts & ValidateString != 0 && !utf8.Validate(buf) { 258 buf = utf8.CorrectWith(nil, buf, `\ufffd`) 259 } 260 return buf 261 } 262 263 var typeByte = rt.UnpackType(reflect.TypeOf(byte(0))) 264 265 // HTMLEscape appends to dst the JSON-encoded src with <, >, &, U+2028 and U+2029 266 // characters inside string literals changed to \u003c, \u003e, \u0026, \u2028, \u2029 267 // so that the JSON will be safe to embed inside HTML <script> tags. 268 // For historical reasons, web browsers don't honor standard HTML 269 // escaping within <script> tags, so an alternative JSON encoding must 270 // be used. 271 func HTMLEscape(dst []byte, src []byte) []byte { 272 return htmlEscape(dst, src) 273 } 274 275 // EncodeIndented is like Encode but applies Indent to format the output. 276 // Each JSON element in the output will begin on a new line beginning with prefix 277 // followed by one or more copies of indent according to the indentation nesting. 278 func EncodeIndented(val interface{}, prefix string, indent string, opts Options) ([]byte, error) { 279 var err error 280 var out []byte 281 var buf *bytes.Buffer 282 283 /* encode into the buffer */ 284 out = newBytes() 285 err = EncodeInto(&out, val, opts) 286 287 /* check for errors */ 288 if err != nil { 289 freeBytes(out) 290 return nil, err 291 } 292 293 /* indent the JSON */ 294 buf = newBuffer() 295 err = json.Indent(buf, out, prefix, indent) 296 297 /* check for errors */ 298 if err != nil { 299 freeBytes(out) 300 freeBuffer(buf) 301 return nil, err 302 } 303 304 /* copy to the result buffer */ 305 ret := make([]byte, buf.Len()) 306 copy(ret, buf.Bytes()) 307 308 /* return the buffers into pool */ 309 freeBytes(out) 310 freeBuffer(buf) 311 return ret, nil 312 } 313 314 // Pretouch compiles vt ahead-of-time to avoid JIT compilation on-the-fly, in 315 // order to reduce the first-hit latency. 316 // 317 // Opts are the compile options, for example, "option.WithCompileRecursiveDepth" is 318 // a compile option to set the depth of recursive compile for the nested struct type. 319 func Pretouch(vt reflect.Type, opts ...option.CompileOption) error { 320 cfg := option.DefaultCompileOptions() 321 for _, opt := range opts { 322 opt(&cfg) 323 } 324 return pretouchRec(map[reflect.Type]uint8{vt: 0}, cfg) 325 } 326 327 // Valid validates json and returns first non-blank character position, 328 // if it is only one valid json value. 329 // Otherwise returns invalid character position using start. 330 // 331 // Note: it does not check for the invalid UTF-8 characters. 332 func Valid(data []byte) (ok bool, start int) { 333 n := len(data) 334 if n == 0 { 335 return false, -1 336 } 337 s := rt.Mem2Str(data) 338 p := 0 339 m := types.NewStateMachine() 340 ret := native.ValidateOne(&s, &p, m, types.F_VALIDATE_STRING) 341 types.FreeStateMachine(m) 342 343 if ret < 0 { 344 return false, p-1 345 } 346 347 /* check for trailing spaces */ 348 for ;p < n; p++ { 349 if (types.SPACE_MASK & (1 << data[p])) == 0 { 350 return false, p 351 } 352 } 353 354 return true, ret 355 }