github.com/cockroachdb/cockroachdb-parser@v0.23.3-0.20240213214944-911057d40c9a/pkg/util/json/parser.go (about) 1 // Copyright 2022 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package json 12 13 import ( 14 "bytes" 15 "encoding/json" 16 "io" 17 "reflect" 18 "strings" 19 "unsafe" 20 21 "github.com/cockroachdb/cockroachdb-parser/pkg/util/json/tokenizer" 22 "github.com/cockroachdb/errors" 23 ) 24 25 // parseUsingFastParser parses string as JSON using fast json parser. 26 func parseUsingFastParser(s string, cfg parseConfig) (JSON, error) { 27 input, err := unsafeGetBytes(s) 28 if err != nil { 29 return nil, err 30 } 31 32 p := fastJSONParser{ 33 parseConfig: cfg, 34 decoder: tokenizer.MakeDecoder(input), 35 state: (*fastJSONParser).parseTopValue, 36 } 37 defer p.decoder.Release() 38 39 j, err := p.parse() 40 if err != nil { 41 if errors.Is(err, io.ErrUnexpectedEOF) && p.decoder.More() { 42 // JSON scanner returns nil token if it encounters an invalid input 43 // character. In such cases, decoder returns io.ErrUnexpectedEOF error. 44 // However, we know it's not an EOF because decoder has more data. So, 45 // produce a bit nicer error message. 46 return nil, jsonDecodeError(decodeErrorContext(errInvalidInputToken, s, p.decoder.Pos())) 47 } 48 return nil, jsonDecodeError(decodeErrorContext(err, s, p.decoder.Pos())) 49 } 50 51 if j == nil { 52 return nil, errors.AssertionFailedf("expected parsed JSON value, got nil") 53 } 54 55 if p.decoder.More() { 56 return nil, jsonDecodeError(decodeErrorContext(errTrailingCharacters, s, p.decoder.Pos()+1)) 57 } 58 59 return j, nil 60 } 61 62 // fastJSONParser builds JSON given input string. This implementation uses low level 63 // API provided by fork of github.com/pkg/json package to implement direct 64 // string to tree.JSON conversion, while trying to be as close to the 65 // encoder/json implementation as possible. 66 type fastJSONParser struct { 67 parseConfig 68 decoder tokenizer.Decoder 69 70 // state is the method expression for the next 71 // state in the state machine. 72 state func(*fastJSONParser, []byte) (JSON, error) 73 74 // State machine stack information. 75 // kind is the types of objects stored in stack 76 // len(kind) == len(arr) + len(obj) 77 kind []kind 78 arr []ArrayBuilder // array builder stack 79 obj []ObjectBuilder // object builder stack 80 } 81 82 // parse runs the parse loop -- reading next token from the 83 // stream, and decoding it based on the state machine. 84 func (p *fastJSONParser) parse() (JSON, error) { 85 for { 86 tok, err := p.decoder.NextToken() 87 if err != nil { 88 return nil, err 89 } 90 91 if len(tok) < 1 { 92 return nil, io.ErrUnexpectedEOF 93 } 94 95 j, err := p.state(p, tok) 96 if err != nil { 97 return nil, err 98 } 99 if j != nil && len(p.kind) == 0 { 100 return j, nil 101 } 102 } 103 } 104 105 // parseTopValue processes top level JSON value. 106 func (p *fastJSONParser) parseTopValue(tok []byte) (JSON, error) { 107 switch tok[0] { 108 case tokenizer.ArrayStart: 109 p.pushArray() 110 return nil, nil 111 case tokenizer.ObjectStart: 112 p.pushObject() 113 return nil, nil 114 case tokenizer.Null: 115 return NullJSONValue, nil 116 case tokenizer.String: 117 return jsonString(tok[1 : len(tok)-1]), nil 118 case tokenizer.True: 119 return TrueJSONValue, nil 120 case tokenizer.False: 121 return FalseJSONValue, nil 122 case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': 123 return FromNumber(json.Number(tok)) 124 default: 125 return nil, errors.Newf("unexpected token %q", tok) 126 } 127 } 128 129 // parseArrayValue processes JSON value inside array. 130 func (p *fastJSONParser) parseArrayValue(tok []byte) (JSON, error) { 131 switch tok[0] { 132 case tokenizer.ArrayEnd: 133 return p.buildArray() 134 case tokenizer.ArrayStart: 135 p.pushArray() 136 case tokenizer.ObjectStart: 137 p.pushObject() 138 case tokenizer.Null: 139 p.addArrayValue(NullJSONValue) 140 case tokenizer.String: 141 p.addArrayValue(jsonString(tok[1 : len(tok)-1])) 142 case tokenizer.True: 143 p.addArrayValue(TrueJSONValue) 144 case tokenizer.False: 145 p.addArrayValue(FalseJSONValue) 146 case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': 147 n, err := FromNumber(json.Number(tok)) 148 if err != nil { 149 return n, err 150 } 151 p.addArrayValue(n) 152 default: 153 return nil, errors.Newf("unexpected array token %q", tok) 154 } 155 return nil, nil 156 } 157 158 // parseObjectKey processes object key. 159 func (p *fastJSONParser) parseObjectKey(tok []byte) (JSON, error) { 160 switch tok[0] { 161 case tokenizer.ObjectEnd: 162 return p.buildObject() 163 case tokenizer.String: 164 p.addObjectKey(string(tok[1 : len(tok)-1])) 165 return nil, nil 166 default: 167 return nil, errors.Newf("expected to read object key (string), found %q", tok) 168 } 169 } 170 171 // parseObjectValue processes object value. 172 func (p *fastJSONParser) parseObjectValue(tok []byte) (JSON, error) { 173 switch tok[0] { 174 case tokenizer.ArrayStart: 175 p.pushArray() 176 case tokenizer.ObjectStart: 177 p.pushObject() 178 case tokenizer.Null: 179 p.setObjectValue(NullJSONValue) 180 case tokenizer.String: 181 p.setObjectValue(jsonString(tok[1 : len(tok)-1])) 182 case tokenizer.True: 183 p.setObjectValue(TrueJSONValue) 184 case tokenizer.False: 185 p.setObjectValue(FalseJSONValue) 186 case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': 187 n, err := FromNumber(json.Number(tok)) 188 if err != nil { 189 return n, err 190 } 191 p.setObjectValue(n) 192 default: 193 return nil, errors.Newf("unexpected object token %q", tok) 194 } 195 return nil, nil 196 } 197 198 type kind bool 199 200 const ( 201 kindArray kind = false 202 kindObject kind = true 203 ) 204 205 var errUnexpectedState = errors.New("unexpected state machine state") 206 207 // pushArray adds array builder and transitions state to read array values. 208 func (p *fastJSONParser) pushArray() { 209 p.arr = append(p.arr, ArrayBuilder{}) 210 p.kind = append(p.kind, kindArray) 211 p.state = (*fastJSONParser).parseArrayValue 212 } 213 214 // addArrayValue adds value to top array. 215 func (p *fastJSONParser) addArrayValue(j JSON) { 216 p.arr[len(p.arr)-1].Add(j) 217 } 218 219 // buildArray builds top array value, and adjusts stack appropriately. 220 func (p *fastJSONParser) buildArray() (JSON, error) { 221 if len(p.kind) == 0 || p.kind[len(p.kind)-1] != kindArray { 222 return nil, errUnexpectedState 223 } 224 j := p.arr[len(p.arr)-1].Build() 225 p.pop() 226 return p.stackReturn(j) 227 } 228 229 // pushObject adds object builder and transitions state to read object. 230 func (p *fastJSONParser) pushObject() { 231 p.obj = append(p.obj, ObjectBuilder{unordered: p.unordered}) 232 p.kind = append(p.kind, kindObject) 233 p.state = (*fastJSONParser).parseObjectKey 234 } 235 236 // addObjectKey adds key to object builder and transitions state to read object 237 // value. 238 func (p *fastJSONParser) addObjectKey(k string) { 239 p.obj[len(p.obj)-1].Add(k, nil) 240 p.state = (*fastJSONParser).parseObjectValue 241 } 242 243 // setObjectValue sets the value for the previously added object key, 244 // and transitions state to read the next object key. 245 func (p *fastJSONParser) setObjectValue(v JSON) { 246 pairs := p.obj[len(p.obj)-1].pairs 247 pairs[len(pairs)-1].v = v 248 p.state = (*fastJSONParser).parseObjectKey 249 } 250 251 // buildObject builds top JSON object. 252 func (p *fastJSONParser) buildObject() (JSON, error) { 253 if len(p.kind) == 0 || p.kind[len(p.kind)-1] != kindObject { 254 return nil, errUnexpectedState 255 } 256 j := p.obj[len(p.obj)-1].Build() 257 p.pop() 258 return p.stackReturn(j) 259 } 260 261 // pop stack. 262 func (p *fastJSONParser) pop() { 263 top := len(p.kind) - 1 264 if p.kind[top] == kindArray { 265 p.arr = p.arr[:len(p.arr)-1] 266 } else { 267 p.obj = p.obj[:len(p.obj)-1] 268 } 269 p.kind = p.kind[:top] 270 } 271 272 // stackReturn returns json object to the top of the stack 273 // and transitions state machine to the next state. 274 func (p *fastJSONParser) stackReturn(j JSON) (JSON, error) { 275 // If stack is now empty, we're done -- return JSON. 276 if len(p.kind) == 0 { 277 return j, nil 278 } 279 280 // Add json to array or object; arrange for next state transition. 281 if p.kind[len(p.kind)-1] == kindArray { 282 p.addArrayValue(j) 283 p.state = (*fastJSONParser).parseArrayValue 284 } else { 285 p.setObjectValue(j) 286 p.state = (*fastJSONParser).parseObjectKey 287 } 288 return nil, nil 289 } 290 291 var errInvalidInputToken = errors.New("invalid JSON token") 292 293 // decodeErrorContext returns input context for an error encountered during decoding. 294 // There is quite a bit of code here, but debugging faulty JSON is hard, so 295 // take extra care to produce nice error message, with good context information. 296 func decodeErrorContext(err error, s string, pos int) error { 297 if len(s) == 0 { 298 return errors.Wrap(err, "while decoding empty string") 299 } 300 301 const contextSize = 16 302 ctxStart := pos - contextSize 303 if ctxStart < 0 { 304 ctxStart = 0 305 } 306 ctxEnd := pos + contextSize 307 if ctxEnd > len(s) { 308 ctxEnd = len(s) 309 } 310 311 var leftPad, rightPad string 312 if pos > ctxStart { 313 leftPad = strings.Repeat(".", pos-ctxStart) 314 } 315 if ctxEnd > pos { 316 rightPad = strings.Repeat(".", ctxEnd-pos-1) 317 } 318 319 return errors.Wrapf(err, 320 "while decoding %d bytes at offset %d:\n"+ 321 "...|%s|...\n"+ 322 "...|%s^%s|...", 323 len(s), pos, 324 s[ctxStart:ctxEnd], 325 leftPad, rightPad, 326 ) 327 } 328 329 // unsafeGetBytes returns []byte in the underlying string, 330 // without incurring copy. 331 // This unsafe mechanism is safe to use here because, ultimately, every 332 // JSON object produced from those bytes will copy those bytes anyway 333 // (i.e. jsonString([]byte)). 334 // See https://groups.google.com/g/golang-nuts/c/Zsfk-VMd_fU/m/O1ru4fO-BgAJ 335 func unsafeGetBytes(s string) ([]byte, error) { 336 const maxStrLen = 1 << 30 // Really, can't see us supporting input JSONs that big. 337 if len(s) > maxStrLen { 338 return nil, bytes.ErrTooLarge 339 } 340 if len(s) == 0 { 341 return nil, nil 342 } 343 //lint:ignore SA1019 StringHeader is deprecated, but no clear replacement 344 p := unsafe.Pointer((*reflect.StringHeader)(unsafe.Pointer(&s)).Data) 345 return (*[maxStrLen]byte)(p)[:len(s):len(s)], nil 346 }