github.com/bytedance/sonic@v1.11.7-0.20240517092252-d2edb31b167b/ast/parser.go (about) 1 /* 2 * Copyright 2021 ByteDance Inc. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package ast 18 19 import ( 20 `fmt` 21 22 `github.com/bytedance/sonic/internal/native/types` 23 `github.com/bytedance/sonic/internal/rt` 24 ) 25 26 const ( 27 _DEFAULT_NODE_CAP int = 8 28 _APPEND_GROW_SHIFT = 1 29 ) 30 31 const ( 32 _ERR_NOT_FOUND types.ParsingError = 33 33 _ERR_UNSUPPORT_TYPE types.ParsingError = 34 34 ) 35 36 var ( 37 // ErrNotExist means both key and value doesn't exist 38 ErrNotExist error = newError(_ERR_NOT_FOUND, "value not exists") 39 40 // ErrUnsupportType means API on the node is unsupported 41 ErrUnsupportType error = newError(_ERR_UNSUPPORT_TYPE, "unsupported type") 42 ) 43 44 type Parser struct { 45 p int 46 s string 47 noLazy bool 48 skipValue bool 49 dbuf *byte 50 } 51 52 /** Parser Private Methods **/ 53 54 func (self *Parser) delim() types.ParsingError { 55 n := len(self.s) 56 p := self.lspace(self.p) 57 58 /* check for EOF */ 59 if p >= n { 60 return types.ERR_EOF 61 } 62 63 /* check for the delimtier */ 64 if self.s[p] != ':' { 65 return types.ERR_INVALID_CHAR 66 } 67 68 /* update the read pointer */ 69 self.p = p + 1 70 return 0 71 } 72 73 func (self *Parser) object() types.ParsingError { 74 n := len(self.s) 75 p := self.lspace(self.p) 76 77 /* check for EOF */ 78 if p >= n { 79 return types.ERR_EOF 80 } 81 82 /* check for the delimtier */ 83 if self.s[p] != '{' { 84 return types.ERR_INVALID_CHAR 85 } 86 87 /* update the read pointer */ 88 self.p = p + 1 89 return 0 90 } 91 92 func (self *Parser) array() types.ParsingError { 93 n := len(self.s) 94 p := self.lspace(self.p) 95 96 /* check for EOF */ 97 if p >= n { 98 return types.ERR_EOF 99 } 100 101 /* check for the delimtier */ 102 if self.s[p] != '[' { 103 return types.ERR_INVALID_CHAR 104 } 105 106 /* update the read pointer */ 107 self.p = p + 1 108 return 0 109 } 110 111 func (self *Parser) lspace(sp int) int { 112 ns := len(self.s) 113 for ; sp<ns && isSpace(self.s[sp]); sp+=1 {} 114 115 return sp 116 } 117 118 func (self *Parser) backward() { 119 for ; self.p >= 0 && isSpace(self.s[self.p]); self.p-=1 {} 120 } 121 122 func (self *Parser) decodeArray(ret *linkedNodes) (Node, types.ParsingError) { 123 sp := self.p 124 ns := len(self.s) 125 126 /* check for EOF */ 127 if self.p = self.lspace(sp); self.p >= ns { 128 return Node{}, types.ERR_EOF 129 } 130 131 /* check for empty array */ 132 if self.s[self.p] == ']' { 133 self.p++ 134 return Node{t: types.V_ARRAY}, 0 135 } 136 137 /* allocate array space and parse every element */ 138 for { 139 var val Node 140 var err types.ParsingError 141 142 if self.skipValue { 143 /* skip the value */ 144 var start int 145 if start, err = self.skipFast(); err != 0 { 146 return Node{}, err 147 } 148 if self.p > ns { 149 return Node{}, types.ERR_EOF 150 } 151 t := switchRawType(self.s[start]) 152 if t == _V_NONE { 153 return Node{}, types.ERR_INVALID_CHAR 154 } 155 val = newRawNode(self.s[start:self.p], t) 156 }else{ 157 /* decode the value */ 158 if val, err = self.Parse(); err != 0 { 159 return Node{}, err 160 } 161 } 162 163 /* add the value to result */ 164 ret.Push(val) 165 self.p = self.lspace(self.p) 166 167 /* check for EOF */ 168 if self.p >= ns { 169 return Node{}, types.ERR_EOF 170 } 171 172 /* check for the next character */ 173 switch self.s[self.p] { 174 case ',' : self.p++ 175 case ']' : self.p++; return newArray(ret), 0 176 default: 177 // if val.isLazy() { 178 // return newLazyArray(self, ret), 0 179 // } 180 return Node{}, types.ERR_INVALID_CHAR 181 } 182 } 183 } 184 185 func (self *Parser) decodeObject(ret *linkedPairs) (Node, types.ParsingError) { 186 sp := self.p 187 ns := len(self.s) 188 189 /* check for EOF */ 190 if self.p = self.lspace(sp); self.p >= ns { 191 return Node{}, types.ERR_EOF 192 } 193 194 /* check for empty object */ 195 if self.s[self.p] == '}' { 196 self.p++ 197 return Node{t: types.V_OBJECT}, 0 198 } 199 200 /* decode each pair */ 201 for { 202 var val Node 203 var njs types.JsonState 204 var err types.ParsingError 205 206 /* decode the key */ 207 if njs = self.decodeValue(); njs.Vt != types.V_STRING { 208 return Node{}, types.ERR_INVALID_CHAR 209 } 210 211 /* extract the key */ 212 idx := self.p - 1 213 key := self.s[njs.Iv:idx] 214 215 /* check for escape sequence */ 216 if njs.Ep != -1 { 217 if key, err = unquote(key); err != 0 { 218 return Node{}, err 219 } 220 } 221 222 /* expect a ':' delimiter */ 223 if err = self.delim(); err != 0 { 224 return Node{}, err 225 } 226 227 228 if self.skipValue { 229 /* skip the value */ 230 var start int 231 if start, err = self.skipFast(); err != 0 { 232 return Node{}, err 233 } 234 if self.p > ns { 235 return Node{}, types.ERR_EOF 236 } 237 t := switchRawType(self.s[start]) 238 if t == _V_NONE { 239 return Node{}, types.ERR_INVALID_CHAR 240 } 241 val = newRawNode(self.s[start:self.p], t) 242 } else { 243 /* decode the value */ 244 if val, err = self.Parse(); err != 0 { 245 return Node{}, err 246 } 247 } 248 249 /* add the value to result */ 250 // FIXME: ret's address may change here, thus previous referred node in ret may be invalid !! 251 ret.Push(Pair{Key: key, Value: val}) 252 self.p = self.lspace(self.p) 253 254 /* check for EOF */ 255 if self.p >= ns { 256 return Node{}, types.ERR_EOF 257 } 258 259 /* check for the next character */ 260 switch self.s[self.p] { 261 case ',' : self.p++ 262 case '}' : self.p++; return newObject(ret), 0 263 default: 264 // if val.isLazy() { 265 // return newLazyObject(self, ret), 0 266 // } 267 return Node{}, types.ERR_INVALID_CHAR 268 } 269 } 270 } 271 272 func (self *Parser) decodeString(iv int64, ep int) (Node, types.ParsingError) { 273 p := self.p - 1 274 s := self.s[iv:p] 275 276 /* fast path: no escape sequence */ 277 if ep == -1 { 278 return NewString(s), 0 279 } 280 281 /* unquote the string */ 282 out, err := unquote(s) 283 284 /* check for errors */ 285 if err != 0 { 286 return Node{}, err 287 } else { 288 return newBytes(rt.Str2Mem(out)), 0 289 } 290 } 291 292 /** Parser Interface **/ 293 294 func (self *Parser) Pos() int { 295 return self.p 296 } 297 298 func (self *Parser) Parse() (Node, types.ParsingError) { 299 switch val := self.decodeValue(); val.Vt { 300 case types.V_EOF : return Node{}, types.ERR_EOF 301 case types.V_NULL : return nullNode, 0 302 case types.V_TRUE : return trueNode, 0 303 case types.V_FALSE : return falseNode, 0 304 case types.V_STRING : return self.decodeString(val.Iv, val.Ep) 305 case types.V_ARRAY: 306 if p := skipBlank(self.s, self.p); p >= self.p && self.s[p] == ']' { 307 self.p = p + 1 308 return Node{t: types.V_ARRAY}, 0 309 } 310 if self.noLazy { 311 return self.decodeArray(new(linkedNodes)) 312 } 313 return newLazyArray(self), 0 314 case types.V_OBJECT: 315 if p := skipBlank(self.s, self.p); p >= self.p && self.s[p] == '}' { 316 self.p = p + 1 317 return Node{t: types.V_OBJECT}, 0 318 } 319 if self.noLazy { 320 return self.decodeObject(new(linkedPairs)) 321 } 322 return newLazyObject(self), 0 323 case types.V_DOUBLE : return NewNumber(self.s[val.Ep:self.p]), 0 324 case types.V_INTEGER : return NewNumber(self.s[val.Ep:self.p]), 0 325 default : return Node{}, types.ParsingError(-val.Vt) 326 } 327 } 328 329 func (self *Parser) searchKey(match string) types.ParsingError { 330 ns := len(self.s) 331 if err := self.object(); err != 0 { 332 return err 333 } 334 335 /* check for EOF */ 336 if self.p = self.lspace(self.p); self.p >= ns { 337 return types.ERR_EOF 338 } 339 340 /* check for empty object */ 341 if self.s[self.p] == '}' { 342 self.p++ 343 return _ERR_NOT_FOUND 344 } 345 346 var njs types.JsonState 347 var err types.ParsingError 348 /* decode each pair */ 349 for { 350 351 /* decode the key */ 352 if njs = self.decodeValue(); njs.Vt != types.V_STRING { 353 return types.ERR_INVALID_CHAR 354 } 355 356 /* extract the key */ 357 idx := self.p - 1 358 key := self.s[njs.Iv:idx] 359 360 /* check for escape sequence */ 361 if njs.Ep != -1 { 362 if key, err = unquote(key); err != 0 { 363 return err 364 } 365 } 366 367 /* expect a ':' delimiter */ 368 if err = self.delim(); err != 0 { 369 return err 370 } 371 372 /* skip value */ 373 if key != match { 374 if _, err = self.skipFast(); err != 0 { 375 return err 376 } 377 } else { 378 return 0 379 } 380 381 /* check for EOF */ 382 self.p = self.lspace(self.p) 383 if self.p >= ns { 384 return types.ERR_EOF 385 } 386 387 /* check for the next character */ 388 switch self.s[self.p] { 389 case ',': 390 self.p++ 391 case '}': 392 self.p++ 393 return _ERR_NOT_FOUND 394 default: 395 return types.ERR_INVALID_CHAR 396 } 397 } 398 } 399 400 func (self *Parser) searchIndex(idx int) types.ParsingError { 401 ns := len(self.s) 402 if err := self.array(); err != 0 { 403 return err 404 } 405 406 /* check for EOF */ 407 if self.p = self.lspace(self.p); self.p >= ns { 408 return types.ERR_EOF 409 } 410 411 /* check for empty array */ 412 if self.s[self.p] == ']' { 413 self.p++ 414 return _ERR_NOT_FOUND 415 } 416 417 var err types.ParsingError 418 /* allocate array space and parse every element */ 419 for i := 0; i < idx; i++ { 420 421 /* decode the value */ 422 if _, err = self.skipFast(); err != 0 { 423 return err 424 } 425 426 /* check for EOF */ 427 self.p = self.lspace(self.p) 428 if self.p >= ns { 429 return types.ERR_EOF 430 } 431 432 /* check for the next character */ 433 switch self.s[self.p] { 434 case ',': 435 self.p++ 436 case ']': 437 self.p++ 438 return _ERR_NOT_FOUND 439 default: 440 return types.ERR_INVALID_CHAR 441 } 442 } 443 444 return 0 445 } 446 447 func (self *Node) skipNextNode() *Node { 448 if !self.isLazy() { 449 return nil 450 } 451 452 parser, stack := self.getParserAndArrayStack() 453 ret := &stack.v 454 sp := parser.p 455 ns := len(parser.s) 456 457 /* check for EOF */ 458 if parser.p = parser.lspace(sp); parser.p >= ns { 459 return newSyntaxError(parser.syntaxError(types.ERR_EOF)) 460 } 461 462 /* check for empty array */ 463 if parser.s[parser.p] == ']' { 464 parser.p++ 465 self.setArray(ret) 466 return nil 467 } 468 469 var val Node 470 /* skip the value */ 471 if start, err := parser.skipFast(); err != 0 { 472 return newSyntaxError(parser.syntaxError(err)) 473 } else { 474 t := switchRawType(parser.s[start]) 475 if t == _V_NONE { 476 return newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR)) 477 } 478 val = newRawNode(parser.s[start:parser.p], t) 479 } 480 481 /* add the value to result */ 482 ret.Push(val) 483 self.l++ 484 parser.p = parser.lspace(parser.p) 485 486 /* check for EOF */ 487 if parser.p >= ns { 488 return newSyntaxError(parser.syntaxError(types.ERR_EOF)) 489 } 490 491 /* check for the next character */ 492 switch parser.s[parser.p] { 493 case ',': 494 parser.p++ 495 return ret.At(ret.Len()-1) 496 case ']': 497 parser.p++ 498 self.setArray(ret) 499 return ret.At(ret.Len()-1) 500 default: 501 return newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR)) 502 } 503 } 504 505 func (self *Node) skipNextPair() (*Pair) { 506 if !self.isLazy() { 507 return nil 508 } 509 510 parser, stack := self.getParserAndObjectStack() 511 ret := &stack.v 512 sp := parser.p 513 ns := len(parser.s) 514 515 /* check for EOF */ 516 if parser.p = parser.lspace(sp); parser.p >= ns { 517 return &Pair{"", *newSyntaxError(parser.syntaxError(types.ERR_EOF))} 518 } 519 520 /* check for empty object */ 521 if parser.s[parser.p] == '}' { 522 parser.p++ 523 self.setObject(ret) 524 return nil 525 } 526 527 /* decode one pair */ 528 var val Node 529 var njs types.JsonState 530 var err types.ParsingError 531 532 /* decode the key */ 533 if njs = parser.decodeValue(); njs.Vt != types.V_STRING { 534 return &Pair{"", *newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR))} 535 } 536 537 /* extract the key */ 538 idx := parser.p - 1 539 key := parser.s[njs.Iv:idx] 540 541 /* check for escape sequence */ 542 if njs.Ep != -1 { 543 if key, err = unquote(key); err != 0 { 544 return &Pair{key, *newSyntaxError(parser.syntaxError(err))} 545 } 546 } 547 548 /* expect a ':' delimiter */ 549 if err = parser.delim(); err != 0 { 550 return &Pair{key, *newSyntaxError(parser.syntaxError(err))} 551 } 552 553 /* skip the value */ 554 if start, err := parser.skipFast(); err != 0 { 555 return &Pair{key, *newSyntaxError(parser.syntaxError(err))} 556 } else { 557 t := switchRawType(parser.s[start]) 558 if t == _V_NONE { 559 return &Pair{key, *newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR))} 560 } 561 val = newRawNode(parser.s[start:parser.p], t) 562 } 563 564 /* add the value to result */ 565 ret.Push(Pair{Key: key, Value: val}) 566 self.l++ 567 parser.p = parser.lspace(parser.p) 568 569 /* check for EOF */ 570 if parser.p >= ns { 571 return &Pair{key, *newSyntaxError(parser.syntaxError(types.ERR_EOF))} 572 } 573 574 /* check for the next character */ 575 switch parser.s[parser.p] { 576 case ',': 577 parser.p++ 578 return ret.At(ret.Len()-1) 579 case '}': 580 parser.p++ 581 self.setObject(ret) 582 return ret.At(ret.Len()-1) 583 default: 584 return &Pair{key, *newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR))} 585 } 586 } 587 588 589 /** Parser Factory **/ 590 591 // Loads parse all json into interface{} 592 func Loads(src string) (int, interface{}, error) { 593 ps := &Parser{s: src} 594 np, err := ps.Parse() 595 596 /* check for errors */ 597 if err != 0 { 598 return 0, nil, ps.ExportError(err) 599 } else { 600 x, err := np.Interface() 601 if err != nil { 602 return 0, nil, err 603 } 604 return ps.Pos(), x, nil 605 } 606 } 607 608 // LoadsUseNumber parse all json into interface{}, with numeric nodes casted to json.Number 609 func LoadsUseNumber(src string) (int, interface{}, error) { 610 ps := &Parser{s: src} 611 np, err := ps.Parse() 612 613 /* check for errors */ 614 if err != 0 { 615 return 0, nil, err 616 } else { 617 x, err := np.InterfaceUseNumber() 618 if err != nil { 619 return 0, nil, err 620 } 621 return ps.Pos(), x, nil 622 } 623 } 624 625 // NewParser returns pointer of new allocated parser 626 func NewParser(src string) *Parser { 627 return &Parser{s: src} 628 } 629 630 // NewParser returns new allocated parser 631 func NewParserObj(src string) Parser { 632 return Parser{s: src} 633 } 634 635 // decodeNumber controls if parser decodes the number values instead of skip them 636 // WARN: once you set decodeNumber(true), please set decodeNumber(false) before you drop the parser 637 // otherwise the memory CANNOT be reused 638 func (self *Parser) decodeNumber(decode bool) { 639 if !decode && self.dbuf != nil { 640 types.FreeDbuf(self.dbuf) 641 self.dbuf = nil 642 return 643 } 644 if decode && self.dbuf == nil { 645 self.dbuf = types.NewDbuf() 646 } 647 } 648 649 // ExportError converts types.ParsingError to std Error 650 func (self *Parser) ExportError(err types.ParsingError) error { 651 if err == _ERR_NOT_FOUND { 652 return ErrNotExist 653 } 654 return fmt.Errorf("%q", SyntaxError{ 655 Pos : self.p, 656 Src : self.s, 657 Code: err, 658 }.Description()) 659 } 660 661 func backward(src string, i int) int { 662 for ; i>=0 && isSpace(src[i]); i-- {} 663 return i 664 }