github.com/goshafaq/sonic@v0.0.0-20231026082336-871835fb94c6/ast/parser.go (about) 1 /* 2 * Copyright 2021 ByteDance Inc. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package ast 18 19 import ( 20 "fmt" 21 22 "github.com/goshafaq/sonic/internal/native/types" 23 "github.com/goshafaq/sonic/internal/rt" 24 ) 25 26 const ( 27 _DEFAULT_NODE_CAP int = 8 28 _APPEND_GROW_SHIFT = 1 29 ) 30 31 const ( 32 _ERR_NOT_FOUND types.ParsingError = 33 33 _ERR_UNSUPPORT_TYPE types.ParsingError = 34 34 ) 35 36 var ( 37 // ErrNotExist means both key and value doesn't exist 38 ErrNotExist error = newError(_ERR_NOT_FOUND, "value not exists") 39 40 // ErrUnsupportType means API on the node is unsupported 41 ErrUnsupportType error = newError(_ERR_UNSUPPORT_TYPE, "unsupported type") 42 ) 43 44 type Parser struct { 45 p int 46 s string 47 noLazy bool 48 skipValue bool 49 dbuf *byte 50 } 51 52 /** Parser Private Methods **/ 53 54 func (self *Parser) delim() types.ParsingError { 55 n := len(self.s) 56 p := self.lspace(self.p) 57 58 /* check for EOF */ 59 if p >= n { 60 return types.ERR_EOF 61 } 62 63 /* check for the delimtier */ 64 if self.s[p] != ':' { 65 return types.ERR_INVALID_CHAR 66 } 67 68 /* update the read pointer */ 69 self.p = p + 1 70 return 0 71 } 72 73 func (self *Parser) object() types.ParsingError { 74 n := len(self.s) 75 p := self.lspace(self.p) 76 77 /* check for EOF */ 78 if p >= n { 79 return types.ERR_EOF 80 } 81 82 /* check for the delimtier */ 83 if self.s[p] != '{' { 84 return types.ERR_INVALID_CHAR 85 } 86 87 /* update the read pointer */ 88 self.p = p + 1 89 return 0 90 } 91 92 func (self *Parser) array() types.ParsingError { 93 n := len(self.s) 94 p := self.lspace(self.p) 95 96 /* check for EOF */ 97 if p >= n { 98 return types.ERR_EOF 99 } 100 101 /* check for the delimtier */ 102 if self.s[p] != '[' { 103 return types.ERR_INVALID_CHAR 104 } 105 106 /* update the read pointer */ 107 self.p = p + 1 108 return 0 109 } 110 111 func (self *Parser) lspace(sp int) int { 112 ns := len(self.s) 113 for ; sp < ns && isSpace(self.s[sp]); sp += 1 { 114 } 115 116 return sp 117 } 118 119 func (self *Parser) decodeArray(ret *linkedNodes) (Node, types.ParsingError) { 120 sp := self.p 121 ns := len(self.s) 122 123 /* check for EOF */ 124 if self.p = self.lspace(sp); self.p >= ns { 125 return Node{}, types.ERR_EOF 126 } 127 128 /* check for empty array */ 129 if self.s[self.p] == ']' { 130 self.p++ 131 return Node{t: types.V_ARRAY}, 0 132 } 133 134 /* allocate array space and parse every element */ 135 for { 136 var val Node 137 var err types.ParsingError 138 139 if self.skipValue { 140 /* skip the value */ 141 var start int 142 if start, err = self.skipFast(); err != 0 { 143 return Node{}, err 144 } 145 if self.p > ns { 146 return Node{}, types.ERR_EOF 147 } 148 t := switchRawType(self.s[start]) 149 if t == _V_NONE { 150 return Node{}, types.ERR_INVALID_CHAR 151 } 152 val = newRawNode(self.s[start:self.p], t) 153 } else { 154 /* decode the value */ 155 if val, err = self.Parse(); err != 0 { 156 return Node{}, err 157 } 158 } 159 160 /* add the value to result */ 161 ret.Add(val) 162 self.p = self.lspace(self.p) 163 164 /* check for EOF */ 165 if self.p >= ns { 166 return Node{}, types.ERR_EOF 167 } 168 169 /* check for the next character */ 170 switch self.s[self.p] { 171 case ',': 172 self.p++ 173 case ']': 174 self.p++ 175 return newArray(ret), 0 176 default: 177 // if val.isLazy() { 178 // return newLazyArray(self, ret), 0 179 // } 180 return Node{}, types.ERR_INVALID_CHAR 181 } 182 } 183 } 184 185 func (self *Parser) decodeObject(ret *linkedPairs) (Node, types.ParsingError) { 186 sp := self.p 187 ns := len(self.s) 188 189 /* check for EOF */ 190 if self.p = self.lspace(sp); self.p >= ns { 191 return Node{}, types.ERR_EOF 192 } 193 194 /* check for empty object */ 195 if self.s[self.p] == '}' { 196 self.p++ 197 return Node{t: types.V_OBJECT}, 0 198 } 199 200 /* decode each pair */ 201 for { 202 var val Node 203 var njs types.JsonState 204 var err types.ParsingError 205 206 /* decode the key */ 207 if njs = self.decodeValue(); njs.Vt != types.V_STRING { 208 return Node{}, types.ERR_INVALID_CHAR 209 } 210 211 /* extract the key */ 212 idx := self.p - 1 213 key := self.s[njs.Iv:idx] 214 215 /* check for escape sequence */ 216 if njs.Ep != -1 { 217 if key, err = unquote(key); err != 0 { 218 return Node{}, err 219 } 220 } 221 222 /* expect a ':' delimiter */ 223 if err = self.delim(); err != 0 { 224 return Node{}, err 225 } 226 227 if self.skipValue { 228 /* skip the value */ 229 var start int 230 if start, err = self.skipFast(); err != 0 { 231 return Node{}, err 232 } 233 if self.p > ns { 234 return Node{}, types.ERR_EOF 235 } 236 t := switchRawType(self.s[start]) 237 if t == _V_NONE { 238 return Node{}, types.ERR_INVALID_CHAR 239 } 240 val = newRawNode(self.s[start:self.p], t) 241 } else { 242 /* decode the value */ 243 if val, err = self.Parse(); err != 0 { 244 return Node{}, err 245 } 246 } 247 248 /* add the value to result */ 249 // FIXME: ret's address may change here, thus previous referred node in ret may be invalid !! 250 ret.Add(Pair{Key: key, Value: val}) 251 self.p = self.lspace(self.p) 252 253 /* check for EOF */ 254 if self.p >= ns { 255 return Node{}, types.ERR_EOF 256 } 257 258 /* check for the next character */ 259 switch self.s[self.p] { 260 case ',': 261 self.p++ 262 case '}': 263 self.p++ 264 return newObject(ret), 0 265 default: 266 // if val.isLazy() { 267 // return newLazyObject(self, ret), 0 268 // } 269 return Node{}, types.ERR_INVALID_CHAR 270 } 271 } 272 } 273 274 func (self *Parser) decodeString(iv int64, ep int) (Node, types.ParsingError) { 275 p := self.p - 1 276 s := self.s[iv:p] 277 278 /* fast path: no escape sequence */ 279 if ep == -1 { 280 return NewString(s), 0 281 } 282 283 /* unquote the string */ 284 out, err := unquote(s) 285 286 /* check for errors */ 287 if err != 0 { 288 return Node{}, err 289 } else { 290 return newBytes(rt.Str2Mem(out)), 0 291 } 292 } 293 294 /** Parser Interface **/ 295 296 func (self *Parser) Pos() int { 297 return self.p 298 } 299 300 func (self *Parser) Parse() (Node, types.ParsingError) { 301 switch val := self.decodeValue(); val.Vt { 302 case types.V_EOF: 303 return Node{}, types.ERR_EOF 304 case types.V_NULL: 305 return nullNode, 0 306 case types.V_TRUE: 307 return trueNode, 0 308 case types.V_FALSE: 309 return falseNode, 0 310 case types.V_STRING: 311 return self.decodeString(val.Iv, val.Ep) 312 case types.V_ARRAY: 313 if p := skipBlank(self.s, self.p); p >= self.p && self.s[p] == ']' { 314 self.p = p + 1 315 return Node{t: types.V_ARRAY}, 0 316 } 317 if self.noLazy { 318 return self.decodeArray(new(linkedNodes)) 319 } 320 return newLazyArray(self), 0 321 case types.V_OBJECT: 322 if p := skipBlank(self.s, self.p); p >= self.p && self.s[p] == '}' { 323 self.p = p + 1 324 return Node{t: types.V_OBJECT}, 0 325 } 326 if self.noLazy { 327 return self.decodeObject(new(linkedPairs)) 328 } 329 return newLazyObject(self), 0 330 case types.V_DOUBLE: 331 return NewNumber(self.s[val.Ep:self.p]), 0 332 case types.V_INTEGER: 333 return NewNumber(self.s[val.Ep:self.p]), 0 334 default: 335 return Node{}, types.ParsingError(-val.Vt) 336 } 337 } 338 339 func (self *Parser) searchKey(match string) types.ParsingError { 340 ns := len(self.s) 341 if err := self.object(); err != 0 { 342 return err 343 } 344 345 /* check for EOF */ 346 if self.p = self.lspace(self.p); self.p >= ns { 347 return types.ERR_EOF 348 } 349 350 /* check for empty object */ 351 if self.s[self.p] == '}' { 352 self.p++ 353 return _ERR_NOT_FOUND 354 } 355 356 var njs types.JsonState 357 var err types.ParsingError 358 /* decode each pair */ 359 for { 360 361 /* decode the key */ 362 if njs = self.decodeValue(); njs.Vt != types.V_STRING { 363 return types.ERR_INVALID_CHAR 364 } 365 366 /* extract the key */ 367 idx := self.p - 1 368 key := self.s[njs.Iv:idx] 369 370 /* check for escape sequence */ 371 if njs.Ep != -1 { 372 if key, err = unquote(key); err != 0 { 373 return err 374 } 375 } 376 377 /* expect a ':' delimiter */ 378 if err = self.delim(); err != 0 { 379 return err 380 } 381 382 /* skip value */ 383 if key != match { 384 if _, err = self.skipFast(); err != 0 { 385 return err 386 } 387 } else { 388 return 0 389 } 390 391 /* check for EOF */ 392 self.p = self.lspace(self.p) 393 if self.p >= ns { 394 return types.ERR_EOF 395 } 396 397 /* check for the next character */ 398 switch self.s[self.p] { 399 case ',': 400 self.p++ 401 case '}': 402 self.p++ 403 return _ERR_NOT_FOUND 404 default: 405 return types.ERR_INVALID_CHAR 406 } 407 } 408 } 409 410 func (self *Parser) searchIndex(idx int) types.ParsingError { 411 ns := len(self.s) 412 if err := self.array(); err != 0 { 413 return err 414 } 415 416 /* check for EOF */ 417 if self.p = self.lspace(self.p); self.p >= ns { 418 return types.ERR_EOF 419 } 420 421 /* check for empty array */ 422 if self.s[self.p] == ']' { 423 self.p++ 424 return _ERR_NOT_FOUND 425 } 426 427 var err types.ParsingError 428 /* allocate array space and parse every element */ 429 for i := 0; i < idx; i++ { 430 431 /* decode the value */ 432 if _, err = self.skipFast(); err != 0 { 433 return err 434 } 435 436 /* check for EOF */ 437 self.p = self.lspace(self.p) 438 if self.p >= ns { 439 return types.ERR_EOF 440 } 441 442 /* check for the next character */ 443 switch self.s[self.p] { 444 case ',': 445 self.p++ 446 case ']': 447 self.p++ 448 return _ERR_NOT_FOUND 449 default: 450 return types.ERR_INVALID_CHAR 451 } 452 } 453 454 return 0 455 } 456 457 func (self *Node) skipNextNode() *Node { 458 if !self.isLazy() { 459 return nil 460 } 461 462 parser, stack := self.getParserAndArrayStack() 463 ret := &stack.v 464 sp := parser.p 465 ns := len(parser.s) 466 467 /* check for EOF */ 468 if parser.p = parser.lspace(sp); parser.p >= ns { 469 return newSyntaxError(parser.syntaxError(types.ERR_EOF)) 470 } 471 472 /* check for empty array */ 473 if parser.s[parser.p] == ']' { 474 parser.p++ 475 self.setArray(ret) 476 return nil 477 } 478 479 var val Node 480 /* skip the value */ 481 if start, err := parser.skipFast(); err != 0 { 482 return newSyntaxError(parser.syntaxError(err)) 483 } else { 484 t := switchRawType(parser.s[start]) 485 if t == _V_NONE { 486 return newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR)) 487 } 488 val = newRawNode(parser.s[start:parser.p], t) 489 } 490 491 /* add the value to result */ 492 ret.Add(val) 493 self.l++ 494 parser.p = parser.lspace(parser.p) 495 496 /* check for EOF */ 497 if parser.p >= ns { 498 return newSyntaxError(parser.syntaxError(types.ERR_EOF)) 499 } 500 501 /* check for the next character */ 502 switch parser.s[parser.p] { 503 case ',': 504 parser.p++ 505 return ret.At(ret.Len() - 1) 506 case ']': 507 parser.p++ 508 self.setArray(ret) 509 return ret.At(ret.Len() - 1) 510 default: 511 return newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR)) 512 } 513 } 514 515 func (self *Node) skipNextPair() *Pair { 516 if !self.isLazy() { 517 return nil 518 } 519 520 parser, stack := self.getParserAndObjectStack() 521 ret := &stack.v 522 sp := parser.p 523 ns := len(parser.s) 524 525 /* check for EOF */ 526 if parser.p = parser.lspace(sp); parser.p >= ns { 527 return &Pair{"", *newSyntaxError(parser.syntaxError(types.ERR_EOF))} 528 } 529 530 /* check for empty object */ 531 if parser.s[parser.p] == '}' { 532 parser.p++ 533 self.setObject(ret) 534 return nil 535 } 536 537 /* decode one pair */ 538 var val Node 539 var njs types.JsonState 540 var err types.ParsingError 541 542 /* decode the key */ 543 if njs = parser.decodeValue(); njs.Vt != types.V_STRING { 544 return &Pair{"", *newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR))} 545 } 546 547 /* extract the key */ 548 idx := parser.p - 1 549 key := parser.s[njs.Iv:idx] 550 551 /* check for escape sequence */ 552 if njs.Ep != -1 { 553 if key, err = unquote(key); err != 0 { 554 return &Pair{key, *newSyntaxError(parser.syntaxError(err))} 555 } 556 } 557 558 /* expect a ':' delimiter */ 559 if err = parser.delim(); err != 0 { 560 return &Pair{key, *newSyntaxError(parser.syntaxError(err))} 561 } 562 563 /* skip the value */ 564 if start, err := parser.skipFast(); err != 0 { 565 return &Pair{key, *newSyntaxError(parser.syntaxError(err))} 566 } else { 567 t := switchRawType(parser.s[start]) 568 if t == _V_NONE { 569 return &Pair{key, *newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR))} 570 } 571 val = newRawNode(parser.s[start:parser.p], t) 572 } 573 574 /* add the value to result */ 575 ret.Add(Pair{Key: key, Value: val}) 576 self.l++ 577 parser.p = parser.lspace(parser.p) 578 579 /* check for EOF */ 580 if parser.p >= ns { 581 return &Pair{key, *newSyntaxError(parser.syntaxError(types.ERR_EOF))} 582 } 583 584 /* check for the next character */ 585 switch parser.s[parser.p] { 586 case ',': 587 parser.p++ 588 return ret.At(ret.Len() - 1) 589 case '}': 590 parser.p++ 591 self.setObject(ret) 592 return ret.At(ret.Len() - 1) 593 default: 594 return &Pair{key, *newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR))} 595 } 596 } 597 598 /** Parser Factory **/ 599 600 // Loads parse all json into interface{} 601 func Loads(src string) (int, interface{}, error) { 602 ps := &Parser{s: src} 603 np, err := ps.Parse() 604 605 /* check for errors */ 606 if err != 0 { 607 return 0, nil, ps.ExportError(err) 608 } else { 609 x, err := np.Interface() 610 if err != nil { 611 return 0, nil, err 612 } 613 return ps.Pos(), x, nil 614 } 615 } 616 617 // LoadsUseNumber parse all json into interface{}, with numeric nodes casted to json.Number 618 func LoadsUseNumber(src string) (int, interface{}, error) { 619 ps := &Parser{s: src} 620 np, err := ps.Parse() 621 622 /* check for errors */ 623 if err != 0 { 624 return 0, nil, err 625 } else { 626 x, err := np.InterfaceUseNumber() 627 if err != nil { 628 return 0, nil, err 629 } 630 return ps.Pos(), x, nil 631 } 632 } 633 634 // NewParser returns pointer of new allocated parser 635 func NewParser(src string) *Parser { 636 return &Parser{s: src} 637 } 638 639 // NewParser returns new allocated parser 640 func NewParserObj(src string) Parser { 641 return Parser{s: src} 642 } 643 644 // decodeNumber controls if parser decodes the number values instead of skip them 645 // 646 // WARN: once you set decodeNumber(true), please set decodeNumber(false) before you drop the parser 647 // otherwise the memory CANNOT be reused 648 func (self *Parser) decodeNumber(decode bool) { 649 if !decode && self.dbuf != nil { 650 types.FreeDbuf(self.dbuf) 651 self.dbuf = nil 652 return 653 } 654 if decode && self.dbuf == nil { 655 self.dbuf = types.NewDbuf() 656 } 657 } 658 659 // ExportError converts types.ParsingError to std Error 660 func (self *Parser) ExportError(err types.ParsingError) error { 661 if err == _ERR_NOT_FOUND { 662 return ErrNotExist 663 } 664 return fmt.Errorf("%q", SyntaxError{ 665 Pos: self.p, 666 Src: self.s, 667 Code: err, 668 }.Description()) 669 }