github.com/bytedance/sonic@v1.11.7-0.20240517092252-d2edb31b167b/ast/parser.go (about)

     1  /*
     2   * Copyright 2021 ByteDance Inc.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package ast
    18  
    19  import (
    20      `fmt`
    21  
    22      `github.com/bytedance/sonic/internal/native/types`
    23      `github.com/bytedance/sonic/internal/rt`
    24  )
    25  
    26  const (
    27      _DEFAULT_NODE_CAP int = 8
    28      _APPEND_GROW_SHIFT = 1
    29  )
    30  
    31  const (
    32      _ERR_NOT_FOUND      types.ParsingError = 33
    33      _ERR_UNSUPPORT_TYPE types.ParsingError = 34
    34  )
    35  
    36  var (
    37      // ErrNotExist means both key and value doesn't exist 
    38      ErrNotExist error = newError(_ERR_NOT_FOUND, "value not exists")
    39  
    40      // ErrUnsupportType means API on the node is unsupported
    41      ErrUnsupportType error = newError(_ERR_UNSUPPORT_TYPE, "unsupported type")
    42  )
    43  
    44  type Parser struct {
    45      p           int
    46      s           string
    47      noLazy      bool
    48      skipValue   bool
    49      dbuf        *byte
    50  }
    51  
    52  /** Parser Private Methods **/
    53  
    54  func (self *Parser) delim() types.ParsingError {
    55      n := len(self.s)
    56      p := self.lspace(self.p)
    57  
    58      /* check for EOF */
    59      if p >= n {
    60          return types.ERR_EOF
    61      }
    62  
    63      /* check for the delimtier */
    64      if self.s[p] != ':' {
    65          return types.ERR_INVALID_CHAR
    66      }
    67  
    68      /* update the read pointer */
    69      self.p = p + 1
    70      return 0
    71  }
    72  
    73  func (self *Parser) object() types.ParsingError {
    74      n := len(self.s)
    75      p := self.lspace(self.p)
    76  
    77      /* check for EOF */
    78      if p >= n {
    79          return types.ERR_EOF
    80      }
    81  
    82      /* check for the delimtier */
    83      if self.s[p] != '{' {
    84          return types.ERR_INVALID_CHAR
    85      }
    86  
    87      /* update the read pointer */
    88      self.p = p + 1
    89      return 0
    90  }
    91  
    92  func (self *Parser) array() types.ParsingError {
    93      n := len(self.s)
    94      p := self.lspace(self.p)
    95  
    96      /* check for EOF */
    97      if p >= n {
    98          return types.ERR_EOF
    99      }
   100  
   101      /* check for the delimtier */
   102      if self.s[p] != '[' {
   103          return types.ERR_INVALID_CHAR
   104      }
   105  
   106      /* update the read pointer */
   107      self.p = p + 1
   108      return 0
   109  }
   110  
   111  func (self *Parser) lspace(sp int) int {
   112      ns := len(self.s)
   113      for ; sp<ns && isSpace(self.s[sp]); sp+=1 {}
   114  
   115      return sp
   116  }
   117  
   118  func (self *Parser) backward() {
   119      for ; self.p >= 0 && isSpace(self.s[self.p]); self.p-=1 {}
   120  }
   121  
   122  func (self *Parser) decodeArray(ret *linkedNodes) (Node, types.ParsingError) {
   123      sp := self.p
   124      ns := len(self.s)
   125  
   126      /* check for EOF */
   127      if self.p = self.lspace(sp); self.p >= ns {
   128          return Node{}, types.ERR_EOF
   129      }
   130  
   131      /* check for empty array */
   132      if self.s[self.p] == ']' {
   133          self.p++
   134          return Node{t: types.V_ARRAY}, 0
   135      }
   136  
   137      /* allocate array space and parse every element */
   138      for {
   139          var val Node
   140          var err types.ParsingError
   141  
   142          if self.skipValue {
   143              /* skip the value */
   144              var start int
   145              if start, err = self.skipFast(); err != 0 {
   146                  return Node{}, err
   147              }
   148              if self.p > ns {
   149                  return Node{}, types.ERR_EOF
   150              }
   151              t := switchRawType(self.s[start])
   152              if t == _V_NONE {
   153                  return Node{}, types.ERR_INVALID_CHAR
   154              }
   155              val = newRawNode(self.s[start:self.p], t)
   156          }else{
   157              /* decode the value */
   158              if val, err = self.Parse(); err != 0 {
   159                  return Node{}, err
   160              }
   161          }
   162  
   163          /* add the value to result */
   164          ret.Push(val)
   165          self.p = self.lspace(self.p)
   166  
   167          /* check for EOF */
   168          if self.p >= ns {
   169              return Node{}, types.ERR_EOF
   170          }
   171  
   172          /* check for the next character */
   173          switch self.s[self.p] {
   174              case ',' : self.p++
   175              case ']' : self.p++; return newArray(ret), 0
   176              default:
   177                  // if val.isLazy() {
   178                  //     return newLazyArray(self, ret), 0
   179                  // }
   180                  return Node{}, types.ERR_INVALID_CHAR
   181          }
   182      }
   183  }
   184  
   185  func (self *Parser) decodeObject(ret *linkedPairs) (Node, types.ParsingError) {
   186      sp := self.p
   187      ns := len(self.s)
   188  
   189      /* check for EOF */
   190      if self.p = self.lspace(sp); self.p >= ns {
   191          return Node{}, types.ERR_EOF
   192      }
   193  
   194      /* check for empty object */
   195      if self.s[self.p] == '}' {
   196          self.p++
   197          return Node{t: types.V_OBJECT}, 0
   198      }
   199  
   200      /* decode each pair */
   201      for {
   202          var val Node
   203          var njs types.JsonState
   204          var err types.ParsingError
   205  
   206          /* decode the key */
   207          if njs = self.decodeValue(); njs.Vt != types.V_STRING {
   208              return Node{}, types.ERR_INVALID_CHAR
   209          }
   210  
   211          /* extract the key */
   212          idx := self.p - 1
   213          key := self.s[njs.Iv:idx]
   214  
   215          /* check for escape sequence */
   216          if njs.Ep != -1 {
   217              if key, err = unquote(key); err != 0 {
   218                  return Node{}, err
   219              }
   220          }
   221  
   222          /* expect a ':' delimiter */
   223          if err = self.delim(); err != 0 {
   224              return Node{}, err
   225          }
   226  
   227          
   228          if self.skipValue {
   229              /* skip the value */
   230              var start int
   231              if start, err = self.skipFast(); err != 0 {
   232                  return Node{}, err
   233              }
   234              if self.p > ns {
   235                  return Node{}, types.ERR_EOF
   236              }
   237              t := switchRawType(self.s[start])
   238              if t == _V_NONE {
   239                  return Node{}, types.ERR_INVALID_CHAR
   240              }
   241              val = newRawNode(self.s[start:self.p], t)
   242          } else {
   243              /* decode the value */
   244              if val, err = self.Parse(); err != 0 {
   245                  return Node{}, err
   246              }
   247          }
   248  
   249          /* add the value to result */
   250          // FIXME: ret's address may change here, thus previous referred node in ret may be invalid !!
   251          ret.Push(Pair{Key: key, Value: val})
   252          self.p = self.lspace(self.p)
   253  
   254          /* check for EOF */
   255          if self.p >= ns {
   256              return Node{}, types.ERR_EOF
   257          }
   258  
   259          /* check for the next character */
   260          switch self.s[self.p] {
   261              case ',' : self.p++
   262              case '}' : self.p++; return newObject(ret), 0
   263          default:
   264              // if val.isLazy() {
   265              //     return newLazyObject(self, ret), 0
   266              // }
   267              return Node{}, types.ERR_INVALID_CHAR
   268          }
   269      }
   270  }
   271  
   272  func (self *Parser) decodeString(iv int64, ep int) (Node, types.ParsingError) {
   273      p := self.p - 1
   274      s := self.s[iv:p]
   275  
   276      /* fast path: no escape sequence */
   277      if ep == -1 {
   278          return NewString(s), 0
   279      }
   280  
   281      /* unquote the string */
   282      out, err := unquote(s)
   283  
   284      /* check for errors */
   285      if err != 0 {
   286          return Node{}, err
   287      } else {
   288          return newBytes(rt.Str2Mem(out)), 0
   289      }
   290  }
   291  
   292  /** Parser Interface **/
   293  
   294  func (self *Parser) Pos() int {
   295      return self.p
   296  }
   297  
   298  func (self *Parser) Parse() (Node, types.ParsingError) {
   299      switch val := self.decodeValue(); val.Vt {
   300          case types.V_EOF     : return Node{}, types.ERR_EOF
   301          case types.V_NULL    : return nullNode, 0
   302          case types.V_TRUE    : return trueNode, 0
   303          case types.V_FALSE   : return falseNode, 0
   304          case types.V_STRING  : return self.decodeString(val.Iv, val.Ep)
   305          case types.V_ARRAY:
   306              if p := skipBlank(self.s, self.p); p >= self.p && self.s[p] == ']' {
   307                  self.p = p + 1
   308                  return Node{t: types.V_ARRAY}, 0
   309              }
   310              if self.noLazy {
   311                  return self.decodeArray(new(linkedNodes))
   312              }
   313              return newLazyArray(self), 0
   314          case types.V_OBJECT:
   315              if p := skipBlank(self.s, self.p); p >= self.p && self.s[p] == '}' {
   316                  self.p = p + 1
   317                  return Node{t: types.V_OBJECT}, 0
   318              }
   319              if self.noLazy {
   320                  return self.decodeObject(new(linkedPairs))
   321              }
   322              return newLazyObject(self), 0
   323          case types.V_DOUBLE  : return NewNumber(self.s[val.Ep:self.p]), 0
   324          case types.V_INTEGER : return NewNumber(self.s[val.Ep:self.p]), 0
   325          default              : return Node{}, types.ParsingError(-val.Vt)
   326      }
   327  }
   328  
   329  func (self *Parser) searchKey(match string) types.ParsingError {
   330      ns := len(self.s)
   331      if err := self.object(); err != 0 {
   332          return err
   333      }
   334  
   335      /* check for EOF */
   336      if self.p = self.lspace(self.p); self.p >= ns {
   337          return types.ERR_EOF
   338      }
   339  
   340      /* check for empty object */
   341      if self.s[self.p] == '}' {
   342          self.p++
   343          return _ERR_NOT_FOUND
   344      }
   345  
   346      var njs types.JsonState
   347      var err types.ParsingError
   348      /* decode each pair */
   349      for {
   350  
   351          /* decode the key */
   352          if njs = self.decodeValue(); njs.Vt != types.V_STRING {
   353              return types.ERR_INVALID_CHAR
   354          }
   355  
   356          /* extract the key */
   357          idx := self.p - 1
   358          key := self.s[njs.Iv:idx]
   359  
   360          /* check for escape sequence */
   361          if njs.Ep != -1 {
   362              if key, err = unquote(key); err != 0 {
   363                  return err
   364              }
   365          }
   366  
   367          /* expect a ':' delimiter */
   368          if err = self.delim(); err != 0 {
   369              return err
   370          }
   371  
   372          /* skip value */
   373          if key != match {
   374              if _, err = self.skipFast(); err != 0 {
   375                  return err
   376              }
   377          } else {
   378              return 0
   379          }
   380  
   381          /* check for EOF */
   382          self.p = self.lspace(self.p)
   383          if self.p >= ns {
   384              return types.ERR_EOF
   385          }
   386  
   387          /* check for the next character */
   388          switch self.s[self.p] {
   389          case ',':
   390              self.p++
   391          case '}':
   392              self.p++
   393              return _ERR_NOT_FOUND
   394          default:
   395              return types.ERR_INVALID_CHAR
   396          }
   397      }
   398  }
   399  
   400  func (self *Parser) searchIndex(idx int) types.ParsingError {
   401      ns := len(self.s)
   402      if err := self.array(); err != 0 {
   403          return err
   404      }
   405  
   406      /* check for EOF */
   407      if self.p = self.lspace(self.p); self.p >= ns {
   408          return types.ERR_EOF
   409      }
   410  
   411      /* check for empty array */
   412      if self.s[self.p] == ']' {
   413          self.p++
   414          return _ERR_NOT_FOUND
   415      }
   416  
   417      var err types.ParsingError
   418      /* allocate array space and parse every element */
   419      for i := 0; i < idx; i++ {
   420  
   421          /* decode the value */
   422          if _, err = self.skipFast(); err != 0 {
   423              return err
   424          }
   425  
   426          /* check for EOF */
   427          self.p = self.lspace(self.p)
   428          if self.p >= ns {
   429              return types.ERR_EOF
   430          }
   431  
   432          /* check for the next character */
   433          switch self.s[self.p] {
   434          case ',':
   435              self.p++
   436          case ']':
   437              self.p++
   438              return _ERR_NOT_FOUND
   439          default:
   440              return types.ERR_INVALID_CHAR
   441          }
   442      }
   443  
   444      return 0
   445  }
   446  
   447  func (self *Node) skipNextNode() *Node {
   448      if !self.isLazy() {
   449          return nil
   450      }
   451  
   452      parser, stack := self.getParserAndArrayStack()
   453      ret := &stack.v
   454      sp := parser.p
   455      ns := len(parser.s)
   456  
   457      /* check for EOF */
   458      if parser.p = parser.lspace(sp); parser.p >= ns {
   459          return newSyntaxError(parser.syntaxError(types.ERR_EOF))
   460      }
   461  
   462      /* check for empty array */
   463      if parser.s[parser.p] == ']' {
   464          parser.p++
   465          self.setArray(ret)
   466          return nil
   467      }
   468  
   469      var val Node
   470      /* skip the value */
   471      if start, err := parser.skipFast(); err != 0 {
   472          return newSyntaxError(parser.syntaxError(err))
   473      } else {
   474          t := switchRawType(parser.s[start])
   475          if t == _V_NONE {
   476              return newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR))
   477          }
   478          val = newRawNode(parser.s[start:parser.p], t)
   479      }
   480  
   481      /* add the value to result */
   482      ret.Push(val)
   483      self.l++
   484      parser.p = parser.lspace(parser.p)
   485  
   486      /* check for EOF */
   487      if parser.p >= ns {
   488          return newSyntaxError(parser.syntaxError(types.ERR_EOF))
   489      }
   490  
   491      /* check for the next character */
   492      switch parser.s[parser.p] {
   493      case ',':
   494          parser.p++
   495          return ret.At(ret.Len()-1)
   496      case ']':
   497          parser.p++
   498          self.setArray(ret)
   499          return ret.At(ret.Len()-1)
   500      default:
   501          return newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR))
   502      }
   503  }
   504  
   505  func (self *Node) skipNextPair() (*Pair) {
   506      if !self.isLazy() {
   507          return nil
   508      }
   509  
   510      parser, stack := self.getParserAndObjectStack()
   511      ret := &stack.v
   512      sp := parser.p
   513      ns := len(parser.s)
   514  
   515      /* check for EOF */
   516      if parser.p = parser.lspace(sp); parser.p >= ns {
   517          return &Pair{"", *newSyntaxError(parser.syntaxError(types.ERR_EOF))}
   518      }
   519  
   520      /* check for empty object */
   521      if parser.s[parser.p] == '}' {
   522          parser.p++
   523          self.setObject(ret)
   524          return nil
   525      }
   526  
   527      /* decode one pair */
   528      var val Node
   529      var njs types.JsonState
   530      var err types.ParsingError
   531  
   532      /* decode the key */
   533      if njs = parser.decodeValue(); njs.Vt != types.V_STRING {
   534          return &Pair{"", *newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR))}
   535      }
   536  
   537      /* extract the key */
   538      idx := parser.p - 1
   539      key := parser.s[njs.Iv:idx]
   540  
   541      /* check for escape sequence */
   542      if njs.Ep != -1 {
   543          if key, err = unquote(key); err != 0 {
   544              return &Pair{key, *newSyntaxError(parser.syntaxError(err))}
   545          }
   546      }
   547  
   548      /* expect a ':' delimiter */
   549      if err = parser.delim(); err != 0 {
   550          return &Pair{key, *newSyntaxError(parser.syntaxError(err))}
   551      }
   552  
   553      /* skip the value */
   554      if start, err := parser.skipFast(); err != 0 {
   555          return &Pair{key, *newSyntaxError(parser.syntaxError(err))}
   556      } else {
   557          t := switchRawType(parser.s[start])
   558          if t == _V_NONE {
   559              return &Pair{key, *newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR))}
   560          }
   561          val = newRawNode(parser.s[start:parser.p], t)
   562      }
   563  
   564      /* add the value to result */
   565      ret.Push(Pair{Key: key, Value: val})
   566      self.l++
   567      parser.p = parser.lspace(parser.p)
   568  
   569      /* check for EOF */
   570      if parser.p >= ns {
   571          return &Pair{key, *newSyntaxError(parser.syntaxError(types.ERR_EOF))}
   572      }
   573  
   574      /* check for the next character */
   575      switch parser.s[parser.p] {
   576      case ',':
   577          parser.p++
   578          return ret.At(ret.Len()-1)
   579      case '}':
   580          parser.p++
   581          self.setObject(ret)
   582          return ret.At(ret.Len()-1)
   583      default:
   584          return &Pair{key, *newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR))}
   585      }
   586  }
   587  
   588  
   589  /** Parser Factory **/
   590  
   591  // Loads parse all json into interface{}
   592  func Loads(src string) (int, interface{}, error) {
   593      ps := &Parser{s: src}
   594      np, err := ps.Parse()
   595  
   596      /* check for errors */
   597      if err != 0 {
   598          return 0, nil, ps.ExportError(err)
   599      } else {
   600          x, err := np.Interface()
   601          if err != nil {
   602              return 0, nil, err
   603          }
   604          return ps.Pos(), x, nil
   605      }
   606  }
   607  
   608  // LoadsUseNumber parse all json into interface{}, with numeric nodes casted to json.Number
   609  func LoadsUseNumber(src string) (int, interface{}, error) {
   610      ps := &Parser{s: src}
   611      np, err := ps.Parse()
   612  
   613      /* check for errors */
   614      if err != 0 {
   615          return 0, nil, err
   616      } else {
   617          x, err := np.InterfaceUseNumber()
   618          if err != nil {
   619              return 0, nil, err
   620          }
   621          return ps.Pos(), x, nil
   622      }
   623  }
   624  
   625  // NewParser returns pointer of new allocated parser
   626  func NewParser(src string) *Parser {
   627      return &Parser{s: src}
   628  }
   629  
   630  // NewParser returns new allocated parser
   631  func NewParserObj(src string) Parser {
   632      return Parser{s: src}
   633  }
   634  
   635  // decodeNumber controls if parser decodes the number values instead of skip them
   636  //   WARN: once you set decodeNumber(true), please set decodeNumber(false) before you drop the parser 
   637  //   otherwise the memory CANNOT be reused
   638  func (self *Parser) decodeNumber(decode bool) {
   639      if !decode && self.dbuf != nil {
   640          types.FreeDbuf(self.dbuf)
   641          self.dbuf = nil
   642          return
   643      }
   644      if decode && self.dbuf == nil {
   645          self.dbuf = types.NewDbuf()
   646      }
   647  }
   648  
   649  // ExportError converts types.ParsingError to std Error
   650  func (self *Parser) ExportError(err types.ParsingError) error {
   651      if err == _ERR_NOT_FOUND {
   652          return ErrNotExist
   653      }
   654      return fmt.Errorf("%q", SyntaxError{
   655          Pos : self.p,
   656          Src : self.s,
   657          Code: err,
   658      }.Description())
   659  }
   660  
   661  func backward(src string, i int) int {
   662      for ; i>=0 && isSpace(src[i]); i-- {}
   663      return i
   664  }