github.com/goshafaq/sonic@v0.0.0-20231026082336-871835fb94c6/ast/parser.go (about)

     1  /*
     2   * Copyright 2021 ByteDance Inc.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package ast
    18  
    19  import (
    20  	"fmt"
    21  
    22  	"github.com/goshafaq/sonic/internal/native/types"
    23  	"github.com/goshafaq/sonic/internal/rt"
    24  )
    25  
    26  const (
    27  	_DEFAULT_NODE_CAP  int = 8
    28  	_APPEND_GROW_SHIFT     = 1
    29  )
    30  
    31  const (
    32  	_ERR_NOT_FOUND      types.ParsingError = 33
    33  	_ERR_UNSUPPORT_TYPE types.ParsingError = 34
    34  )
    35  
    36  var (
    37  	// ErrNotExist means both key and value doesn't exist
    38  	ErrNotExist error = newError(_ERR_NOT_FOUND, "value not exists")
    39  
    40  	// ErrUnsupportType means API on the node is unsupported
    41  	ErrUnsupportType error = newError(_ERR_UNSUPPORT_TYPE, "unsupported type")
    42  )
    43  
    44  type Parser struct {
    45  	p         int
    46  	s         string
    47  	noLazy    bool
    48  	skipValue bool
    49  	dbuf      *byte
    50  }
    51  
    52  /** Parser Private Methods **/
    53  
    54  func (self *Parser) delim() types.ParsingError {
    55  	n := len(self.s)
    56  	p := self.lspace(self.p)
    57  
    58  	/* check for EOF */
    59  	if p >= n {
    60  		return types.ERR_EOF
    61  	}
    62  
    63  	/* check for the delimtier */
    64  	if self.s[p] != ':' {
    65  		return types.ERR_INVALID_CHAR
    66  	}
    67  
    68  	/* update the read pointer */
    69  	self.p = p + 1
    70  	return 0
    71  }
    72  
    73  func (self *Parser) object() types.ParsingError {
    74  	n := len(self.s)
    75  	p := self.lspace(self.p)
    76  
    77  	/* check for EOF */
    78  	if p >= n {
    79  		return types.ERR_EOF
    80  	}
    81  
    82  	/* check for the delimtier */
    83  	if self.s[p] != '{' {
    84  		return types.ERR_INVALID_CHAR
    85  	}
    86  
    87  	/* update the read pointer */
    88  	self.p = p + 1
    89  	return 0
    90  }
    91  
    92  func (self *Parser) array() types.ParsingError {
    93  	n := len(self.s)
    94  	p := self.lspace(self.p)
    95  
    96  	/* check for EOF */
    97  	if p >= n {
    98  		return types.ERR_EOF
    99  	}
   100  
   101  	/* check for the delimtier */
   102  	if self.s[p] != '[' {
   103  		return types.ERR_INVALID_CHAR
   104  	}
   105  
   106  	/* update the read pointer */
   107  	self.p = p + 1
   108  	return 0
   109  }
   110  
   111  func (self *Parser) lspace(sp int) int {
   112  	ns := len(self.s)
   113  	for ; sp < ns && isSpace(self.s[sp]); sp += 1 {
   114  	}
   115  
   116  	return sp
   117  }
   118  
   119  func (self *Parser) decodeArray(ret *linkedNodes) (Node, types.ParsingError) {
   120  	sp := self.p
   121  	ns := len(self.s)
   122  
   123  	/* check for EOF */
   124  	if self.p = self.lspace(sp); self.p >= ns {
   125  		return Node{}, types.ERR_EOF
   126  	}
   127  
   128  	/* check for empty array */
   129  	if self.s[self.p] == ']' {
   130  		self.p++
   131  		return Node{t: types.V_ARRAY}, 0
   132  	}
   133  
   134  	/* allocate array space and parse every element */
   135  	for {
   136  		var val Node
   137  		var err types.ParsingError
   138  
   139  		if self.skipValue {
   140  			/* skip the value */
   141  			var start int
   142  			if start, err = self.skipFast(); err != 0 {
   143  				return Node{}, err
   144  			}
   145  			if self.p > ns {
   146  				return Node{}, types.ERR_EOF
   147  			}
   148  			t := switchRawType(self.s[start])
   149  			if t == _V_NONE {
   150  				return Node{}, types.ERR_INVALID_CHAR
   151  			}
   152  			val = newRawNode(self.s[start:self.p], t)
   153  		} else {
   154  			/* decode the value */
   155  			if val, err = self.Parse(); err != 0 {
   156  				return Node{}, err
   157  			}
   158  		}
   159  
   160  		/* add the value to result */
   161  		ret.Add(val)
   162  		self.p = self.lspace(self.p)
   163  
   164  		/* check for EOF */
   165  		if self.p >= ns {
   166  			return Node{}, types.ERR_EOF
   167  		}
   168  
   169  		/* check for the next character */
   170  		switch self.s[self.p] {
   171  		case ',':
   172  			self.p++
   173  		case ']':
   174  			self.p++
   175  			return newArray(ret), 0
   176  		default:
   177  			// if val.isLazy() {
   178  			//     return newLazyArray(self, ret), 0
   179  			// }
   180  			return Node{}, types.ERR_INVALID_CHAR
   181  		}
   182  	}
   183  }
   184  
   185  func (self *Parser) decodeObject(ret *linkedPairs) (Node, types.ParsingError) {
   186  	sp := self.p
   187  	ns := len(self.s)
   188  
   189  	/* check for EOF */
   190  	if self.p = self.lspace(sp); self.p >= ns {
   191  		return Node{}, types.ERR_EOF
   192  	}
   193  
   194  	/* check for empty object */
   195  	if self.s[self.p] == '}' {
   196  		self.p++
   197  		return Node{t: types.V_OBJECT}, 0
   198  	}
   199  
   200  	/* decode each pair */
   201  	for {
   202  		var val Node
   203  		var njs types.JsonState
   204  		var err types.ParsingError
   205  
   206  		/* decode the key */
   207  		if njs = self.decodeValue(); njs.Vt != types.V_STRING {
   208  			return Node{}, types.ERR_INVALID_CHAR
   209  		}
   210  
   211  		/* extract the key */
   212  		idx := self.p - 1
   213  		key := self.s[njs.Iv:idx]
   214  
   215  		/* check for escape sequence */
   216  		if njs.Ep != -1 {
   217  			if key, err = unquote(key); err != 0 {
   218  				return Node{}, err
   219  			}
   220  		}
   221  
   222  		/* expect a ':' delimiter */
   223  		if err = self.delim(); err != 0 {
   224  			return Node{}, err
   225  		}
   226  
   227  		if self.skipValue {
   228  			/* skip the value */
   229  			var start int
   230  			if start, err = self.skipFast(); err != 0 {
   231  				return Node{}, err
   232  			}
   233  			if self.p > ns {
   234  				return Node{}, types.ERR_EOF
   235  			}
   236  			t := switchRawType(self.s[start])
   237  			if t == _V_NONE {
   238  				return Node{}, types.ERR_INVALID_CHAR
   239  			}
   240  			val = newRawNode(self.s[start:self.p], t)
   241  		} else {
   242  			/* decode the value */
   243  			if val, err = self.Parse(); err != 0 {
   244  				return Node{}, err
   245  			}
   246  		}
   247  
   248  		/* add the value to result */
   249  		// FIXME: ret's address may change here, thus previous referred node in ret may be invalid !!
   250  		ret.Add(Pair{Key: key, Value: val})
   251  		self.p = self.lspace(self.p)
   252  
   253  		/* check for EOF */
   254  		if self.p >= ns {
   255  			return Node{}, types.ERR_EOF
   256  		}
   257  
   258  		/* check for the next character */
   259  		switch self.s[self.p] {
   260  		case ',':
   261  			self.p++
   262  		case '}':
   263  			self.p++
   264  			return newObject(ret), 0
   265  		default:
   266  			// if val.isLazy() {
   267  			//     return newLazyObject(self, ret), 0
   268  			// }
   269  			return Node{}, types.ERR_INVALID_CHAR
   270  		}
   271  	}
   272  }
   273  
   274  func (self *Parser) decodeString(iv int64, ep int) (Node, types.ParsingError) {
   275  	p := self.p - 1
   276  	s := self.s[iv:p]
   277  
   278  	/* fast path: no escape sequence */
   279  	if ep == -1 {
   280  		return NewString(s), 0
   281  	}
   282  
   283  	/* unquote the string */
   284  	out, err := unquote(s)
   285  
   286  	/* check for errors */
   287  	if err != 0 {
   288  		return Node{}, err
   289  	} else {
   290  		return newBytes(rt.Str2Mem(out)), 0
   291  	}
   292  }
   293  
   294  /** Parser Interface **/
   295  
   296  func (self *Parser) Pos() int {
   297  	return self.p
   298  }
   299  
   300  func (self *Parser) Parse() (Node, types.ParsingError) {
   301  	switch val := self.decodeValue(); val.Vt {
   302  	case types.V_EOF:
   303  		return Node{}, types.ERR_EOF
   304  	case types.V_NULL:
   305  		return nullNode, 0
   306  	case types.V_TRUE:
   307  		return trueNode, 0
   308  	case types.V_FALSE:
   309  		return falseNode, 0
   310  	case types.V_STRING:
   311  		return self.decodeString(val.Iv, val.Ep)
   312  	case types.V_ARRAY:
   313  		if p := skipBlank(self.s, self.p); p >= self.p && self.s[p] == ']' {
   314  			self.p = p + 1
   315  			return Node{t: types.V_ARRAY}, 0
   316  		}
   317  		if self.noLazy {
   318  			return self.decodeArray(new(linkedNodes))
   319  		}
   320  		return newLazyArray(self), 0
   321  	case types.V_OBJECT:
   322  		if p := skipBlank(self.s, self.p); p >= self.p && self.s[p] == '}' {
   323  			self.p = p + 1
   324  			return Node{t: types.V_OBJECT}, 0
   325  		}
   326  		if self.noLazy {
   327  			return self.decodeObject(new(linkedPairs))
   328  		}
   329  		return newLazyObject(self), 0
   330  	case types.V_DOUBLE:
   331  		return NewNumber(self.s[val.Ep:self.p]), 0
   332  	case types.V_INTEGER:
   333  		return NewNumber(self.s[val.Ep:self.p]), 0
   334  	default:
   335  		return Node{}, types.ParsingError(-val.Vt)
   336  	}
   337  }
   338  
   339  func (self *Parser) searchKey(match string) types.ParsingError {
   340  	ns := len(self.s)
   341  	if err := self.object(); err != 0 {
   342  		return err
   343  	}
   344  
   345  	/* check for EOF */
   346  	if self.p = self.lspace(self.p); self.p >= ns {
   347  		return types.ERR_EOF
   348  	}
   349  
   350  	/* check for empty object */
   351  	if self.s[self.p] == '}' {
   352  		self.p++
   353  		return _ERR_NOT_FOUND
   354  	}
   355  
   356  	var njs types.JsonState
   357  	var err types.ParsingError
   358  	/* decode each pair */
   359  	for {
   360  
   361  		/* decode the key */
   362  		if njs = self.decodeValue(); njs.Vt != types.V_STRING {
   363  			return types.ERR_INVALID_CHAR
   364  		}
   365  
   366  		/* extract the key */
   367  		idx := self.p - 1
   368  		key := self.s[njs.Iv:idx]
   369  
   370  		/* check for escape sequence */
   371  		if njs.Ep != -1 {
   372  			if key, err = unquote(key); err != 0 {
   373  				return err
   374  			}
   375  		}
   376  
   377  		/* expect a ':' delimiter */
   378  		if err = self.delim(); err != 0 {
   379  			return err
   380  		}
   381  
   382  		/* skip value */
   383  		if key != match {
   384  			if _, err = self.skipFast(); err != 0 {
   385  				return err
   386  			}
   387  		} else {
   388  			return 0
   389  		}
   390  
   391  		/* check for EOF */
   392  		self.p = self.lspace(self.p)
   393  		if self.p >= ns {
   394  			return types.ERR_EOF
   395  		}
   396  
   397  		/* check for the next character */
   398  		switch self.s[self.p] {
   399  		case ',':
   400  			self.p++
   401  		case '}':
   402  			self.p++
   403  			return _ERR_NOT_FOUND
   404  		default:
   405  			return types.ERR_INVALID_CHAR
   406  		}
   407  	}
   408  }
   409  
   410  func (self *Parser) searchIndex(idx int) types.ParsingError {
   411  	ns := len(self.s)
   412  	if err := self.array(); err != 0 {
   413  		return err
   414  	}
   415  
   416  	/* check for EOF */
   417  	if self.p = self.lspace(self.p); self.p >= ns {
   418  		return types.ERR_EOF
   419  	}
   420  
   421  	/* check for empty array */
   422  	if self.s[self.p] == ']' {
   423  		self.p++
   424  		return _ERR_NOT_FOUND
   425  	}
   426  
   427  	var err types.ParsingError
   428  	/* allocate array space and parse every element */
   429  	for i := 0; i < idx; i++ {
   430  
   431  		/* decode the value */
   432  		if _, err = self.skipFast(); err != 0 {
   433  			return err
   434  		}
   435  
   436  		/* check for EOF */
   437  		self.p = self.lspace(self.p)
   438  		if self.p >= ns {
   439  			return types.ERR_EOF
   440  		}
   441  
   442  		/* check for the next character */
   443  		switch self.s[self.p] {
   444  		case ',':
   445  			self.p++
   446  		case ']':
   447  			self.p++
   448  			return _ERR_NOT_FOUND
   449  		default:
   450  			return types.ERR_INVALID_CHAR
   451  		}
   452  	}
   453  
   454  	return 0
   455  }
   456  
   457  func (self *Node) skipNextNode() *Node {
   458  	if !self.isLazy() {
   459  		return nil
   460  	}
   461  
   462  	parser, stack := self.getParserAndArrayStack()
   463  	ret := &stack.v
   464  	sp := parser.p
   465  	ns := len(parser.s)
   466  
   467  	/* check for EOF */
   468  	if parser.p = parser.lspace(sp); parser.p >= ns {
   469  		return newSyntaxError(parser.syntaxError(types.ERR_EOF))
   470  	}
   471  
   472  	/* check for empty array */
   473  	if parser.s[parser.p] == ']' {
   474  		parser.p++
   475  		self.setArray(ret)
   476  		return nil
   477  	}
   478  
   479  	var val Node
   480  	/* skip the value */
   481  	if start, err := parser.skipFast(); err != 0 {
   482  		return newSyntaxError(parser.syntaxError(err))
   483  	} else {
   484  		t := switchRawType(parser.s[start])
   485  		if t == _V_NONE {
   486  			return newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR))
   487  		}
   488  		val = newRawNode(parser.s[start:parser.p], t)
   489  	}
   490  
   491  	/* add the value to result */
   492  	ret.Add(val)
   493  	self.l++
   494  	parser.p = parser.lspace(parser.p)
   495  
   496  	/* check for EOF */
   497  	if parser.p >= ns {
   498  		return newSyntaxError(parser.syntaxError(types.ERR_EOF))
   499  	}
   500  
   501  	/* check for the next character */
   502  	switch parser.s[parser.p] {
   503  	case ',':
   504  		parser.p++
   505  		return ret.At(ret.Len() - 1)
   506  	case ']':
   507  		parser.p++
   508  		self.setArray(ret)
   509  		return ret.At(ret.Len() - 1)
   510  	default:
   511  		return newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR))
   512  	}
   513  }
   514  
   515  func (self *Node) skipNextPair() *Pair {
   516  	if !self.isLazy() {
   517  		return nil
   518  	}
   519  
   520  	parser, stack := self.getParserAndObjectStack()
   521  	ret := &stack.v
   522  	sp := parser.p
   523  	ns := len(parser.s)
   524  
   525  	/* check for EOF */
   526  	if parser.p = parser.lspace(sp); parser.p >= ns {
   527  		return &Pair{"", *newSyntaxError(parser.syntaxError(types.ERR_EOF))}
   528  	}
   529  
   530  	/* check for empty object */
   531  	if parser.s[parser.p] == '}' {
   532  		parser.p++
   533  		self.setObject(ret)
   534  		return nil
   535  	}
   536  
   537  	/* decode one pair */
   538  	var val Node
   539  	var njs types.JsonState
   540  	var err types.ParsingError
   541  
   542  	/* decode the key */
   543  	if njs = parser.decodeValue(); njs.Vt != types.V_STRING {
   544  		return &Pair{"", *newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR))}
   545  	}
   546  
   547  	/* extract the key */
   548  	idx := parser.p - 1
   549  	key := parser.s[njs.Iv:idx]
   550  
   551  	/* check for escape sequence */
   552  	if njs.Ep != -1 {
   553  		if key, err = unquote(key); err != 0 {
   554  			return &Pair{key, *newSyntaxError(parser.syntaxError(err))}
   555  		}
   556  	}
   557  
   558  	/* expect a ':' delimiter */
   559  	if err = parser.delim(); err != 0 {
   560  		return &Pair{key, *newSyntaxError(parser.syntaxError(err))}
   561  	}
   562  
   563  	/* skip the value */
   564  	if start, err := parser.skipFast(); err != 0 {
   565  		return &Pair{key, *newSyntaxError(parser.syntaxError(err))}
   566  	} else {
   567  		t := switchRawType(parser.s[start])
   568  		if t == _V_NONE {
   569  			return &Pair{key, *newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR))}
   570  		}
   571  		val = newRawNode(parser.s[start:parser.p], t)
   572  	}
   573  
   574  	/* add the value to result */
   575  	ret.Add(Pair{Key: key, Value: val})
   576  	self.l++
   577  	parser.p = parser.lspace(parser.p)
   578  
   579  	/* check for EOF */
   580  	if parser.p >= ns {
   581  		return &Pair{key, *newSyntaxError(parser.syntaxError(types.ERR_EOF))}
   582  	}
   583  
   584  	/* check for the next character */
   585  	switch parser.s[parser.p] {
   586  	case ',':
   587  		parser.p++
   588  		return ret.At(ret.Len() - 1)
   589  	case '}':
   590  		parser.p++
   591  		self.setObject(ret)
   592  		return ret.At(ret.Len() - 1)
   593  	default:
   594  		return &Pair{key, *newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR))}
   595  	}
   596  }
   597  
   598  /** Parser Factory **/
   599  
   600  // Loads parse all json into interface{}
   601  func Loads(src string) (int, interface{}, error) {
   602  	ps := &Parser{s: src}
   603  	np, err := ps.Parse()
   604  
   605  	/* check for errors */
   606  	if err != 0 {
   607  		return 0, nil, ps.ExportError(err)
   608  	} else {
   609  		x, err := np.Interface()
   610  		if err != nil {
   611  			return 0, nil, err
   612  		}
   613  		return ps.Pos(), x, nil
   614  	}
   615  }
   616  
   617  // LoadsUseNumber parse all json into interface{}, with numeric nodes casted to json.Number
   618  func LoadsUseNumber(src string) (int, interface{}, error) {
   619  	ps := &Parser{s: src}
   620  	np, err := ps.Parse()
   621  
   622  	/* check for errors */
   623  	if err != 0 {
   624  		return 0, nil, err
   625  	} else {
   626  		x, err := np.InterfaceUseNumber()
   627  		if err != nil {
   628  			return 0, nil, err
   629  		}
   630  		return ps.Pos(), x, nil
   631  	}
   632  }
   633  
   634  // NewParser returns pointer of new allocated parser
   635  func NewParser(src string) *Parser {
   636  	return &Parser{s: src}
   637  }
   638  
   639  // NewParser returns new allocated parser
   640  func NewParserObj(src string) Parser {
   641  	return Parser{s: src}
   642  }
   643  
   644  // decodeNumber controls if parser decodes the number values instead of skip them
   645  //
   646  //	WARN: once you set decodeNumber(true), please set decodeNumber(false) before you drop the parser
   647  //	otherwise the memory CANNOT be reused
   648  func (self *Parser) decodeNumber(decode bool) {
   649  	if !decode && self.dbuf != nil {
   650  		types.FreeDbuf(self.dbuf)
   651  		self.dbuf = nil
   652  		return
   653  	}
   654  	if decode && self.dbuf == nil {
   655  		self.dbuf = types.NewDbuf()
   656  	}
   657  }
   658  
   659  // ExportError converts types.ParsingError to std Error
   660  func (self *Parser) ExportError(err types.ParsingError) error {
   661  	if err == _ERR_NOT_FOUND {
   662  		return ErrNotExist
   663  	}
   664  	return fmt.Errorf("%q", SyntaxError{
   665  		Pos:  self.p,
   666  		Src:  self.s,
   667  		Code: err,
   668  	}.Description())
   669  }