github.com/rohankumardubey/aresdb@v0.0.2-0.20190517170215-e54e3ca06b9c/query/expr/parser.go (about)

     1  // Modifications Copyright (c) 2017-2018 Uber Technologies, Inc.
     2  // Copyright (c) 2013-2016 Errplane Inc.
     3  //
     4  // Permission is hereby granted, free of charge, to any person obtaining a copy of
     5  // this software and associated documentation files (the "Software"), to deal in
     6  // the Software without restriction, including without limitation the rights to
     7  // use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
     8  // the Software, and to permit persons to whom the Software is furnished to do so,
     9  // subject to the following conditions:
    10  //
    11  // The above copyright notice and this permission notice shall be included in all
    12  // copies or substantial portions of the Software.
    13  //
    14  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    15  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
    16  // FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
    17  // COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
    18  // IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
    19  // CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
    20  
    21  package expr
    22  
    23  import (
    24  	"bytes"
    25  	"fmt"
    26  	"io"
    27  	"strconv"
    28  	"strings"
    29  )
    30  
    31  // Parser represents an InfluxQL parser.
    32  type Parser struct {
    33  	s *bufScanner
    34  }
    35  
    36  // NewParser returns a new instance of Parser.
    37  func NewParser(r io.Reader) *Parser {
    38  	return &Parser{s: newBufScanner(r)}
    39  }
    40  
    41  // ParseExpr parses an expression string and returns its AST representation.
    42  func ParseExpr(s string) (Expr, error) { return NewParser(strings.NewReader(s)).ParseExpr(0) }
    43  
    44  // parseInt parses a string and returns an integer literal.
    45  func (p *Parser) parseInt(min, max int) (int, error) {
    46  	tok, pos, lit := p.scanIgnoreWhitespace()
    47  	if tok != NUMBER {
    48  		return 0, newParseError(tokstr(tok, lit), []string{"number"}, pos)
    49  	}
    50  
    51  	// Return an error if the number has a fractional part.
    52  	if strings.Contains(lit, ".") {
    53  		return 0, &ParseError{Message: "number must be an integer", Pos: pos}
    54  	}
    55  
    56  	// Convert string to int.
    57  	n, err := strconv.Atoi(lit)
    58  	if err != nil {
    59  		return 0, &ParseError{Message: err.Error(), Pos: pos}
    60  	} else if min > n || n > max {
    61  		return 0, &ParseError{
    62  			Message: fmt.Sprintf("invalid value %d: must be %d <= n <= %d", n, min, max),
    63  			Pos:     pos,
    64  		}
    65  	}
    66  
    67  	return n, nil
    68  }
    69  
    70  // parseUInt32 parses a string and returns a 32-bit unsigned integer literal.
    71  func (p *Parser) parseUInt32() (uint32, error) {
    72  	tok, pos, lit := p.scanIgnoreWhitespace()
    73  	if tok != NUMBER {
    74  		return 0, newParseError(tokstr(tok, lit), []string{"number"}, pos)
    75  	}
    76  
    77  	// Convert string to unsigned 32-bit integer
    78  	n, err := strconv.ParseUint(lit, 10, 32)
    79  	if err != nil {
    80  		return 0, &ParseError{Message: err.Error(), Pos: pos}
    81  	}
    82  
    83  	return uint32(n), nil
    84  }
    85  
    86  // parseUInt64 parses a string and returns a 64-bit unsigned integer literal.
    87  func (p *Parser) parseUInt64() (uint64, error) {
    88  	tok, pos, lit := p.scanIgnoreWhitespace()
    89  	if tok != NUMBER {
    90  		return 0, newParseError(tokstr(tok, lit), []string{"number"}, pos)
    91  	}
    92  
    93  	// Convert string to unsigned 64-bit integer
    94  	n, err := strconv.ParseUint(lit, 10, 64)
    95  	if err != nil {
    96  		return 0, &ParseError{Message: err.Error(), Pos: pos}
    97  	}
    98  
    99  	return uint64(n), nil
   100  }
   101  
   102  // parseIdent parses an identifier.
   103  func (p *Parser) parseIdent() (string, error) {
   104  	tok, pos, lit := p.scanIgnoreWhitespace()
   105  	if tok != IDENT {
   106  		return "", newParseError(tokstr(tok, lit), []string{"identifier"}, pos)
   107  	}
   108  	return lit, nil
   109  }
   110  
   111  // parseIdentList parses a comma delimited list of identifiers.
   112  func (p *Parser) parseIdentList() ([]string, error) {
   113  	// Parse first (required) identifier.
   114  	ident, err := p.parseIdent()
   115  	if err != nil {
   116  		return nil, err
   117  	}
   118  	idents := []string{ident}
   119  
   120  	// Parse remaining (optional) identifiers.
   121  	for {
   122  		if tok, _, _ := p.scanIgnoreWhitespace(); tok != COMMA {
   123  			p.unscan()
   124  			return idents, nil
   125  		}
   126  
   127  		if ident, err = p.parseIdent(); err != nil {
   128  			return nil, err
   129  		}
   130  
   131  		idents = append(idents, ident)
   132  	}
   133  }
   134  
   135  // parseSegmentedIdents parses a segmented identifiers.
   136  // e.g.,  "db"."rp".measurement  or  "db"..measurement
   137  func (p *Parser) parseSegmentedIdents() ([]string, error) {
   138  	ident, err := p.parseIdent()
   139  	if err != nil {
   140  		return nil, err
   141  	}
   142  	idents := []string{ident}
   143  
   144  	// Parse remaining (optional) identifiers.
   145  	for {
   146  		if tok, _, _ := p.scan(); tok != DOT {
   147  			// No more segments so we're done.
   148  			p.unscan()
   149  			break
   150  		}
   151  
   152  		if ch := p.peekRune(); ch == '/' {
   153  			// Next segment is a regex so we're done.
   154  			break
   155  		} else if ch == '.' {
   156  			// Add an empty identifier.
   157  			idents = append(idents, "")
   158  			continue
   159  		}
   160  
   161  		// Parse the next identifier.
   162  		if ident, err = p.parseIdent(); err != nil {
   163  			return nil, err
   164  		}
   165  
   166  		idents = append(idents, ident)
   167  	}
   168  
   169  	if len(idents) > 3 {
   170  		msg := fmt.Sprintf("too many segments in %s", QuoteIdent(idents...))
   171  		return nil, &ParseError{Message: msg}
   172  	}
   173  
   174  	return idents, nil
   175  }
   176  
   177  // parserString parses a string.
   178  func (p *Parser) parseString() (string, error) {
   179  	tok, pos, lit := p.scanIgnoreWhitespace()
   180  	if tok != STRING {
   181  		return "", newParseError(tokstr(tok, lit), []string{"string"}, pos)
   182  	}
   183  	return lit, nil
   184  }
   185  
   186  // peekRune returns the next rune that would be read by the scanner.
   187  func (p *Parser) peekRune() rune {
   188  	r, _, _ := p.s.s.r.ReadRune()
   189  	if r != eof {
   190  		_ = p.s.s.r.UnreadRune()
   191  	}
   192  
   193  	return r
   194  }
   195  
   196  // parseOptionalTokenAndInt parses the specified token followed
   197  // by an int, if it exists.
   198  func (p *Parser) parseOptionalTokenAndInt(t Token) (int, error) {
   199  	// Check if the token exists.
   200  	if tok, _, _ := p.scanIgnoreWhitespace(); tok != t {
   201  		p.unscan()
   202  		return 0, nil
   203  	}
   204  
   205  	// Scan the number.
   206  	tok, pos, lit := p.scanIgnoreWhitespace()
   207  	if tok != NUMBER {
   208  		return 0, newParseError(tokstr(tok, lit), []string{"number"}, pos)
   209  	}
   210  
   211  	// Return an error if the number has a fractional part.
   212  	if strings.Contains(lit, ".") {
   213  		msg := fmt.Sprintf("fractional parts not allowed in %s", t.String())
   214  		return 0, &ParseError{Message: msg, Pos: pos}
   215  	}
   216  
   217  	// Parse number.
   218  	n, _ := strconv.ParseInt(lit, 10, 64)
   219  
   220  	if n < 0 {
   221  		msg := fmt.Sprintf("%s must be >= 0", t.String())
   222  		return 0, &ParseError{Message: msg, Pos: pos}
   223  	}
   224  
   225  	return int(n), nil
   226  }
   227  
   228  // parseVarRef parses a reference to a measurement or field.
   229  func (p *Parser) parseVarRef() (*VarRef, error) {
   230  	// Parse the segments of the variable ref.
   231  	segments, err := p.parseSegmentedIdents()
   232  	if err != nil {
   233  		return nil, err
   234  	}
   235  
   236  	vr := &VarRef{Val: strings.Join(segments, ".")}
   237  
   238  	return vr, nil
   239  }
   240  
   241  func rewriteIsOp(expr Expr) (Token, error) {
   242  	affirmative := true
   243  	if unary, ok := expr.(*UnaryExpr); ok {
   244  		if unary.Op == NOT {
   245  			affirmative = false
   246  			expr = unary.Expr
   247  		} else {
   248  			return IS, fmt.Errorf("bad literal %s following IS", expr.String())
   249  		}
   250  	}
   251  	switch e := expr.(type) {
   252  	case *NullLiteral:
   253  		if affirmative {
   254  			return IS_NULL, nil
   255  		}
   256  		return IS_NOT_NULL, nil
   257  	case *UnknownLiteral:
   258  		if affirmative {
   259  			return IS_NULL, nil
   260  		}
   261  		return IS_NOT_NULL, nil
   262  	case *BooleanLiteral:
   263  		if affirmative == e.Val {
   264  			return IS_TRUE, nil
   265  		}
   266  
   267  		return IS_FALSE, nil
   268  	}
   269  	return IS, fmt.Errorf("bad literal %s following IS (NOT)", expr.String())
   270  }
   271  
   272  func rewriteIsExpr(expr Expr) (Expr, error) {
   273  	e, ok := expr.(*BinaryExpr)
   274  	if !ok {
   275  		return expr, nil
   276  	}
   277  
   278  	if e.Op == IS {
   279  		op, err := rewriteIsOp(e.RHS)
   280  		if err != nil {
   281  			return nil, err
   282  		}
   283  		expr, err := rewriteIsExpr(e.LHS)
   284  		if err != nil {
   285  			return nil, err
   286  		}
   287  		return &UnaryExpr{Op: op, Expr: expr}, nil
   288  	}
   289  
   290  	var err error
   291  	e.LHS, err = rewriteIsExpr(e.LHS)
   292  	if err != nil {
   293  		return nil, err
   294  	}
   295  	e.RHS, err = rewriteIsExpr(e.RHS)
   296  	if err != nil {
   297  		return nil, err
   298  	}
   299  	return expr, nil
   300  }
   301  
   302  // ParseExpr parses an expression.
   303  // binOpPrcdncLb: binary operator precedence lower bound.
   304  // Any binary operator with a lower precedence than that will cause ParseExpr to stop.
   305  // This is used for parsing binary operators following a unary operator.
   306  func (p *Parser) ParseExpr(binOpPrcdncLb int) (Expr, error) {
   307  	var err error
   308  	// Dummy root node.
   309  	root := &BinaryExpr{}
   310  
   311  	// Parse a non-binary expression type to start.
   312  	// This variable will always be the root of the expression tree.
   313  	root.RHS, err = p.parseUnaryExpr(false)
   314  	if err != nil {
   315  		return nil, err
   316  	}
   317  
   318  	// Loop over operations and unary exprs and build a tree based on precendence.
   319  	for {
   320  		// If the next token is NOT an operator then return the expression.
   321  		op, pos, lit := p.scanIgnoreWhitespace()
   322  		if op == NOT {
   323  			op, pos, lit = p.scanIgnoreWhitespace()
   324  			if op == IN {
   325  				op = NOT_IN
   326  			} else {
   327  				return nil, newParseError(tokstr(op, lit), []string{"IN"}, pos)
   328  			}
   329  		}
   330  		if !op.isBinaryOperator() || op.Precedence() < binOpPrcdncLb {
   331  			p.unscan()
   332  			return rewriteIsExpr(root.RHS)
   333  		}
   334  
   335  		// Otherwise parse the next expression.
   336  		var rhs Expr
   337  		if rhs, err = p.parseUnaryExpr(op == IN || op == NOT_IN); err != nil {
   338  			return nil, err
   339  		}
   340  
   341  		// Find the right spot in the tree to add the new expression by
   342  		// descending the RHS of the expression tree until we reach the last
   343  		// BinaryExpr or a BinaryExpr whose RHS has an operator with
   344  		// precedence >= the operator being added.
   345  		for node := root; ; {
   346  			r, ok := node.RHS.(*BinaryExpr)
   347  			if !ok || r.Op.Precedence() >= op.Precedence() {
   348  				// Add the new expression here and break.
   349  				node.RHS = &BinaryExpr{LHS: node.RHS, RHS: rhs, Op: op}
   350  				break
   351  			}
   352  			node = r
   353  		}
   354  	}
   355  }
   356  
   357  // parseUnaryExpr parses an non-binary expression.
   358  // TODO: shz@ revisit inclusion parameter when open sourcing
   359  func (p *Parser) parseUnaryExpr(inclusion bool) (Expr, error) {
   360  	// If the first token is a LPAREN then parse it as its own grouped expression.
   361  	if tok, _, _ := p.scanIgnoreWhitespace(); tok == LPAREN {
   362  		expr, err := p.ParseExpr(0)
   363  		if err != nil {
   364  			return nil, err
   365  		}
   366  		tok, pos, lit := p.scanIgnoreWhitespace()
   367  		if tok == RPAREN {
   368  			// Expect an RPAREN at the end.
   369  			if inclusion {
   370  				return &Call{Args: []Expr{expr}}, nil
   371  			}
   372  			return &ParenExpr{Expr: expr}, nil
   373  		} else if tok == COMMA {
   374  			// Parse a tuple as a function call with empty name.
   375  			var args []Expr
   376  			args = append(args, expr)
   377  
   378  			for {
   379  				// Parse an expression argument.
   380  				arg, err := p.ParseExpr(0)
   381  				if err != nil {
   382  					return nil, err
   383  				}
   384  				args = append(args, arg)
   385  
   386  				// If there's not a comma next then stop parsing arguments.
   387  				if tok, _, _ := p.scan(); tok != COMMA {
   388  					p.unscan()
   389  					break
   390  				}
   391  			}
   392  
   393  			// There should be a right parentheses at the end.
   394  			if tok, pos, lit := p.scan(); tok != RPAREN {
   395  				return nil, newParseError(tokstr(tok, lit), []string{")"}, pos)
   396  			}
   397  
   398  			return &Call{Args: args}, nil
   399  		} else {
   400  			return nil, newParseError(tokstr(tok, lit), []string{")"}, pos)
   401  		}
   402  
   403  	}
   404  	p.unscan()
   405  
   406  	// Read next token.
   407  	tok, pos, lit := p.scanIgnoreWhitespace()
   408  	if tok.isUnaryOperator() {
   409  		expr, err := p.ParseExpr(tok.Precedence())
   410  		if err != nil {
   411  			return nil, err
   412  		}
   413  		return &UnaryExpr{Op: tok, Expr: expr}, nil
   414  	}
   415  
   416  	switch tok {
   417  	case CASE:
   418  		return p.parseCase()
   419  	case IDENT:
   420  		// If the next immediate token is a left parentheses, parse as function call.
   421  		// Otherwise parse as a variable reference.
   422  		if tok0, _, _ := p.scan(); tok0 == LPAREN {
   423  			return p.parseCall(lit)
   424  		}
   425  
   426  		p.unscan() // unscan the last token (wasn't an LPAREN)
   427  		p.unscan() // unscan the IDENT token
   428  
   429  		// Parse it as a VarRef.
   430  		return p.parseVarRef()
   431  	case DISTINCT:
   432  		// If the next immediate token is a left parentheses, parse as function call.
   433  		// Otherwise parse as a Distinct expression.
   434  		tok0, pos, lit := p.scan()
   435  		if tok0 == LPAREN {
   436  			return p.parseCall("distinct")
   437  		} else if tok0 == WS {
   438  			tok1, pos, lit := p.scanIgnoreWhitespace()
   439  			if tok1 != IDENT {
   440  				return nil, newParseError(tokstr(tok1, lit), []string{"identifier"}, pos)
   441  			}
   442  			return &Distinct{Val: lit}, nil
   443  		}
   444  
   445  		return nil, newParseError(tokstr(tok0, lit), []string{"(", "identifier"}, pos)
   446  	case STRING:
   447  		return &StringLiteral{Val: lit}, nil
   448  	case NUMBER:
   449  		v, _ := strconv.ParseFloat(lit, 64)
   450  		e := &NumberLiteral{Val: v, Expr: lit}
   451  		var err error
   452  		e.Int, err = strconv.Atoi(e.Expr)
   453  		if err != nil {
   454  			e.ExprType = Float
   455  			e.Int = int(v)
   456  		} else if e.Int >= 0 {
   457  			e.ExprType = Unsigned
   458  		} else {
   459  			e.ExprType = Signed
   460  		}
   461  		return e, nil
   462  	case NULL:
   463  		return &NullLiteral{}, nil
   464  	case UNKNOWN:
   465  		return &UnknownLiteral{}, nil
   466  	case TRUE, FALSE:
   467  		return &BooleanLiteral{Val: (tok == TRUE)}, nil
   468  	case MUL:
   469  		return &Wildcard{}, nil
   470  	default:
   471  		return nil, newParseError(tokstr(tok, lit), []string{"identifier", "string", "number", "bool"}, pos)
   472  	}
   473  }
   474  
   475  // Assumes CASE token has been scanned.
   476  func (p *Parser) parseCase() (*Case, error) {
   477  	var kase Case
   478  	var err error
   479  	tok, pos, lit := p.scanIgnoreWhitespace()
   480  	for tok == WHEN {
   481  		var cond WhenThen
   482  
   483  		cond.When, err = p.ParseExpr(0)
   484  		if err != nil {
   485  			return nil, err
   486  		}
   487  
   488  		tok, pos, lit = p.scanIgnoreWhitespace()
   489  		if tok != THEN {
   490  			return nil, newParseError(tokstr(tok, lit), []string{"THEN"}, pos)
   491  		}
   492  
   493  		cond.Then, err = p.ParseExpr(0)
   494  		if err != nil {
   495  			return nil, err
   496  		}
   497  
   498  		kase.WhenThens = append(kase.WhenThens, cond)
   499  		tok, pos, lit = p.scanIgnoreWhitespace()
   500  	}
   501  
   502  	if len(kase.WhenThens) == 0 {
   503  		return nil, newParseError(tokstr(tok, lit), []string{"WHEN"}, pos)
   504  	}
   505  
   506  	if tok == ELSE {
   507  		kase.Else, err = p.ParseExpr(0)
   508  		if err != nil {
   509  			return nil, err
   510  		}
   511  		tok, pos, lit = p.scanIgnoreWhitespace()
   512  	}
   513  
   514  	if tok != END {
   515  		return nil, newParseError(tokstr(tok, lit), []string{"END"}, pos)
   516  	}
   517  	return &kase, nil
   518  }
   519  
   520  // parseCall parses a function call.
   521  // This function assumes the function name and LPAREN have been consumed.
   522  func (p *Parser) parseCall(name string) (*Call, error) {
   523  	name = strings.ToLower(name)
   524  	// If there's a right paren then just return immediately.
   525  	if tok, _, _ := p.scan(); tok == RPAREN {
   526  		return &Call{Name: name}, nil
   527  	}
   528  	p.unscan()
   529  
   530  	// Otherwise parse function call arguments.
   531  	var args []Expr
   532  	for {
   533  		// Parse an expression argument.
   534  		arg, err := p.ParseExpr(0)
   535  		if err != nil {
   536  			return nil, err
   537  		}
   538  		args = append(args, arg)
   539  
   540  		// If there's not a comma next then stop parsing arguments.
   541  		if tok, _, _ := p.scan(); tok != COMMA {
   542  			p.unscan()
   543  			break
   544  		}
   545  	}
   546  
   547  	// There should be a right parentheses at the end.
   548  	if tok, pos, lit := p.scan(); tok != RPAREN {
   549  		return nil, newParseError(tokstr(tok, lit), []string{")"}, pos)
   550  	}
   551  
   552  	return &Call{Name: name, Args: args}, nil
   553  }
   554  
   555  // scan returns the next token from the underlying scanner.
   556  func (p *Parser) scan() (tok Token, pos Pos, lit string) { return p.s.Scan() }
   557  
   558  // scanIgnoreWhitespace scans the next non-whitespace token.
   559  func (p *Parser) scanIgnoreWhitespace() (tok Token, pos Pos, lit string) {
   560  	tok, pos, lit = p.scan()
   561  	if tok == WS {
   562  		tok, pos, lit = p.scan()
   563  	}
   564  	return
   565  }
   566  
   567  // consumeWhitespace scans the next token if it's whitespace.
   568  func (p *Parser) consumeWhitespace() {
   569  	if tok, _, _ := p.scan(); tok != WS {
   570  		p.unscan()
   571  	}
   572  }
   573  
   574  // unscan pushes the previously read token back onto the buffer.
   575  func (p *Parser) unscan() { p.s.Unscan() }
   576  
   577  // QuoteString returns a quoted string.
   578  func QuoteString(s string) string {
   579  	return `'` + strings.NewReplacer("\n", `\n`, `\`, `\\`, `'`, `\'`).Replace(s) + `'`
   580  }
   581  
   582  // QuoteIdent returns a quoted identifier from multiple bare identifiers.
   583  func QuoteIdent(segments ...string) string {
   584  	r := strings.NewReplacer("\n", `\n`, `\`, `\\`, `"`, `\"`)
   585  
   586  	var buf bytes.Buffer
   587  	for i, segment := range segments {
   588  		needQuote := IdentNeedsQuotes(segment) ||
   589  			((i < len(segments)-1) && segment != "") // not last segment && not ""
   590  
   591  		if needQuote {
   592  			_ = buf.WriteByte('"')
   593  		}
   594  
   595  		_, _ = buf.WriteString(r.Replace(segment))
   596  
   597  		if needQuote {
   598  			_ = buf.WriteByte('"')
   599  		}
   600  
   601  		if i < len(segments)-1 {
   602  			_ = buf.WriteByte('.')
   603  		}
   604  	}
   605  	return buf.String()
   606  }
   607  
   608  // IdentNeedsQuotes returns true if the ident string given would require quotes.
   609  func IdentNeedsQuotes(ident string) bool {
   610  	// check if this identifier is a keyword
   611  	tok := Lookup(ident)
   612  	if tok != IDENT {
   613  		return true
   614  	}
   615  	for i, r := range ident {
   616  		if i == 0 && !isIdentFirstChar(r) {
   617  			return true
   618  		} else if i > 0 && !isIdentChar(r) {
   619  			return true
   620  		}
   621  	}
   622  	return false
   623  }
   624  
   625  // split splits a string into a slice of runes.
   626  func split(s string) (a []rune) {
   627  	for _, ch := range s {
   628  		a = append(a, ch)
   629  	}
   630  	return
   631  }
   632  
   633  // ParseError represents an error that occurred during parsing.
   634  type ParseError struct {
   635  	Message  string
   636  	Found    string
   637  	Expected []string
   638  	Pos      Pos
   639  }
   640  
   641  // newParseError returns a new instance of ParseError.
   642  func newParseError(found string, expected []string, pos Pos) *ParseError {
   643  	return &ParseError{Found: found, Expected: expected, Pos: pos}
   644  }
   645  
   646  // Error returns the string representation of the error.
   647  func (e *ParseError) Error() string {
   648  	if e.Message != "" {
   649  		return fmt.Sprintf("%s at line %d, char %d", e.Message, e.Pos.Line+1, e.Pos.Char+1)
   650  	}
   651  	return fmt.Sprintf("found %s, expected %s at line %d, char %d", e.Found, strings.Join(e.Expected, ", "), e.Pos.Line+1, e.Pos.Char+1)
   652  }