github.com/rohankumardubey/aresdb@v0.0.2-0.20190517170215-e54e3ca06b9c/query/expr/parser.go (about) 1 // Modifications Copyright (c) 2017-2018 Uber Technologies, Inc. 2 // Copyright (c) 2013-2016 Errplane Inc. 3 // 4 // Permission is hereby granted, free of charge, to any person obtaining a copy of 5 // this software and associated documentation files (the "Software"), to deal in 6 // the Software without restriction, including without limitation the rights to 7 // use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 8 // the Software, and to permit persons to whom the Software is furnished to do so, 9 // subject to the following conditions: 10 // 11 // The above copyright notice and this permission notice shall be included in all 12 // copies or substantial portions of the Software. 13 // 14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 16 // FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 17 // COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 18 // IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 19 // CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 20 21 package expr 22 23 import ( 24 "bytes" 25 "fmt" 26 "io" 27 "strconv" 28 "strings" 29 ) 30 31 // Parser represents an InfluxQL parser. 32 type Parser struct { 33 s *bufScanner 34 } 35 36 // NewParser returns a new instance of Parser. 37 func NewParser(r io.Reader) *Parser { 38 return &Parser{s: newBufScanner(r)} 39 } 40 41 // ParseExpr parses an expression string and returns its AST representation. 42 func ParseExpr(s string) (Expr, error) { return NewParser(strings.NewReader(s)).ParseExpr(0) } 43 44 // parseInt parses a string and returns an integer literal. 45 func (p *Parser) parseInt(min, max int) (int, error) { 46 tok, pos, lit := p.scanIgnoreWhitespace() 47 if tok != NUMBER { 48 return 0, newParseError(tokstr(tok, lit), []string{"number"}, pos) 49 } 50 51 // Return an error if the number has a fractional part. 52 if strings.Contains(lit, ".") { 53 return 0, &ParseError{Message: "number must be an integer", Pos: pos} 54 } 55 56 // Convert string to int. 57 n, err := strconv.Atoi(lit) 58 if err != nil { 59 return 0, &ParseError{Message: err.Error(), Pos: pos} 60 } else if min > n || n > max { 61 return 0, &ParseError{ 62 Message: fmt.Sprintf("invalid value %d: must be %d <= n <= %d", n, min, max), 63 Pos: pos, 64 } 65 } 66 67 return n, nil 68 } 69 70 // parseUInt32 parses a string and returns a 32-bit unsigned integer literal. 71 func (p *Parser) parseUInt32() (uint32, error) { 72 tok, pos, lit := p.scanIgnoreWhitespace() 73 if tok != NUMBER { 74 return 0, newParseError(tokstr(tok, lit), []string{"number"}, pos) 75 } 76 77 // Convert string to unsigned 32-bit integer 78 n, err := strconv.ParseUint(lit, 10, 32) 79 if err != nil { 80 return 0, &ParseError{Message: err.Error(), Pos: pos} 81 } 82 83 return uint32(n), nil 84 } 85 86 // parseUInt64 parses a string and returns a 64-bit unsigned integer literal. 87 func (p *Parser) parseUInt64() (uint64, error) { 88 tok, pos, lit := p.scanIgnoreWhitespace() 89 if tok != NUMBER { 90 return 0, newParseError(tokstr(tok, lit), []string{"number"}, pos) 91 } 92 93 // Convert string to unsigned 64-bit integer 94 n, err := strconv.ParseUint(lit, 10, 64) 95 if err != nil { 96 return 0, &ParseError{Message: err.Error(), Pos: pos} 97 } 98 99 return uint64(n), nil 100 } 101 102 // parseIdent parses an identifier. 103 func (p *Parser) parseIdent() (string, error) { 104 tok, pos, lit := p.scanIgnoreWhitespace() 105 if tok != IDENT { 106 return "", newParseError(tokstr(tok, lit), []string{"identifier"}, pos) 107 } 108 return lit, nil 109 } 110 111 // parseIdentList parses a comma delimited list of identifiers. 112 func (p *Parser) parseIdentList() ([]string, error) { 113 // Parse first (required) identifier. 114 ident, err := p.parseIdent() 115 if err != nil { 116 return nil, err 117 } 118 idents := []string{ident} 119 120 // Parse remaining (optional) identifiers. 121 for { 122 if tok, _, _ := p.scanIgnoreWhitespace(); tok != COMMA { 123 p.unscan() 124 return idents, nil 125 } 126 127 if ident, err = p.parseIdent(); err != nil { 128 return nil, err 129 } 130 131 idents = append(idents, ident) 132 } 133 } 134 135 // parseSegmentedIdents parses a segmented identifiers. 136 // e.g., "db"."rp".measurement or "db"..measurement 137 func (p *Parser) parseSegmentedIdents() ([]string, error) { 138 ident, err := p.parseIdent() 139 if err != nil { 140 return nil, err 141 } 142 idents := []string{ident} 143 144 // Parse remaining (optional) identifiers. 145 for { 146 if tok, _, _ := p.scan(); tok != DOT { 147 // No more segments so we're done. 148 p.unscan() 149 break 150 } 151 152 if ch := p.peekRune(); ch == '/' { 153 // Next segment is a regex so we're done. 154 break 155 } else if ch == '.' { 156 // Add an empty identifier. 157 idents = append(idents, "") 158 continue 159 } 160 161 // Parse the next identifier. 162 if ident, err = p.parseIdent(); err != nil { 163 return nil, err 164 } 165 166 idents = append(idents, ident) 167 } 168 169 if len(idents) > 3 { 170 msg := fmt.Sprintf("too many segments in %s", QuoteIdent(idents...)) 171 return nil, &ParseError{Message: msg} 172 } 173 174 return idents, nil 175 } 176 177 // parserString parses a string. 178 func (p *Parser) parseString() (string, error) { 179 tok, pos, lit := p.scanIgnoreWhitespace() 180 if tok != STRING { 181 return "", newParseError(tokstr(tok, lit), []string{"string"}, pos) 182 } 183 return lit, nil 184 } 185 186 // peekRune returns the next rune that would be read by the scanner. 187 func (p *Parser) peekRune() rune { 188 r, _, _ := p.s.s.r.ReadRune() 189 if r != eof { 190 _ = p.s.s.r.UnreadRune() 191 } 192 193 return r 194 } 195 196 // parseOptionalTokenAndInt parses the specified token followed 197 // by an int, if it exists. 198 func (p *Parser) parseOptionalTokenAndInt(t Token) (int, error) { 199 // Check if the token exists. 200 if tok, _, _ := p.scanIgnoreWhitespace(); tok != t { 201 p.unscan() 202 return 0, nil 203 } 204 205 // Scan the number. 206 tok, pos, lit := p.scanIgnoreWhitespace() 207 if tok != NUMBER { 208 return 0, newParseError(tokstr(tok, lit), []string{"number"}, pos) 209 } 210 211 // Return an error if the number has a fractional part. 212 if strings.Contains(lit, ".") { 213 msg := fmt.Sprintf("fractional parts not allowed in %s", t.String()) 214 return 0, &ParseError{Message: msg, Pos: pos} 215 } 216 217 // Parse number. 218 n, _ := strconv.ParseInt(lit, 10, 64) 219 220 if n < 0 { 221 msg := fmt.Sprintf("%s must be >= 0", t.String()) 222 return 0, &ParseError{Message: msg, Pos: pos} 223 } 224 225 return int(n), nil 226 } 227 228 // parseVarRef parses a reference to a measurement or field. 229 func (p *Parser) parseVarRef() (*VarRef, error) { 230 // Parse the segments of the variable ref. 231 segments, err := p.parseSegmentedIdents() 232 if err != nil { 233 return nil, err 234 } 235 236 vr := &VarRef{Val: strings.Join(segments, ".")} 237 238 return vr, nil 239 } 240 241 func rewriteIsOp(expr Expr) (Token, error) { 242 affirmative := true 243 if unary, ok := expr.(*UnaryExpr); ok { 244 if unary.Op == NOT { 245 affirmative = false 246 expr = unary.Expr 247 } else { 248 return IS, fmt.Errorf("bad literal %s following IS", expr.String()) 249 } 250 } 251 switch e := expr.(type) { 252 case *NullLiteral: 253 if affirmative { 254 return IS_NULL, nil 255 } 256 return IS_NOT_NULL, nil 257 case *UnknownLiteral: 258 if affirmative { 259 return IS_NULL, nil 260 } 261 return IS_NOT_NULL, nil 262 case *BooleanLiteral: 263 if affirmative == e.Val { 264 return IS_TRUE, nil 265 } 266 267 return IS_FALSE, nil 268 } 269 return IS, fmt.Errorf("bad literal %s following IS (NOT)", expr.String()) 270 } 271 272 func rewriteIsExpr(expr Expr) (Expr, error) { 273 e, ok := expr.(*BinaryExpr) 274 if !ok { 275 return expr, nil 276 } 277 278 if e.Op == IS { 279 op, err := rewriteIsOp(e.RHS) 280 if err != nil { 281 return nil, err 282 } 283 expr, err := rewriteIsExpr(e.LHS) 284 if err != nil { 285 return nil, err 286 } 287 return &UnaryExpr{Op: op, Expr: expr}, nil 288 } 289 290 var err error 291 e.LHS, err = rewriteIsExpr(e.LHS) 292 if err != nil { 293 return nil, err 294 } 295 e.RHS, err = rewriteIsExpr(e.RHS) 296 if err != nil { 297 return nil, err 298 } 299 return expr, nil 300 } 301 302 // ParseExpr parses an expression. 303 // binOpPrcdncLb: binary operator precedence lower bound. 304 // Any binary operator with a lower precedence than that will cause ParseExpr to stop. 305 // This is used for parsing binary operators following a unary operator. 306 func (p *Parser) ParseExpr(binOpPrcdncLb int) (Expr, error) { 307 var err error 308 // Dummy root node. 309 root := &BinaryExpr{} 310 311 // Parse a non-binary expression type to start. 312 // This variable will always be the root of the expression tree. 313 root.RHS, err = p.parseUnaryExpr(false) 314 if err != nil { 315 return nil, err 316 } 317 318 // Loop over operations and unary exprs and build a tree based on precendence. 319 for { 320 // If the next token is NOT an operator then return the expression. 321 op, pos, lit := p.scanIgnoreWhitespace() 322 if op == NOT { 323 op, pos, lit = p.scanIgnoreWhitespace() 324 if op == IN { 325 op = NOT_IN 326 } else { 327 return nil, newParseError(tokstr(op, lit), []string{"IN"}, pos) 328 } 329 } 330 if !op.isBinaryOperator() || op.Precedence() < binOpPrcdncLb { 331 p.unscan() 332 return rewriteIsExpr(root.RHS) 333 } 334 335 // Otherwise parse the next expression. 336 var rhs Expr 337 if rhs, err = p.parseUnaryExpr(op == IN || op == NOT_IN); err != nil { 338 return nil, err 339 } 340 341 // Find the right spot in the tree to add the new expression by 342 // descending the RHS of the expression tree until we reach the last 343 // BinaryExpr or a BinaryExpr whose RHS has an operator with 344 // precedence >= the operator being added. 345 for node := root; ; { 346 r, ok := node.RHS.(*BinaryExpr) 347 if !ok || r.Op.Precedence() >= op.Precedence() { 348 // Add the new expression here and break. 349 node.RHS = &BinaryExpr{LHS: node.RHS, RHS: rhs, Op: op} 350 break 351 } 352 node = r 353 } 354 } 355 } 356 357 // parseUnaryExpr parses an non-binary expression. 358 // TODO: shz@ revisit inclusion parameter when open sourcing 359 func (p *Parser) parseUnaryExpr(inclusion bool) (Expr, error) { 360 // If the first token is a LPAREN then parse it as its own grouped expression. 361 if tok, _, _ := p.scanIgnoreWhitespace(); tok == LPAREN { 362 expr, err := p.ParseExpr(0) 363 if err != nil { 364 return nil, err 365 } 366 tok, pos, lit := p.scanIgnoreWhitespace() 367 if tok == RPAREN { 368 // Expect an RPAREN at the end. 369 if inclusion { 370 return &Call{Args: []Expr{expr}}, nil 371 } 372 return &ParenExpr{Expr: expr}, nil 373 } else if tok == COMMA { 374 // Parse a tuple as a function call with empty name. 375 var args []Expr 376 args = append(args, expr) 377 378 for { 379 // Parse an expression argument. 380 arg, err := p.ParseExpr(0) 381 if err != nil { 382 return nil, err 383 } 384 args = append(args, arg) 385 386 // If there's not a comma next then stop parsing arguments. 387 if tok, _, _ := p.scan(); tok != COMMA { 388 p.unscan() 389 break 390 } 391 } 392 393 // There should be a right parentheses at the end. 394 if tok, pos, lit := p.scan(); tok != RPAREN { 395 return nil, newParseError(tokstr(tok, lit), []string{")"}, pos) 396 } 397 398 return &Call{Args: args}, nil 399 } else { 400 return nil, newParseError(tokstr(tok, lit), []string{")"}, pos) 401 } 402 403 } 404 p.unscan() 405 406 // Read next token. 407 tok, pos, lit := p.scanIgnoreWhitespace() 408 if tok.isUnaryOperator() { 409 expr, err := p.ParseExpr(tok.Precedence()) 410 if err != nil { 411 return nil, err 412 } 413 return &UnaryExpr{Op: tok, Expr: expr}, nil 414 } 415 416 switch tok { 417 case CASE: 418 return p.parseCase() 419 case IDENT: 420 // If the next immediate token is a left parentheses, parse as function call. 421 // Otherwise parse as a variable reference. 422 if tok0, _, _ := p.scan(); tok0 == LPAREN { 423 return p.parseCall(lit) 424 } 425 426 p.unscan() // unscan the last token (wasn't an LPAREN) 427 p.unscan() // unscan the IDENT token 428 429 // Parse it as a VarRef. 430 return p.parseVarRef() 431 case DISTINCT: 432 // If the next immediate token is a left parentheses, parse as function call. 433 // Otherwise parse as a Distinct expression. 434 tok0, pos, lit := p.scan() 435 if tok0 == LPAREN { 436 return p.parseCall("distinct") 437 } else if tok0 == WS { 438 tok1, pos, lit := p.scanIgnoreWhitespace() 439 if tok1 != IDENT { 440 return nil, newParseError(tokstr(tok1, lit), []string{"identifier"}, pos) 441 } 442 return &Distinct{Val: lit}, nil 443 } 444 445 return nil, newParseError(tokstr(tok0, lit), []string{"(", "identifier"}, pos) 446 case STRING: 447 return &StringLiteral{Val: lit}, nil 448 case NUMBER: 449 v, _ := strconv.ParseFloat(lit, 64) 450 e := &NumberLiteral{Val: v, Expr: lit} 451 var err error 452 e.Int, err = strconv.Atoi(e.Expr) 453 if err != nil { 454 e.ExprType = Float 455 e.Int = int(v) 456 } else if e.Int >= 0 { 457 e.ExprType = Unsigned 458 } else { 459 e.ExprType = Signed 460 } 461 return e, nil 462 case NULL: 463 return &NullLiteral{}, nil 464 case UNKNOWN: 465 return &UnknownLiteral{}, nil 466 case TRUE, FALSE: 467 return &BooleanLiteral{Val: (tok == TRUE)}, nil 468 case MUL: 469 return &Wildcard{}, nil 470 default: 471 return nil, newParseError(tokstr(tok, lit), []string{"identifier", "string", "number", "bool"}, pos) 472 } 473 } 474 475 // Assumes CASE token has been scanned. 476 func (p *Parser) parseCase() (*Case, error) { 477 var kase Case 478 var err error 479 tok, pos, lit := p.scanIgnoreWhitespace() 480 for tok == WHEN { 481 var cond WhenThen 482 483 cond.When, err = p.ParseExpr(0) 484 if err != nil { 485 return nil, err 486 } 487 488 tok, pos, lit = p.scanIgnoreWhitespace() 489 if tok != THEN { 490 return nil, newParseError(tokstr(tok, lit), []string{"THEN"}, pos) 491 } 492 493 cond.Then, err = p.ParseExpr(0) 494 if err != nil { 495 return nil, err 496 } 497 498 kase.WhenThens = append(kase.WhenThens, cond) 499 tok, pos, lit = p.scanIgnoreWhitespace() 500 } 501 502 if len(kase.WhenThens) == 0 { 503 return nil, newParseError(tokstr(tok, lit), []string{"WHEN"}, pos) 504 } 505 506 if tok == ELSE { 507 kase.Else, err = p.ParseExpr(0) 508 if err != nil { 509 return nil, err 510 } 511 tok, pos, lit = p.scanIgnoreWhitespace() 512 } 513 514 if tok != END { 515 return nil, newParseError(tokstr(tok, lit), []string{"END"}, pos) 516 } 517 return &kase, nil 518 } 519 520 // parseCall parses a function call. 521 // This function assumes the function name and LPAREN have been consumed. 522 func (p *Parser) parseCall(name string) (*Call, error) { 523 name = strings.ToLower(name) 524 // If there's a right paren then just return immediately. 525 if tok, _, _ := p.scan(); tok == RPAREN { 526 return &Call{Name: name}, nil 527 } 528 p.unscan() 529 530 // Otherwise parse function call arguments. 531 var args []Expr 532 for { 533 // Parse an expression argument. 534 arg, err := p.ParseExpr(0) 535 if err != nil { 536 return nil, err 537 } 538 args = append(args, arg) 539 540 // If there's not a comma next then stop parsing arguments. 541 if tok, _, _ := p.scan(); tok != COMMA { 542 p.unscan() 543 break 544 } 545 } 546 547 // There should be a right parentheses at the end. 548 if tok, pos, lit := p.scan(); tok != RPAREN { 549 return nil, newParseError(tokstr(tok, lit), []string{")"}, pos) 550 } 551 552 return &Call{Name: name, Args: args}, nil 553 } 554 555 // scan returns the next token from the underlying scanner. 556 func (p *Parser) scan() (tok Token, pos Pos, lit string) { return p.s.Scan() } 557 558 // scanIgnoreWhitespace scans the next non-whitespace token. 559 func (p *Parser) scanIgnoreWhitespace() (tok Token, pos Pos, lit string) { 560 tok, pos, lit = p.scan() 561 if tok == WS { 562 tok, pos, lit = p.scan() 563 } 564 return 565 } 566 567 // consumeWhitespace scans the next token if it's whitespace. 568 func (p *Parser) consumeWhitespace() { 569 if tok, _, _ := p.scan(); tok != WS { 570 p.unscan() 571 } 572 } 573 574 // unscan pushes the previously read token back onto the buffer. 575 func (p *Parser) unscan() { p.s.Unscan() } 576 577 // QuoteString returns a quoted string. 578 func QuoteString(s string) string { 579 return `'` + strings.NewReplacer("\n", `\n`, `\`, `\\`, `'`, `\'`).Replace(s) + `'` 580 } 581 582 // QuoteIdent returns a quoted identifier from multiple bare identifiers. 583 func QuoteIdent(segments ...string) string { 584 r := strings.NewReplacer("\n", `\n`, `\`, `\\`, `"`, `\"`) 585 586 var buf bytes.Buffer 587 for i, segment := range segments { 588 needQuote := IdentNeedsQuotes(segment) || 589 ((i < len(segments)-1) && segment != "") // not last segment && not "" 590 591 if needQuote { 592 _ = buf.WriteByte('"') 593 } 594 595 _, _ = buf.WriteString(r.Replace(segment)) 596 597 if needQuote { 598 _ = buf.WriteByte('"') 599 } 600 601 if i < len(segments)-1 { 602 _ = buf.WriteByte('.') 603 } 604 } 605 return buf.String() 606 } 607 608 // IdentNeedsQuotes returns true if the ident string given would require quotes. 609 func IdentNeedsQuotes(ident string) bool { 610 // check if this identifier is a keyword 611 tok := Lookup(ident) 612 if tok != IDENT { 613 return true 614 } 615 for i, r := range ident { 616 if i == 0 && !isIdentFirstChar(r) { 617 return true 618 } else if i > 0 && !isIdentChar(r) { 619 return true 620 } 621 } 622 return false 623 } 624 625 // split splits a string into a slice of runes. 626 func split(s string) (a []rune) { 627 for _, ch := range s { 628 a = append(a, ch) 629 } 630 return 631 } 632 633 // ParseError represents an error that occurred during parsing. 634 type ParseError struct { 635 Message string 636 Found string 637 Expected []string 638 Pos Pos 639 } 640 641 // newParseError returns a new instance of ParseError. 642 func newParseError(found string, expected []string, pos Pos) *ParseError { 643 return &ParseError{Found: found, Expected: expected, Pos: pos} 644 } 645 646 // Error returns the string representation of the error. 647 func (e *ParseError) Error() string { 648 if e.Message != "" { 649 return fmt.Sprintf("%s at line %d, char %d", e.Message, e.Pos.Line+1, e.Pos.Char+1) 650 } 651 return fmt.Sprintf("found %s, expected %s at line %d, char %d", e.Found, strings.Join(e.Expected, ", "), e.Pos.Line+1, e.Pos.Char+1) 652 }