storj.io/minio@v0.0.0-20230509071714-0cbc90f649b1/pkg/s3select/sql/parser.go (about)

     1  /*
     2   * MinIO Cloud Storage, (C) 2019 MinIO, Inc.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package sql
    18  
    19  import (
    20  	"strings"
    21  
    22  	"github.com/alecthomas/participle"
    23  	"github.com/alecthomas/participle/lexer"
    24  )
    25  
    26  // Types with custom Capture interface for parsing
    27  
    28  // Boolean is a type for a parsed Boolean literal
    29  type Boolean bool
    30  
    31  // Capture interface used by participle
    32  func (b *Boolean) Capture(values []string) error {
    33  	*b = strings.ToLower(values[0]) == "true"
    34  	return nil
    35  }
    36  
    37  // LiteralString is a type for parsed SQL string literals
    38  type LiteralString string
    39  
    40  // Capture interface used by participle
    41  func (ls *LiteralString) Capture(values []string) error {
    42  	// Remove enclosing single quote
    43  	n := len(values[0])
    44  	r := values[0][1 : n-1]
    45  	// Translate doubled quotes
    46  	*ls = LiteralString(strings.Replace(r, "''", "'", -1))
    47  	return nil
    48  }
    49  
    50  // LiteralList is a type for parsed SQL lists literals
    51  type LiteralList []string
    52  
    53  // Capture interface used by participle
    54  func (ls *LiteralList) Capture(values []string) error {
    55  	// Remove enclosing parenthesis.
    56  	n := len(values[0])
    57  	r := values[0][1 : n-1]
    58  	// Translate doubled quotes
    59  	*ls = LiteralList(strings.Split(r, ","))
    60  	return nil
    61  }
    62  
    63  // ObjectKey is a type for parsed strings occurring in key paths
    64  type ObjectKey struct {
    65  	Lit *LiteralString `parser:" \"[\" @LitString \"]\""`
    66  	ID  *Identifier    `parser:"| \".\" @@"`
    67  }
    68  
    69  // QuotedIdentifier is a type for parsed strings that are double
    70  // quoted.
    71  type QuotedIdentifier string
    72  
    73  // Capture interface used by participle
    74  func (qi *QuotedIdentifier) Capture(values []string) error {
    75  	// Remove enclosing quotes
    76  	n := len(values[0])
    77  	r := values[0][1 : n-1]
    78  
    79  	// Translate doubled quotes
    80  	*qi = QuotedIdentifier(strings.Replace(r, `""`, `"`, -1))
    81  	return nil
    82  }
    83  
    84  // Types representing AST of SQL statement. Only SELECT is supported.
    85  
    86  // Select is the top level AST node type
    87  type Select struct {
    88  	Expression *SelectExpression `parser:"\"SELECT\" @@"`
    89  	From       *TableExpression  `parser:"\"FROM\" @@"`
    90  	Where      *Expression       `parser:"( \"WHERE\" @@ )?"`
    91  	Limit      *LitValue         `parser:"( \"LIMIT\" @@ )?"`
    92  }
    93  
    94  // SelectExpression represents the items requested in the select
    95  // statement
    96  type SelectExpression struct {
    97  	All         bool                 `parser:"  @\"*\""`
    98  	Expressions []*AliasedExpression `parser:"| @@ { \",\" @@ }"`
    99  }
   100  
   101  // TableExpression represents the FROM clause
   102  type TableExpression struct {
   103  	Table *JSONPath `parser:"@@"`
   104  	As    string    `parser:"( \"AS\"? @Ident )?"`
   105  }
   106  
   107  // JSONPathElement represents a keypath component
   108  type JSONPathElement struct {
   109  	Key            *ObjectKey `parser:"  @@"`               // ['name'] and .name forms
   110  	Index          *int       `parser:"| \"[\" @Int \"]\""` // [3] form
   111  	ObjectWildcard bool       `parser:"| @\".*\""`          // .* form
   112  	ArrayWildcard  bool       `parser:"| @\"[*]\""`         // [*] form
   113  }
   114  
   115  // JSONPath represents a keypath.
   116  // Instances should be treated idempotent and not change once created.
   117  type JSONPath struct {
   118  	BaseKey  *Identifier        `parser:" @@"`
   119  	PathExpr []*JSONPathElement `parser:"(@@)*"`
   120  
   121  	// Cached values:
   122  	pathString         string
   123  	strippedTableAlias string
   124  	strippedPathExpr   []*JSONPathElement
   125  }
   126  
   127  // AliasedExpression is an expression that can be optionally named
   128  type AliasedExpression struct {
   129  	Expression *Expression `parser:"@@"`
   130  	As         string      `parser:"[ \"AS\" @Ident ]"`
   131  }
   132  
   133  // Grammar for Expression
   134  //
   135  // Expression          → AndCondition ("OR" AndCondition)*
   136  // AndCondition        → Condition ("AND" Condition)*
   137  // Condition           → "NOT" Condition | ConditionExpression
   138  // ConditionExpression → ValueExpression ("=" | "<>" | "<=" | ">=" | "<" | ">") ValueExpression
   139  //                     | ValueExpression "LIKE" ValueExpression ("ESCAPE" LitString)?
   140  //                     | ValueExpression ("NOT"? "BETWEEN" ValueExpression "AND" ValueExpression)
   141  //                     | ValueExpression "IN" "(" Expression ("," Expression)* ")"
   142  //                     | ValueExpression
   143  // ValueExpression     → Operand
   144  //
   145  // Operand grammar follows below
   146  
   147  // Expression represents a logical disjunction of clauses
   148  type Expression struct {
   149  	And []*AndCondition `parser:"@@ ( \"OR\" @@ )*"`
   150  }
   151  
   152  // ListExpr represents a literal list with elements as expressions.
   153  type ListExpr struct {
   154  	Elements []*Expression `parser:"\"(\" @@ ( \",\" @@ )* \")\" | \"[\" @@ ( \",\" @@ )* \"]\""`
   155  }
   156  
   157  // AndCondition represents logical conjunction of clauses
   158  type AndCondition struct {
   159  	Condition []*Condition `parser:"@@ ( \"AND\" @@ )*"`
   160  }
   161  
   162  // Condition represents a negation or a condition operand
   163  type Condition struct {
   164  	Operand *ConditionOperand `parser:"  @@"`
   165  	Not     *Condition        `parser:"| \"NOT\" @@"`
   166  }
   167  
   168  // ConditionOperand is a operand followed by an an optional operation
   169  // expression
   170  type ConditionOperand struct {
   171  	Operand      *Operand      `parser:"@@"`
   172  	ConditionRHS *ConditionRHS `parser:"@@?"`
   173  }
   174  
   175  // ConditionRHS represents the right-hand-side of Compare, Between, In
   176  // or Like expressions.
   177  type ConditionRHS struct {
   178  	Compare *Compare `parser:"  @@"`
   179  	Between *Between `parser:"| @@"`
   180  	In      *In      `parser:"| \"IN\" @@"`
   181  	Like    *Like    `parser:"| @@"`
   182  }
   183  
   184  // Compare represents the RHS of a comparison expression
   185  type Compare struct {
   186  	Operator string   `parser:"@( \"<>\" | \"<=\" | \">=\" | \"=\" | \"<\" | \">\" | \"!=\" )"`
   187  	Operand  *Operand `parser:"  @@"`
   188  }
   189  
   190  // Like represents the RHS of a LIKE expression
   191  type Like struct {
   192  	Not        bool     `parser:" @\"NOT\"? "`
   193  	Pattern    *Operand `parser:" \"LIKE\" @@ "`
   194  	EscapeChar *Operand `parser:" (\"ESCAPE\" @@)? "`
   195  }
   196  
   197  // Between represents the RHS of a BETWEEN expression
   198  type Between struct {
   199  	Not   bool     `parser:" @\"NOT\"? "`
   200  	Start *Operand `parser:" \"BETWEEN\" @@ "`
   201  	End   *Operand `parser:" \"AND\" @@ "`
   202  }
   203  
   204  // In represents the RHS of an IN expression
   205  type In struct {
   206  	ListExpression *Expression `parser:"@@ "`
   207  }
   208  
   209  // Grammar for Operand:
   210  //
   211  // operand → multOp ( ("-" | "+") multOp )*
   212  // multOp  → unary ( ("/" | "*" | "%") unary )*
   213  // unary   → "-" unary | primary
   214  // primary → Value | Variable | "(" expression ")"
   215  //
   216  
   217  // An Operand is a single term followed by an optional sequence of
   218  // terms separated by +/-
   219  type Operand struct {
   220  	Left  *MultOp     `parser:"@@"`
   221  	Right []*OpFactor `parser:"(@@)*"`
   222  }
   223  
   224  // OpFactor represents the right-side of a +/- operation.
   225  type OpFactor struct {
   226  	Op    string  `parser:"@(\"+\" | \"-\")"`
   227  	Right *MultOp `parser:"@@"`
   228  }
   229  
   230  // MultOp represents a single term followed by an optional sequence of
   231  // terms separated by *, / or % operators.
   232  type MultOp struct {
   233  	Left  *UnaryTerm     `parser:"@@"`
   234  	Right []*OpUnaryTerm `parser:"(@@)*"`
   235  }
   236  
   237  // OpUnaryTerm represents the right side of *, / or % binary operations.
   238  type OpUnaryTerm struct {
   239  	Op    string     `parser:"@(\"*\" | \"/\" | \"%\")"`
   240  	Right *UnaryTerm `parser:"@@"`
   241  }
   242  
   243  // UnaryTerm represents a single negated term or a primary term
   244  type UnaryTerm struct {
   245  	Negated *NegatedTerm `parser:"  @@"`
   246  	Primary *PrimaryTerm `parser:"| @@"`
   247  }
   248  
   249  // NegatedTerm has a leading minus sign.
   250  type NegatedTerm struct {
   251  	Term *PrimaryTerm `parser:"\"-\" @@"`
   252  }
   253  
   254  // PrimaryTerm represents a Value, Path expression, a Sub-expression
   255  // or a function call.
   256  type PrimaryTerm struct {
   257  	Value         *LitValue   `parser:"  @@"`
   258  	JPathExpr     *JSONPath   `parser:"| @@"`
   259  	ListExpr      *ListExpr   `parser:"| @@"`
   260  	SubExpression *Expression `parser:"| \"(\" @@ \")\""`
   261  	// Include function expressions here.
   262  	FuncCall *FuncExpr `parser:"| @@"`
   263  }
   264  
   265  // FuncExpr represents a function call
   266  type FuncExpr struct {
   267  	SFunc     *SimpleArgFunc `parser:"  @@"`
   268  	Count     *CountFunc     `parser:"| @@"`
   269  	Cast      *CastFunc      `parser:"| @@"`
   270  	Substring *SubstringFunc `parser:"| @@"`
   271  	Extract   *ExtractFunc   `parser:"| @@"`
   272  	Trim      *TrimFunc      `parser:"| @@"`
   273  	DateAdd   *DateAddFunc   `parser:"| @@"`
   274  	DateDiff  *DateDiffFunc  `parser:"| @@"`
   275  
   276  	// Used during evaluation for aggregation funcs
   277  	aggregate *aggVal
   278  }
   279  
   280  // SimpleArgFunc represents functions with simple expression
   281  // arguments.
   282  type SimpleArgFunc struct {
   283  	FunctionName string `parser:" @(\"AVG\" | \"MAX\" | \"MIN\" | \"SUM\" |  \"COALESCE\" | \"NULLIF\" | \"TO_STRING\" | \"TO_TIMESTAMP\" | \"UTCNOW\" | \"CHAR_LENGTH\" | \"CHARACTER_LENGTH\" | \"LOWER\" | \"UPPER\") "`
   284  
   285  	ArgsList []*Expression `parser:"\"(\" (@@ (\",\" @@)*)?\")\""`
   286  }
   287  
   288  // CountFunc represents the COUNT sql function
   289  type CountFunc struct {
   290  	StarArg bool        `parser:" \"COUNT\" \"(\" ( @\"*\"?"`
   291  	ExprArg *Expression `parser:" @@? )! \")\""`
   292  }
   293  
   294  // CastFunc represents CAST sql function
   295  type CastFunc struct {
   296  	Expr     *Expression `parser:" \"CAST\" \"(\" @@ "`
   297  	CastType string      `parser:" \"AS\" @(\"BOOL\" | \"INT\" | \"INTEGER\" | \"STRING\" | \"FLOAT\" | \"DECIMAL\" | \"NUMERIC\" | \"TIMESTAMP\") \")\" "`
   298  }
   299  
   300  // SubstringFunc represents SUBSTRING sql function
   301  type SubstringFunc struct {
   302  	Expr *PrimaryTerm `parser:" \"SUBSTRING\" \"(\" @@ "`
   303  	From *Operand     `parser:" ( \"FROM\" @@ "`
   304  	For  *Operand     `parser:"   (\"FOR\" @@)? \")\" "`
   305  	Arg2 *Operand     `parser:" | \",\" @@ "`
   306  	Arg3 *Operand     `parser:"   (\",\" @@)? \")\" )"`
   307  }
   308  
   309  // ExtractFunc represents EXTRACT sql function
   310  type ExtractFunc struct {
   311  	Timeword string       `parser:" \"EXTRACT\" \"(\" @( \"YEAR\":Timeword | \"MONTH\":Timeword | \"DAY\":Timeword | \"HOUR\":Timeword | \"MINUTE\":Timeword | \"SECOND\":Timeword | \"TIMEZONE_HOUR\":Timeword | \"TIMEZONE_MINUTE\":Timeword ) "`
   312  	From     *PrimaryTerm `parser:" \"FROM\" @@ \")\" "`
   313  }
   314  
   315  // TrimFunc represents TRIM sql function
   316  type TrimFunc struct {
   317  	TrimWhere *string      `parser:" \"TRIM\" \"(\" ( @( \"LEADING\" | \"TRAILING\" | \"BOTH\" ) "`
   318  	TrimChars *PrimaryTerm `parser:"             @@?  "`
   319  	TrimFrom  *PrimaryTerm `parser:"             \"FROM\" )? @@ \")\" "`
   320  }
   321  
   322  // DateAddFunc represents the DATE_ADD function
   323  type DateAddFunc struct {
   324  	DatePart  string       `parser:" \"DATE_ADD\" \"(\" @( \"YEAR\":Timeword | \"MONTH\":Timeword | \"DAY\":Timeword | \"HOUR\":Timeword | \"MINUTE\":Timeword | \"SECOND\":Timeword ) \",\""`
   325  	Quantity  *Operand     `parser:" @@ \",\""`
   326  	Timestamp *PrimaryTerm `parser:" @@ \")\""`
   327  }
   328  
   329  // DateDiffFunc represents the DATE_DIFF function
   330  type DateDiffFunc struct {
   331  	DatePart   string       `parser:" \"DATE_DIFF\" \"(\" @( \"YEAR\":Timeword | \"MONTH\":Timeword | \"DAY\":Timeword | \"HOUR\":Timeword | \"MINUTE\":Timeword | \"SECOND\":Timeword ) \",\" "`
   332  	Timestamp1 *PrimaryTerm `parser:" @@ \",\" "`
   333  	Timestamp2 *PrimaryTerm `parser:" @@ \")\" "`
   334  }
   335  
   336  // LitValue represents a literal value parsed from the sql
   337  type LitValue struct {
   338  	Float   *float64       `parser:"(  @Float"`
   339  	Int     *float64       `parser:" | @Int"` // To avoid value out of range, use float64 instead
   340  	String  *LiteralString `parser:" | @LitString"`
   341  	Boolean *Boolean       `parser:" | @(\"TRUE\" | \"FALSE\")"`
   342  	Null    bool           `parser:" | @\"NULL\")"`
   343  }
   344  
   345  // Identifier represents a parsed identifier
   346  type Identifier struct {
   347  	Unquoted *string           `parser:"  @Ident"`
   348  	Quoted   *QuotedIdentifier `parser:"| @QuotIdent"`
   349  }
   350  
   351  var (
   352  	sqlLexer = lexer.Must(lexer.Regexp(`(\s+)` +
   353  		`|(?P<Timeword>(?i)\b(?:YEAR|MONTH|DAY|HOUR|MINUTE|SECOND|TIMEZONE_HOUR|TIMEZONE_MINUTE)\b)` +
   354  		`|(?P<Keyword>(?i)\b(?:SELECT|FROM|TOP|DISTINCT|ALL|WHERE|GROUP|BY|HAVING|UNION|MINUS|EXCEPT|INTERSECT|ORDER|LIMIT|OFFSET|TRUE|FALSE|NULL|IS|NOT|ANY|SOME|BETWEEN|AND|OR|LIKE|ESCAPE|AS|IN|BOOL|INT|INTEGER|STRING|FLOAT|DECIMAL|NUMERIC|TIMESTAMP|AVG|COUNT|MAX|MIN|SUM|COALESCE|NULLIF|CAST|DATE_ADD|DATE_DIFF|EXTRACT|TO_STRING|TO_TIMESTAMP|UTCNOW|CHAR_LENGTH|CHARACTER_LENGTH|LOWER|SUBSTRING|TRIM|UPPER|LEADING|TRAILING|BOTH|FOR)\b)` +
   355  		`|(?P<Ident>[a-zA-Z_][a-zA-Z0-9_]*)` +
   356  		`|(?P<QuotIdent>"([^"]*("")?)*")` +
   357  		`|(?P<Float>\d*\.\d+([eE][-+]?\d+)?)` +
   358  		`|(?P<Int>\d+)` +
   359  		`|(?P<LitString>'([^']*('')?)*')` +
   360  		`|(?P<Operators><>|!=|<=|>=|\.\*|\[\*\]|[-+*/%,.()=<>\[\]])`,
   361  	))
   362  
   363  	// SQLParser is used to parse SQL statements
   364  	SQLParser = participle.MustBuild(
   365  		&Select{},
   366  		participle.Lexer(sqlLexer),
   367  		participle.CaseInsensitive("Keyword"),
   368  		participle.CaseInsensitive("Timeword"),
   369  	)
   370  )