github.com/minio/minio@v0.0.0-20240328213742-3f72439b8a27/internal/s3select/sql/parser.go (about) 1 // Copyright (c) 2015-2021 MinIO, Inc. 2 // 3 // This file is part of MinIO Object Storage stack 4 // 5 // This program is free software: you can redistribute it and/or modify 6 // it under the terms of the GNU Affero General Public License as published by 7 // the Free Software Foundation, either version 3 of the License, or 8 // (at your option) any later version. 9 // 10 // This program is distributed in the hope that it will be useful 11 // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 // GNU Affero General Public License for more details. 14 // 15 // You should have received a copy of the GNU Affero General Public License 16 // along with this program. If not, see <http://www.gnu.org/licenses/>. 17 18 package sql 19 20 import ( 21 "strings" 22 23 "github.com/alecthomas/participle" 24 "github.com/alecthomas/participle/lexer" 25 ) 26 27 // Types with custom Capture interface for parsing 28 29 // Boolean is a type for a parsed Boolean literal 30 type Boolean bool 31 32 // Capture interface used by participle 33 func (b *Boolean) Capture(values []string) error { 34 *b = Boolean(strings.EqualFold(values[0], "true")) 35 return nil 36 } 37 38 // LiteralString is a type for parsed SQL string literals 39 type LiteralString string 40 41 // Capture interface used by participle 42 func (ls *LiteralString) Capture(values []string) error { 43 // Remove enclosing single quote 44 n := len(values[0]) 45 r := values[0][1 : n-1] 46 // Translate doubled quotes 47 *ls = LiteralString(strings.ReplaceAll(r, "''", "'")) 48 return nil 49 } 50 51 // LiteralList is a type for parsed SQL lists literals 52 type LiteralList []string 53 54 // Capture interface used by participle 55 func (ls *LiteralList) Capture(values []string) error { 56 // Remove enclosing parenthesis. 57 n := len(values[0]) 58 r := values[0][1 : n-1] 59 // Translate doubled quotes 60 *ls = LiteralList(strings.Split(r, ",")) 61 return nil 62 } 63 64 // ObjectKey is a type for parsed strings occurring in key paths 65 type ObjectKey struct { 66 Lit *LiteralString `parser:" \"[\" @LitString \"]\""` 67 ID *Identifier `parser:"| \".\" @@"` 68 } 69 70 // QuotedIdentifier is a type for parsed strings that are double 71 // quoted. 72 type QuotedIdentifier string 73 74 // Capture interface used by participle 75 func (qi *QuotedIdentifier) Capture(values []string) error { 76 // Remove enclosing quotes 77 n := len(values[0]) 78 r := values[0][1 : n-1] 79 80 // Translate doubled quotes 81 *qi = QuotedIdentifier(strings.ReplaceAll(r, `""`, `"`)) 82 return nil 83 } 84 85 // Types representing AST of SQL statement. Only SELECT is supported. 86 87 // Select is the top level AST node type 88 type Select struct { 89 Expression *SelectExpression `parser:"\"SELECT\" @@"` 90 From *TableExpression `parser:"\"FROM\" @@"` 91 Where *Expression `parser:"( \"WHERE\" @@ )?"` 92 Limit *LitValue `parser:"( \"LIMIT\" @@ )?"` 93 } 94 95 // SelectExpression represents the items requested in the select 96 // statement 97 type SelectExpression struct { 98 All bool `parser:" @\"*\""` 99 Expressions []*AliasedExpression `parser:"| @@ { \",\" @@ }"` 100 } 101 102 // TableExpression represents the FROM clause 103 type TableExpression struct { 104 Table *JSONPath `parser:"@@"` 105 As string `parser:"( \"AS\"? @Ident )?"` 106 } 107 108 // JSONPathElement represents a keypath component 109 type JSONPathElement struct { 110 Key *ObjectKey `parser:" @@"` // ['name'] and .name forms 111 Index *int `parser:"| \"[\" @Int \"]\""` // [3] form 112 ObjectWildcard bool `parser:"| @\".*\""` // .* form 113 ArrayWildcard bool `parser:"| @\"[*]\""` // [*] form 114 } 115 116 // JSONPath represents a keypath. 117 // Instances should be treated idempotent and not change once created. 118 type JSONPath struct { 119 BaseKey *Identifier `parser:" @@"` 120 PathExpr []*JSONPathElement `parser:"(@@)*"` 121 122 // Cached values: 123 pathString string 124 strippedTableAlias string 125 strippedPathExpr []*JSONPathElement 126 } 127 128 // AliasedExpression is an expression that can be optionally named 129 type AliasedExpression struct { 130 Expression *Expression `parser:"@@"` 131 As string `parser:"[ \"AS\" @Ident | \"AS\" @LitString ]"` 132 } 133 134 // Grammar for Expression 135 // 136 // Expression → AndCondition ("OR" AndCondition)* 137 // AndCondition → Condition ("AND" Condition)* 138 // Condition → "NOT" Condition | ConditionExpression 139 // ConditionExpression → ValueExpression ("=" | "<>" | "<=" | ">=" | "<" | ">") ValueExpression 140 // | ValueExpression "LIKE" ValueExpression ("ESCAPE" LitString)? 141 // | ValueExpression ("NOT"? "BETWEEN" ValueExpression "AND" ValueExpression) 142 // | ValueExpression "IN" "(" Expression ("," Expression)* ")" 143 // | ValueExpression 144 // ValueExpression → Operand 145 // 146 // Operand grammar follows below 147 148 // Expression represents a logical disjunction of clauses 149 type Expression struct { 150 And []*AndCondition `parser:"@@ ( \"OR\" @@ )*"` 151 } 152 153 // ListExpr represents a literal list with elements as expressions. 154 type ListExpr struct { 155 Elements []*Expression `parser:"\"(\" @@ ( \",\" @@ )* \")\" | \"[\" @@ ( \",\" @@ )* \"]\""` 156 } 157 158 // AndCondition represents logical conjunction of clauses 159 type AndCondition struct { 160 Condition []*Condition `parser:"@@ ( \"AND\" @@ )*"` 161 } 162 163 // Condition represents a negation or a condition operand 164 type Condition struct { 165 Operand *ConditionOperand `parser:" @@"` 166 Not *Condition `parser:"| \"NOT\" @@"` 167 } 168 169 // ConditionOperand is a operand followed by an optional operation expression. 170 type ConditionOperand struct { 171 Operand *Operand `parser:"@@"` 172 ConditionRHS *ConditionRHS `parser:"@@?"` 173 } 174 175 // ConditionRHS represents the right-hand-side of Compare, Between, In 176 // or Like expressions. 177 type ConditionRHS struct { 178 Compare *Compare `parser:" @@"` 179 Between *Between `parser:"| @@"` 180 In *In `parser:"| \"IN\" @@"` 181 Like *Like `parser:"| @@"` 182 } 183 184 // Compare represents the RHS of a comparison expression 185 type Compare struct { 186 Operator string `parser:"@( \"<>\" | \"<=\" | \">=\" | \"=\" | \"<\" | \">\" | \"!=\" | \"IS\" \"NOT\" | \"IS\")"` 187 Operand *Operand `parser:" @@"` 188 } 189 190 // Like represents the RHS of a LIKE expression 191 type Like struct { 192 Not bool `parser:" @\"NOT\"? "` 193 Pattern *Operand `parser:" \"LIKE\" @@ "` 194 EscapeChar *Operand `parser:" (\"ESCAPE\" @@)? "` 195 } 196 197 // Between represents the RHS of a BETWEEN expression 198 type Between struct { 199 Not bool `parser:" @\"NOT\"? "` 200 Start *Operand `parser:" \"BETWEEN\" @@ "` 201 End *Operand `parser:" \"AND\" @@ "` 202 } 203 204 // In represents the RHS of an IN expression. The RHS can be a list-literal 205 // (i.e. enclosed in parentheses like `IN (1,2,4)`) or it could be a JSON path 206 // expression (as in `8.5 IN s.nested[*][*]`). Specifically, it cannot be an 207 // `Expression` as an expression can never evaluate to a list. 208 type In struct { 209 JPathExpr *JSONPath `parser:"@@"` 210 ListExpr *ListExpr `parser:"| @@"` 211 } 212 213 // Grammar for Operand: 214 // 215 // operand → multOp ( ("-" | "+") multOp )* 216 // multOp → unary ( ("/" | "*" | "%") unary )* 217 // unary → "-" unary | primary 218 // primary → Value | Variable | "(" expression ")" 219 // 220 221 // An Operand is a single term followed by an optional sequence of 222 // terms separated by +/- 223 type Operand struct { 224 Left *MultOp `parser:"@@"` 225 Right []*OpFactor `parser:"(@@)*"` 226 } 227 228 // OpFactor represents the right-side of a +/- operation. 229 type OpFactor struct { 230 Op string `parser:"@(\"+\" | \"-\")"` 231 Right *MultOp `parser:"@@"` 232 } 233 234 // MultOp represents a single term followed by an optional sequence of 235 // terms separated by *, / or % operators. 236 type MultOp struct { 237 Left *UnaryTerm `parser:"@@"` 238 Right []*OpUnaryTerm `parser:"(@@)*"` 239 } 240 241 // OpUnaryTerm represents the right side of *, / or % binary operations. 242 type OpUnaryTerm struct { 243 Op string `parser:"@(\"*\" | \"/\" | \"%\")"` 244 Right *UnaryTerm `parser:"@@"` 245 } 246 247 // UnaryTerm represents a single negated term or a primary term 248 type UnaryTerm struct { 249 Negated *NegatedTerm `parser:" @@"` 250 Primary *PrimaryTerm `parser:"| @@"` 251 } 252 253 // NegatedTerm has a leading minus sign. 254 type NegatedTerm struct { 255 Term *PrimaryTerm `parser:"\"-\" @@"` 256 } 257 258 // PrimaryTerm represents a Value, Path expression, a Sub-expression 259 // or a function call. 260 type PrimaryTerm struct { 261 Value *LitValue `parser:" @@"` 262 JPathExpr *JSONPath `parser:"| @@"` 263 ListExpr *ListExpr `parser:"| @@"` 264 SubExpression *Expression `parser:"| \"(\" @@ \")\""` 265 // Include function expressions here. 266 FuncCall *FuncExpr `parser:"| @@"` 267 } 268 269 // FuncExpr represents a function call 270 type FuncExpr struct { 271 SFunc *SimpleArgFunc `parser:" @@"` 272 Count *CountFunc `parser:"| @@"` 273 Cast *CastFunc `parser:"| @@"` 274 Substring *SubstringFunc `parser:"| @@"` 275 Extract *ExtractFunc `parser:"| @@"` 276 Trim *TrimFunc `parser:"| @@"` 277 DateAdd *DateAddFunc `parser:"| @@"` 278 DateDiff *DateDiffFunc `parser:"| @@"` 279 280 // Used during evaluation for aggregation funcs 281 aggregate *aggVal 282 } 283 284 // SimpleArgFunc represents functions with simple expression 285 // arguments. 286 type SimpleArgFunc struct { 287 FunctionName string `parser:" @(\"AVG\" | \"MAX\" | \"MIN\" | \"SUM\" | \"COALESCE\" | \"NULLIF\" | \"TO_STRING\" | \"TO_TIMESTAMP\" | \"UTCNOW\" | \"CHAR_LENGTH\" | \"CHARACTER_LENGTH\" | \"LOWER\" | \"UPPER\") "` 288 289 ArgsList []*Expression `parser:"\"(\" (@@ (\",\" @@)*)?\")\""` 290 } 291 292 // CountFunc represents the COUNT sql function 293 type CountFunc struct { 294 StarArg bool `parser:" \"COUNT\" \"(\" ( @\"*\"?"` 295 ExprArg *Expression `parser:" @@? )! \")\""` 296 } 297 298 // CastFunc represents CAST sql function 299 type CastFunc struct { 300 Expr *Expression `parser:" \"CAST\" \"(\" @@ "` 301 CastType string `parser:" \"AS\" @(\"BOOL\" | \"INT\" | \"INTEGER\" | \"STRING\" | \"FLOAT\" | \"DECIMAL\" | \"NUMERIC\" | \"TIMESTAMP\") \")\" "` 302 } 303 304 // SubstringFunc represents SUBSTRING sql function 305 type SubstringFunc struct { 306 Expr *PrimaryTerm `parser:" \"SUBSTRING\" \"(\" @@ "` 307 From *Operand `parser:" ( \"FROM\" @@ "` 308 For *Operand `parser:" (\"FOR\" @@)? \")\" "` 309 Arg2 *Operand `parser:" | \",\" @@ "` 310 Arg3 *Operand `parser:" (\",\" @@)? \")\" )"` 311 } 312 313 // ExtractFunc represents EXTRACT sql function 314 type ExtractFunc struct { 315 Timeword string `parser:" \"EXTRACT\" \"(\" @( \"YEAR\":Timeword | \"MONTH\":Timeword | \"DAY\":Timeword | \"HOUR\":Timeword | \"MINUTE\":Timeword | \"SECOND\":Timeword | \"TIMEZONE_HOUR\":Timeword | \"TIMEZONE_MINUTE\":Timeword ) "` 316 From *PrimaryTerm `parser:" \"FROM\" @@ \")\" "` 317 } 318 319 // TrimFunc represents TRIM sql function 320 type TrimFunc struct { 321 TrimWhere *string `parser:" \"TRIM\" \"(\" ( @( \"LEADING\" | \"TRAILING\" | \"BOTH\" ) "` 322 TrimChars *PrimaryTerm `parser:" @@? "` 323 TrimFrom *PrimaryTerm `parser:" \"FROM\" )? @@ \")\" "` 324 } 325 326 // DateAddFunc represents the DATE_ADD function 327 type DateAddFunc struct { 328 DatePart string `parser:" \"DATE_ADD\" \"(\" @( \"YEAR\":Timeword | \"MONTH\":Timeword | \"DAY\":Timeword | \"HOUR\":Timeword | \"MINUTE\":Timeword | \"SECOND\":Timeword ) \",\""` 329 Quantity *Operand `parser:" @@ \",\""` 330 Timestamp *PrimaryTerm `parser:" @@ \")\""` 331 } 332 333 // DateDiffFunc represents the DATE_DIFF function 334 type DateDiffFunc struct { 335 DatePart string `parser:" \"DATE_DIFF\" \"(\" @( \"YEAR\":Timeword | \"MONTH\":Timeword | \"DAY\":Timeword | \"HOUR\":Timeword | \"MINUTE\":Timeword | \"SECOND\":Timeword ) \",\" "` 336 Timestamp1 *PrimaryTerm `parser:" @@ \",\" "` 337 Timestamp2 *PrimaryTerm `parser:" @@ \")\" "` 338 } 339 340 // LitValue represents a literal value parsed from the sql 341 type LitValue struct { 342 Float *float64 `parser:"( @Float"` 343 Int *float64 `parser:" | @Int"` // To avoid value out of range, use float64 instead 344 String *LiteralString `parser:" | @LitString"` 345 Boolean *Boolean `parser:" | @(\"TRUE\" | \"FALSE\")"` 346 Null bool `parser:" | @\"NULL\""` 347 Missing bool `parser:" | @\"MISSING\")"` 348 } 349 350 // Identifier represents a parsed identifier 351 type Identifier struct { 352 Unquoted *string `parser:" @Ident"` 353 Quoted *QuotedIdentifier `parser:"| @QuotIdent"` 354 } 355 356 var ( 357 sqlLexer = lexer.Must(lexer.Regexp(`(\s+)` + 358 `|(?P<Timeword>(?i)\b(?:YEAR|MONTH|DAY|HOUR|MINUTE|SECOND|TIMEZONE_HOUR|TIMEZONE_MINUTE)\b)` + 359 `|(?P<Keyword>(?i)\b(?:SELECT|FROM|TOP|DISTINCT|ALL|WHERE|GROUP|BY|HAVING|UNION|MINUS|EXCEPT|INTERSECT|ORDER|LIMIT|OFFSET|TRUE|FALSE|NULL|IS|NOT|ANY|SOME|BETWEEN|AND|OR|LIKE|ESCAPE|AS|IN|BOOL|INT|INTEGER|STRING|FLOAT|DECIMAL|NUMERIC|TIMESTAMP|AVG|COUNT|MAX|MIN|SUM|COALESCE|NULLIF|CAST|DATE_ADD|DATE_DIFF|EXTRACT|TO_STRING|TO_TIMESTAMP|UTCNOW|CHAR_LENGTH|CHARACTER_LENGTH|LOWER|SUBSTRING|TRIM|UPPER|LEADING|TRAILING|BOTH|FOR|MISSING)\b)` + 360 `|(?P<Ident>[a-zA-Z_][a-zA-Z0-9_]*)` + 361 `|(?P<QuotIdent>"([^"]*("")?)*")` + 362 `|(?P<Float>\d*\.\d+([eE][-+]?\d+)?)` + 363 `|(?P<Int>\d+)` + 364 `|(?P<LitString>'([^']*('')?)*')` + 365 `|(?P<Operators><>|!=|<=|>=|\.\*|\[\*\]|[-+*/%,.()=<>\[\]])`, 366 )) 367 368 // SQLParser is used to parse SQL statements 369 SQLParser = participle.MustBuild( 370 &Select{}, 371 participle.Lexer(sqlLexer), 372 participle.CaseInsensitive("Keyword"), 373 participle.CaseInsensitive("Timeword"), 374 ) 375 )