storj.io/minio@v0.0.0-20230509071714-0cbc90f649b1/pkg/s3select/sql/parser.go (about) 1 /* 2 * MinIO Cloud Storage, (C) 2019 MinIO, Inc. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package sql 18 19 import ( 20 "strings" 21 22 "github.com/alecthomas/participle" 23 "github.com/alecthomas/participle/lexer" 24 ) 25 26 // Types with custom Capture interface for parsing 27 28 // Boolean is a type for a parsed Boolean literal 29 type Boolean bool 30 31 // Capture interface used by participle 32 func (b *Boolean) Capture(values []string) error { 33 *b = strings.ToLower(values[0]) == "true" 34 return nil 35 } 36 37 // LiteralString is a type for parsed SQL string literals 38 type LiteralString string 39 40 // Capture interface used by participle 41 func (ls *LiteralString) Capture(values []string) error { 42 // Remove enclosing single quote 43 n := len(values[0]) 44 r := values[0][1 : n-1] 45 // Translate doubled quotes 46 *ls = LiteralString(strings.Replace(r, "''", "'", -1)) 47 return nil 48 } 49 50 // LiteralList is a type for parsed SQL lists literals 51 type LiteralList []string 52 53 // Capture interface used by participle 54 func (ls *LiteralList) Capture(values []string) error { 55 // Remove enclosing parenthesis. 56 n := len(values[0]) 57 r := values[0][1 : n-1] 58 // Translate doubled quotes 59 *ls = LiteralList(strings.Split(r, ",")) 60 return nil 61 } 62 63 // ObjectKey is a type for parsed strings occurring in key paths 64 type ObjectKey struct { 65 Lit *LiteralString `parser:" \"[\" @LitString \"]\""` 66 ID *Identifier `parser:"| \".\" @@"` 67 } 68 69 // QuotedIdentifier is a type for parsed strings that are double 70 // quoted. 71 type QuotedIdentifier string 72 73 // Capture interface used by participle 74 func (qi *QuotedIdentifier) Capture(values []string) error { 75 // Remove enclosing quotes 76 n := len(values[0]) 77 r := values[0][1 : n-1] 78 79 // Translate doubled quotes 80 *qi = QuotedIdentifier(strings.Replace(r, `""`, `"`, -1)) 81 return nil 82 } 83 84 // Types representing AST of SQL statement. Only SELECT is supported. 85 86 // Select is the top level AST node type 87 type Select struct { 88 Expression *SelectExpression `parser:"\"SELECT\" @@"` 89 From *TableExpression `parser:"\"FROM\" @@"` 90 Where *Expression `parser:"( \"WHERE\" @@ )?"` 91 Limit *LitValue `parser:"( \"LIMIT\" @@ )?"` 92 } 93 94 // SelectExpression represents the items requested in the select 95 // statement 96 type SelectExpression struct { 97 All bool `parser:" @\"*\""` 98 Expressions []*AliasedExpression `parser:"| @@ { \",\" @@ }"` 99 } 100 101 // TableExpression represents the FROM clause 102 type TableExpression struct { 103 Table *JSONPath `parser:"@@"` 104 As string `parser:"( \"AS\"? @Ident )?"` 105 } 106 107 // JSONPathElement represents a keypath component 108 type JSONPathElement struct { 109 Key *ObjectKey `parser:" @@"` // ['name'] and .name forms 110 Index *int `parser:"| \"[\" @Int \"]\""` // [3] form 111 ObjectWildcard bool `parser:"| @\".*\""` // .* form 112 ArrayWildcard bool `parser:"| @\"[*]\""` // [*] form 113 } 114 115 // JSONPath represents a keypath. 116 // Instances should be treated idempotent and not change once created. 117 type JSONPath struct { 118 BaseKey *Identifier `parser:" @@"` 119 PathExpr []*JSONPathElement `parser:"(@@)*"` 120 121 // Cached values: 122 pathString string 123 strippedTableAlias string 124 strippedPathExpr []*JSONPathElement 125 } 126 127 // AliasedExpression is an expression that can be optionally named 128 type AliasedExpression struct { 129 Expression *Expression `parser:"@@"` 130 As string `parser:"[ \"AS\" @Ident ]"` 131 } 132 133 // Grammar for Expression 134 // 135 // Expression → AndCondition ("OR" AndCondition)* 136 // AndCondition → Condition ("AND" Condition)* 137 // Condition → "NOT" Condition | ConditionExpression 138 // ConditionExpression → ValueExpression ("=" | "<>" | "<=" | ">=" | "<" | ">") ValueExpression 139 // | ValueExpression "LIKE" ValueExpression ("ESCAPE" LitString)? 140 // | ValueExpression ("NOT"? "BETWEEN" ValueExpression "AND" ValueExpression) 141 // | ValueExpression "IN" "(" Expression ("," Expression)* ")" 142 // | ValueExpression 143 // ValueExpression → Operand 144 // 145 // Operand grammar follows below 146 147 // Expression represents a logical disjunction of clauses 148 type Expression struct { 149 And []*AndCondition `parser:"@@ ( \"OR\" @@ )*"` 150 } 151 152 // ListExpr represents a literal list with elements as expressions. 153 type ListExpr struct { 154 Elements []*Expression `parser:"\"(\" @@ ( \",\" @@ )* \")\" | \"[\" @@ ( \",\" @@ )* \"]\""` 155 } 156 157 // AndCondition represents logical conjunction of clauses 158 type AndCondition struct { 159 Condition []*Condition `parser:"@@ ( \"AND\" @@ )*"` 160 } 161 162 // Condition represents a negation or a condition operand 163 type Condition struct { 164 Operand *ConditionOperand `parser:" @@"` 165 Not *Condition `parser:"| \"NOT\" @@"` 166 } 167 168 // ConditionOperand is a operand followed by an an optional operation 169 // expression 170 type ConditionOperand struct { 171 Operand *Operand `parser:"@@"` 172 ConditionRHS *ConditionRHS `parser:"@@?"` 173 } 174 175 // ConditionRHS represents the right-hand-side of Compare, Between, In 176 // or Like expressions. 177 type ConditionRHS struct { 178 Compare *Compare `parser:" @@"` 179 Between *Between `parser:"| @@"` 180 In *In `parser:"| \"IN\" @@"` 181 Like *Like `parser:"| @@"` 182 } 183 184 // Compare represents the RHS of a comparison expression 185 type Compare struct { 186 Operator string `parser:"@( \"<>\" | \"<=\" | \">=\" | \"=\" | \"<\" | \">\" | \"!=\" )"` 187 Operand *Operand `parser:" @@"` 188 } 189 190 // Like represents the RHS of a LIKE expression 191 type Like struct { 192 Not bool `parser:" @\"NOT\"? "` 193 Pattern *Operand `parser:" \"LIKE\" @@ "` 194 EscapeChar *Operand `parser:" (\"ESCAPE\" @@)? "` 195 } 196 197 // Between represents the RHS of a BETWEEN expression 198 type Between struct { 199 Not bool `parser:" @\"NOT\"? "` 200 Start *Operand `parser:" \"BETWEEN\" @@ "` 201 End *Operand `parser:" \"AND\" @@ "` 202 } 203 204 // In represents the RHS of an IN expression 205 type In struct { 206 ListExpression *Expression `parser:"@@ "` 207 } 208 209 // Grammar for Operand: 210 // 211 // operand → multOp ( ("-" | "+") multOp )* 212 // multOp → unary ( ("/" | "*" | "%") unary )* 213 // unary → "-" unary | primary 214 // primary → Value | Variable | "(" expression ")" 215 // 216 217 // An Operand is a single term followed by an optional sequence of 218 // terms separated by +/- 219 type Operand struct { 220 Left *MultOp `parser:"@@"` 221 Right []*OpFactor `parser:"(@@)*"` 222 } 223 224 // OpFactor represents the right-side of a +/- operation. 225 type OpFactor struct { 226 Op string `parser:"@(\"+\" | \"-\")"` 227 Right *MultOp `parser:"@@"` 228 } 229 230 // MultOp represents a single term followed by an optional sequence of 231 // terms separated by *, / or % operators. 232 type MultOp struct { 233 Left *UnaryTerm `parser:"@@"` 234 Right []*OpUnaryTerm `parser:"(@@)*"` 235 } 236 237 // OpUnaryTerm represents the right side of *, / or % binary operations. 238 type OpUnaryTerm struct { 239 Op string `parser:"@(\"*\" | \"/\" | \"%\")"` 240 Right *UnaryTerm `parser:"@@"` 241 } 242 243 // UnaryTerm represents a single negated term or a primary term 244 type UnaryTerm struct { 245 Negated *NegatedTerm `parser:" @@"` 246 Primary *PrimaryTerm `parser:"| @@"` 247 } 248 249 // NegatedTerm has a leading minus sign. 250 type NegatedTerm struct { 251 Term *PrimaryTerm `parser:"\"-\" @@"` 252 } 253 254 // PrimaryTerm represents a Value, Path expression, a Sub-expression 255 // or a function call. 256 type PrimaryTerm struct { 257 Value *LitValue `parser:" @@"` 258 JPathExpr *JSONPath `parser:"| @@"` 259 ListExpr *ListExpr `parser:"| @@"` 260 SubExpression *Expression `parser:"| \"(\" @@ \")\""` 261 // Include function expressions here. 262 FuncCall *FuncExpr `parser:"| @@"` 263 } 264 265 // FuncExpr represents a function call 266 type FuncExpr struct { 267 SFunc *SimpleArgFunc `parser:" @@"` 268 Count *CountFunc `parser:"| @@"` 269 Cast *CastFunc `parser:"| @@"` 270 Substring *SubstringFunc `parser:"| @@"` 271 Extract *ExtractFunc `parser:"| @@"` 272 Trim *TrimFunc `parser:"| @@"` 273 DateAdd *DateAddFunc `parser:"| @@"` 274 DateDiff *DateDiffFunc `parser:"| @@"` 275 276 // Used during evaluation for aggregation funcs 277 aggregate *aggVal 278 } 279 280 // SimpleArgFunc represents functions with simple expression 281 // arguments. 282 type SimpleArgFunc struct { 283 FunctionName string `parser:" @(\"AVG\" | \"MAX\" | \"MIN\" | \"SUM\" | \"COALESCE\" | \"NULLIF\" | \"TO_STRING\" | \"TO_TIMESTAMP\" | \"UTCNOW\" | \"CHAR_LENGTH\" | \"CHARACTER_LENGTH\" | \"LOWER\" | \"UPPER\") "` 284 285 ArgsList []*Expression `parser:"\"(\" (@@ (\",\" @@)*)?\")\""` 286 } 287 288 // CountFunc represents the COUNT sql function 289 type CountFunc struct { 290 StarArg bool `parser:" \"COUNT\" \"(\" ( @\"*\"?"` 291 ExprArg *Expression `parser:" @@? )! \")\""` 292 } 293 294 // CastFunc represents CAST sql function 295 type CastFunc struct { 296 Expr *Expression `parser:" \"CAST\" \"(\" @@ "` 297 CastType string `parser:" \"AS\" @(\"BOOL\" | \"INT\" | \"INTEGER\" | \"STRING\" | \"FLOAT\" | \"DECIMAL\" | \"NUMERIC\" | \"TIMESTAMP\") \")\" "` 298 } 299 300 // SubstringFunc represents SUBSTRING sql function 301 type SubstringFunc struct { 302 Expr *PrimaryTerm `parser:" \"SUBSTRING\" \"(\" @@ "` 303 From *Operand `parser:" ( \"FROM\" @@ "` 304 For *Operand `parser:" (\"FOR\" @@)? \")\" "` 305 Arg2 *Operand `parser:" | \",\" @@ "` 306 Arg3 *Operand `parser:" (\",\" @@)? \")\" )"` 307 } 308 309 // ExtractFunc represents EXTRACT sql function 310 type ExtractFunc struct { 311 Timeword string `parser:" \"EXTRACT\" \"(\" @( \"YEAR\":Timeword | \"MONTH\":Timeword | \"DAY\":Timeword | \"HOUR\":Timeword | \"MINUTE\":Timeword | \"SECOND\":Timeword | \"TIMEZONE_HOUR\":Timeword | \"TIMEZONE_MINUTE\":Timeword ) "` 312 From *PrimaryTerm `parser:" \"FROM\" @@ \")\" "` 313 } 314 315 // TrimFunc represents TRIM sql function 316 type TrimFunc struct { 317 TrimWhere *string `parser:" \"TRIM\" \"(\" ( @( \"LEADING\" | \"TRAILING\" | \"BOTH\" ) "` 318 TrimChars *PrimaryTerm `parser:" @@? "` 319 TrimFrom *PrimaryTerm `parser:" \"FROM\" )? @@ \")\" "` 320 } 321 322 // DateAddFunc represents the DATE_ADD function 323 type DateAddFunc struct { 324 DatePart string `parser:" \"DATE_ADD\" \"(\" @( \"YEAR\":Timeword | \"MONTH\":Timeword | \"DAY\":Timeword | \"HOUR\":Timeword | \"MINUTE\":Timeword | \"SECOND\":Timeword ) \",\""` 325 Quantity *Operand `parser:" @@ \",\""` 326 Timestamp *PrimaryTerm `parser:" @@ \")\""` 327 } 328 329 // DateDiffFunc represents the DATE_DIFF function 330 type DateDiffFunc struct { 331 DatePart string `parser:" \"DATE_DIFF\" \"(\" @( \"YEAR\":Timeword | \"MONTH\":Timeword | \"DAY\":Timeword | \"HOUR\":Timeword | \"MINUTE\":Timeword | \"SECOND\":Timeword ) \",\" "` 332 Timestamp1 *PrimaryTerm `parser:" @@ \",\" "` 333 Timestamp2 *PrimaryTerm `parser:" @@ \")\" "` 334 } 335 336 // LitValue represents a literal value parsed from the sql 337 type LitValue struct { 338 Float *float64 `parser:"( @Float"` 339 Int *float64 `parser:" | @Int"` // To avoid value out of range, use float64 instead 340 String *LiteralString `parser:" | @LitString"` 341 Boolean *Boolean `parser:" | @(\"TRUE\" | \"FALSE\")"` 342 Null bool `parser:" | @\"NULL\")"` 343 } 344 345 // Identifier represents a parsed identifier 346 type Identifier struct { 347 Unquoted *string `parser:" @Ident"` 348 Quoted *QuotedIdentifier `parser:"| @QuotIdent"` 349 } 350 351 var ( 352 sqlLexer = lexer.Must(lexer.Regexp(`(\s+)` + 353 `|(?P<Timeword>(?i)\b(?:YEAR|MONTH|DAY|HOUR|MINUTE|SECOND|TIMEZONE_HOUR|TIMEZONE_MINUTE)\b)` + 354 `|(?P<Keyword>(?i)\b(?:SELECT|FROM|TOP|DISTINCT|ALL|WHERE|GROUP|BY|HAVING|UNION|MINUS|EXCEPT|INTERSECT|ORDER|LIMIT|OFFSET|TRUE|FALSE|NULL|IS|NOT|ANY|SOME|BETWEEN|AND|OR|LIKE|ESCAPE|AS|IN|BOOL|INT|INTEGER|STRING|FLOAT|DECIMAL|NUMERIC|TIMESTAMP|AVG|COUNT|MAX|MIN|SUM|COALESCE|NULLIF|CAST|DATE_ADD|DATE_DIFF|EXTRACT|TO_STRING|TO_TIMESTAMP|UTCNOW|CHAR_LENGTH|CHARACTER_LENGTH|LOWER|SUBSTRING|TRIM|UPPER|LEADING|TRAILING|BOTH|FOR)\b)` + 355 `|(?P<Ident>[a-zA-Z_][a-zA-Z0-9_]*)` + 356 `|(?P<QuotIdent>"([^"]*("")?)*")` + 357 `|(?P<Float>\d*\.\d+([eE][-+]?\d+)?)` + 358 `|(?P<Int>\d+)` + 359 `|(?P<LitString>'([^']*('')?)*')` + 360 `|(?P<Operators><>|!=|<=|>=|\.\*|\[\*\]|[-+*/%,.()=<>\[\]])`, 361 )) 362 363 // SQLParser is used to parse SQL statements 364 SQLParser = participle.MustBuild( 365 &Select{}, 366 participle.Lexer(sqlLexer), 367 participle.CaseInsensitive("Keyword"), 368 participle.CaseInsensitive("Timeword"), 369 ) 370 )