vitess.io/vitess@v0.16.2/go/vt/sqlparser/parser.go (about) 1 /* 2 Copyright 2019 The Vitess Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package sqlparser 18 19 import ( 20 "fmt" 21 "io" 22 "strconv" 23 "strings" 24 "sync" 25 26 "vitess.io/vitess/go/internal/flag" 27 "vitess.io/vitess/go/vt/log" 28 "vitess.io/vitess/go/vt/servenv" 29 "vitess.io/vitess/go/vt/vterrors" 30 31 vtrpcpb "vitess.io/vitess/go/vt/proto/vtrpc" 32 ) 33 34 var versionFlagSync sync.Once 35 36 // parserPool is a pool for parser objects. 37 var parserPool = sync.Pool{ 38 New: func() any { 39 return &yyParserImpl{} 40 }, 41 } 42 43 // zeroParser is a zero-initialized parser to help reinitialize the parser for pooling. 44 var zeroParser yyParserImpl 45 46 // mySQLParserVersion is the version of MySQL that the parser would emulate 47 var mySQLParserVersion string 48 49 // yyParsePooled is a wrapper around yyParse that pools the parser objects. There isn't a 50 // particularly good reason to use yyParse directly, since it immediately discards its parser. 51 // 52 // N.B: Parser pooling means that you CANNOT take references directly to parse stack variables (e.g. 53 // $$ = &$4) in sql.y rules. You must instead add an intermediate reference like so: 54 // 55 // showCollationFilterOpt := $4 56 // $$ = &Show{Type: string($2), ShowCollationFilterOpt: &showCollationFilterOpt} 57 func yyParsePooled(yylex yyLexer) int { 58 parser := parserPool.Get().(*yyParserImpl) 59 defer func() { 60 *parser = zeroParser 61 parserPool.Put(parser) 62 }() 63 return parser.Parse(yylex) 64 } 65 66 // Instructions for creating new types: If a type 67 // needs to satisfy an interface, declare that function 68 // along with that interface. This will help users 69 // identify the list of types to which they can assert 70 // those interfaces. 71 // If the member of a type has a string with a predefined 72 // list of values, declare those values as const following 73 // the type. 74 // For interfaces that define dummy functions to consolidate 75 // a set of types, define the function as iTypeName. 76 // This will help avoid name collisions. 77 78 // Parse2 parses the SQL in full and returns a Statement, which 79 // is the AST representation of the query, and a set of BindVars, which are all the 80 // bind variables that were found in the original SQL query. If a DDL statement 81 // is partially parsed but still contains a syntax error, the 82 // error is ignored and the DDL is returned anyway. 83 func Parse2(sql string) (Statement, BindVars, error) { 84 tokenizer := NewStringTokenizer(sql) 85 if yyParsePooled(tokenizer) != 0 { 86 if tokenizer.partialDDL != nil { 87 if typ, val := tokenizer.Scan(); typ != 0 { 88 return nil, nil, fmt.Errorf("extra characters encountered after end of DDL: '%s'", string(val)) 89 } 90 log.Warningf("ignoring error parsing DDL '%s': %v", sql, tokenizer.LastError) 91 switch x := tokenizer.partialDDL.(type) { 92 case DBDDLStatement: 93 x.SetFullyParsed(false) 94 case DDLStatement: 95 x.SetFullyParsed(false) 96 } 97 tokenizer.ParseTree = tokenizer.partialDDL 98 return tokenizer.ParseTree, tokenizer.BindVars, nil 99 } 100 return nil, nil, vterrors.New(vtrpcpb.Code_INVALID_ARGUMENT, tokenizer.LastError.Error()) 101 } 102 if tokenizer.ParseTree == nil { 103 return nil, nil, ErrEmpty 104 } 105 return tokenizer.ParseTree, tokenizer.BindVars, nil 106 } 107 108 func checkParserVersionFlag() { 109 if flag.Parsed() { 110 versionFlagSync.Do(func() { 111 convVersion, err := convertMySQLVersionToCommentVersion(servenv.MySQLServerVersion()) 112 if err != nil { 113 log.Fatalf("unable to parse mysql version: %v", err) 114 } 115 mySQLParserVersion = convVersion 116 }) 117 } 118 } 119 120 // SetParserVersion sets the mysql parser version 121 func SetParserVersion(version string) { 122 mySQLParserVersion = version 123 } 124 125 // GetParserVersion returns the version of the mysql parser 126 func GetParserVersion() string { 127 return mySQLParserVersion 128 } 129 130 // convertMySQLVersionToCommentVersion converts the MySQL version into comment version format. 131 func convertMySQLVersionToCommentVersion(version string) (string, error) { 132 var res = make([]int, 3) 133 idx := 0 134 val := "" 135 for _, c := range version { 136 if c <= '9' && c >= '0' { 137 val += string(c) 138 } else if c == '.' { 139 v, err := strconv.Atoi(val) 140 if err != nil { 141 return "", err 142 } 143 val = "" 144 res[idx] = v 145 idx++ 146 if idx == 3 { 147 break 148 } 149 } else { 150 break 151 } 152 } 153 if val != "" { 154 v, err := strconv.Atoi(val) 155 if err != nil { 156 return "", err 157 } 158 res[idx] = v 159 idx++ 160 } 161 if idx == 0 { 162 return "", vterrors.Errorf(vtrpcpb.Code_INVALID_ARGUMENT, "MySQL version not correctly setup - %s.", version) 163 } 164 165 return fmt.Sprintf("%01d%02d%02d", res[0], res[1], res[2]), nil 166 } 167 168 // ParseExpr parses an expression and transforms it to an AST 169 func ParseExpr(sql string) (Expr, error) { 170 stmt, err := Parse("select " + sql) 171 if err != nil { 172 return nil, err 173 } 174 aliasedExpr := stmt.(*Select).SelectExprs[0].(*AliasedExpr) 175 return aliasedExpr.Expr, err 176 } 177 178 // Parse behaves like Parse2 but does not return a set of bind variables 179 func Parse(sql string) (Statement, error) { 180 stmt, _, err := Parse2(sql) 181 return stmt, err 182 } 183 184 // ParseStrictDDL is the same as Parse except it errors on 185 // partially parsed DDL statements. 186 func ParseStrictDDL(sql string) (Statement, error) { 187 tokenizer := NewStringTokenizer(sql) 188 if yyParsePooled(tokenizer) != 0 { 189 return nil, tokenizer.LastError 190 } 191 if tokenizer.ParseTree == nil { 192 return nil, ErrEmpty 193 } 194 return tokenizer.ParseTree, nil 195 } 196 197 // ParseTokenizer is a raw interface to parse from the given tokenizer. 198 // This does not used pooled parsers, and should not be used in general. 199 func ParseTokenizer(tokenizer *Tokenizer) int { 200 return yyParse(tokenizer) 201 } 202 203 // ParseNext parses a single SQL statement from the tokenizer 204 // returning a Statement which is the AST representation of the query. 205 // The tokenizer will always read up to the end of the statement, allowing for 206 // the next call to ParseNext to parse any subsequent SQL statements. When 207 // there are no more statements to parse, a error of io.EOF is returned. 208 func ParseNext(tokenizer *Tokenizer) (Statement, error) { 209 return parseNext(tokenizer, false) 210 } 211 212 // ParseNextStrictDDL is the same as ParseNext except it errors on 213 // partially parsed DDL statements. 214 func ParseNextStrictDDL(tokenizer *Tokenizer) (Statement, error) { 215 return parseNext(tokenizer, true) 216 } 217 218 func parseNext(tokenizer *Tokenizer, strict bool) (Statement, error) { 219 if tokenizer.cur() == ';' { 220 tokenizer.skip(1) 221 tokenizer.skipBlank() 222 } 223 if tokenizer.cur() == eofChar { 224 return nil, io.EOF 225 } 226 227 tokenizer.reset() 228 tokenizer.multi = true 229 if yyParsePooled(tokenizer) != 0 { 230 if tokenizer.partialDDL != nil && !strict { 231 tokenizer.ParseTree = tokenizer.partialDDL 232 return tokenizer.ParseTree, nil 233 } 234 return nil, tokenizer.LastError 235 } 236 _, isCommentOnly := tokenizer.ParseTree.(*CommentOnly) 237 if tokenizer.ParseTree == nil || isCommentOnly { 238 return ParseNext(tokenizer) 239 } 240 return tokenizer.ParseTree, nil 241 } 242 243 // ErrEmpty is a sentinel error returned when parsing empty statements. 244 var ErrEmpty = vterrors.NewErrorf(vtrpcpb.Code_INVALID_ARGUMENT, vterrors.EmptyQuery, "Query was empty") 245 246 // SplitStatement returns the first sql statement up to either a ; or EOF 247 // and the remainder from the given buffer 248 func SplitStatement(blob string) (string, string, error) { 249 tokenizer := NewStringTokenizer(blob) 250 tkn := 0 251 for { 252 tkn, _ = tokenizer.Scan() 253 if tkn == 0 || tkn == ';' || tkn == eofChar { 254 break 255 } 256 } 257 if tokenizer.LastError != nil { 258 return "", "", tokenizer.LastError 259 } 260 if tkn == ';' { 261 return blob[:tokenizer.Pos-1], blob[tokenizer.Pos:], nil 262 } 263 return blob, "", nil 264 } 265 266 // SplitStatementToPieces split raw sql statement that may have multi sql pieces to sql pieces 267 // returns the sql pieces blob contains; or error if sql cannot be parsed 268 func SplitStatementToPieces(blob string) (pieces []string, err error) { 269 // fast path: the vast majority of SQL statements do not have semicolons in them 270 if blob == "" { 271 return nil, nil 272 } 273 switch strings.IndexByte(blob, ';') { 274 case -1: // if there is no semicolon, return blob as a whole 275 return []string{blob}, nil 276 case len(blob) - 1: // if there's a single semicolon and it's the last character, return blob without it 277 return []string{blob[:len(blob)-1]}, nil 278 } 279 280 pieces = make([]string, 0, 16) 281 tokenizer := NewStringTokenizer(blob) 282 283 tkn := 0 284 var stmt string 285 stmtBegin := 0 286 emptyStatement := true 287 loop: 288 for { 289 tkn, _ = tokenizer.Scan() 290 switch tkn { 291 case ';': 292 stmt = blob[stmtBegin : tokenizer.Pos-1] 293 if !emptyStatement { 294 pieces = append(pieces, stmt) 295 emptyStatement = true 296 } 297 stmtBegin = tokenizer.Pos 298 case 0, eofChar: 299 blobTail := tokenizer.Pos - 1 300 if stmtBegin < blobTail { 301 stmt = blob[stmtBegin : blobTail+1] 302 if !emptyStatement { 303 pieces = append(pieces, stmt) 304 } 305 } 306 break loop 307 default: 308 emptyStatement = false 309 } 310 } 311 312 err = tokenizer.LastError 313 return 314 } 315 316 func IsMySQL80AndAbove() bool { 317 return mySQLParserVersion >= "80000" 318 }