github.com/vedadiyan/sqlparser@v1.0.0/pkg/sqlparser/parser.go (about) 1 /* 2 Copyright 2019 The Vitess Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package sqlparser 18 19 import ( 20 "fmt" 21 "io" 22 "strconv" 23 "strings" 24 "sync" 25 26 "github.com/vedadiyan/sqlparser/pkg/log" 27 "github.com/vedadiyan/sqlparser/pkg/vterrors" 28 29 vtrpcpb "github.com/vedadiyan/sqlparser/pkg/vtrpc" 30 ) 31 32 var versionFlagSync sync.Once 33 34 // parserPool is a pool for parser objects. 35 var parserPool = sync.Pool{ 36 New: func() any { 37 return &yyParserImpl{} 38 }, 39 } 40 41 // zeroParser is a zero-initialized parser to help reinitialize the parser for pooling. 42 var zeroParser yyParserImpl 43 44 // mySQLParserVersion is the version of MySQL that the parser would emulate 45 var mySQLParserVersion string 46 47 // yyParsePooled is a wrapper around yyParse that pools the parser objects. There isn't a 48 // particularly good reason to use yyParse directly, since it immediately discards its parser. 49 // 50 // N.B: Parser pooling means that you CANNOT take references directly to parse stack variables (e.g. 51 // $$ = &$4) in sql.y rules. You must instead add an intermediate reference like so: 52 // 53 // showCollationFilterOpt := $4 54 // $$ = &Show{Type: string($2), ShowCollationFilterOpt: &showCollationFilterOpt} 55 func yyParsePooled(yylex yyLexer) int { 56 parser := parserPool.Get().(*yyParserImpl) 57 defer func() { 58 *parser = zeroParser 59 parserPool.Put(parser) 60 }() 61 return parser.Parse(yylex) 62 } 63 64 // Instructions for creating new types: If a type 65 // needs to satisfy an interface, declare that function 66 // along with that interface. This will help users 67 // identify the list of types to which they can assert 68 // those interfaces. 69 // If the member of a type has a string with a predefined 70 // list of values, declare those values as const following 71 // the type. 72 // For interfaces that define dummy functions to consolidate 73 // a set of types, define the function as iTypeName. 74 // This will help avoid name collisions. 75 76 // Parse2 parses the SQL in full and returns a Statement, which 77 // is the AST representation of the query, and a set of BindVars, which are all the 78 // bind variables that were found in the original SQL query. If a DDL statement 79 // is partially parsed but still contains a syntax error, the 80 // error is ignored and the DDL is returned anyway. 81 func Parse2(sql string) (Statement, BindVars, error) { 82 tokenizer := NewStringTokenizer(sql) 83 if yyParsePooled(tokenizer) != 0 { 84 if tokenizer.partialDDL != nil { 85 if typ, val := tokenizer.Scan(); typ != 0 { 86 return nil, nil, fmt.Errorf("extra characters encountered after end of DDL: '%s'", string(val)) 87 } 88 log.Warningf("ignoring error parsing DDL '%s': %v", sql, tokenizer.LastError) 89 switch x := tokenizer.partialDDL.(type) { 90 case DBDDLStatement: 91 x.SetFullyParsed(false) 92 case DDLStatement: 93 x.SetFullyParsed(false) 94 } 95 tokenizer.ParseTree = tokenizer.partialDDL 96 return tokenizer.ParseTree, tokenizer.BindVars, nil 97 } 98 return nil, nil, vterrors.New(vtrpcpb.Code_INVALID_ARGUMENT, tokenizer.LastError.Error()) 99 } 100 if tokenizer.ParseTree == nil { 101 return nil, nil, ErrEmpty 102 } 103 return tokenizer.ParseTree, tokenizer.BindVars, nil 104 } 105 106 // func checkParserVersionFlag() { 107 // if flag.Parsed() { 108 // versionFlagSync.Do(func() { 109 // convVersion, err := convertMySQLVersionToCommentVersion(servenv.MySQLServerVersion()) 110 // if err != nil { 111 // log.Fatalf("unable to parse mysql version: %v", err) 112 // } 113 // mySQLParserVersion = convVersion 114 // }) 115 // } 116 // } 117 118 // SetParserVersion sets the mysql parser version 119 func SetParserVersion(version string) { 120 mySQLParserVersion = version 121 } 122 123 // GetParserVersion returns the version of the mysql parser 124 func GetParserVersion() string { 125 return mySQLParserVersion 126 } 127 128 // convertMySQLVersionToCommentVersion converts the MySQL version into comment version format. 129 func convertMySQLVersionToCommentVersion(version string) (string, error) { 130 var res = make([]int, 3) 131 idx := 0 132 val := "" 133 for _, c := range version { 134 if c <= '9' && c >= '0' { 135 val += string(c) 136 } else if c == '.' { 137 v, err := strconv.Atoi(val) 138 if err != nil { 139 return "", err 140 } 141 val = "" 142 res[idx] = v 143 idx++ 144 if idx == 3 { 145 break 146 } 147 } else { 148 break 149 } 150 } 151 if val != "" { 152 v, err := strconv.Atoi(val) 153 if err != nil { 154 return "", err 155 } 156 res[idx] = v 157 idx++ 158 } 159 if idx == 0 { 160 return "", vterrors.Errorf(vtrpcpb.Code_INVALID_ARGUMENT, "MySQL version not correctly setup - %s.", version) 161 } 162 163 return fmt.Sprintf("%01d%02d%02d", res[0], res[1], res[2]), nil 164 } 165 166 // ParseExpr parses an expression and transforms it to an AST 167 func ParseExpr(sql string) (Expr, error) { 168 stmt, err := Parse("select " + sql) 169 if err != nil { 170 return nil, err 171 } 172 aliasedExpr := stmt.(*Select).SelectExprs[0].(*AliasedExpr) 173 return aliasedExpr.Expr, err 174 } 175 176 // Parse behaves like Parse2 but does not return a set of bind variables 177 func Parse(sql string) (Statement, error) { 178 stmt, _, err := Parse2(sql) 179 return stmt, err 180 } 181 182 // ParseStrictDDL is the same as Parse except it errors on 183 // partially parsed DDL statements. 184 func ParseStrictDDL(sql string) (Statement, error) { 185 tokenizer := NewStringTokenizer(sql) 186 if yyParsePooled(tokenizer) != 0 { 187 return nil, tokenizer.LastError 188 } 189 if tokenizer.ParseTree == nil { 190 return nil, ErrEmpty 191 } 192 return tokenizer.ParseTree, nil 193 } 194 195 // ParseTokenizer is a raw interface to parse from the given tokenizer. 196 // This does not used pooled parsers, and should not be used in general. 197 func ParseTokenizer(tokenizer *Tokenizer) int { 198 return yyParse(tokenizer) 199 } 200 201 // ParseNext parses a single SQL statement from the tokenizer 202 // returning a Statement which is the AST representation of the query. 203 // The tokenizer will always read up to the end of the statement, allowing for 204 // the next call to ParseNext to parse any subsequent SQL statements. When 205 // there are no more statements to parse, a error of io.EOF is returned. 206 func ParseNext(tokenizer *Tokenizer) (Statement, error) { 207 return parseNext(tokenizer, false) 208 } 209 210 // ParseNextStrictDDL is the same as ParseNext except it errors on 211 // partially parsed DDL statements. 212 func ParseNextStrictDDL(tokenizer *Tokenizer) (Statement, error) { 213 return parseNext(tokenizer, true) 214 } 215 216 func parseNext(tokenizer *Tokenizer, strict bool) (Statement, error) { 217 if tokenizer.cur() == ';' { 218 tokenizer.skip(1) 219 tokenizer.skipBlank() 220 } 221 if tokenizer.cur() == eofChar { 222 return nil, io.EOF 223 } 224 225 tokenizer.reset() 226 tokenizer.multi = true 227 if yyParsePooled(tokenizer) != 0 { 228 if tokenizer.partialDDL != nil && !strict { 229 tokenizer.ParseTree = tokenizer.partialDDL 230 return tokenizer.ParseTree, nil 231 } 232 return nil, tokenizer.LastError 233 } 234 _, isCommentOnly := tokenizer.ParseTree.(*CommentOnly) 235 if tokenizer.ParseTree == nil || isCommentOnly { 236 return ParseNext(tokenizer) 237 } 238 return tokenizer.ParseTree, nil 239 } 240 241 // ErrEmpty is a sentinel error returned when parsing empty statements. 242 var ErrEmpty = vterrors.NewErrorf(vtrpcpb.Code_INVALID_ARGUMENT, vterrors.EmptyQuery, "Query was empty") 243 244 // SplitStatement returns the first sql statement up to either a ; or EOF 245 // and the remainder from the given buffer 246 func SplitStatement(blob string) (string, string, error) { 247 tokenizer := NewStringTokenizer(blob) 248 tkn := 0 249 for { 250 tkn, _ = tokenizer.Scan() 251 if tkn == 0 || tkn == ';' || tkn == eofChar { 252 break 253 } 254 } 255 if tokenizer.LastError != nil { 256 return "", "", tokenizer.LastError 257 } 258 if tkn == ';' { 259 return blob[:tokenizer.Pos-1], blob[tokenizer.Pos:], nil 260 } 261 return blob, "", nil 262 } 263 264 // SplitStatementToPieces split raw sql statement that may have multi sql pieces to sql pieces 265 // returns the sql pieces blob contains; or error if sql cannot be parsed 266 func SplitStatementToPieces(blob string) (pieces []string, err error) { 267 // fast path: the vast majority of SQL statements do not have semicolons in them 268 if blob == "" { 269 return nil, nil 270 } 271 switch strings.IndexByte(blob, ';') { 272 case -1: // if there is no semicolon, return blob as a whole 273 return []string{blob}, nil 274 case len(blob) - 1: // if there's a single semicolon and it's the last character, return blob without it 275 return []string{blob[:len(blob)-1]}, nil 276 } 277 278 pieces = make([]string, 0, 16) 279 tokenizer := NewStringTokenizer(blob) 280 281 tkn := 0 282 var stmt string 283 stmtBegin := 0 284 emptyStatement := true 285 loop: 286 for { 287 tkn, _ = tokenizer.Scan() 288 switch tkn { 289 case ';': 290 stmt = blob[stmtBegin : tokenizer.Pos-1] 291 if !emptyStatement { 292 pieces = append(pieces, stmt) 293 emptyStatement = true 294 } 295 stmtBegin = tokenizer.Pos 296 case 0, eofChar: 297 blobTail := tokenizer.Pos - 1 298 if stmtBegin < blobTail { 299 stmt = blob[stmtBegin : blobTail+1] 300 if !emptyStatement { 301 pieces = append(pieces, stmt) 302 } 303 } 304 break loop 305 default: 306 emptyStatement = false 307 } 308 } 309 310 err = tokenizer.LastError 311 return 312 } 313 314 func IsMySQL80AndAbove() bool { 315 return mySQLParserVersion >= "80000" 316 }