github.com/vedadiyan/sqlparser@v1.0.0/pkg/sqlparser/parser.go (about)

     1  /*
     2  Copyright 2019 The Vitess Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package sqlparser
    18  
    19  import (
    20  	"fmt"
    21  	"io"
    22  	"strconv"
    23  	"strings"
    24  	"sync"
    25  
    26  	"github.com/vedadiyan/sqlparser/pkg/log"
    27  	"github.com/vedadiyan/sqlparser/pkg/vterrors"
    28  
    29  	vtrpcpb "github.com/vedadiyan/sqlparser/pkg/vtrpc"
    30  )
    31  
    32  var versionFlagSync sync.Once
    33  
    34  // parserPool is a pool for parser objects.
    35  var parserPool = sync.Pool{
    36  	New: func() any {
    37  		return &yyParserImpl{}
    38  	},
    39  }
    40  
    41  // zeroParser is a zero-initialized parser to help reinitialize the parser for pooling.
    42  var zeroParser yyParserImpl
    43  
    44  // mySQLParserVersion is the version of MySQL that the parser would emulate
    45  var mySQLParserVersion string
    46  
    47  // yyParsePooled is a wrapper around yyParse that pools the parser objects. There isn't a
    48  // particularly good reason to use yyParse directly, since it immediately discards its parser.
    49  //
    50  // N.B: Parser pooling means that you CANNOT take references directly to parse stack variables (e.g.
    51  // $$ = &$4) in sql.y rules. You must instead add an intermediate reference like so:
    52  //
    53  //	showCollationFilterOpt := $4
    54  //	$$ = &Show{Type: string($2), ShowCollationFilterOpt: &showCollationFilterOpt}
    55  func yyParsePooled(yylex yyLexer) int {
    56  	parser := parserPool.Get().(*yyParserImpl)
    57  	defer func() {
    58  		*parser = zeroParser
    59  		parserPool.Put(parser)
    60  	}()
    61  	return parser.Parse(yylex)
    62  }
    63  
    64  // Instructions for creating new types: If a type
    65  // needs to satisfy an interface, declare that function
    66  // along with that interface. This will help users
    67  // identify the list of types to which they can assert
    68  // those interfaces.
    69  // If the member of a type has a string with a predefined
    70  // list of values, declare those values as const following
    71  // the type.
    72  // For interfaces that define dummy functions to consolidate
    73  // a set of types, define the function as iTypeName.
    74  // This will help avoid name collisions.
    75  
    76  // Parse2 parses the SQL in full and returns a Statement, which
    77  // is the AST representation of the query, and a set of BindVars, which are all the
    78  // bind variables that were found in the original SQL query. If a DDL statement
    79  // is partially parsed but still contains a syntax error, the
    80  // error is ignored and the DDL is returned anyway.
    81  func Parse2(sql string) (Statement, BindVars, error) {
    82  	tokenizer := NewStringTokenizer(sql)
    83  	if yyParsePooled(tokenizer) != 0 {
    84  		if tokenizer.partialDDL != nil {
    85  			if typ, val := tokenizer.Scan(); typ != 0 {
    86  				return nil, nil, fmt.Errorf("extra characters encountered after end of DDL: '%s'", string(val))
    87  			}
    88  			log.Warningf("ignoring error parsing DDL '%s': %v", sql, tokenizer.LastError)
    89  			switch x := tokenizer.partialDDL.(type) {
    90  			case DBDDLStatement:
    91  				x.SetFullyParsed(false)
    92  			case DDLStatement:
    93  				x.SetFullyParsed(false)
    94  			}
    95  			tokenizer.ParseTree = tokenizer.partialDDL
    96  			return tokenizer.ParseTree, tokenizer.BindVars, nil
    97  		}
    98  		return nil, nil, vterrors.New(vtrpcpb.Code_INVALID_ARGUMENT, tokenizer.LastError.Error())
    99  	}
   100  	if tokenizer.ParseTree == nil {
   101  		return nil, nil, ErrEmpty
   102  	}
   103  	return tokenizer.ParseTree, tokenizer.BindVars, nil
   104  }
   105  
   106  // func checkParserVersionFlag() {
   107  // 	if flag.Parsed() {
   108  // 		versionFlagSync.Do(func() {
   109  // 			convVersion, err := convertMySQLVersionToCommentVersion(servenv.MySQLServerVersion())
   110  // 			if err != nil {
   111  // 				log.Fatalf("unable to parse mysql version: %v", err)
   112  // 			}
   113  // 			mySQLParserVersion = convVersion
   114  // 		})
   115  // 	}
   116  // }
   117  
   118  // SetParserVersion sets the mysql parser version
   119  func SetParserVersion(version string) {
   120  	mySQLParserVersion = version
   121  }
   122  
   123  // GetParserVersion returns the version of the mysql parser
   124  func GetParserVersion() string {
   125  	return mySQLParserVersion
   126  }
   127  
   128  // convertMySQLVersionToCommentVersion converts the MySQL version into comment version format.
   129  func convertMySQLVersionToCommentVersion(version string) (string, error) {
   130  	var res = make([]int, 3)
   131  	idx := 0
   132  	val := ""
   133  	for _, c := range version {
   134  		if c <= '9' && c >= '0' {
   135  			val += string(c)
   136  		} else if c == '.' {
   137  			v, err := strconv.Atoi(val)
   138  			if err != nil {
   139  				return "", err
   140  			}
   141  			val = ""
   142  			res[idx] = v
   143  			idx++
   144  			if idx == 3 {
   145  				break
   146  			}
   147  		} else {
   148  			break
   149  		}
   150  	}
   151  	if val != "" {
   152  		v, err := strconv.Atoi(val)
   153  		if err != nil {
   154  			return "", err
   155  		}
   156  		res[idx] = v
   157  		idx++
   158  	}
   159  	if idx == 0 {
   160  		return "", vterrors.Errorf(vtrpcpb.Code_INVALID_ARGUMENT, "MySQL version not correctly setup - %s.", version)
   161  	}
   162  
   163  	return fmt.Sprintf("%01d%02d%02d", res[0], res[1], res[2]), nil
   164  }
   165  
   166  // ParseExpr parses an expression and transforms it to an AST
   167  func ParseExpr(sql string) (Expr, error) {
   168  	stmt, err := Parse("select " + sql)
   169  	if err != nil {
   170  		return nil, err
   171  	}
   172  	aliasedExpr := stmt.(*Select).SelectExprs[0].(*AliasedExpr)
   173  	return aliasedExpr.Expr, err
   174  }
   175  
   176  // Parse behaves like Parse2 but does not return a set of bind variables
   177  func Parse(sql string) (Statement, error) {
   178  	stmt, _, err := Parse2(sql)
   179  	return stmt, err
   180  }
   181  
   182  // ParseStrictDDL is the same as Parse except it errors on
   183  // partially parsed DDL statements.
   184  func ParseStrictDDL(sql string) (Statement, error) {
   185  	tokenizer := NewStringTokenizer(sql)
   186  	if yyParsePooled(tokenizer) != 0 {
   187  		return nil, tokenizer.LastError
   188  	}
   189  	if tokenizer.ParseTree == nil {
   190  		return nil, ErrEmpty
   191  	}
   192  	return tokenizer.ParseTree, nil
   193  }
   194  
   195  // ParseTokenizer is a raw interface to parse from the given tokenizer.
   196  // This does not used pooled parsers, and should not be used in general.
   197  func ParseTokenizer(tokenizer *Tokenizer) int {
   198  	return yyParse(tokenizer)
   199  }
   200  
   201  // ParseNext parses a single SQL statement from the tokenizer
   202  // returning a Statement which is the AST representation of the query.
   203  // The tokenizer will always read up to the end of the statement, allowing for
   204  // the next call to ParseNext to parse any subsequent SQL statements. When
   205  // there are no more statements to parse, a error of io.EOF is returned.
   206  func ParseNext(tokenizer *Tokenizer) (Statement, error) {
   207  	return parseNext(tokenizer, false)
   208  }
   209  
   210  // ParseNextStrictDDL is the same as ParseNext except it errors on
   211  // partially parsed DDL statements.
   212  func ParseNextStrictDDL(tokenizer *Tokenizer) (Statement, error) {
   213  	return parseNext(tokenizer, true)
   214  }
   215  
   216  func parseNext(tokenizer *Tokenizer, strict bool) (Statement, error) {
   217  	if tokenizer.cur() == ';' {
   218  		tokenizer.skip(1)
   219  		tokenizer.skipBlank()
   220  	}
   221  	if tokenizer.cur() == eofChar {
   222  		return nil, io.EOF
   223  	}
   224  
   225  	tokenizer.reset()
   226  	tokenizer.multi = true
   227  	if yyParsePooled(tokenizer) != 0 {
   228  		if tokenizer.partialDDL != nil && !strict {
   229  			tokenizer.ParseTree = tokenizer.partialDDL
   230  			return tokenizer.ParseTree, nil
   231  		}
   232  		return nil, tokenizer.LastError
   233  	}
   234  	_, isCommentOnly := tokenizer.ParseTree.(*CommentOnly)
   235  	if tokenizer.ParseTree == nil || isCommentOnly {
   236  		return ParseNext(tokenizer)
   237  	}
   238  	return tokenizer.ParseTree, nil
   239  }
   240  
   241  // ErrEmpty is a sentinel error returned when parsing empty statements.
   242  var ErrEmpty = vterrors.NewErrorf(vtrpcpb.Code_INVALID_ARGUMENT, vterrors.EmptyQuery, "Query was empty")
   243  
   244  // SplitStatement returns the first sql statement up to either a ; or EOF
   245  // and the remainder from the given buffer
   246  func SplitStatement(blob string) (string, string, error) {
   247  	tokenizer := NewStringTokenizer(blob)
   248  	tkn := 0
   249  	for {
   250  		tkn, _ = tokenizer.Scan()
   251  		if tkn == 0 || tkn == ';' || tkn == eofChar {
   252  			break
   253  		}
   254  	}
   255  	if tokenizer.LastError != nil {
   256  		return "", "", tokenizer.LastError
   257  	}
   258  	if tkn == ';' {
   259  		return blob[:tokenizer.Pos-1], blob[tokenizer.Pos:], nil
   260  	}
   261  	return blob, "", nil
   262  }
   263  
   264  // SplitStatementToPieces split raw sql statement that may have multi sql pieces to sql pieces
   265  // returns the sql pieces blob contains; or error if sql cannot be parsed
   266  func SplitStatementToPieces(blob string) (pieces []string, err error) {
   267  	// fast path: the vast majority of SQL statements do not have semicolons in them
   268  	if blob == "" {
   269  		return nil, nil
   270  	}
   271  	switch strings.IndexByte(blob, ';') {
   272  	case -1: // if there is no semicolon, return blob as a whole
   273  		return []string{blob}, nil
   274  	case len(blob) - 1: // if there's a single semicolon and it's the last character, return blob without it
   275  		return []string{blob[:len(blob)-1]}, nil
   276  	}
   277  
   278  	pieces = make([]string, 0, 16)
   279  	tokenizer := NewStringTokenizer(blob)
   280  
   281  	tkn := 0
   282  	var stmt string
   283  	stmtBegin := 0
   284  	emptyStatement := true
   285  loop:
   286  	for {
   287  		tkn, _ = tokenizer.Scan()
   288  		switch tkn {
   289  		case ';':
   290  			stmt = blob[stmtBegin : tokenizer.Pos-1]
   291  			if !emptyStatement {
   292  				pieces = append(pieces, stmt)
   293  				emptyStatement = true
   294  			}
   295  			stmtBegin = tokenizer.Pos
   296  		case 0, eofChar:
   297  			blobTail := tokenizer.Pos - 1
   298  			if stmtBegin < blobTail {
   299  				stmt = blob[stmtBegin : blobTail+1]
   300  				if !emptyStatement {
   301  					pieces = append(pieces, stmt)
   302  				}
   303  			}
   304  			break loop
   305  		default:
   306  			emptyStatement = false
   307  		}
   308  	}
   309  
   310  	err = tokenizer.LastError
   311  	return
   312  }
   313  
   314  func IsMySQL80AndAbove() bool {
   315  	return mySQLParserVersion >= "80000"
   316  }