vitess.io/vitess@v0.16.2/go/vt/sqlparser/parser.go (about)

     1  /*
     2  Copyright 2019 The Vitess Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package sqlparser
    18  
    19  import (
    20  	"fmt"
    21  	"io"
    22  	"strconv"
    23  	"strings"
    24  	"sync"
    25  
    26  	"vitess.io/vitess/go/internal/flag"
    27  	"vitess.io/vitess/go/vt/log"
    28  	"vitess.io/vitess/go/vt/servenv"
    29  	"vitess.io/vitess/go/vt/vterrors"
    30  
    31  	vtrpcpb "vitess.io/vitess/go/vt/proto/vtrpc"
    32  )
    33  
    34  var versionFlagSync sync.Once
    35  
    36  // parserPool is a pool for parser objects.
    37  var parserPool = sync.Pool{
    38  	New: func() any {
    39  		return &yyParserImpl{}
    40  	},
    41  }
    42  
    43  // zeroParser is a zero-initialized parser to help reinitialize the parser for pooling.
    44  var zeroParser yyParserImpl
    45  
    46  // mySQLParserVersion is the version of MySQL that the parser would emulate
    47  var mySQLParserVersion string
    48  
    49  // yyParsePooled is a wrapper around yyParse that pools the parser objects. There isn't a
    50  // particularly good reason to use yyParse directly, since it immediately discards its parser.
    51  //
    52  // N.B: Parser pooling means that you CANNOT take references directly to parse stack variables (e.g.
    53  // $$ = &$4) in sql.y rules. You must instead add an intermediate reference like so:
    54  //
    55  //	showCollationFilterOpt := $4
    56  //	$$ = &Show{Type: string($2), ShowCollationFilterOpt: &showCollationFilterOpt}
    57  func yyParsePooled(yylex yyLexer) int {
    58  	parser := parserPool.Get().(*yyParserImpl)
    59  	defer func() {
    60  		*parser = zeroParser
    61  		parserPool.Put(parser)
    62  	}()
    63  	return parser.Parse(yylex)
    64  }
    65  
    66  // Instructions for creating new types: If a type
    67  // needs to satisfy an interface, declare that function
    68  // along with that interface. This will help users
    69  // identify the list of types to which they can assert
    70  // those interfaces.
    71  // If the member of a type has a string with a predefined
    72  // list of values, declare those values as const following
    73  // the type.
    74  // For interfaces that define dummy functions to consolidate
    75  // a set of types, define the function as iTypeName.
    76  // This will help avoid name collisions.
    77  
    78  // Parse2 parses the SQL in full and returns a Statement, which
    79  // is the AST representation of the query, and a set of BindVars, which are all the
    80  // bind variables that were found in the original SQL query. If a DDL statement
    81  // is partially parsed but still contains a syntax error, the
    82  // error is ignored and the DDL is returned anyway.
    83  func Parse2(sql string) (Statement, BindVars, error) {
    84  	tokenizer := NewStringTokenizer(sql)
    85  	if yyParsePooled(tokenizer) != 0 {
    86  		if tokenizer.partialDDL != nil {
    87  			if typ, val := tokenizer.Scan(); typ != 0 {
    88  				return nil, nil, fmt.Errorf("extra characters encountered after end of DDL: '%s'", string(val))
    89  			}
    90  			log.Warningf("ignoring error parsing DDL '%s': %v", sql, tokenizer.LastError)
    91  			switch x := tokenizer.partialDDL.(type) {
    92  			case DBDDLStatement:
    93  				x.SetFullyParsed(false)
    94  			case DDLStatement:
    95  				x.SetFullyParsed(false)
    96  			}
    97  			tokenizer.ParseTree = tokenizer.partialDDL
    98  			return tokenizer.ParseTree, tokenizer.BindVars, nil
    99  		}
   100  		return nil, nil, vterrors.New(vtrpcpb.Code_INVALID_ARGUMENT, tokenizer.LastError.Error())
   101  	}
   102  	if tokenizer.ParseTree == nil {
   103  		return nil, nil, ErrEmpty
   104  	}
   105  	return tokenizer.ParseTree, tokenizer.BindVars, nil
   106  }
   107  
   108  func checkParserVersionFlag() {
   109  	if flag.Parsed() {
   110  		versionFlagSync.Do(func() {
   111  			convVersion, err := convertMySQLVersionToCommentVersion(servenv.MySQLServerVersion())
   112  			if err != nil {
   113  				log.Fatalf("unable to parse mysql version: %v", err)
   114  			}
   115  			mySQLParserVersion = convVersion
   116  		})
   117  	}
   118  }
   119  
   120  // SetParserVersion sets the mysql parser version
   121  func SetParserVersion(version string) {
   122  	mySQLParserVersion = version
   123  }
   124  
   125  // GetParserVersion returns the version of the mysql parser
   126  func GetParserVersion() string {
   127  	return mySQLParserVersion
   128  }
   129  
   130  // convertMySQLVersionToCommentVersion converts the MySQL version into comment version format.
   131  func convertMySQLVersionToCommentVersion(version string) (string, error) {
   132  	var res = make([]int, 3)
   133  	idx := 0
   134  	val := ""
   135  	for _, c := range version {
   136  		if c <= '9' && c >= '0' {
   137  			val += string(c)
   138  		} else if c == '.' {
   139  			v, err := strconv.Atoi(val)
   140  			if err != nil {
   141  				return "", err
   142  			}
   143  			val = ""
   144  			res[idx] = v
   145  			idx++
   146  			if idx == 3 {
   147  				break
   148  			}
   149  		} else {
   150  			break
   151  		}
   152  	}
   153  	if val != "" {
   154  		v, err := strconv.Atoi(val)
   155  		if err != nil {
   156  			return "", err
   157  		}
   158  		res[idx] = v
   159  		idx++
   160  	}
   161  	if idx == 0 {
   162  		return "", vterrors.Errorf(vtrpcpb.Code_INVALID_ARGUMENT, "MySQL version not correctly setup - %s.", version)
   163  	}
   164  
   165  	return fmt.Sprintf("%01d%02d%02d", res[0], res[1], res[2]), nil
   166  }
   167  
   168  // ParseExpr parses an expression and transforms it to an AST
   169  func ParseExpr(sql string) (Expr, error) {
   170  	stmt, err := Parse("select " + sql)
   171  	if err != nil {
   172  		return nil, err
   173  	}
   174  	aliasedExpr := stmt.(*Select).SelectExprs[0].(*AliasedExpr)
   175  	return aliasedExpr.Expr, err
   176  }
   177  
   178  // Parse behaves like Parse2 but does not return a set of bind variables
   179  func Parse(sql string) (Statement, error) {
   180  	stmt, _, err := Parse2(sql)
   181  	return stmt, err
   182  }
   183  
   184  // ParseStrictDDL is the same as Parse except it errors on
   185  // partially parsed DDL statements.
   186  func ParseStrictDDL(sql string) (Statement, error) {
   187  	tokenizer := NewStringTokenizer(sql)
   188  	if yyParsePooled(tokenizer) != 0 {
   189  		return nil, tokenizer.LastError
   190  	}
   191  	if tokenizer.ParseTree == nil {
   192  		return nil, ErrEmpty
   193  	}
   194  	return tokenizer.ParseTree, nil
   195  }
   196  
   197  // ParseTokenizer is a raw interface to parse from the given tokenizer.
   198  // This does not used pooled parsers, and should not be used in general.
   199  func ParseTokenizer(tokenizer *Tokenizer) int {
   200  	return yyParse(tokenizer)
   201  }
   202  
   203  // ParseNext parses a single SQL statement from the tokenizer
   204  // returning a Statement which is the AST representation of the query.
   205  // The tokenizer will always read up to the end of the statement, allowing for
   206  // the next call to ParseNext to parse any subsequent SQL statements. When
   207  // there are no more statements to parse, a error of io.EOF is returned.
   208  func ParseNext(tokenizer *Tokenizer) (Statement, error) {
   209  	return parseNext(tokenizer, false)
   210  }
   211  
   212  // ParseNextStrictDDL is the same as ParseNext except it errors on
   213  // partially parsed DDL statements.
   214  func ParseNextStrictDDL(tokenizer *Tokenizer) (Statement, error) {
   215  	return parseNext(tokenizer, true)
   216  }
   217  
   218  func parseNext(tokenizer *Tokenizer, strict bool) (Statement, error) {
   219  	if tokenizer.cur() == ';' {
   220  		tokenizer.skip(1)
   221  		tokenizer.skipBlank()
   222  	}
   223  	if tokenizer.cur() == eofChar {
   224  		return nil, io.EOF
   225  	}
   226  
   227  	tokenizer.reset()
   228  	tokenizer.multi = true
   229  	if yyParsePooled(tokenizer) != 0 {
   230  		if tokenizer.partialDDL != nil && !strict {
   231  			tokenizer.ParseTree = tokenizer.partialDDL
   232  			return tokenizer.ParseTree, nil
   233  		}
   234  		return nil, tokenizer.LastError
   235  	}
   236  	_, isCommentOnly := tokenizer.ParseTree.(*CommentOnly)
   237  	if tokenizer.ParseTree == nil || isCommentOnly {
   238  		return ParseNext(tokenizer)
   239  	}
   240  	return tokenizer.ParseTree, nil
   241  }
   242  
   243  // ErrEmpty is a sentinel error returned when parsing empty statements.
   244  var ErrEmpty = vterrors.NewErrorf(vtrpcpb.Code_INVALID_ARGUMENT, vterrors.EmptyQuery, "Query was empty")
   245  
   246  // SplitStatement returns the first sql statement up to either a ; or EOF
   247  // and the remainder from the given buffer
   248  func SplitStatement(blob string) (string, string, error) {
   249  	tokenizer := NewStringTokenizer(blob)
   250  	tkn := 0
   251  	for {
   252  		tkn, _ = tokenizer.Scan()
   253  		if tkn == 0 || tkn == ';' || tkn == eofChar {
   254  			break
   255  		}
   256  	}
   257  	if tokenizer.LastError != nil {
   258  		return "", "", tokenizer.LastError
   259  	}
   260  	if tkn == ';' {
   261  		return blob[:tokenizer.Pos-1], blob[tokenizer.Pos:], nil
   262  	}
   263  	return blob, "", nil
   264  }
   265  
   266  // SplitStatementToPieces split raw sql statement that may have multi sql pieces to sql pieces
   267  // returns the sql pieces blob contains; or error if sql cannot be parsed
   268  func SplitStatementToPieces(blob string) (pieces []string, err error) {
   269  	// fast path: the vast majority of SQL statements do not have semicolons in them
   270  	if blob == "" {
   271  		return nil, nil
   272  	}
   273  	switch strings.IndexByte(blob, ';') {
   274  	case -1: // if there is no semicolon, return blob as a whole
   275  		return []string{blob}, nil
   276  	case len(blob) - 1: // if there's a single semicolon and it's the last character, return blob without it
   277  		return []string{blob[:len(blob)-1]}, nil
   278  	}
   279  
   280  	pieces = make([]string, 0, 16)
   281  	tokenizer := NewStringTokenizer(blob)
   282  
   283  	tkn := 0
   284  	var stmt string
   285  	stmtBegin := 0
   286  	emptyStatement := true
   287  loop:
   288  	for {
   289  		tkn, _ = tokenizer.Scan()
   290  		switch tkn {
   291  		case ';':
   292  			stmt = blob[stmtBegin : tokenizer.Pos-1]
   293  			if !emptyStatement {
   294  				pieces = append(pieces, stmt)
   295  				emptyStatement = true
   296  			}
   297  			stmtBegin = tokenizer.Pos
   298  		case 0, eofChar:
   299  			blobTail := tokenizer.Pos - 1
   300  			if stmtBegin < blobTail {
   301  				stmt = blob[stmtBegin : blobTail+1]
   302  				if !emptyStatement {
   303  					pieces = append(pieces, stmt)
   304  				}
   305  			}
   306  			break loop
   307  		default:
   308  			emptyStatement = false
   309  		}
   310  	}
   311  
   312  	err = tokenizer.LastError
   313  	return
   314  }
   315  
   316  func IsMySQL80AndAbove() bool {
   317  	return mySQLParserVersion >= "80000"
   318  }