vitess.io/vitess@v0.16.2/go/vt/sqlparser/token.go (about)

     1  /*
     2  Copyright 2019 The Vitess Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package sqlparser
    18  
    19  import (
    20  	"fmt"
    21  	"strconv"
    22  	"strings"
    23  
    24  	"vitess.io/vitess/go/sqltypes"
    25  )
    26  
    27  const (
    28  	eofChar = 0x100
    29  )
    30  
    31  // Tokenizer is the struct used to generate SQL
    32  // tokens for the parser.
    33  type Tokenizer struct {
    34  	AllowComments       bool
    35  	SkipSpecialComments bool
    36  	SkipToEnd           bool
    37  	LastError           error
    38  	ParseTree           Statement
    39  	BindVars            map[string]struct{}
    40  
    41  	lastToken      string
    42  	posVarIndex    int
    43  	partialDDL     Statement
    44  	nesting        int
    45  	multi          bool
    46  	specialComment *Tokenizer
    47  
    48  	Pos int
    49  	buf string
    50  }
    51  
    52  // NewStringTokenizer creates a new Tokenizer for the
    53  // sql string.
    54  func NewStringTokenizer(sql string) *Tokenizer {
    55  	checkParserVersionFlag()
    56  
    57  	return &Tokenizer{
    58  		buf:      sql,
    59  		BindVars: make(map[string]struct{}),
    60  	}
    61  }
    62  
    63  // Lex returns the next token form the Tokenizer.
    64  // This function is used by go yacc.
    65  func (tkn *Tokenizer) Lex(lval *yySymType) int {
    66  	if tkn.SkipToEnd {
    67  		return tkn.skipStatement()
    68  	}
    69  
    70  	typ, val := tkn.Scan()
    71  	for typ == COMMENT {
    72  		if tkn.AllowComments {
    73  			break
    74  		}
    75  		typ, val = tkn.Scan()
    76  	}
    77  	if typ == 0 || typ == ';' || typ == LEX_ERROR {
    78  		// If encounter end of statement or invalid token,
    79  		// we should not accept partially parsed DDLs. They
    80  		// should instead result in parser errors. See the
    81  		// Parse function to see how this is handled.
    82  		tkn.partialDDL = nil
    83  	}
    84  	lval.str = val
    85  	tkn.lastToken = val
    86  	return typ
    87  }
    88  
    89  // PositionedErr holds context related to parser errors
    90  type PositionedErr struct {
    91  	Err  string
    92  	Pos  int
    93  	Near string
    94  }
    95  
    96  func (p PositionedErr) Error() string {
    97  	if p.Near != "" {
    98  		return fmt.Sprintf("%s at position %v near '%s'", p.Err, p.Pos, p.Near)
    99  	}
   100  	return fmt.Sprintf("%s at position %v", p.Err, p.Pos)
   101  }
   102  
   103  // Error is called by go yacc if there's a parsing error.
   104  func (tkn *Tokenizer) Error(err string) {
   105  	tkn.LastError = PositionedErr{Err: err, Pos: tkn.Pos + 1, Near: tkn.lastToken}
   106  
   107  	// Try and re-sync to the next statement
   108  	tkn.skipStatement()
   109  }
   110  
   111  // Scan scans the tokenizer for the next token and returns
   112  // the token type and an optional value.
   113  func (tkn *Tokenizer) Scan() (int, string) {
   114  	if tkn.specialComment != nil {
   115  		// Enter specialComment scan mode.
   116  		// for scanning such kind of comment: /*! MySQL-specific code */
   117  		specialComment := tkn.specialComment
   118  		tok, val := specialComment.Scan()
   119  		if tok != 0 {
   120  			// return the specialComment scan result as the result
   121  			return tok, val
   122  		}
   123  		// leave specialComment scan mode after all stream consumed.
   124  		tkn.specialComment = nil
   125  	}
   126  
   127  	tkn.skipBlank()
   128  	switch ch := tkn.cur(); {
   129  	case ch == '@':
   130  		tokenID := AT_ID
   131  		tkn.skip(1)
   132  		if tkn.cur() == '@' {
   133  			tokenID = AT_AT_ID
   134  			tkn.skip(1)
   135  		}
   136  		var tID int
   137  		var tBytes string
   138  		if tkn.cur() == '`' {
   139  			tkn.skip(1)
   140  			tID, tBytes = tkn.scanLiteralIdentifier()
   141  		} else if tkn.cur() == eofChar {
   142  			return LEX_ERROR, ""
   143  		} else {
   144  			tID, tBytes = tkn.scanIdentifier(true)
   145  		}
   146  		if tID == LEX_ERROR {
   147  			return tID, ""
   148  		}
   149  		return tokenID, tBytes
   150  	case isLetter(ch):
   151  		if ch == 'X' || ch == 'x' {
   152  			if tkn.peek(1) == '\'' {
   153  				tkn.skip(2)
   154  				return tkn.scanHex()
   155  			}
   156  		}
   157  		if ch == 'B' || ch == 'b' {
   158  			if tkn.peek(1) == '\'' {
   159  				tkn.skip(2)
   160  				return tkn.scanBitLiteral()
   161  			}
   162  		}
   163  		// N\'literal' is used to create a string in the national character set
   164  		if ch == 'N' || ch == 'n' {
   165  			nxt := tkn.peek(1)
   166  			if nxt == '\'' || nxt == '"' {
   167  				tkn.skip(2)
   168  				return tkn.scanString(nxt, NCHAR_STRING)
   169  			}
   170  		}
   171  		return tkn.scanIdentifier(false)
   172  	case isDigit(ch):
   173  		return tkn.scanNumber()
   174  	case ch == ':':
   175  		return tkn.scanBindVar()
   176  	case ch == ';':
   177  		if tkn.multi {
   178  			// In multi mode, ';' is treated as EOF. So, we don't advance.
   179  			// Repeated calls to Scan will keep returning 0 until ParseNext
   180  			// forces the advance.
   181  			return 0, ""
   182  		}
   183  		tkn.skip(1)
   184  		return ';', ""
   185  	case ch == eofChar:
   186  		return 0, ""
   187  	default:
   188  		if ch == '.' && isDigit(tkn.peek(1)) {
   189  			return tkn.scanNumber()
   190  		}
   191  
   192  		tkn.skip(1)
   193  		switch ch {
   194  		case '=', ',', '(', ')', '+', '*', '%', '^', '~':
   195  			return int(ch), ""
   196  		case '&':
   197  			if tkn.cur() == '&' {
   198  				tkn.skip(1)
   199  				return AND, ""
   200  			}
   201  			return int(ch), ""
   202  		case '|':
   203  			if tkn.cur() == '|' {
   204  				tkn.skip(1)
   205  				return OR, ""
   206  			}
   207  			return int(ch), ""
   208  		case '?':
   209  			tkn.posVarIndex++
   210  			buf := make([]byte, 0, 8)
   211  			buf = append(buf, ":v"...)
   212  			buf = strconv.AppendInt(buf, int64(tkn.posVarIndex), 10)
   213  			return VALUE_ARG, string(buf)
   214  		case '.':
   215  			return int(ch), ""
   216  		case '/':
   217  			switch tkn.cur() {
   218  			case '/':
   219  				tkn.skip(1)
   220  				return tkn.scanCommentType1(2)
   221  			case '*':
   222  				tkn.skip(1)
   223  				if tkn.cur() == '!' && !tkn.SkipSpecialComments {
   224  					tkn.skip(1)
   225  					return tkn.scanMySQLSpecificComment()
   226  				}
   227  				return tkn.scanCommentType2()
   228  			default:
   229  				return int(ch), ""
   230  			}
   231  		case '#':
   232  			return tkn.scanCommentType1(1)
   233  		case '-':
   234  			switch tkn.cur() {
   235  			case '-':
   236  				nextChar := tkn.peek(1)
   237  				if nextChar == ' ' || nextChar == '\n' || nextChar == '\t' || nextChar == '\r' || nextChar == eofChar {
   238  					tkn.skip(1)
   239  					return tkn.scanCommentType1(2)
   240  				}
   241  			case '>':
   242  				tkn.skip(1)
   243  				if tkn.cur() == '>' {
   244  					tkn.skip(1)
   245  					return JSON_UNQUOTE_EXTRACT_OP, ""
   246  				}
   247  				return JSON_EXTRACT_OP, ""
   248  			}
   249  			return int(ch), ""
   250  		case '<':
   251  			switch tkn.cur() {
   252  			case '>':
   253  				tkn.skip(1)
   254  				return NE, ""
   255  			case '<':
   256  				tkn.skip(1)
   257  				return SHIFT_LEFT, ""
   258  			case '=':
   259  				tkn.skip(1)
   260  				switch tkn.cur() {
   261  				case '>':
   262  					tkn.skip(1)
   263  					return NULL_SAFE_EQUAL, ""
   264  				default:
   265  					return LE, ""
   266  				}
   267  			default:
   268  				return int(ch), ""
   269  			}
   270  		case '>':
   271  			switch tkn.cur() {
   272  			case '=':
   273  				tkn.skip(1)
   274  				return GE, ""
   275  			case '>':
   276  				tkn.skip(1)
   277  				return SHIFT_RIGHT, ""
   278  			default:
   279  				return int(ch), ""
   280  			}
   281  		case '!':
   282  			if tkn.cur() == '=' {
   283  				tkn.skip(1)
   284  				return NE, ""
   285  			}
   286  			return int(ch), ""
   287  		case '\'', '"':
   288  			return tkn.scanString(ch, STRING)
   289  		case '`':
   290  			return tkn.scanLiteralIdentifier()
   291  		default:
   292  			return LEX_ERROR, string(byte(ch))
   293  		}
   294  	}
   295  }
   296  
   297  // skipStatement scans until end of statement.
   298  func (tkn *Tokenizer) skipStatement() int {
   299  	tkn.SkipToEnd = false
   300  	for {
   301  		typ, _ := tkn.Scan()
   302  		if typ == 0 || typ == ';' || typ == LEX_ERROR {
   303  			return typ
   304  		}
   305  	}
   306  }
   307  
   308  // skipBlank skips the cursor while it finds whitespace
   309  func (tkn *Tokenizer) skipBlank() {
   310  	ch := tkn.cur()
   311  	for ch == ' ' || ch == '\n' || ch == '\r' || ch == '\t' {
   312  		tkn.skip(1)
   313  		ch = tkn.cur()
   314  	}
   315  }
   316  
   317  // scanIdentifier scans a language keyword or @-encased variable
   318  func (tkn *Tokenizer) scanIdentifier(isVariable bool) (int, string) {
   319  	start := tkn.Pos
   320  	tkn.skip(1)
   321  
   322  	for {
   323  		ch := tkn.cur()
   324  		if !isLetter(ch) && !isDigit(ch) && !(isVariable && isCarat(ch)) {
   325  			break
   326  		}
   327  		tkn.skip(1)
   328  	}
   329  	keywordName := tkn.buf[start:tkn.Pos]
   330  	if keywordID, found := keywordLookupTable.LookupString(keywordName); found {
   331  		return keywordID, keywordName
   332  	}
   333  	// dual must always be case-insensitive
   334  	if keywordASCIIMatch(keywordName, "dual") {
   335  		return ID, "dual"
   336  	}
   337  	return ID, keywordName
   338  }
   339  
   340  // scanHex scans a hex numeral; assumes x' or X' has already been scanned
   341  func (tkn *Tokenizer) scanHex() (int, string) {
   342  	start := tkn.Pos
   343  	tkn.scanMantissa(16)
   344  	hex := tkn.buf[start:tkn.Pos]
   345  	if tkn.cur() != '\'' {
   346  		return LEX_ERROR, hex
   347  	}
   348  	tkn.skip(1)
   349  	if len(hex)%2 != 0 {
   350  		return LEX_ERROR, hex
   351  	}
   352  	return HEX, hex
   353  }
   354  
   355  // scanBitLiteral scans a binary numeric literal; assumes b' or B' has already been scanned
   356  func (tkn *Tokenizer) scanBitLiteral() (int, string) {
   357  	start := tkn.Pos
   358  	tkn.scanMantissa(2)
   359  	bit := tkn.buf[start:tkn.Pos]
   360  	if tkn.cur() != '\'' {
   361  		return LEX_ERROR, bit
   362  	}
   363  	tkn.skip(1)
   364  	return BIT_LITERAL, bit
   365  }
   366  
   367  // scanLiteralIdentifierSlow scans an identifier surrounded by backticks which may
   368  // contain escape sequences instead of it. This method is only called from
   369  // scanLiteralIdentifier once the first escape sequence is found in the identifier.
   370  // The provided `buf` contains the contents of the identifier that have been scanned
   371  // so far.
   372  func (tkn *Tokenizer) scanLiteralIdentifierSlow(buf *strings.Builder) (int, string) {
   373  	backTickSeen := true
   374  	for {
   375  		if backTickSeen {
   376  			if tkn.cur() != '`' {
   377  				break
   378  			}
   379  			backTickSeen = false
   380  			buf.WriteByte('`')
   381  			tkn.skip(1)
   382  			continue
   383  		}
   384  		// The previous char was not a backtick.
   385  		switch tkn.cur() {
   386  		case '`':
   387  			backTickSeen = true
   388  		case eofChar:
   389  			// Premature EOF.
   390  			return LEX_ERROR, buf.String()
   391  		default:
   392  			buf.WriteByte(byte(tkn.cur()))
   393  			// keep scanning
   394  		}
   395  		tkn.skip(1)
   396  	}
   397  	return ID, buf.String()
   398  }
   399  
   400  // scanLiteralIdentifier scans an identifier enclosed by backticks. If the identifier
   401  // is a simple literal, it'll be returned as a slice of the input buffer. If the identifier
   402  // contains escape sequences, this function will fall back to scanLiteralIdentifierSlow
   403  func (tkn *Tokenizer) scanLiteralIdentifier() (int, string) {
   404  	start := tkn.Pos
   405  	for {
   406  		switch tkn.cur() {
   407  		case '`':
   408  			if tkn.peek(1) != '`' {
   409  				if tkn.Pos == start {
   410  					return LEX_ERROR, ""
   411  				}
   412  				tkn.skip(1)
   413  				return ID, tkn.buf[start : tkn.Pos-1]
   414  			}
   415  
   416  			var buf strings.Builder
   417  			buf.WriteString(tkn.buf[start:tkn.Pos])
   418  			tkn.skip(1)
   419  			return tkn.scanLiteralIdentifierSlow(&buf)
   420  		case eofChar:
   421  			// Premature EOF.
   422  			return LEX_ERROR, tkn.buf[start:tkn.Pos]
   423  		default:
   424  			tkn.skip(1)
   425  		}
   426  	}
   427  }
   428  
   429  // scanBindVar scans a bind variable; assumes a ':' has been scanned right before
   430  func (tkn *Tokenizer) scanBindVar() (int, string) {
   431  	start := tkn.Pos
   432  	token := VALUE_ARG
   433  
   434  	tkn.skip(1)
   435  	// If : is followed by a digit, then it is an offset value arg. Example - :1, :10
   436  	if isDigit(tkn.cur()) {
   437  		tkn.scanMantissa(10)
   438  		return OFFSET_ARG, tkn.buf[start+1 : tkn.Pos]
   439  	}
   440  	// If : is followed by another : it is a list arg. Example ::v1, ::list
   441  	if tkn.cur() == ':' {
   442  		token = LIST_ARG
   443  		tkn.skip(1)
   444  	}
   445  	if !isLetter(tkn.cur()) {
   446  		return LEX_ERROR, tkn.buf[start:tkn.Pos]
   447  	}
   448  	// If : is followed by a letter, it is a bindvariable. Example :v1, :v2
   449  	for {
   450  		ch := tkn.cur()
   451  		if !isLetter(ch) && !isDigit(ch) && ch != '.' {
   452  			break
   453  		}
   454  		tkn.skip(1)
   455  	}
   456  	return token, tkn.buf[start:tkn.Pos]
   457  }
   458  
   459  // scanMantissa scans a sequence of numeric characters with the same base.
   460  // This is a helper function only called from the numeric scanners
   461  func (tkn *Tokenizer) scanMantissa(base int) {
   462  	for digitVal(tkn.cur()) < base {
   463  		tkn.skip(1)
   464  	}
   465  }
   466  
   467  // scanNumber scans any SQL numeric literal, either floating point or integer
   468  func (tkn *Tokenizer) scanNumber() (int, string) {
   469  	start := tkn.Pos
   470  	token := INTEGRAL
   471  
   472  	if tkn.cur() == '.' {
   473  		token = DECIMAL
   474  		tkn.skip(1)
   475  		tkn.scanMantissa(10)
   476  		goto exponent
   477  	}
   478  
   479  	// 0x construct.
   480  	if tkn.cur() == '0' {
   481  		tkn.skip(1)
   482  		if tkn.cur() == 'x' || tkn.cur() == 'X' {
   483  			token = HEXNUM
   484  			tkn.skip(1)
   485  			tkn.scanMantissa(16)
   486  			goto exit
   487  		}
   488  		if tkn.cur() == 'b' || tkn.cur() == 'B' {
   489  			token = BITNUM
   490  			tkn.skip(1)
   491  			tkn.scanMantissa(2)
   492  			goto exit
   493  		}
   494  	}
   495  
   496  	tkn.scanMantissa(10)
   497  
   498  	if tkn.cur() == '.' {
   499  		token = DECIMAL
   500  		tkn.skip(1)
   501  		tkn.scanMantissa(10)
   502  	}
   503  
   504  exponent:
   505  	if tkn.cur() == 'e' || tkn.cur() == 'E' {
   506  		token = FLOAT
   507  		tkn.skip(1)
   508  		if tkn.cur() == '+' || tkn.cur() == '-' {
   509  			tkn.skip(1)
   510  		}
   511  		tkn.scanMantissa(10)
   512  	}
   513  
   514  exit:
   515  	if isLetter(tkn.cur()) {
   516  		// A letter cannot immediately follow a float number.
   517  		if token == FLOAT || token == DECIMAL {
   518  			return LEX_ERROR, tkn.buf[start:tkn.Pos]
   519  		}
   520  		// A letter seen after a few numbers means that we should parse this
   521  		// as an identifier and not a number.
   522  		for {
   523  			ch := tkn.cur()
   524  			if !isLetter(ch) && !isDigit(ch) {
   525  				break
   526  			}
   527  			tkn.skip(1)
   528  		}
   529  		return ID, tkn.buf[start:tkn.Pos]
   530  	}
   531  
   532  	return token, tkn.buf[start:tkn.Pos]
   533  }
   534  
   535  // scanString scans a string surrounded by the given `delim`, which can be
   536  // either single or double quotes. Assumes that the given delimiter has just
   537  // been scanned. If the skin contains any escape sequences, this function
   538  // will fall back to scanStringSlow
   539  func (tkn *Tokenizer) scanString(delim uint16, typ int) (int, string) {
   540  	start := tkn.Pos
   541  
   542  	for {
   543  		switch tkn.cur() {
   544  		case delim:
   545  			if tkn.peek(1) != delim {
   546  				tkn.skip(1)
   547  				return typ, tkn.buf[start : tkn.Pos-1]
   548  			}
   549  			fallthrough
   550  
   551  		case '\\':
   552  			var buffer strings.Builder
   553  			buffer.WriteString(tkn.buf[start:tkn.Pos])
   554  			return tkn.scanStringSlow(&buffer, delim, typ)
   555  
   556  		case eofChar:
   557  			return LEX_ERROR, tkn.buf[start:tkn.Pos]
   558  		}
   559  
   560  		tkn.skip(1)
   561  	}
   562  }
   563  
   564  // scanString scans a string surrounded by the given `delim` and containing escape
   565  // sequencse. The given `buffer` contains the contents of the string that have
   566  // been scanned so far.
   567  func (tkn *Tokenizer) scanStringSlow(buffer *strings.Builder, delim uint16, typ int) (int, string) {
   568  	for {
   569  		ch := tkn.cur()
   570  		if ch == eofChar {
   571  			// Unterminated string.
   572  			return LEX_ERROR, buffer.String()
   573  		}
   574  
   575  		if ch != delim && ch != '\\' {
   576  			// Scan ahead to the next interesting character.
   577  			start := tkn.Pos
   578  			for ; tkn.Pos < len(tkn.buf); tkn.Pos++ {
   579  				ch = uint16(tkn.buf[tkn.Pos])
   580  				if ch == delim || ch == '\\' {
   581  					break
   582  				}
   583  			}
   584  
   585  			buffer.WriteString(tkn.buf[start:tkn.Pos])
   586  			if tkn.Pos >= len(tkn.buf) {
   587  				// Reached the end of the buffer without finding a delim or
   588  				// escape character.
   589  				tkn.skip(1)
   590  				continue
   591  			}
   592  		}
   593  		tkn.skip(1) // Read one past the delim or escape character.
   594  
   595  		if ch == '\\' {
   596  			if tkn.cur() == eofChar {
   597  				// String terminates mid escape character.
   598  				return LEX_ERROR, buffer.String()
   599  			}
   600  			// Preserve escaping of % and _
   601  			if tkn.cur() == '%' || tkn.cur() == '_' {
   602  				buffer.WriteByte('\\')
   603  				ch = tkn.cur()
   604  			} else if decodedChar := sqltypes.SQLDecodeMap[byte(tkn.cur())]; decodedChar == sqltypes.DontEscape {
   605  				ch = tkn.cur()
   606  			} else {
   607  				ch = uint16(decodedChar)
   608  			}
   609  		} else if ch == delim && tkn.cur() != delim {
   610  			// Correctly terminated string, which is not a double delim.
   611  			break
   612  		}
   613  
   614  		buffer.WriteByte(byte(ch))
   615  		tkn.skip(1)
   616  	}
   617  
   618  	return typ, buffer.String()
   619  }
   620  
   621  // scanCommentType1 scans a SQL line-comment, which is applied until the end
   622  // of the line. The given prefix length varies based on whether the comment
   623  // is started with '//', '--' or '#'.
   624  func (tkn *Tokenizer) scanCommentType1(prefixLen int) (int, string) {
   625  	start := tkn.Pos - prefixLen
   626  	for tkn.cur() != eofChar {
   627  		if tkn.cur() == '\n' {
   628  			tkn.skip(1)
   629  			break
   630  		}
   631  		tkn.skip(1)
   632  	}
   633  	return COMMENT, tkn.buf[start:tkn.Pos]
   634  }
   635  
   636  // scanCommentType2 scans a '/*' delimited comment; assumes the opening
   637  // prefix has already been scanned
   638  func (tkn *Tokenizer) scanCommentType2() (int, string) {
   639  	start := tkn.Pos - 2
   640  	for {
   641  		if tkn.cur() == '*' {
   642  			tkn.skip(1)
   643  			if tkn.cur() == '/' {
   644  				tkn.skip(1)
   645  				break
   646  			}
   647  			continue
   648  		}
   649  		if tkn.cur() == eofChar {
   650  			return LEX_ERROR, tkn.buf[start:tkn.Pos]
   651  		}
   652  		tkn.skip(1)
   653  	}
   654  	return COMMENT, tkn.buf[start:tkn.Pos]
   655  }
   656  
   657  // scanMySQLSpecificComment scans a MySQL comment pragma, which always starts with '//*`
   658  func (tkn *Tokenizer) scanMySQLSpecificComment() (int, string) {
   659  	start := tkn.Pos - 3
   660  	for {
   661  		if tkn.cur() == '*' {
   662  			tkn.skip(1)
   663  			if tkn.cur() == '/' {
   664  				tkn.skip(1)
   665  				break
   666  			}
   667  			continue
   668  		}
   669  		if tkn.cur() == eofChar {
   670  			return LEX_ERROR, tkn.buf[start:tkn.Pos]
   671  		}
   672  		tkn.skip(1)
   673  	}
   674  
   675  	commentVersion, sql := ExtractMysqlComment(tkn.buf[start:tkn.Pos])
   676  
   677  	if mySQLParserVersion >= commentVersion {
   678  		// Only add the special comment to the tokenizer if the version of MySQL is higher or equal to the comment version
   679  		tkn.specialComment = NewStringTokenizer(sql)
   680  	}
   681  
   682  	return tkn.Scan()
   683  }
   684  
   685  func (tkn *Tokenizer) cur() uint16 {
   686  	return tkn.peek(0)
   687  }
   688  
   689  func (tkn *Tokenizer) skip(dist int) {
   690  	tkn.Pos += dist
   691  }
   692  
   693  func (tkn *Tokenizer) peek(dist int) uint16 {
   694  	if tkn.Pos+dist >= len(tkn.buf) {
   695  		return eofChar
   696  	}
   697  	return uint16(tkn.buf[tkn.Pos+dist])
   698  }
   699  
   700  // reset clears any internal state.
   701  func (tkn *Tokenizer) reset() {
   702  	tkn.ParseTree = nil
   703  	tkn.partialDDL = nil
   704  	tkn.specialComment = nil
   705  	tkn.posVarIndex = 0
   706  	tkn.nesting = 0
   707  	tkn.SkipToEnd = false
   708  }
   709  
   710  func isLetter(ch uint16) bool {
   711  	return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' || ch == '$'
   712  }
   713  
   714  func isCarat(ch uint16) bool {
   715  	return ch == '.' || ch == '\'' || ch == '"' || ch == '`'
   716  }
   717  
   718  func digitVal(ch uint16) int {
   719  	switch {
   720  	case '0' <= ch && ch <= '9':
   721  		return int(ch) - '0'
   722  	case 'a' <= ch && ch <= 'f':
   723  		return int(ch) - 'a' + 10
   724  	case 'A' <= ch && ch <= 'F':
   725  		return int(ch) - 'A' + 10
   726  	}
   727  	return 16 // larger than any legal digit val
   728  }
   729  
   730  func isDigit(ch uint16) bool {
   731  	return '0' <= ch && ch <= '9'
   732  }