github.com/bingoohuang/gg@v0.0.0-20240325092523-45da7dee9335/pkg/sqlparse/sqlparser/token.go

github.com/bingoohuang/gg@v0.0.0-20240325092523-45da7dee9335/pkg/sqlparse/sqlparser/token.go (about)

     1  /*
     2  Copyright 2017 Google Inc.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package sqlparser
    18  
    19  import (
    20  	"bytes"
    21  	"fmt"
    22  	"strings"
    23  
    24  	"github.com/bingoohuang/gg/pkg/sqlparse/bytes2"
    25  	"github.com/bingoohuang/gg/pkg/sqlparse/sqltypes"
    26  )
    27  
    28  const eofChar = 0x100
    29  
    30  // Tokenizer is the struct used to generate SQL
    31  // tokens for the parser.
    32  type Tokenizer struct {
    33  	InStream      *strings.Reader
    34  	AllowComments bool
    35  	ForceEOF      bool
    36  	lastChar      uint16
    37  	Position      int
    38  	lastToken     []byte
    39  	LastError     string
    40  	posVarIndex   int
    41  	ParseTree     Statement
    42  	nesting       int
    43  }
    44  
    45  // NewStringTokenizer creates a new Tokenizer for the
    46  // sql string.
    47  func NewStringTokenizer(sql string) *Tokenizer {
    48  	return &Tokenizer{InStream: strings.NewReader(sql)}
    49  }
    50  
    51  // keywords is a map of mysql keywords that fall into two categories:
    52  // 1) keywords considered reserved by MySQL
    53  // 2) keywords for us to handle specially in sql.y
    54  //
    55  // Those marked as UNUSED are likely reserved keywords. We add them here so that
    56  // when rewriting queries we can properly backtick quote them so they don't cause issues
    57  //
    58  // NOTE: If you add new keywords, add them also to the reserved_keywords or
    59  // non_reserved_keywords grammar in sql.y -- this will allow the keyword to be used
    60  // in identifiers. See the docs for each grammar to determine which one to put it into.
    61  var keywords = map[string]int{
    62  	"accessible":          UNUSED,
    63  	"add":                 UNUSED,
    64  	"against":             AGAINST,
    65  	"all":                 ALL,
    66  	"alter":               ALTER,
    67  	"analyze":             ANALYZE,
    68  	"and":                 AND,
    69  	"as":                  AS,
    70  	"asc":                 ASC,
    71  	"asensitive":          UNUSED,
    72  	"before":              UNUSED,
    73  	"between":             BETWEEN,
    74  	"bigint":              UNUSED,
    75  	"binary":              BINARY,
    76  	"blob":                UNUSED,
    77  	"boolean":             BOOLEAN,
    78  	"both":                UNUSED,
    79  	"by":                  BY,
    80  	"call":                UNUSED,
    81  	"cascade":             UNUSED,
    82  	"case":                CASE,
    83  	"cast":                CAST,
    84  	"change":              UNUSED,
    85  	"character":           CHARACTER,
    86  	"check":               UNUSED,
    87  	"collate":             COLLATE,
    88  	"column":              UNUSED,
    89  	"condition":           UNUSED,
    90  	"constraint":          UNUSED,
    91  	"continue":            UNUSED,
    92  	"convert":             CONVERT,
    93  	"create":              CREATE,
    94  	"cross":               CROSS,
    95  	"current_date":        CURRENT_DATE,
    96  	"current_time":        CURRENT_TIME,
    97  	"current_timestamp":   CURRENT_TIMESTAMP,
    98  	"current_user":        UNUSED,
    99  	"cursor":              UNUSED,
   100  	"database":            DATABASE,
   101  	"databases":           DATABASES,
   102  	"day_hour":            UNUSED,
   103  	"day_microsecond":     UNUSED,
   104  	"day_minute":          UNUSED,
   105  	"day_second":          UNUSED,
   106  	"date":                DATE,
   107  	"dec":                 UNUSED,
   108  	"declare":             UNUSED,
   109  	"default":             DEFAULT,
   110  	"delayed":             UNUSED,
   111  	"delete":              DELETE,
   112  	"desc":                DESC,
   113  	"describe":            DESCRIBE,
   114  	"deterministic":       UNUSED,
   115  	"distinct":            DISTINCT,
   116  	"distinctrow":         UNUSED,
   117  	"div":                 DIV,
   118  	"double":              UNUSED,
   119  	"drop":                DROP,
   120  	"duplicate":           DUPLICATE,
   121  	"each":                UNUSED,
   122  	"else":                ELSE,
   123  	"elseif":              UNUSED,
   124  	"enclosed":            UNUSED,
   125  	"end":                 END,
   126  	"escape":              ESCAPE,
   127  	"escaped":             UNUSED,
   128  	"exists":              EXISTS,
   129  	"exit":                UNUSED,
   130  	"explain":             EXPLAIN,
   131  	"expansion":           EXPANSION,
   132  	"false":               FALSE,
   133  	"fetch":               UNUSED,
   134  	"float":               UNUSED,
   135  	"float4":              UNUSED,
   136  	"float8":              UNUSED,
   137  	"for":                 FOR,
   138  	"force":               FORCE,
   139  	"foreign":             UNUSED,
   140  	"from":                FROM,
   141  	"fulltext":            UNUSED,
   142  	"generated":           UNUSED,
   143  	"get":                 UNUSED,
   144  	"grant":               UNUSED,
   145  	"group":               GROUP,
   146  	"group_concat":        GROUP_CONCAT,
   147  	"having":              HAVING,
   148  	"high_priority":       UNUSED,
   149  	"hour_microsecond":    UNUSED,
   150  	"hour_minute":         UNUSED,
   151  	"hour_second":         UNUSED,
   152  	"if":                  IF,
   153  	"ignore":              IGNORE,
   154  	"in":                  IN,
   155  	"index":               INDEX,
   156  	"infile":              UNUSED,
   157  	"inout":               UNUSED,
   158  	"inner":               INNER,
   159  	"insensitive":         UNUSED,
   160  	"insert":              INSERT,
   161  	"int":                 UNUSED,
   162  	"int1":                UNUSED,
   163  	"int2":                UNUSED,
   164  	"int3":                UNUSED,
   165  	"int4":                UNUSED,
   166  	"int8":                UNUSED,
   167  	"integer":             INTEGER,
   168  	"interval":            INTERVAL,
   169  	"into":                INTO,
   170  	"io_after_gtids":      UNUSED,
   171  	"is":                  IS,
   172  	"iterate":             UNUSED,
   173  	"join":                JOIN,
   174  	"key":                 KEY,
   175  	"keys":                UNUSED,
   176  	"kill":                UNUSED,
   177  	"language":            LANGUAGE,
   178  	"last_insert_id":      LAST_INSERT_ID,
   179  	"leading":             UNUSED,
   180  	"leave":               UNUSED,
   181  	"left":                LEFT,
   182  	"like":                LIKE,
   183  	"limit":               LIMIT,
   184  	"linear":              UNUSED,
   185  	"lines":               UNUSED,
   186  	"load":                UNUSED,
   187  	"localtime":           LOCALTIME,
   188  	"localtimestamp":      LOCALTIMESTAMP,
   189  	"lock":                LOCK,
   190  	"long":                UNUSED,
   191  	"longblob":            UNUSED,
   192  	"longtext":            UNUSED,
   193  	"loop":                UNUSED,
   194  	"low_priority":        UNUSED,
   195  	"master_bind":         UNUSED,
   196  	"match":               MATCH,
   197  	"maxvalue":            UNUSED,
   198  	"mediumblob":          UNUSED,
   199  	"mediumint":           UNUSED,
   200  	"mediumtext":          UNUSED,
   201  	"middleint":           UNUSED,
   202  	"minute_microsecond":  UNUSED,
   203  	"minute_second":       UNUSED,
   204  	"mod":                 MOD,
   205  	"mode":                MODE,
   206  	"modifies":            UNUSED,
   207  	"natural":             NATURAL,
   208  	"next":                NEXT,
   209  	"not":                 NOT,
   210  	"no_write_to_binlog":  UNUSED,
   211  	"null":                NULL,
   212  	"numeric":             UNUSED,
   213  	"offset":              OFFSET,
   214  	"on":                  ON,
   215  	"optimize":            OPTIMIZE,
   216  	"optimizer_costs":     UNUSED,
   217  	"option":              UNUSED,
   218  	"optionally":          UNUSED,
   219  	"or":                  OR,
   220  	"order":               ORDER,
   221  	"out":                 UNUSED,
   222  	"outer":               OUTER,
   223  	"outfile":             UNUSED,
   224  	"partition":           UNUSED,
   225  	"precision":           UNUSED,
   226  	"primary":             UNUSED,
   227  	"procedure":           UNUSED,
   228  	"query":               QUERY,
   229  	"range":               UNUSED,
   230  	"read":                UNUSED,
   231  	"reads":               UNUSED,
   232  	"read_write":          UNUSED,
   233  	"real":                UNUSED,
   234  	"references":          UNUSED,
   235  	"regexp":              REGEXP,
   236  	"release":             UNUSED,
   237  	"rename":              RENAME,
   238  	"repair":              REPAIR,
   239  	"repeat":              UNUSED,
   240  	"replace":             REPLACE,
   241  	"require":             UNUSED,
   242  	"resignal":            UNUSED,
   243  	"restrict":            UNUSED,
   244  	"return":              UNUSED,
   245  	"revoke":              UNUSED,
   246  	"right":               RIGHT,
   247  	"rlike":               REGEXP,
   248  	"schema":              UNUSED,
   249  	"schemas":             UNUSED,
   250  	"second_microsecond":  UNUSED,
   251  	"select":              SELECT,
   252  	"sensitive":           UNUSED,
   253  	"separator":           SEPARATOR,
   254  	"set":                 SET,
   255  	"share":               SHARE,
   256  	"show":                SHOW,
   257  	"signal":              UNUSED,
   258  	"smallint":            UNUSED,
   259  	"spatial":             UNUSED,
   260  	"specific":            UNUSED,
   261  	"sql":                 UNUSED,
   262  	"sqlexception":        UNUSED,
   263  	"sqlstate":            UNUSED,
   264  	"sqlwarning":          UNUSED,
   265  	"sql_big_result":      UNUSED,
   266  	"sql_cache":           SQL_CACHE,
   267  	"sql_calc_found_rows": UNUSED,
   268  	"sql_no_cache":        SQL_NO_CACHE,
   269  	"sql_small_result":    UNUSED,
   270  	"ssl":                 UNUSED,
   271  	"starting":            UNUSED,
   272  	"stored":              UNUSED,
   273  	"straight_join":       STRAIGHT_JOIN,
   274  	"table":               TABLE,
   275  	"tables":              TABLES,
   276  	"terminated":          UNUSED,
   277  	"then":                THEN,
   278  	"tinyblob":            UNUSED,
   279  	"tinyint":             UNUSED,
   280  	"tinytext":            UNUSED,
   281  	"to":                  TO,
   282  	"trailing":            UNUSED,
   283  	"trigger":             UNUSED,
   284  	"true":                TRUE,
   285  	"truncate":            TRUNCATE,
   286  	"undo":                UNUSED,
   287  	"union":               UNION,
   288  	"unique":              UNIQUE,
   289  	"unlock":              UNUSED,
   290  	"update":              UPDATE,
   291  	"usage":               UNUSED,
   292  	"use":                 USE,
   293  	"using":               USING,
   294  	"utc_date":            UTC_DATE,
   295  	"utc_time":            UTC_TIME,
   296  	"utc_timestamp":       UTC_TIMESTAMP,
   297  	"values":              VALUES,
   298  	"varbinary":           UNUSED,
   299  	"varchar":             UNUSED,
   300  	"varcharacter":        UNUSED,
   301  	"varying":             UNUSED,
   302  	"virtual":             UNUSED,
   303  	"view":                VIEW,
   304  	"vitess_keyspaces":    VITESS_KEYSPACES,
   305  	"vitess_shards":       VITESS_SHARDS,
   306  	"vschema_tables":      VSCHEMA_TABLES,
   307  	"when":                WHEN,
   308  	"where":               WHERE,
   309  	"while":               UNUSED,
   310  	"with":                WITH,
   311  	"write":               UNUSED,
   312  	"xor":                 UNUSED,
   313  	"year_month":          UNUSED,
   314  	"zerofill":            UNUSED,
   315  }
   316  
   317  // Lex returns the next token form the Tokenizer.
   318  // This function is used by go yacc.
   319  func (tkn *Tokenizer) Lex(lval *yySymType) int {
   320  	typ, val := tkn.Scan()
   321  	for typ == COMMENT {
   322  		if tkn.AllowComments {
   323  			break
   324  		}
   325  		typ, val = tkn.Scan()
   326  	}
   327  	lval.bytes = val
   328  	tkn.lastToken = val
   329  	return typ
   330  }
   331  
   332  // Error is called by go yacc if there's a parsing error.
   333  func (tkn *Tokenizer) Error(err string) {
   334  	buf := &bytes2.Buffer{}
   335  	if tkn.lastToken != nil {
   336  		fmt.Fprintf(buf, "%s at position %v near '%s'", err, tkn.Position, tkn.lastToken)
   337  	} else {
   338  		fmt.Fprintf(buf, "%s at position %v", err, tkn.Position)
   339  	}
   340  	tkn.LastError = buf.String()
   341  }
   342  
   343  // Scan scans the tokenizer for the next token and returns
   344  // the token type and an optional value.
   345  func (tkn *Tokenizer) Scan() (int, []byte) {
   346  	if tkn.ForceEOF {
   347  		return 0, nil
   348  	}
   349  
   350  	if tkn.lastChar == 0 {
   351  		tkn.next()
   352  	}
   353  	tkn.skipBlank()
   354  	switch ch := tkn.lastChar; {
   355  	case isLetter(ch):
   356  		tkn.next()
   357  		if ch == 'X' || ch == 'x' {
   358  			if tkn.lastChar == '\'' {
   359  				tkn.next()
   360  				return tkn.scanHex()
   361  			}
   362  		}
   363  		return tkn.scanIdentifier(byte(ch))
   364  	case isDigit(ch):
   365  		return tkn.scanNumber(false)
   366  	case ch == ':':
   367  		return tkn.scanBindVar()
   368  	default:
   369  		tkn.next()
   370  		switch ch {
   371  		case eofChar:
   372  			return 0, nil
   373  		case '=', ',', ';', '(', ')', '+', '*', '%', '^', '~':
   374  			return int(ch), nil
   375  		case '&':
   376  			if tkn.lastChar == '&' {
   377  				tkn.next()
   378  				return AND, nil
   379  			}
   380  			return int(ch), nil
   381  		case '|':
   382  			if tkn.lastChar == '|' {
   383  				tkn.next()
   384  				return OR, nil
   385  			}
   386  			return int(ch), nil
   387  		case '?':
   388  			tkn.posVarIndex++
   389  			// buf := new(bytes2.Buffer)
   390  			// fmt.Fprintf(buf, ":v%d", tkn.posVarIndex)
   391  			return VALUE_ARG, []byte("?")
   392  		case '.':
   393  			if isDigit(tkn.lastChar) {
   394  				return tkn.scanNumber(true)
   395  			}
   396  			return int(ch), nil
   397  		case '/':
   398  			switch tkn.lastChar {
   399  			case '/':
   400  				tkn.next()
   401  				return tkn.scanCommentType1("//")
   402  			case '*':
   403  				tkn.next()
   404  				return tkn.scanCommentType2()
   405  			default:
   406  				return int(ch), nil
   407  			}
   408  		case '#':
   409  			tkn.next()
   410  			return tkn.scanCommentType1("#")
   411  		case '-':
   412  			switch tkn.lastChar {
   413  			case '-':
   414  				tkn.next()
   415  				return tkn.scanCommentType1("--")
   416  			case '>':
   417  				tkn.next()
   418  				if tkn.lastChar == '>' {
   419  					tkn.next()
   420  					return JSON_UNQUOTE_EXTRACT_OP, nil
   421  				}
   422  				return JSON_EXTRACT_OP, nil
   423  			}
   424  			return int(ch), nil
   425  		case '<':
   426  			switch tkn.lastChar {
   427  			case '>':
   428  				tkn.next()
   429  				return NE, nil
   430  			case '<':
   431  				tkn.next()
   432  				return SHIFT_LEFT, nil
   433  			case '=':
   434  				tkn.next()
   435  				switch tkn.lastChar {
   436  				case '>':
   437  					tkn.next()
   438  					return NULL_SAFE_EQUAL, nil
   439  				default:
   440  					return LE, nil
   441  				}
   442  			default:
   443  				return int(ch), nil
   444  			}
   445  		case '>':
   446  			switch tkn.lastChar {
   447  			case '=':
   448  				tkn.next()
   449  				return GE, nil
   450  			case '>':
   451  				tkn.next()
   452  				return SHIFT_RIGHT, nil
   453  			default:
   454  				return int(ch), nil
   455  			}
   456  		case '!':
   457  			if tkn.lastChar == '=' {
   458  				tkn.next()
   459  				return NE, nil
   460  			}
   461  			return int(ch), nil
   462  		case '\'', '"':
   463  			return tkn.scanString(ch, STRING)
   464  		case '`':
   465  			return tkn.scanLiteralIdentifier()
   466  		default:
   467  			return LEX_ERROR, []byte{byte(ch)}
   468  		}
   469  	}
   470  }
   471  
   472  func (tkn *Tokenizer) skipBlank() {
   473  	ch := tkn.lastChar
   474  	for ch == ' ' || ch == '\n' || ch == '\r' || ch == '\t' {
   475  		tkn.next()
   476  		ch = tkn.lastChar
   477  	}
   478  }
   479  
   480  func (tkn *Tokenizer) scanIdentifier(firstByte byte) (int, []byte) {
   481  	buffer := &bytes2.Buffer{}
   482  	buffer.WriteByte(firstByte)
   483  	for isLetter(tkn.lastChar) || isDigit(tkn.lastChar) {
   484  		buffer.WriteByte(byte(tkn.lastChar))
   485  		tkn.next()
   486  	}
   487  	lowered := bytes.ToLower(buffer.Bytes())
   488  	loweredStr := string(lowered)
   489  	if keywordID, found := keywords[loweredStr]; found {
   490  		return keywordID, lowered
   491  	}
   492  	// dual must always be case-insensitive
   493  	if loweredStr == "dual" {
   494  		return ID, lowered
   495  	}
   496  	return ID, buffer.Bytes()
   497  }
   498  
   499  func (tkn *Tokenizer) scanHex() (int, []byte) {
   500  	buffer := &bytes2.Buffer{}
   501  	tkn.scanMantissa(16, buffer)
   502  	if tkn.lastChar != '\'' {
   503  		return LEX_ERROR, buffer.Bytes()
   504  	}
   505  	tkn.next()
   506  	if buffer.Len()%2 != 0 {
   507  		return LEX_ERROR, buffer.Bytes()
   508  	}
   509  	return HEX, buffer.Bytes()
   510  }
   511  
   512  func (tkn *Tokenizer) scanLiteralIdentifier() (int, []byte) {
   513  	buffer := &bytes2.Buffer{}
   514  	backTickSeen := false
   515  	for {
   516  		if backTickSeen {
   517  			if tkn.lastChar != '`' {
   518  				break
   519  			}
   520  			backTickSeen = false
   521  			buffer.WriteByte('`')
   522  			tkn.next()
   523  			continue
   524  		}
   525  		// The previous char was not a backtick.
   526  		switch tkn.lastChar {
   527  		case '`':
   528  			backTickSeen = true
   529  		case eofChar:
   530  			// Premature EOF.
   531  			return LEX_ERROR, buffer.Bytes()
   532  		default:
   533  			buffer.WriteByte(byte(tkn.lastChar))
   534  		}
   535  		tkn.next()
   536  	}
   537  	if buffer.Len() == 0 {
   538  		return LEX_ERROR, buffer.Bytes()
   539  	}
   540  	return ID, buffer.Bytes()
   541  }
   542  
   543  func (tkn *Tokenizer) scanBindVar() (int, []byte) {
   544  	buffer := &bytes2.Buffer{}
   545  	buffer.WriteByte(byte(tkn.lastChar))
   546  	token := VALUE_ARG
   547  	tkn.next()
   548  	if tkn.lastChar == ':' {
   549  		token = LIST_ARG
   550  		buffer.WriteByte(byte(tkn.lastChar))
   551  		tkn.next()
   552  	}
   553  	//if !isLetter(tkn.lastChar) {
   554  	//	return LEX_ERROR, buffer.Bytes()
   555  	//}
   556  	for isLetter(tkn.lastChar) || isDigit(tkn.lastChar) || tkn.lastChar == '?' || tkn.lastChar == '.' {
   557  		buffer.WriteByte(byte(tkn.lastChar))
   558  		tkn.next()
   559  	}
   560  	return token, buffer.Bytes()
   561  }
   562  
   563  func (tkn *Tokenizer) scanMantissa(base int, buffer *bytes2.Buffer) {
   564  	for digitVal(tkn.lastChar) < base {
   565  		tkn.consumeNext(buffer)
   566  	}
   567  }
   568  
   569  func (tkn *Tokenizer) scanNumber(seenDecimalPoint bool) (int, []byte) {
   570  	token := INTEGRAL
   571  	buffer := &bytes2.Buffer{}
   572  	if seenDecimalPoint {
   573  		token = FLOAT
   574  		buffer.WriteByte('.')
   575  		tkn.scanMantissa(10, buffer)
   576  		goto exponent
   577  	}
   578  
   579  	// 0x construct.
   580  	if tkn.lastChar == '0' {
   581  		tkn.consumeNext(buffer)
   582  		if tkn.lastChar == 'x' || tkn.lastChar == 'X' {
   583  			token = HEXNUM
   584  			tkn.consumeNext(buffer)
   585  			tkn.scanMantissa(16, buffer)
   586  			goto exit
   587  		}
   588  	}
   589  
   590  	tkn.scanMantissa(10, buffer)
   591  
   592  	if tkn.lastChar == '.' {
   593  		token = FLOAT
   594  		tkn.consumeNext(buffer)
   595  		tkn.scanMantissa(10, buffer)
   596  	}
   597  
   598  exponent:
   599  	if tkn.lastChar == 'e' || tkn.lastChar == 'E' {
   600  		token = FLOAT
   601  		tkn.consumeNext(buffer)
   602  		if tkn.lastChar == '+' || tkn.lastChar == '-' {
   603  			tkn.consumeNext(buffer)
   604  		}
   605  		tkn.scanMantissa(10, buffer)
   606  	}
   607  
   608  exit:
   609  	// A letter cannot immediately follow a number.
   610  	if isLetter(tkn.lastChar) {
   611  		return LEX_ERROR, buffer.Bytes()
   612  	}
   613  
   614  	return token, buffer.Bytes()
   615  }
   616  
   617  func (tkn *Tokenizer) scanString(delim uint16, typ int) (int, []byte) {
   618  	buffer := &bytes2.Buffer{}
   619  	for {
   620  		ch := tkn.lastChar
   621  		tkn.next()
   622  		if ch == delim {
   623  			if tkn.lastChar == delim {
   624  				tkn.next()
   625  			} else {
   626  				break
   627  			}
   628  		} else if ch == '\\' {
   629  			if tkn.lastChar == eofChar {
   630  				return LEX_ERROR, buffer.Bytes()
   631  			}
   632  			if decodedChar := sqltypes.SQLDecodeMap[byte(tkn.lastChar)]; decodedChar == sqltypes.DontEscape {
   633  				ch = tkn.lastChar
   634  			} else {
   635  				ch = uint16(decodedChar)
   636  			}
   637  			tkn.next()
   638  		}
   639  		if ch == eofChar {
   640  			return LEX_ERROR, buffer.Bytes()
   641  		}
   642  		buffer.WriteByte(byte(ch))
   643  	}
   644  	return typ, buffer.Bytes()
   645  }
   646  
   647  func (tkn *Tokenizer) scanCommentType1(prefix string) (int, []byte) {
   648  	buffer := &bytes2.Buffer{}
   649  	buffer.WriteString(prefix)
   650  	for tkn.lastChar != eofChar {
   651  		if tkn.lastChar == '\n' {
   652  			tkn.consumeNext(buffer)
   653  			break
   654  		}
   655  		tkn.consumeNext(buffer)
   656  	}
   657  	return COMMENT, buffer.Bytes()
   658  }
   659  
   660  func (tkn *Tokenizer) scanCommentType2() (int, []byte) {
   661  	buffer := &bytes2.Buffer{}
   662  	buffer.WriteString("/*")
   663  	for {
   664  		if tkn.lastChar == '*' {
   665  			tkn.consumeNext(buffer)
   666  			if tkn.lastChar == '/' {
   667  				tkn.consumeNext(buffer)
   668  				break
   669  			}
   670  			continue
   671  		}
   672  		if tkn.lastChar == eofChar {
   673  			return LEX_ERROR, buffer.Bytes()
   674  		}
   675  		tkn.consumeNext(buffer)
   676  	}
   677  	return COMMENT, buffer.Bytes()
   678  }
   679  
   680  func (tkn *Tokenizer) consumeNext(buffer *bytes2.Buffer) {
   681  	if tkn.lastChar == eofChar {
   682  		// This should never happen.
   683  		panic("unexpected EOF")
   684  	}
   685  	buffer.WriteByte(byte(tkn.lastChar))
   686  	tkn.next()
   687  }
   688  
   689  func (tkn *Tokenizer) next() {
   690  	if ch, err := tkn.InStream.ReadByte(); err != nil {
   691  		// Only EOF is possible.
   692  		tkn.lastChar = eofChar
   693  	} else {
   694  		tkn.lastChar = uint16(ch)
   695  	}
   696  	tkn.Position++
   697  }
   698  
   699  func isLetter(ch uint16) bool {
   700  	return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' || ch == '@'
   701  }
   702  
   703  func digitVal(ch uint16) int {
   704  	switch {
   705  	case '0' <= ch && ch <= '9':
   706  		return int(ch) - '0'
   707  	case 'a' <= ch && ch <= 'f':
   708  		return int(ch) - 'a' + 10
   709  	case 'A' <= ch && ch <= 'F':
   710  		return int(ch) - 'A' + 10
   711  	}
   712  	return 16 // larger than any legal digit val
   713  }
   714  
   715  func isDigit(ch uint16) bool {
   716  	return '0' <= ch && ch <= '9'
   717  }