github.com/vescale/zgraph@v0.0.0-20230410094002-959c02d50f95/parser/tokens.go (about)

     1  // Copyright 2016 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package parser
    15  
    16  func isLetter(ch byte) bool {
    17  	return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')
    18  }
    19  
    20  func isDigit(ch byte) bool {
    21  	return ch >= '0' && ch <= '9'
    22  }
    23  
    24  func isIdentChar(ch byte) bool {
    25  	return isLetter(ch) || isDigit(ch) || ch == '_' || ch == '$' || isIdentExtend(ch)
    26  }
    27  
    28  func isIdentExtend(ch byte) bool {
    29  	return ch >= 0x80
    30  }
    31  
    32  func isUserVarChar(ch byte) bool {
    33  	return isLetter(ch) || isDigit(ch) || ch == '_' || ch == '$' || ch == '.' || isIdentExtend(ch)
    34  }
    35  
    36  type trieNode struct {
    37  	childs [256]*trieNode
    38  	token  int
    39  	fn     func(s *Lexer) (int, Pos, string)
    40  }
    41  
    42  var ruleTable trieNode
    43  
    44  func initTokenByte(c byte, tok int) {
    45  	if ruleTable.childs[c] == nil {
    46  		ruleTable.childs[c] = &trieNode{}
    47  	}
    48  	ruleTable.childs[c].token = tok
    49  }
    50  
    51  func initTokenString(str string, tok int) {
    52  	node := &ruleTable
    53  	for _, c := range str {
    54  		if node.childs[c] == nil {
    55  			node.childs[c] = &trieNode{}
    56  		}
    57  		node = node.childs[c]
    58  	}
    59  	node.token = tok
    60  }
    61  
    62  func initTokenFunc(str string, fn func(s *Lexer) (int, Pos, string)) {
    63  	for i := 0; i < len(str); i++ {
    64  		c := str[i]
    65  		if ruleTable.childs[c] == nil {
    66  			ruleTable.childs[c] = &trieNode{}
    67  		}
    68  		ruleTable.childs[c].fn = fn
    69  	}
    70  }
    71  
    72  func init() {
    73  	// invalid is a special token defined in parser.y, when parser meet
    74  	// this token, it will throw an error.
    75  	// set root trie node's token to invalid, so when input match nothing
    76  	// in the trie, invalid will be the default return token.
    77  	ruleTable.token = invalid
    78  	initTokenByte('+', int('+'))
    79  	initTokenByte('-', int('-'))
    80  	initTokenByte('>', int('>'))
    81  	initTokenByte('<', int('<'))
    82  	initTokenByte('(', int('('))
    83  	initTokenByte(')', int(')'))
    84  	initTokenByte('[', int('['))
    85  	initTokenByte(']', int(']'))
    86  	initTokenByte(';', int(';'))
    87  	initTokenByte(',', int(','))
    88  	initTokenByte('&', int('&'))
    89  	initTokenByte('%', int('%'))
    90  	initTokenByte(':', int(':'))
    91  	initTokenByte('|', int('|'))
    92  	initTokenByte('!', int('!'))
    93  	initTokenByte('^', int('^'))
    94  	initTokenByte('~', int('~'))
    95  	initTokenByte('\\', int('\\'))
    96  	initTokenByte('?', paramMarker)
    97  	initTokenByte('=', eq)
    98  	initTokenByte('{', int('{'))
    99  	initTokenByte('}', int('}'))
   100  
   101  	initTokenString("||", pipes)
   102  	initTokenString("&&", andand)
   103  	initTokenString("&^", andnot)
   104  	initTokenString(":=", assignmentEq)
   105  	initTokenString("<=>", nulleq)
   106  	initTokenString(">=", ge)
   107  	initTokenString("<=", le)
   108  	initTokenString("!=", neq)
   109  	initTokenString("<>", neqSynonym)
   110  	initTokenString(".*", allProp)
   111  	initTokenString("\\N", null)
   112  	initTokenString("<-", leftArrow)
   113  	initTokenString("->", rightArrow)
   114  	initTokenString("-[", edgeOutgoingLeft)
   115  	initTokenString("]->", edgeOutgoingRight)
   116  	initTokenString("<-[", edgeIncomingLeft)
   117  	initTokenString("]-", edgeIncomingRight)
   118  	initTokenString("-/", reachOutgoingLeft)
   119  	initTokenString("<-/", reachIncomingLeft)
   120  
   121  	initTokenFunc("/", startWithSlash)
   122  	initTokenFunc("@", startWithAt)
   123  	initTokenFunc("*", startWithStar)
   124  	initTokenFunc("#", startWithSharp)
   125  	initTokenFunc(".", startWithDot)
   126  	initTokenFunc("_$ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz", scanIdentifier)
   127  	initTokenFunc("`", scanQuotedIdent)
   128  	initTokenFunc("0123456789", startWithNumber)
   129  	initTokenFunc("'\"", startString)
   130  }
   131  
   132  // tokenMap is a map of known identifiers to the parser token ID.
   133  // Please try to keep the map in alphabetical order.
   134  var tokenMap = map[string]int{
   135  	"ABS":              abs,
   136  	"ALL":              all,
   137  	"ALL_DIFFERENT":    allDifferent,
   138  	"AND":              and,
   139  	"ANY":              any,
   140  	"ARRAY_AGG":        arrayAgg,
   141  	"AS":               as,
   142  	"ASC":              asc,
   143  	"AVG":              avg,
   144  	"BEGIN":            begin,
   145  	"BETWEEN":          between,
   146  	"BOOLEAN":          booleanType,
   147  	"BY":               by,
   148  	"CASE":             caseKwd,
   149  	"CAST":             cast,
   150  	"CEIL":             ceil,
   151  	"CEILING":          ceiling,
   152  	"CHEAPEST":         cheapest,
   153  	"COMMENT":          comment,
   154  	"COMMIT":           commit,
   155  	"COST":             cost,
   156  	"COUNT":            count,
   157  	"CREATE":           create,
   158  	"DECIMAL":          decimalType,
   159  	"DATE":             dateType,
   160  	"DAY":              day,
   161  	"DEFAULT":          defaultKwd,
   162  	"DELETE":           deleteKwd,
   163  	"DESC":             desc,
   164  	"DISTINCT":         distinct,
   165  	"DISTINCTROW":      distinct,
   166  	"DIV":              div,
   167  	"DOUBLE":           doubleType,
   168  	"DROP":             drop,
   169  	"EDGE":             edge,
   170  	"ELEMENT_NUMBER":   elementNumber,
   171  	"ELSE":             elseKwd,
   172  	"END":              end,
   173  	"EXISTS":           exists,
   174  	"EXPLAIN":          explain,
   175  	"EXTRACT":          extract,
   176  	"FALSE":            falseKwd,
   177  	"FLOAT":            floatType,
   178  	"FLOOR":            floor,
   179  	"FROM":             from,
   180  	"GRAPH":            graph,
   181  	"GRAPHS":           graphs,
   182  	"GROUP":            group,
   183  	"HAVING":           having,
   184  	"HAS_LABEL":        hasLabel,
   185  	"HOUR":             hour,
   186  	"IF":               ifKwd,
   187  	"IN":               in,
   188  	"IN_DEGREE":        inDegree,
   189  	"INDEX":            index,
   190  	"INSERT":           insert,
   191  	"INTEGER":          integerType,
   192  	"INTERVAL":         interval,
   193  	"INTO":             into,
   194  	"ID":               id,
   195  	"IS":               is,
   196  	"JAVA_REGEXP_LIKE": javaRegexpLike,
   197  	"LABEL":            label,
   198  	"LABELS":           labels,
   199  	"LIMIT":            limit,
   200  	"LISTAGG":          listagg,
   201  	"LOWER":            lower,
   202  	"MATCH":            match,
   203  	"MATCH_NUMBER":     matchNumber,
   204  	"MAX":              max,
   205  	"MIN":              min,
   206  	"MINUTE":           minute,
   207  	"MOD":              mod,
   208  	"MONTH":            month,
   209  	"NOT":              not,
   210  	"NULL":             null,
   211  	"OFFSET":           offset,
   212  	"ON":               on,
   213  	"OR":               or,
   214  	"ORDER":            order,
   215  	"OUT_DEGREE":       outDegree,
   216  	"PATH":             path,
   217  	"PROPERTIES":       properties,
   218  	"ROLLBACK":         rollback,
   219  	"SECOND":           second,
   220  	"SELECT":           selectKwd,
   221  	"SET":              set,
   222  	"SHORTEST":         shortest,
   223  	"SHOW":             show,
   224  	"STRING":           stringKwd,
   225  	"SUBSTR":           substring,
   226  	"SUBSTRING":        substring,
   227  	"SUM":              sum,
   228  	"THEN":             then,
   229  	"TIME":             timeType,
   230  	"TIMESTAMP":        timestampType,
   231  	"TOP":              top,
   232  	"TRUE":             trueKwd,
   233  	"UNIQUE":           unique,
   234  	"UPDATE":           update,
   235  	"UPPER":            uppper,
   236  	"USE":              use,
   237  	"VERTEX":           vertex,
   238  	"WHEN":             when,
   239  	"WHERE":            where,
   240  	"WITH":             with,
   241  	"XOR":              xor,
   242  	"YEAR":             yearType,
   243  }
   244  
   245  var btFuncTokenMap = map[string]int{}
   246  
   247  func (l *Lexer) isTokenIdentifier(lit string, offset int) int {
   248  	// An identifier before or after '.' means it is part of a qualified identifier.
   249  	// We do not parse it as keyword.
   250  	if l.r.peek() == '.' {
   251  		return 0
   252  	}
   253  	if offset > 0 && l.r.s[offset-1] == '.' {
   254  		return 0
   255  	}
   256  	buf := &l.buf
   257  	buf.Reset()
   258  	buf.Grow(len(lit))
   259  	data := buf.Bytes()[:len(lit)]
   260  	for i := 0; i < len(lit); i++ {
   261  		if lit[i] >= 'a' && lit[i] <= 'z' {
   262  			data[i] = lit[i] + 'A' - 'a'
   263  		} else {
   264  			data[i] = lit[i]
   265  		}
   266  	}
   267  
   268  	checkBtFuncToken := false
   269  	if l.r.peek() == '(' {
   270  		checkBtFuncToken = true
   271  	}
   272  	if checkBtFuncToken {
   273  		if tok := btFuncTokenMap[string(data)]; tok != 0 {
   274  			return tok
   275  		}
   276  	}
   277  	tok := tokenMap[string(data)]
   278  	return tok
   279  }