github.com/vescale/zgraph@v0.0.0-20230410094002-959c02d50f95/parser/tokens.go (about) 1 // Copyright 2016 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package parser 15 16 func isLetter(ch byte) bool { 17 return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') 18 } 19 20 func isDigit(ch byte) bool { 21 return ch >= '0' && ch <= '9' 22 } 23 24 func isIdentChar(ch byte) bool { 25 return isLetter(ch) || isDigit(ch) || ch == '_' || ch == '$' || isIdentExtend(ch) 26 } 27 28 func isIdentExtend(ch byte) bool { 29 return ch >= 0x80 30 } 31 32 func isUserVarChar(ch byte) bool { 33 return isLetter(ch) || isDigit(ch) || ch == '_' || ch == '$' || ch == '.' || isIdentExtend(ch) 34 } 35 36 type trieNode struct { 37 childs [256]*trieNode 38 token int 39 fn func(s *Lexer) (int, Pos, string) 40 } 41 42 var ruleTable trieNode 43 44 func initTokenByte(c byte, tok int) { 45 if ruleTable.childs[c] == nil { 46 ruleTable.childs[c] = &trieNode{} 47 } 48 ruleTable.childs[c].token = tok 49 } 50 51 func initTokenString(str string, tok int) { 52 node := &ruleTable 53 for _, c := range str { 54 if node.childs[c] == nil { 55 node.childs[c] = &trieNode{} 56 } 57 node = node.childs[c] 58 } 59 node.token = tok 60 } 61 62 func initTokenFunc(str string, fn func(s *Lexer) (int, Pos, string)) { 63 for i := 0; i < len(str); i++ { 64 c := str[i] 65 if ruleTable.childs[c] == nil { 66 ruleTable.childs[c] = &trieNode{} 67 } 68 ruleTable.childs[c].fn = fn 69 } 70 } 71 72 func init() { 73 // invalid is a special token defined in parser.y, when parser meet 74 // this token, it will throw an error. 75 // set root trie node's token to invalid, so when input match nothing 76 // in the trie, invalid will be the default return token. 77 ruleTable.token = invalid 78 initTokenByte('+', int('+')) 79 initTokenByte('-', int('-')) 80 initTokenByte('>', int('>')) 81 initTokenByte('<', int('<')) 82 initTokenByte('(', int('(')) 83 initTokenByte(')', int(')')) 84 initTokenByte('[', int('[')) 85 initTokenByte(']', int(']')) 86 initTokenByte(';', int(';')) 87 initTokenByte(',', int(',')) 88 initTokenByte('&', int('&')) 89 initTokenByte('%', int('%')) 90 initTokenByte(':', int(':')) 91 initTokenByte('|', int('|')) 92 initTokenByte('!', int('!')) 93 initTokenByte('^', int('^')) 94 initTokenByte('~', int('~')) 95 initTokenByte('\\', int('\\')) 96 initTokenByte('?', paramMarker) 97 initTokenByte('=', eq) 98 initTokenByte('{', int('{')) 99 initTokenByte('}', int('}')) 100 101 initTokenString("||", pipes) 102 initTokenString("&&", andand) 103 initTokenString("&^", andnot) 104 initTokenString(":=", assignmentEq) 105 initTokenString("<=>", nulleq) 106 initTokenString(">=", ge) 107 initTokenString("<=", le) 108 initTokenString("!=", neq) 109 initTokenString("<>", neqSynonym) 110 initTokenString(".*", allProp) 111 initTokenString("\\N", null) 112 initTokenString("<-", leftArrow) 113 initTokenString("->", rightArrow) 114 initTokenString("-[", edgeOutgoingLeft) 115 initTokenString("]->", edgeOutgoingRight) 116 initTokenString("<-[", edgeIncomingLeft) 117 initTokenString("]-", edgeIncomingRight) 118 initTokenString("-/", reachOutgoingLeft) 119 initTokenString("<-/", reachIncomingLeft) 120 121 initTokenFunc("/", startWithSlash) 122 initTokenFunc("@", startWithAt) 123 initTokenFunc("*", startWithStar) 124 initTokenFunc("#", startWithSharp) 125 initTokenFunc(".", startWithDot) 126 initTokenFunc("_$ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz", scanIdentifier) 127 initTokenFunc("`", scanQuotedIdent) 128 initTokenFunc("0123456789", startWithNumber) 129 initTokenFunc("'\"", startString) 130 } 131 132 // tokenMap is a map of known identifiers to the parser token ID. 133 // Please try to keep the map in alphabetical order. 134 var tokenMap = map[string]int{ 135 "ABS": abs, 136 "ALL": all, 137 "ALL_DIFFERENT": allDifferent, 138 "AND": and, 139 "ANY": any, 140 "ARRAY_AGG": arrayAgg, 141 "AS": as, 142 "ASC": asc, 143 "AVG": avg, 144 "BEGIN": begin, 145 "BETWEEN": between, 146 "BOOLEAN": booleanType, 147 "BY": by, 148 "CASE": caseKwd, 149 "CAST": cast, 150 "CEIL": ceil, 151 "CEILING": ceiling, 152 "CHEAPEST": cheapest, 153 "COMMENT": comment, 154 "COMMIT": commit, 155 "COST": cost, 156 "COUNT": count, 157 "CREATE": create, 158 "DECIMAL": decimalType, 159 "DATE": dateType, 160 "DAY": day, 161 "DEFAULT": defaultKwd, 162 "DELETE": deleteKwd, 163 "DESC": desc, 164 "DISTINCT": distinct, 165 "DISTINCTROW": distinct, 166 "DIV": div, 167 "DOUBLE": doubleType, 168 "DROP": drop, 169 "EDGE": edge, 170 "ELEMENT_NUMBER": elementNumber, 171 "ELSE": elseKwd, 172 "END": end, 173 "EXISTS": exists, 174 "EXPLAIN": explain, 175 "EXTRACT": extract, 176 "FALSE": falseKwd, 177 "FLOAT": floatType, 178 "FLOOR": floor, 179 "FROM": from, 180 "GRAPH": graph, 181 "GRAPHS": graphs, 182 "GROUP": group, 183 "HAVING": having, 184 "HAS_LABEL": hasLabel, 185 "HOUR": hour, 186 "IF": ifKwd, 187 "IN": in, 188 "IN_DEGREE": inDegree, 189 "INDEX": index, 190 "INSERT": insert, 191 "INTEGER": integerType, 192 "INTERVAL": interval, 193 "INTO": into, 194 "ID": id, 195 "IS": is, 196 "JAVA_REGEXP_LIKE": javaRegexpLike, 197 "LABEL": label, 198 "LABELS": labels, 199 "LIMIT": limit, 200 "LISTAGG": listagg, 201 "LOWER": lower, 202 "MATCH": match, 203 "MATCH_NUMBER": matchNumber, 204 "MAX": max, 205 "MIN": min, 206 "MINUTE": minute, 207 "MOD": mod, 208 "MONTH": month, 209 "NOT": not, 210 "NULL": null, 211 "OFFSET": offset, 212 "ON": on, 213 "OR": or, 214 "ORDER": order, 215 "OUT_DEGREE": outDegree, 216 "PATH": path, 217 "PROPERTIES": properties, 218 "ROLLBACK": rollback, 219 "SECOND": second, 220 "SELECT": selectKwd, 221 "SET": set, 222 "SHORTEST": shortest, 223 "SHOW": show, 224 "STRING": stringKwd, 225 "SUBSTR": substring, 226 "SUBSTRING": substring, 227 "SUM": sum, 228 "THEN": then, 229 "TIME": timeType, 230 "TIMESTAMP": timestampType, 231 "TOP": top, 232 "TRUE": trueKwd, 233 "UNIQUE": unique, 234 "UPDATE": update, 235 "UPPER": uppper, 236 "USE": use, 237 "VERTEX": vertex, 238 "WHEN": when, 239 "WHERE": where, 240 "WITH": with, 241 "XOR": xor, 242 "YEAR": yearType, 243 } 244 245 var btFuncTokenMap = map[string]int{} 246 247 func (l *Lexer) isTokenIdentifier(lit string, offset int) int { 248 // An identifier before or after '.' means it is part of a qualified identifier. 249 // We do not parse it as keyword. 250 if l.r.peek() == '.' { 251 return 0 252 } 253 if offset > 0 && l.r.s[offset-1] == '.' { 254 return 0 255 } 256 buf := &l.buf 257 buf.Reset() 258 buf.Grow(len(lit)) 259 data := buf.Bytes()[:len(lit)] 260 for i := 0; i < len(lit); i++ { 261 if lit[i] >= 'a' && lit[i] <= 'z' { 262 data[i] = lit[i] + 'A' - 'a' 263 } else { 264 data[i] = lit[i] 265 } 266 } 267 268 checkBtFuncToken := false 269 if l.r.peek() == '(' { 270 checkBtFuncToken = true 271 } 272 if checkBtFuncToken { 273 if tok := btFuncTokenMap[string(data)]; tok != 0 { 274 return tok 275 } 276 } 277 tok := tokenMap[string(data)] 278 return tok 279 }