github.com/bingoohuang/gg@v0.0.0-20240325092523-45da7dee9335/pkg/sqlparse/tidbparser/parser/misc.go (about) 1 // Copyright 2016 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package parser 15 16 import ( 17 "strings" 18 19 "github.com/bingoohuang/gg/pkg/sqlparse/tidbparser/dependency/util/charset" 20 "github.com/bingoohuang/gg/pkg/sqlparse/tidbparser/dependency/util/hack" 21 ) 22 23 func isLetter(ch rune) bool { 24 return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') 25 } 26 27 func isDigit(ch rune) bool { 28 return ch >= '0' && ch <= '9' 29 } 30 31 func isIdentChar(ch rune) bool { 32 return isLetter(ch) || isDigit(ch) || ch == '_' || ch == '$' || isIdentExtend(ch) 33 } 34 35 func isIdentExtend(ch rune) bool { 36 return ch >= 0x80 && ch <= '\uffff' 37 } 38 39 func isIdentFirstChar(ch rune) bool { 40 return isLetter(ch) || ch == '_' 41 } 42 43 type trieNode struct { 44 childs [256]*trieNode 45 token int 46 fn func(s *Scanner) (int, Pos, string) 47 } 48 49 var ruleTable trieNode 50 51 func initTokenByte(c byte, tok int) { 52 if ruleTable.childs[c] == nil { 53 ruleTable.childs[c] = &trieNode{} 54 } 55 ruleTable.childs[c].token = tok 56 } 57 58 func initTokenString(str string, tok int) { 59 node := &ruleTable 60 for _, c := range str { 61 if node.childs[c] == nil { 62 node.childs[c] = &trieNode{} 63 } 64 node = node.childs[c] 65 } 66 node.token = tok 67 } 68 69 func initTokenFunc(str string, fn func(s *Scanner) (int, Pos, string)) { 70 for i := 0; i < len(str); i++ { 71 c := str[i] 72 if ruleTable.childs[c] == nil { 73 ruleTable.childs[c] = &trieNode{} 74 } 75 ruleTable.childs[c].fn = fn 76 } 77 return 78 } 79 80 func init() { 81 // invalid is a special token defined in parser.y, when parser meet 82 // this token, it will throw an error. 83 // set root trie node's token to invalid, so when input match nothing 84 // in the trie, invalid will be the default return token. 85 ruleTable.token = invalid 86 initTokenByte('*', int('*')) 87 initTokenByte('/', int('/')) 88 initTokenByte('+', int('+')) 89 initTokenByte('>', int('>')) 90 initTokenByte('<', int('<')) 91 initTokenByte('(', int('(')) 92 initTokenByte(')', int(')')) 93 initTokenByte(';', int(';')) 94 initTokenByte(',', int(',')) 95 initTokenByte('&', int('&')) 96 initTokenByte('%', int('%')) 97 initTokenByte(':', int(':')) 98 initTokenByte('|', int('|')) 99 initTokenByte('!', int('!')) 100 initTokenByte('^', int('^')) 101 initTokenByte('~', int('~')) 102 initTokenByte('\\', int('\\')) 103 initTokenByte('?', paramMarker) 104 initTokenByte('=', eq) 105 initTokenByte('{', int('{')) 106 initTokenByte('}', int('}')) 107 108 initTokenString("||", pipes) 109 initTokenString("&&", andand) 110 initTokenString("&^", andnot) 111 initTokenString(":=", assignmentEq) 112 initTokenString("<=>", nulleq) 113 initTokenString(">=", ge) 114 initTokenString("<=", le) 115 initTokenString("!=", neq) 116 initTokenString("<>", neqSynonym) 117 initTokenString("<<", lsh) 118 initTokenString(">>", rsh) 119 initTokenString("\\N", null) 120 121 initTokenFunc("@", startWithAt) 122 initTokenFunc("/", startWithSlash) 123 initTokenFunc("-", startWithDash) 124 initTokenFunc("#", startWithSharp) 125 initTokenFunc("Xx", startWithXx) 126 initTokenFunc("Nn", startWithNn) 127 initTokenFunc("Bb", startWithBb) 128 initTokenFunc(".", startWithDot) 129 initTokenFunc("_$ACDEFGHIJKLMOPQRSTUVWYZacdefghijklmopqrstuvwyz", scanIdentifier) 130 initTokenFunc("`", scanQuotedIdent) 131 initTokenFunc("0123456789", startWithNumber) 132 initTokenFunc("'\"", startString) 133 } 134 135 var tokenMap = map[string]int{ 136 "ACTION": action, 137 "ADD": add, 138 "ADDDATE": addDate, 139 "ADMIN": admin, 140 "AFTER": after, 141 "ALL": all, 142 "ALGORITHM": algorithm, 143 "ALTER": alter, 144 "ALWAYS": always, 145 "ANALYZE": analyze, 146 "AND": and, 147 "ANY": any, 148 "AS": as, 149 "ASC": asc, 150 "ASCII": ascii, 151 "AUTO_INCREMENT": autoIncrement, 152 "AVG": avg, 153 "AVG_ROW_LENGTH": avgRowLength, 154 "BEGIN": begin, 155 "BETWEEN": between, 156 "BIGINT": bigIntType, 157 "BINARY": binaryType, 158 "BINLOG": binlog, 159 "BIT": bitType, 160 "BIT_AND": bitAnd, 161 "BIT_OR": bitOr, 162 "BIT_XOR": bitXor, 163 "BLOB": blobType, 164 "BOOL": boolType, 165 "BOOLEAN": booleanType, 166 "BOTH": both, 167 "BTREE": btree, 168 "BY": by, 169 "BYTE": byteType, 170 "CANCEL": cancel, 171 "CASCADE": cascade, 172 "CASCADED": cascaded, 173 "CASE": caseKwd, 174 "CAST": cast, 175 "CHANGE": change, 176 "CHAR": charType, 177 "CHARACTER": character, 178 "CHARSET": charsetKwd, 179 "CHECK": check, 180 "CHECKSUM": checksum, 181 "CLIENT": client, 182 "COALESCE": coalesce, 183 "COLLATE": collate, 184 "COLLATION": collation, 185 "COLUMN": column, 186 "COLUMNS": columns, 187 "COMMENT": comment, 188 "COMMIT": commit, 189 "COMMITTED": committed, 190 "COMPACT": compact, 191 "COMPRESSED": compressed, 192 "COMPRESSION": compression, 193 "CONNECTION": connection, 194 "CONSISTENT": consistent, 195 "CONSTRAINT": constraint, 196 "CONVERT": convert, 197 "COUNT": count, 198 "CREATE": create, 199 "CROSS": cross, 200 "CURRENT_DATE": currentDate, 201 "CURRENT_TIME": currentTime, 202 "CURRENT_TIMESTAMP": currentTs, 203 "CURRENT_USER": currentUser, 204 "CURTIME": curTime, 205 "DATA": data, 206 "DATABASE": database, 207 "DATABASES": databases, 208 "DATE": dateType, 209 "DATE_ADD": dateAdd, 210 "DATE_SUB": dateSub, 211 "DATETIME": datetimeType, 212 "DAY": day, 213 "DAY_HOUR": dayHour, 214 "DAY_MICROSECOND": dayMicrosecond, 215 "DAY_MINUTE": dayMinute, 216 "DAY_SECOND": daySecond, 217 "DDL": ddl, 218 "DEALLOCATE": deallocate, 219 "DEC": decimalType, 220 "DECIMAL": decimalType, 221 "DEFAULT": defaultKwd, 222 "DEFINER": definer, 223 "DELAY_KEY_WRITE": delayKeyWrite, 224 "DELAYED": delayed, 225 "DELETE": deleteKwd, 226 "DESC": desc, 227 "DESCRIBE": describe, 228 "DISABLE": disable, 229 "DISTINCT": distinct, 230 "DISTINCTROW": distinct, 231 "DIV": div, 232 "DO": do, 233 "DOUBLE": doubleType, 234 "DROP": drop, 235 "DUAL": dual, 236 "DUPLICATE": duplicate, 237 "DYNAMIC": dynamic, 238 "ELSE": elseKwd, 239 "ENABLE": enable, 240 "ENCLOSED": enclosed, 241 "END": end, 242 "ENGINE": engine, 243 "ENGINES": engines, 244 "ENUM": enum, 245 "ESCAPE": escape, 246 "ESCAPED": escaped, 247 "EVENT": event, 248 "EVENTS": events, 249 "EXCLUSIVE": exclusive, 250 "EXECUTE": execute, 251 "EXISTS": exists, 252 "EXPLAIN": explain, 253 "EXTRACT": extract, 254 "FALSE": falseKwd, 255 "FIELDS": fields, 256 "FIRST": first, 257 "FIXED": fixed, 258 "FLOAT": floatType, 259 "FLUSH": flush, 260 "FOR": forKwd, 261 "FORCE": force, 262 "FOREIGN": foreign, 263 "FORMAT": format, 264 "FROM": from, 265 "FULL": full, 266 "FULLTEXT": fulltext, 267 "FUNCTION": function, 268 "GENERATED": generated, 269 "GET_FORMAT": getFormat, 270 "GLOBAL": global, 271 "GRANT": grant, 272 "GRANTS": grants, 273 "GROUP": group, 274 "GROUP_CONCAT": groupConcat, 275 "HASH": hash, 276 "HAVING": having, 277 "HIGH_PRIORITY": highPriority, 278 "HOUR": hour, 279 "HOUR_MICROSECOND": hourMicrosecond, 280 "HOUR_MINUTE": hourMinute, 281 "HOUR_SECOND": hourSecond, 282 "IDENTIFIED": identified, 283 "IF": ifKwd, 284 "IGNORE": ignore, 285 "IN": in, 286 "INDEX": index, 287 "INDEXES": indexes, 288 "INFILE": infile, 289 "INNER": inner, 290 "INSERT": insert, 291 "INT": intType, 292 "INT1": int1Type, 293 "INT2": int2Type, 294 "INT3": int3Type, 295 "INT4": int4Type, 296 "INT8": int8Type, 297 "INTEGER": integerType, 298 "INTERVAL": interval, 299 "INTO": into, 300 "INVOKER": invoker, 301 "IS": is, 302 "ISOLATION": isolation, 303 "JOBS": jobs, 304 "JOIN": join, 305 "JSON": jsonType, 306 "KEY": key, 307 "KEY_BLOCK_SIZE": keyBlockSize, 308 "KEYS": keys, 309 "KILL": kill, 310 "LEADING": leading, 311 "LEFT": left, 312 "LESS": less, 313 "LEVEL": level, 314 "LIKE": like, 315 "LIMIT": limit, 316 "LINES": lines, 317 "LOAD": load, 318 "LOCAL": local, 319 "LOCALTIME": localTime, 320 "LOCALTIMESTAMP": localTs, 321 "LOCK": lock, 322 "LONG": long, 323 "LONGBLOB": longblobType, 324 "LONGTEXT": longtextType, 325 "LOW_PRIORITY": lowPriority, 326 "MAX": max, 327 "MAX_CONNECTIONS_PER_HOUR": maxConnectionsPerHour, 328 "MAX_QUERIES_PER_HOUR": maxQueriesPerHour, 329 "MAX_ROWS": maxRows, 330 "MAX_UPDATES_PER_HOUR": maxUpdatesPerHour, 331 "MAX_USER_CONNECTIONS": maxUserConnections, 332 "MAXVALUE": maxValue, 333 "MEDIUMBLOB": mediumblobType, 334 "MEDIUMINT": mediumIntType, 335 "MEDIUMTEXT": mediumtextType, 336 "MERGE": merge, 337 "MICROSECOND": microsecond, 338 "MIN": min, 339 "MIN_ROWS": minRows, 340 "MINUTE": minute, 341 "MINUTE_MICROSECOND": minuteMicrosecond, 342 "MINUTE_SECOND": minuteSecond, 343 "MOD": mod, 344 "MODE": mode, 345 "MODIFY": modify, 346 "MONTH": month, 347 "NAMES": names, 348 "NATIONAL": national, 349 "NATURAL": natural, 350 "NO": no, 351 "NO_WRITE_TO_BINLOG": noWriteToBinLog, 352 "NONE": none, 353 "NOT": not, 354 "NOW": now, 355 "NULL": null, 356 "NUMERIC": numericType, 357 "NVARCHAR": nvarcharType, 358 "OFFSET": offset, 359 "ON": on, 360 "ONLY": only, 361 "OPTION": option, 362 "OR": or, 363 "ORDER": order, 364 "OUTER": outer, 365 "PACK_KEYS": packKeys, 366 "PARTITION": partition, 367 "PARTITIONS": partitions, 368 "PASSWORD": password, 369 "PLUGINS": plugins, 370 "POSITION": position, 371 "PRECISION": precisionType, 372 "PREPARE": prepare, 373 "PRIMARY": primary, 374 "PRIVILEGES": privileges, 375 "PROCEDURE": procedure, 376 "PROCESS": process, 377 "PROCESSLIST": processlist, 378 "PROFILES": profiles, 379 "QUARTER": quarter, 380 "QUERY": query, 381 "QUICK": quick, 382 "SHARD_ROW_ID_BITS": shardRowIDBits, 383 "RANGE": rangeKwd, 384 "READ": read, 385 "REAL": realType, 386 "REDUNDANT": redundant, 387 "REFERENCES": references, 388 "REGEXP": regexpKwd, 389 "RELOAD": reload, 390 "RENAME": rename, 391 "REPEAT": repeat, 392 "REPEATABLE": repeatable, 393 "REPLACE": replace, 394 "REPLICATION": replication, 395 "RESTRICT": restrict, 396 "REVERSE": reverse, 397 "REVOKE": revoke, 398 "RIGHT": right, 399 "RLIKE": rlike, 400 "ROLLBACK": rollback, 401 "ROUTINE": routine, 402 "ROW": row, 403 "ROW_COUNT": rowCount, 404 "ROW_FORMAT": rowFormat, 405 "SCHEMA": database, 406 "SCHEMAS": databases, 407 "SECOND": second, 408 "SECOND_MICROSECOND": secondMicrosecond, 409 "SECURITY": security, 410 "SELECT": selectKwd, 411 "SERIALIZABLE": serializable, 412 "SESSION": session, 413 "SET": set, 414 "SEPARATOR": separator, 415 "SHARE": share, 416 "SHARED": shared, 417 "SHOW": show, 418 "SIGNED": signed, 419 "SLAVE": slave, 420 "SMALLINT": smallIntType, 421 "SNAPSHOT": snapshot, 422 "SOME": some, 423 "SQL": sql, 424 "SQL_CACHE": sqlCache, 425 "SQL_CALC_FOUND_ROWS": sqlCalcFoundRows, 426 "SQL_NO_CACHE": sqlNoCache, 427 "START": start, 428 "STARTING": starting, 429 "STATS": stats, 430 "STATS_BUCKETS": statsBuckets, 431 "STATS_HISTOGRAMS": statsHistograms, 432 "STATS_HEALTHY": statsHealthy, 433 "STATS_META": statsMeta, 434 "STATS_PERSISTENT": statsPersistent, 435 "STATUS": status, 436 "STORED": stored, 437 "STRAIGHT_JOIN": straightJoin, 438 "SUBDATE": subDate, 439 "SUBSTR": substring, 440 "SUBSTRING": substring, 441 "SUM": sum, 442 "SUPER": super, 443 "TABLE": tableKwd, 444 "TABLES": tables, 445 "TEMPORARY": temporary, 446 "TEMPTABLE": temptable, 447 "TERMINATED": terminated, 448 "TEXT": textType, 449 "THAN": than, 450 "THEN": then, 451 "TIDB": tidb, 452 "TIDB_HJ": tidbHJ, 453 "TIDB_INLJ": tidbINLJ, 454 "TIDB_SMJ": tidbSMJ, 455 "TIME": timeType, 456 "TIMESTAMP": timestampType, 457 "TIMESTAMPADD": timestampAdd, 458 "TIMESTAMPDIFF": timestampDiff, 459 "TINYBLOB": tinyblobType, 460 "TINYINT": tinyIntType, 461 "TINYTEXT": tinytextType, 462 "TO": to, 463 "TRAILING": trailing, 464 "TRANSACTION": transaction, 465 "TRIGGER": trigger, 466 "TRIGGERS": triggers, 467 "TRIM": trim, 468 "TRUE": trueKwd, 469 "TRUNCATE": truncate, 470 "UNCOMMITTED": uncommitted, 471 "UNDEFINED": undefined, 472 "UNION": union, 473 "UNIQUE": unique, 474 "UNKNOWN": unknown, 475 "UNLOCK": unlock, 476 "UNSIGNED": unsigned, 477 "UPDATE": update, 478 "USAGE": usage, 479 "USE": use, 480 "USER": user, 481 "USING": using, 482 "UTC_DATE": utcDate, 483 "UTC_TIME": utcTime, 484 "UTC_TIMESTAMP": utcTimestamp, 485 "VALUE": value, 486 "VALUES": values, 487 "VARBINARY": varbinaryType, 488 "VARCHAR": varcharType, 489 "VARIABLES": variables, 490 "VIEW": view, 491 "VIRTUAL": virtual, 492 "WARNINGS": warnings, 493 "WEEK": week, 494 "WHEN": when, 495 "WHERE": where, 496 "WITH": with, 497 "WRITE": write, 498 "XOR": xor, 499 "YEAR": yearType, 500 "YEAR_MONTH": yearMonth, 501 "ZEROFILL": zerofill, 502 } 503 504 // See https://dev.mysql.com/doc/refman/5.7/en/function-resolution.html for details 505 var btFuncTokenMap = map[string]int{ 506 "ADDDATE": builtinAddDate, 507 "BIT_AND": builtinBitAnd, 508 "BIT_OR": builtinBitOr, 509 "BIT_XOR": builtinBitXor, 510 "CAST": builtinCast, 511 "COUNT": builtinCount, 512 "CURDATE": builtinCurDate, 513 "CURTIME": builtinCurTime, 514 "DATE_ADD": builtinDateAdd, 515 "DATE_SUB": builtinDateSub, 516 "EXTRACT": builtinExtract, 517 "GROUP_CONCAT": builtinGroupConcat, 518 "MAX": builtinMax, 519 "MID": builtinSubstring, 520 "MIN": builtinMin, 521 "NOW": builtinNow, 522 "POSITION": builtinPosition, 523 "SESSION_USER": builtinUser, 524 "STD": builtinStddevPop, 525 "STDDEV": builtinStddevPop, 526 "STDDEV_POP": builtinStddevPop, 527 "STDDEV_SAMP": builtinVarSamp, 528 "SUBDATE": builtinSubDate, 529 "SUBSTR": builtinSubstring, 530 "SUBSTRING": builtinSubstring, 531 "SUM": builtinSum, 532 "SYSDATE": builtinSysDate, 533 "SYSTEM_USER": builtinUser, 534 "TRIM": builtinTrim, 535 "VARIANCE": builtinVarPop, 536 "VAR_POP": builtinVarPop, 537 "VAR_SAMP": builtinVarSamp, 538 } 539 540 // aliases are strings directly map to another string and use the same token. 541 var aliases = map[string]string{ 542 "SCHEMA": "DATABASE", 543 "SCHEMAS": "DATABASES", 544 "DEC": "DECIMAL", 545 "SUBSTR": "SUBSTRING", 546 } 547 548 func (s *Scanner) isTokenIdentifier(lit string, offset int) int { 549 // An identifier before or after '.' means it is part of a qualified identifier. 550 // We do not parse it as keyword. 551 if s.r.peek() == '.' { 552 return 0 553 } 554 if offset > 0 && s.r.s[offset-1] == '.' { 555 return 0 556 } 557 buf := &s.buf 558 buf.Reset() 559 buf.Grow(len(lit)) 560 data := buf.Bytes()[:len(lit)] 561 for i := 0; i < len(lit); i++ { 562 if lit[i] >= 'a' && lit[i] <= 'z' { 563 data[i] = lit[i] + 'A' - 'a' 564 } else { 565 data[i] = lit[i] 566 } 567 } 568 569 checkBtFuncToken, tokenStr := false, hack.String(data) 570 if s.r.peek() == '(' { 571 checkBtFuncToken = true 572 } else if s.sqlMode.HasIgnoreSpaceMode() { 573 s.skipWhitespace() 574 if s.r.peek() == '(' { 575 checkBtFuncToken = true 576 } 577 } 578 if checkBtFuncToken { 579 if tok := btFuncTokenMap[tokenStr]; tok != 0 { 580 return tok 581 } 582 } 583 tok := tokenMap[tokenStr] 584 return tok 585 } 586 587 func handleIdent(lval *yySymType) int { 588 s := lval.ident 589 // A character string literal may have an optional character set introducer and COLLATE clause: 590 // [_charset_name]'string' [COLLATE collation_name] 591 // See https://dev.mysql.com/doc/refman/5.7/en/charset-literal.html 592 if !strings.HasPrefix(s, "_") { 593 return identifier 594 } 595 cs, _, err := charset.GetCharsetInfo(s[1:]) 596 if err != nil { 597 return identifier 598 } 599 lval.ident = cs 600 return underscoreCS 601 }