github.com/XiaoMi/Gaea@v1.2.5/parser/misc.go (about) 1 // Copyright 2016 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package parser 15 16 import ( 17 "strings" 18 19 "github.com/XiaoMi/Gaea/mysql" 20 ) 21 22 func isLetter(ch rune) bool { 23 return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') 24 } 25 26 func isDigit(ch rune) bool { 27 return ch >= '0' && ch <= '9' 28 } 29 30 func isIdentChar(ch rune) bool { 31 return isLetter(ch) || isDigit(ch) || ch == '_' || ch == '$' || isIdentExtend(ch) 32 } 33 34 func isIdentExtend(ch rune) bool { 35 return ch >= 0x80 && ch <= '\uffff' 36 } 37 38 func isUserVarChar(ch rune) bool { 39 return isLetter(ch) || isDigit(ch) || ch == '_' || ch == '$' || ch == '.' || isIdentExtend(ch) 40 } 41 42 type trieNode struct { 43 childs [256]*trieNode 44 token int 45 fn func(s *Scanner) (int, Pos, string) 46 } 47 48 var ruleTable trieNode 49 50 func initTokenByte(c byte, tok int) { 51 if ruleTable.childs[c] == nil { 52 ruleTable.childs[c] = &trieNode{} 53 } 54 ruleTable.childs[c].token = tok 55 } 56 57 func initTokenString(str string, tok int) { 58 node := &ruleTable 59 for _, c := range str { 60 if node.childs[c] == nil { 61 node.childs[c] = &trieNode{} 62 } 63 node = node.childs[c] 64 } 65 node.token = tok 66 } 67 68 func initTokenFunc(str string, fn func(s *Scanner) (int, Pos, string)) { 69 for i := 0; i < len(str); i++ { 70 c := str[i] 71 if ruleTable.childs[c] == nil { 72 ruleTable.childs[c] = &trieNode{} 73 } 74 ruleTable.childs[c].fn = fn 75 } 76 return 77 } 78 79 func init() { 80 // invalid is a special token defined in parser.y, when parser meet 81 // this token, it will throw an error. 82 // set root trie node's token to invalid, so when input match nothing 83 // in the trie, invalid will be the default return token. 84 ruleTable.token = invalid 85 initTokenByte('*', int('*')) 86 initTokenByte('/', int('/')) 87 initTokenByte('+', int('+')) 88 initTokenByte('>', int('>')) 89 initTokenByte('<', int('<')) 90 initTokenByte('(', int('(')) 91 initTokenByte(')', int(')')) 92 initTokenByte(';', int(';')) 93 initTokenByte(',', int(',')) 94 initTokenByte('&', int('&')) 95 initTokenByte('%', int('%')) 96 initTokenByte(':', int(':')) 97 initTokenByte('|', int('|')) 98 initTokenByte('!', int('!')) 99 initTokenByte('^', int('^')) 100 initTokenByte('~', int('~')) 101 initTokenByte('\\', int('\\')) 102 initTokenByte('?', paramMarker) 103 initTokenByte('=', eq) 104 initTokenByte('{', int('{')) 105 initTokenByte('}', int('}')) 106 107 initTokenString("||", pipes) 108 initTokenString("&&", andand) 109 initTokenString("&^", andnot) 110 initTokenString(":=", assignmentEq) 111 initTokenString("<=>", nulleq) 112 initTokenString(">=", ge) 113 initTokenString("<=", le) 114 initTokenString("!=", neq) 115 initTokenString("<>", neqSynonym) 116 initTokenString("<<", lsh) 117 initTokenString(">>", rsh) 118 initTokenString("\\N", null) 119 120 initTokenFunc("@", startWithAt) 121 initTokenFunc("/", startWithSlash) 122 initTokenFunc("-", startWithDash) 123 initTokenFunc("#", startWithSharp) 124 initTokenFunc("Xx", startWithXx) 125 initTokenFunc("Nn", startWithNn) 126 initTokenFunc("Bb", startWithBb) 127 initTokenFunc(".", startWithDot) 128 initTokenFunc("_$ACDEFGHIJKLMOPQRSTUVWYZacdefghijklmopqrstuvwyz", scanIdentifier) 129 initTokenFunc("`", scanQuotedIdent) 130 initTokenFunc("0123456789", startWithNumber) 131 initTokenFunc("'\"", startString) 132 } 133 134 var tokenMap = map[string]int{ 135 "ACTION": action, 136 "ADD": add, 137 "ADDDATE": addDate, 138 "ADMIN": admin, 139 "AFTER": after, 140 "ALL": all, 141 "ALGORITHM": algorithm, 142 "ALTER": alter, 143 "ALWAYS": always, 144 "ANALYZE": analyze, 145 "AND": and, 146 "ANY": any, 147 "AS": as, 148 "ASC": asc, 149 "ASCII": ascii, 150 "AUTO_INCREMENT": autoIncrement, 151 "AVG": avg, 152 "AVG_ROW_LENGTH": avgRowLength, 153 "BEGIN": begin, 154 "BETWEEN": between, 155 "BIGINT": bigIntType, 156 "BINARY": binaryType, 157 "BINLOG": binlog, 158 "BIT": bitType, 159 "BIT_AND": bitAnd, 160 "BIT_OR": bitOr, 161 "BIT_XOR": bitXor, 162 "BLOB": blobType, 163 "BOOL": boolType, 164 "BOOLEAN": booleanType, 165 "BOTH": both, 166 "BTREE": btree, 167 "BUCKETS": buckets, 168 "BY": by, 169 "BYTE": byteType, 170 "CANCEL": cancel, 171 "CASCADE": cascade, 172 "CASCADED": cascaded, 173 "CASE": caseKwd, 174 "CAST": cast, 175 "CHANGE": change, 176 "CHAR": charType, 177 "CHARACTER": character, 178 "CHARSET": charsetKwd, 179 "CHECK": check, 180 "CHECKSUM": checksum, 181 "CLEANUP": cleanup, 182 "CLIENT": client, 183 "COALESCE": coalesce, 184 "COLLATE": collate, 185 "COLLATION": collation, 186 "COLUMN": column, 187 "COLUMNS": columns, 188 "COMMENT": comment, 189 "COMMIT": commit, 190 "COMMITTED": committed, 191 "COMPACT": compact, 192 "COMPRESSED": compressed, 193 "COMPRESSION": compression, 194 "CONNECTION": connection, 195 "CONSISTENT": consistent, 196 "CONSTRAINT": constraint, 197 "CONVERT": convert, 198 "COPY": copyKwd, 199 "COUNT": count, 200 "CREATE": create, 201 "CROSS": cross, 202 "CURRENT": current, 203 "CURRENT_DATE": currentDate, 204 "CURRENT_TIME": currentTime, 205 "CURRENT_TIMESTAMP": currentTs, 206 "CURRENT_USER": currentUser, 207 "CURTIME": curTime, 208 "DATA": data, 209 "DATABASE": database, 210 "DATABASES": databases, 211 "DATE": dateType, 212 "DATE_ADD": dateAdd, 213 "DATE_SUB": dateSub, 214 "DATETIME": datetimeType, 215 "DAY": day, 216 "DAY_HOUR": dayHour, 217 "DAY_MICROSECOND": dayMicrosecond, 218 "DAY_MINUTE": dayMinute, 219 "DAY_SECOND": daySecond, 220 "DDL": ddl, 221 "DEALLOCATE": deallocate, 222 "DEC": decimalType, 223 "DECIMAL": decimalType, 224 "DEFAULT": defaultKwd, 225 "DEFINER": definer, 226 "DELAY_KEY_WRITE": delayKeyWrite, 227 "DELAYED": delayed, 228 "DELETE": deleteKwd, 229 "DESC": desc, 230 "DESCRIBE": describe, 231 "DISABLE": disable, 232 "DISTINCT": distinct, 233 "DISTINCTROW": distinct, 234 "DIV": div, 235 "DO": do, 236 "DOUBLE": doubleType, 237 "DRAINER": drainer, 238 "DROP": drop, 239 "DUAL": dual, 240 "DUPLICATE": duplicate, 241 "DYNAMIC": dynamic, 242 "ELSE": elseKwd, 243 "ENABLE": enable, 244 "ENCLOSED": enclosed, 245 "END": end, 246 "ENGINE": engine, 247 "ENGINES": engines, 248 "ENUM": enum, 249 "ESCAPE": escape, 250 "ESCAPED": escaped, 251 "EVENT": event, 252 "EVENTS": events, 253 "EXCLUSIVE": exclusive, 254 "EXCEPT": except, 255 "EXECUTE": execute, 256 "EXISTS": exists, 257 "EXPLAIN": explain, 258 "EXTRACT": extract, 259 "FALSE": falseKwd, 260 "FIELDS": fields, 261 "FIRST": first, 262 "FIXED": fixed, 263 "FLOAT": floatType, 264 "FLUSH": flush, 265 "FOLLOWING": following, 266 "FOR": forKwd, 267 "FORCE": force, 268 "FOREIGN": foreign, 269 "FORMAT": format, 270 "FROM": from, 271 "FULL": full, 272 "FULLTEXT": fulltext, 273 "FUNCTION": function, 274 "GENERATED": generated, 275 "GET_FORMAT": getFormat, 276 "GLOBAL": global, 277 "GRANT": grant, 278 "GRANTS": grants, 279 "GROUP": group, 280 "GROUP_CONCAT": groupConcat, 281 "HASH": hash, 282 "HAVING": having, 283 "HIGH_PRIORITY": highPriority, 284 "HOUR": hour, 285 "HOUR_MICROSECOND": hourMicrosecond, 286 "HOUR_MINUTE": hourMinute, 287 "HOUR_SECOND": hourSecond, 288 "IDENTIFIED": identified, 289 "IF": ifKwd, 290 "IGNORE": ignore, 291 "IN": in, 292 "INDEX": index, 293 "INDEXES": indexes, 294 "INFILE": infile, 295 "INNER": inner, 296 "INPLACE": inplace, 297 "INSTANT": instant, 298 "INSERT": insert, 299 "INT": intType, 300 "INT1": int1Type, 301 "INT2": int2Type, 302 "INT3": int3Type, 303 "INT4": int4Type, 304 "INT8": int8Type, 305 "INTEGER": integerType, 306 "INTERVAL": interval, 307 "INTERNAL": internal, 308 "INTO": into, 309 "INVOKER": invoker, 310 "IS": is, 311 "ISOLATION": isolation, 312 "JOBS": jobs, 313 "JOB": job, 314 "JOIN": join, 315 "JSON": jsonType, 316 "KEY": key, 317 "KEY_BLOCK_SIZE": keyBlockSize, 318 "KEYS": keys, 319 "KILL": kill, 320 "LAST": last, 321 "LEADING": leading, 322 "LEFT": left, 323 "LESS": less, 324 "LEVEL": level, 325 "LIKE": like, 326 "LIMIT": limit, 327 "LINES": lines, 328 "LOAD": load, 329 "LOCAL": local, 330 "LOCALTIME": localTime, 331 "LOCALTIMESTAMP": localTs, 332 "LOCK": lock, 333 "LONG": long, 334 "LONGBLOB": longblobType, 335 "LONGTEXT": longtextType, 336 "LOW_PRIORITY": lowPriority, 337 "MASTER": master, 338 "MAX": max, 339 "MAX_CONNECTIONS_PER_HOUR": maxConnectionsPerHour, 340 "MAX_EXECUTION_TIME": maxExecutionTime, 341 "MAX_QUERIES_PER_HOUR": maxQueriesPerHour, 342 "MAX_ROWS": maxRows, 343 "MAX_UPDATES_PER_HOUR": maxUpdatesPerHour, 344 "MAX_USER_CONNECTIONS": maxUserConnections, 345 "MAXVALUE": maxValue, 346 "MEDIUMBLOB": mediumblobType, 347 "MEDIUMINT": mediumIntType, 348 "MEDIUMTEXT": mediumtextType, 349 "MERGE": merge, 350 "MICROSECOND": microsecond, 351 "MIN": min, 352 "MIN_ROWS": minRows, 353 "MINUTE": minute, 354 "MINUTE_MICROSECOND": minuteMicrosecond, 355 "MINUTE_SECOND": minuteSecond, 356 "MOD": mod, 357 "MODE": mode, 358 "MODIFY": modify, 359 "MONTH": month, 360 "NAMES": names, 361 "NATIONAL": national, 362 "NATURAL": natural, 363 "NEXT_ROW_ID": next_row_id, 364 "NO": no, 365 "NO_WRITE_TO_BINLOG": noWriteToBinLog, 366 "NONE": none, 367 "NOT": not, 368 "NOW": now, 369 "NULL": null, 370 "NULLS": nulls, 371 "NUMERIC": numericType, 372 "NVARCHAR": nvarcharType, 373 "OFFSET": offset, 374 "ON": on, 375 "ONLY": only, 376 "OPTION": option, 377 "OPTIONALLY": optionally, 378 "OR": or, 379 "ORDER": order, 380 "OUTER": outer, 381 "PACK_KEYS": packKeys, 382 "PARTITION": partition, 383 "PARTITIONS": partitions, 384 "PASSWORD": password, 385 "PLUGINS": plugins, 386 "POSITION": position, 387 "PRECEDING": preceding, 388 "PRECISION": precisionType, 389 "PREPARE": prepare, 390 "PRIMARY": primary, 391 "PRIVILEGES": privileges, 392 "PROCEDURE": procedure, 393 "PROCESS": process, 394 "PROCESSLIST": processlist, 395 "PROFILES": profiles, 396 "PUMP": pump, 397 "QUARTER": quarter, 398 "QUERY": query, 399 "QUERIES": queries, 400 "QUICK": quick, 401 "SHARD_ROW_ID_BITS": shardRowIDBits, 402 "RANGE": rangeKwd, 403 "RECOVER": recover, 404 "READ": read, 405 "REAL": realType, 406 "RECENT": recent, 407 "REDUNDANT": redundant, 408 "REFERENCES": references, 409 "REGEXP": regexpKwd, 410 "RELOAD": reload, 411 "RENAME": rename, 412 "REPEAT": repeat, 413 "REPEATABLE": repeatable, 414 "REPLACE": replace, 415 "RESPECT": respect, 416 "REPLICATION": replication, 417 "RESTRICT": restrict, 418 "RESTORE": restore, 419 "REVERSE": reverse, 420 "REVOKE": revoke, 421 "RIGHT": right, 422 "RLIKE": rlike, 423 "ROLE": role, 424 "ROLLBACK": rollback, 425 "ROUTINE": routine, 426 "ROW": row, 427 "ROW_COUNT": rowCount, 428 "ROW_FORMAT": rowFormat, 429 "SAVEPOINT": savepoint, 430 "SCHEMA": database, 431 "SCHEMAS": databases, 432 "SECOND": second, 433 "SECOND_MICROSECOND": secondMicrosecond, 434 "SECURITY": security, 435 "SELECT": selectKwd, 436 "SERIALIZABLE": serializable, 437 "SESSION": session, 438 "SET": set, 439 "SEPARATOR": separator, 440 "SHARE": share, 441 "SHARED": shared, 442 "SHOW": show, 443 "SIGNED": signed, 444 "SLAVE": slave, 445 "SLOW": slow, 446 "SMALLINT": smallIntType, 447 "SNAPSHOT": snapshot, 448 "SOME": some, 449 "SQL": sql, 450 "SQL_CACHE": sqlCache, 451 "SQL_CALC_FOUND_ROWS": sqlCalcFoundRows, 452 "SQL_NO_CACHE": sqlNoCache, 453 "START": start, 454 "STARTING": starting, 455 "STATS": stats, 456 "STATS_BUCKETS": statsBuckets, 457 "STATS_HISTOGRAMS": statsHistograms, 458 "STATS_HEALTHY": statsHealthy, 459 "STATS_META": statsMeta, 460 "STATS_PERSISTENT": statsPersistent, 461 "STATUS": status, 462 "STD": stddevPop, 463 "STDDEV": stddevPop, 464 "STDDEV_POP": stddevPop, 465 "STDDEV_SAMP": stddevSamp, 466 "STORED": stored, 467 "STRAIGHT_JOIN": straightJoin, 468 "SUBDATE": subDate, 469 "SUBPARTITION": subpartition, 470 "SUBPARTITIONS": subpartitions, 471 "SUBSTR": substring, 472 "SUBSTRING": substring, 473 "SUM": sum, 474 "SUPER": super, 475 "TABLE": tableKwd, 476 "TABLES": tables, 477 "TABLESPACE": tablespace, 478 "TEMPORARY": temporary, 479 "TEMPTABLE": temptable, 480 "TERMINATED": terminated, 481 "TEXT": textType, 482 "THAN": than, 483 "THEN": then, 484 "TIDB": tidb, 485 "TIDB_HJ": tidbHJ, 486 "TIDB_INLJ": tidbINLJ, 487 "TIDB_SMJ": tidbSMJ, 488 "TIME": timeType, 489 "TIMESTAMP": timestampType, 490 "TIMESTAMPADD": timestampAdd, 491 "TIMESTAMPDIFF": timestampDiff, 492 "TINYBLOB": tinyblobType, 493 "TINYINT": tinyIntType, 494 "TINYTEXT": tinytextType, 495 "TO": to, 496 "TOP": top, 497 "TRACE": trace, 498 "TRAILING": trailing, 499 "TRANSACTION": transaction, 500 "TRIGGER": trigger, 501 "TRIGGERS": triggers, 502 "TRIM": trim, 503 "TRUE": trueKwd, 504 "TRUNCATE": truncate, 505 "UNBOUNDED": unbounded, 506 "UNCOMMITTED": uncommitted, 507 "UNDEFINED": undefined, 508 "UNION": union, 509 "UNIQUE": unique, 510 "UNKNOWN": unknown, 511 "UNLOCK": unlock, 512 "UNSIGNED": unsigned, 513 "UPDATE": update, 514 "USAGE": usage, 515 "USE": use, 516 "USER": user, 517 "USING": using, 518 "UTC_DATE": utcDate, 519 "UTC_TIME": utcTime, 520 "UTC_TIMESTAMP": utcTimestamp, 521 "VALUE": value, 522 "VALUES": values, 523 "VARBINARY": varbinaryType, 524 "VARCHAR": varcharType, 525 "VARIABLES": variables, 526 "VARIANCE": varPop, 527 "VAR_POP": varPop, 528 "VAR_SAMP": varSamp, 529 "VIEW": view, 530 "VIRTUAL": virtual, 531 "WARNINGS": warnings, 532 "ERRORS": identSQLErrors, 533 "WEEK": week, 534 "WHEN": when, 535 "WHERE": where, 536 "WITH": with, 537 "WRITE": write, 538 "XOR": xor, 539 "YEAR": yearType, 540 "YEAR_MONTH": yearMonth, 541 "ZEROFILL": zerofill, 542 "BINDING": binding, 543 "BINDINGS": bindings, 544 "WORK": work, 545 "CHAIN": chain, 546 "RELEASE": release, 547 } 548 549 // See https://dev.mysql.com/doc/refman/5.7/en/function-resolution.html for details 550 var btFuncTokenMap = map[string]int{ 551 "ADDDATE": builtinAddDate, 552 "BIT_AND": builtinBitAnd, 553 "BIT_OR": builtinBitOr, 554 "BIT_XOR": builtinBitXor, 555 "CAST": builtinCast, 556 "COUNT": builtinCount, 557 "CURDATE": builtinCurDate, 558 "CURTIME": builtinCurTime, 559 "DATE_ADD": builtinDateAdd, 560 "DATE_SUB": builtinDateSub, 561 "EXTRACT": builtinExtract, 562 "GROUP_CONCAT": builtinGroupConcat, 563 "MAX": builtinMax, 564 "MID": builtinSubstring, 565 "MIN": builtinMin, 566 "NOW": builtinNow, 567 "POSITION": builtinPosition, 568 "SESSION_USER": builtinUser, 569 "STD": builtinStddevPop, 570 "STDDEV": builtinStddevPop, 571 "STDDEV_POP": builtinStddevPop, 572 "STDDEV_SAMP": builtinStddevSamp, 573 "SUBDATE": builtinSubDate, 574 "SUBSTR": builtinSubstring, 575 "SUBSTRING": builtinSubstring, 576 "SUM": builtinSum, 577 "SYSDATE": builtinSysDate, 578 "SYSTEM_USER": builtinUser, 579 "TRIM": builtinTrim, 580 "VARIANCE": builtinVarPop, 581 "VAR_POP": builtinVarPop, 582 "VAR_SAMP": builtinVarSamp, 583 } 584 585 var windowFuncTokenMap = map[string]int{ 586 "CUME_DIST": cumeDist, 587 "DENSE_RANK": denseRank, 588 "FIRST_VALUE": firstValue, 589 "GROUPS": groups, 590 "LAG": lag, 591 "LAST_VALUE": lastValue, 592 "LEAD": lead, 593 "NTH_VALUE": nthValue, 594 "NTILE": ntile, 595 "OVER": over, 596 "PERCENT_RANK": percentRank, 597 "RANK": rank, 598 "ROWS": rows, 599 "ROW_NUMBER": rowNumber, 600 "WINDOW": window, 601 } 602 603 // aliases are strings directly map to another string and use the same token. 604 var aliases = map[string]string{ 605 "SCHEMA": "DATABASE", 606 "SCHEMAS": "DATABASES", 607 "DEC": "DECIMAL", 608 "SUBSTR": "SUBSTRING", 609 } 610 611 func (s *Scanner) isTokenIdentifier(lit string, offset int) int { 612 // An identifier before or after '.' means it is part of a qualified identifier. 613 // We do not parse it as keyword. 614 if s.r.peek() == '.' { 615 return 0 616 } 617 if offset > 0 && s.r.s[offset-1] == '.' { 618 return 0 619 } 620 buf := &s.buf 621 buf.Reset() 622 buf.Grow(len(lit)) 623 data := buf.Bytes()[:len(lit)] 624 for i := 0; i < len(lit); i++ { 625 if lit[i] >= 'a' && lit[i] <= 'z' { 626 data[i] = lit[i] + 'A' - 'a' 627 } else { 628 data[i] = lit[i] 629 } 630 } 631 632 checkBtFuncToken, tokenStr := false, string(data) 633 if s.r.peek() == '(' { 634 checkBtFuncToken = true 635 } else if s.sqlMode.HasIgnoreSpaceMode() { 636 s.skipWhitespace() 637 if s.r.peek() == '(' { 638 checkBtFuncToken = true 639 } 640 } 641 if checkBtFuncToken { 642 if tok := btFuncTokenMap[tokenStr]; tok != 0 { 643 return tok 644 } 645 } 646 tok, ok := tokenMap[string(data)] 647 if !ok && s.supportWindowFunc { 648 tok = windowFuncTokenMap[string(data)] 649 } 650 return tok 651 } 652 653 func handleIdent(lval *yySymType) int { 654 s := lval.ident 655 // A character string literal may have an optional character set introducer and COLLATE clause: 656 // [_charset_name]'string' [COLLATE collation_name] 657 // See https://dev.mysql.com/doc/refman/5.7/en/charset-literal.html 658 if !strings.HasPrefix(s, "_") { 659 return identifier 660 } 661 cs, _, err := mysql.GetCharsetInfo(s[1:]) 662 if err != nil { 663 return identifier 664 } 665 lval.ident = cs 666 return underscoreCS 667 }