github.com/walf443/mgr@v0.0.0-20150203144449-6f7a3a548462/sqlparser/mysql/lexer.go (about) 1 package mysql 2 3 //go:generate go tool yacc -o parser.go -v parser.output parser.go.y 4 import ( 5 "errors" 6 "fmt" 7 "strings" 8 ) 9 10 const ( 11 EOF = -1 12 UNKNOWN = 0 13 ) 14 15 var keywords = map[string]int{ 16 "ADD": ADD, 17 "DROP": DROP, 18 "CREATE": CREATE, 19 "ALTER": ALTER, 20 "COLUMN": COLUMN, 21 "TABLE": TABLE, 22 "INDEX": INDEX, 23 "KEY": KEY, 24 "DATABASE": DATABASE, 25 "NULL": NULL, 26 "NOT": NOT, 27 "AUTO_INCREMENT": AUTO_INCREMENT, 28 "DEFAULT": DEFAULT, 29 "CURRENT_TIMESTAMP": CURRENT_TIMESTAMP, 30 "ON": ON, 31 "UPDATE": UPDATE, 32 "PRIMARY": PRIMARY, 33 "UNIQUE": UNIQUE, 34 "USING": USING, 35 "HASH": HASH, 36 "BTREE": BTREE, 37 "ENGINE": ENGINE, 38 "CHARSET": CHARSET, 39 "CHARACTER": CHARACTER, 40 "COLLATE": COLLATE, 41 "SET": SET, 42 "AVG_ROW_LENGTH": AVG_ROW_LENGTH, 43 "CHECKSUM": CHECKSUM, 44 "COMMENT": COMMENT, 45 "KEY_BLOCK_SIZE": KEY_BLOCK_SIZE, 46 "MAX_ROWS": MAX_ROWS, 47 "MIN_ROWS": MIN_ROWS, 48 "ROW_FORMAT": ROW_FORMAT, 49 50 // datatypes 51 "BIT": BIT, 52 "TINYINT": TINYINT, 53 "SMALLINT": SMALLINT, 54 "MEDIUMINT": MEDIUMINT, 55 "INT": INT, 56 "INTEGER": INTEGER, 57 "BIGINT": BIGINT, 58 "REAL": REAL, 59 "DOUBLE": DOUBLE, 60 "FLOAT": FLOAT, 61 "DECIMAL": DECIMAL, 62 "NUMERIC": NUMERIC, 63 "DATE": DATE, 64 "TIME": TIME, 65 "TIMESTAMP": TIMESTAMP, 66 "DATETIME": DATETIME, 67 "YEAR": YEAR, 68 "CHAR": CHAR, 69 "VARCHAR": VARCHAR, 70 "BINARY": BINARY, 71 "VARBINARY": VARBINARY, 72 "TINYBLOB": TINYBLOB, 73 "BLOB": BLOB, 74 "MEDIUMBLOB": MEDIUMBLOB, 75 "LONGBLOB": LONGBLOB, 76 "TINYTEXT": TINYTEXT, 77 "TEXT": TEXT, 78 "MEDIUMTEXT": MEDIUMTEXT, 79 "LONGTEXT": LONGTEXT, 80 81 // datatype options 82 "UNSIGNED": UNSIGNED, 83 "ZEROFILL": ZEROFILL, 84 } 85 86 type Position struct { 87 Line int 88 Column int 89 } 90 91 type Scanner struct { 92 src []rune 93 offset int 94 lineHead int 95 line int 96 markRawUntil []rune 97 nextLiteral string 98 } 99 100 func (s *Scanner) Init(src string) { 101 s.src = []rune(src) 102 } 103 104 func (s *Scanner) Scan() (tok int, lit string, pos Position) { 105 if s.nextLiteral != "" { 106 switch s.nextLiteral { 107 case "*/": 108 tok = COMMENT_FINISH 109 case "`": 110 tok = int('`') 111 case "'": 112 tok = int('\'') 113 case "\"": 114 tok = int('"') 115 } 116 pos = s.position() 117 for i := 0; i < len(s.nextLiteral); i++ { 118 s.next() 119 } 120 lit = s.nextLiteral 121 s.nextLiteral = "" 122 return 123 } 124 if len(s.markRawUntil) == 0 { 125 s.skipWhiteSpace() 126 pos = s.position() 127 switch ch := s.peek(); { 128 case ch == '/' && s.readAhead(1) == '*': 129 s.next() 130 s.next() 131 tok = COMMENT_START 132 lit = "/*" 133 s.markRawUntil = []rune{'*', '/'} 134 case isLetter(ch): 135 lit = s.scanIdentifier() 136 if keyword, ok := keywords[strings.ToUpper(lit)]; ok { 137 tok = keyword 138 } else { 139 tok = IDENT 140 } 141 case isNumber(ch): 142 lit = s.scanNumber() 143 tok = NUMBER 144 case ch == '`': 145 s.markRawUntil = []rune{'`'} 146 tok = int(ch) 147 lit = string(ch) 148 s.next() 149 case ch == '\'': 150 s.markRawUntil = []rune{'\''} 151 tok = int(ch) 152 lit = string(ch) 153 s.next() 154 case ch == '"': 155 s.markRawUntil = []rune{'"'} 156 tok = int(ch) 157 lit = string(ch) 158 s.next() 159 default: 160 switch ch { 161 case -1: 162 tok = EOF 163 case ';', ',', '`', '.', '(', ')', '=': 164 tok = int(ch) 165 lit = string(ch) 166 } 167 s.next() 168 } 169 } else { 170 var err error 171 lit, err = s.scanUntil(s.markRawUntil) 172 if err != nil { 173 panic(err) 174 } 175 tok = RAW 176 s.nextLiteral = string(s.markRawUntil) 177 s.markRawUntil = []rune{} 178 } 179 return 180 } 181 182 func (s *Scanner) peek() rune { 183 if !s.reachEOF(0) { 184 return s.src[s.offset] 185 } else { 186 return -1 187 } 188 } 189 190 func (s *Scanner) readAhead(offset int) rune { 191 if !s.reachEOF(offset) { 192 return s.src[s.offset+offset] 193 } else { 194 return -1 195 } 196 } 197 198 func (s *Scanner) next() { 199 if !s.reachEOF(0) { 200 if s.peek() == '\n' { 201 s.lineHead = s.offset + 1 202 s.line++ 203 } 204 s.offset++ 205 } 206 } 207 208 func (s *Scanner) CurrentLine() string { 209 cursor := s.lineHead 210 var bytes []rune 211 for { 212 ch := s.src[cursor] 213 214 if ch == '\n' { 215 break 216 } 217 bytes = append(bytes, ch) 218 cursor++ 219 if len(s.src) <= cursor { 220 break 221 } 222 } 223 return string(bytes) 224 } 225 226 func isLetter(ch rune) bool { 227 return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' 228 } 229 230 func isNumber(ch rune) bool { 231 return '0' <= ch && ch <= '9' 232 } 233 234 func isWhiteSpace(ch rune) bool { 235 return ch == ' ' || ch == '\t' || ch == '\n' 236 } 237 238 func (s *Scanner) reachEOF(offset int) bool { 239 return len(s.src) <= s.offset+offset 240 } 241 242 func (s *Scanner) position() Position { 243 return Position{Line: s.line + 1, Column: s.offset - s.lineHead + 1} 244 } 245 246 func (s *Scanner) skipWhiteSpace() { 247 for isWhiteSpace(s.peek()) { 248 s.next() 249 } 250 } 251 252 func (s *Scanner) scanIdentifier() string { 253 var ret []rune 254 for isLetter(s.peek()) || isNumber(s.peek()) { 255 ret = append(ret, s.peek()) 256 s.next() 257 } 258 259 return string(ret) 260 } 261 262 func (s *Scanner) scanUntil(finish []rune) (string, error) { 263 cursor := 0 264 finish_pos := len(finish) - 1 265 var ret []rune 266 for { 267 ch := s.peek() 268 if ch == finish[cursor] { 269 for { 270 cursor++ 271 if cursor > finish_pos { 272 return string(ret), nil 273 } 274 ch2 := s.readAhead(cursor) 275 if ch2 != finish[cursor] { 276 cursor = 0 277 break 278 } 279 if ch2 == -1 { 280 return "", errors.New(fmt.Sprintf("unexpected EOF string. exptected \"%s\"", finish)) 281 } 282 } 283 } 284 285 ret = append(ret, s.peek()) 286 s.next() 287 } 288 289 return string(ret), nil 290 } 291 292 func (s *Scanner) scanNumber() string { 293 var ret []rune 294 for isNumber(s.peek()) { 295 ret = append(ret, s.peek()) 296 s.next() 297 } 298 return string(ret) 299 }