github.com/wfusion/gofusion@v1.1.14/common/utils/sqlparser/lexer.go (about) 1 package sqlparser 2 3 import ( 4 "bufio" 5 "bytes" 6 "io" 7 "strings" 8 "unicode" 9 ) 10 11 type Lexer struct { 12 r io.RuneReader 13 buf bytes.Buffer 14 15 ch rune 16 pos Pos 17 full bool 18 } 19 20 func NewLexer(r io.Reader) *Lexer { 21 return &Lexer{ 22 r: bufio.NewReader(r), 23 pos: Pos{Offset: -1, Line: 1}, 24 } 25 } 26 27 func (l *Lexer) Lex() (pos Pos, token Token, lit string) { 28 for { 29 if ch := l.peek(); ch == -1 { 30 return l.pos, EOF, "" 31 } else if unicode.IsSpace(ch) { 32 l.read() 33 continue 34 } else if isDigit(ch) || ch == '.' { 35 return l.lexNumber() 36 } else if ch == 'x' || ch == 'X' { 37 return l.lexBlob() 38 } else if isAlpha(ch) || ch == '_' { 39 return l.lexUnquotedIdent(l.pos, "") 40 } else if ch == '"' || ch == '`' { 41 return l.lexQuotedIdent(ch) 42 } else if ch == '\'' { 43 return l.lexString() 44 } else if ch == '?' || ch == ':' || ch == '@' || ch == '$' { 45 return l.lexBind() 46 } 47 48 switch ch, pos := l.read(); ch { 49 case ';': 50 return pos, SEMI, ";" 51 case '(': 52 return pos, LP, "(" 53 case ')': 54 return pos, RP, ")" 55 case ',': 56 return pos, COMMA, "," 57 case '!': 58 if l.peek() == '=' { 59 l.read() 60 return pos, NE, "!=" 61 } 62 return pos, BITNOT, "!" 63 case '=': 64 return pos, EQ, "=" 65 case '<': 66 if l.peek() == '=' { 67 l.read() 68 return pos, LE, "<=" 69 } else if l.peek() == '<' { 70 l.read() 71 return pos, LSHIFT, "<<" 72 } else if l.peek() == '>' { 73 l.read() 74 return pos, LG, "<>" 75 } 76 return pos, LT, "<" 77 case '>': 78 if l.peek() == '=' { 79 l.read() 80 return pos, GE, ">=" 81 } else if l.peek() == '>' { 82 l.read() 83 return pos, RSHIFT, ">>" 84 } 85 return pos, GT, ">" 86 case '&': 87 return pos, BITAND, "&" 88 case '|': 89 if l.peek() == '|' { 90 l.read() 91 return pos, CONCAT, "||" 92 } 93 return pos, BITOR, "|" 94 case '+': 95 return pos, PLUS, "+" 96 case '-': 97 return pos, MINUS, "-" 98 case '*': 99 return pos, STAR, "*" 100 case '/': 101 if l.peek() == '*' { 102 return l.lexMultilineComment() 103 } 104 return pos, SLASH, "/" 105 case '%': 106 return pos, REM, "%" 107 default: 108 return pos, ILLEGAL, string(ch) 109 } 110 } 111 } 112 113 func (l *Lexer) lexUnquotedIdent(pos Pos, prefix string) (Pos, Token, string) { 114 assert(isUnquotedIdent(l.peek())) 115 116 l.buf.Reset() 117 l.buf.WriteString(prefix) 118 for ch, _ := l.read(); isUnquotedIdent(ch); ch, _ = l.read() { 119 l.buf.WriteRune(ch) 120 } 121 l.unread() 122 123 lit := l.buf.String() 124 tok := Lookup(lit) 125 return pos, tok, lit 126 } 127 128 func (l *Lexer) lexQuotedIdent(char rune) (Pos, Token, string) { 129 ch, pos := l.read() 130 assert(ch == char) 131 132 l.buf.Reset() 133 l.buf.WriteRune(char) 134 for { 135 ch, _ := l.read() 136 if ch == -1 { 137 return pos, ILLEGAL, l.buf.String() 138 } else if ch == char { 139 if l.peek() == char { // escaped quote 140 l.read() 141 l.buf.WriteRune(char) 142 continue 143 } 144 l.buf.WriteRune(char) 145 return pos, QIDENT, l.buf.String() 146 } 147 l.buf.WriteRune(ch) 148 } 149 } 150 151 func (l *Lexer) lexString() (Pos, Token, string) { 152 ch, pos := l.read() 153 assert(ch == '\'') 154 155 l.buf.Reset() 156 for { 157 ch, _ := l.read() 158 if ch == -1 { 159 return pos, ILLEGAL, `'` + l.buf.String() 160 } else if ch == '\'' { 161 if l.peek() == '\'' { // escaped quote 162 l.read() 163 l.buf.WriteRune('\'') 164 continue 165 } 166 return pos, STRING, l.buf.String() 167 } 168 l.buf.WriteRune(ch) 169 } 170 } 171 172 func (l *Lexer) lexMultilineComment() (Pos, Token, string) { 173 ch, pos := l.read() 174 assert(ch == '*') 175 176 l.buf.Reset() 177 for { 178 ch, _ := l.read() 179 if ch == -1 { 180 return pos, ILLEGAL, `/*` + l.buf.String() 181 } else if ch == '*' { 182 if l.peek() == '/' { 183 l.read() 184 l.read() 185 return pos, MLCOMMENT, strings.Trim(l.buf.String(), " ") 186 } 187 } 188 l.buf.WriteRune(ch) 189 } 190 } 191 192 func (l *Lexer) lexBind() (Pos, Token, string) { 193 start, pos := l.read() 194 195 l.buf.Reset() 196 l.buf.WriteRune(start) 197 198 // Question mark starts a numeric bind. 199 if start == '?' { 200 for isDigit(l.peek()) { 201 ch, _ := l.read() 202 l.buf.WriteRune(ch) 203 } 204 return pos, BIND, l.buf.String() 205 } 206 207 // All other characters start an alphanumeric bind. 208 assert(start == ':' || start == '@' || start == '$') 209 for isUnquotedIdent(l.peek()) { 210 ch, _ := l.read() 211 l.buf.WriteRune(ch) 212 } 213 return pos, BIND, l.buf.String() 214 } 215 216 func (l *Lexer) lexBlob() (Pos, Token, string) { 217 start, pos := l.read() 218 assert(start == 'x' || start == 'X') 219 220 // If the next character is not a quote, it's an IDENT. 221 if isUnquotedIdent(l.peek()) { 222 return l.lexUnquotedIdent(pos, string(start)) 223 } else if l.peek() != '\'' { 224 return pos, IDENT, string(start) 225 } 226 ch, _ := l.read() 227 assert(ch == '\'') 228 229 l.buf.Reset() 230 for i := 0; ; i++ { 231 ch, _ := l.read() 232 if ch == '\'' { 233 return pos, BLOB, l.buf.String() 234 } else if ch == -1 { 235 return pos, ILLEGAL, string(start) + `'` + l.buf.String() 236 } else if !isHex(ch) { 237 return pos, ILLEGAL, string(start) + `'` + l.buf.String() + string(ch) 238 } 239 l.buf.WriteRune(ch) 240 } 241 } 242 243 func (l *Lexer) lexNumber() (Pos, Token, string) { 244 assert(isDigit(l.peek()) || l.peek() == '.') 245 pos := l.pos 246 tok := INTEGER 247 248 l.buf.Reset() 249 250 // Read whole number if starting with a digit. 251 if isDigit(l.peek()) { 252 for isDigit(l.peek()) { 253 ch, _ := l.read() 254 l.buf.WriteRune(ch) 255 } 256 } 257 258 // Read decimal and successive digitl. 259 if l.peek() == '.' { 260 tok = FLOAT 261 262 ch, _ := l.read() 263 l.buf.WriteRune(ch) 264 265 for isDigit(l.peek()) { 266 ch, _ := l.read() 267 l.buf.WriteRune(ch) 268 } 269 } 270 271 // Read exponent with optional +/- sign. 272 if ch := l.peek(); ch == 'e' || ch == 'E' { 273 tok = FLOAT 274 275 ch, _ := l.read() 276 l.buf.WriteRune(ch) 277 278 if l.peek() == '+' || l.peek() == '-' { 279 ch, _ := l.read() 280 l.buf.WriteRune(ch) 281 if !isDigit(l.peek()) { 282 return pos, ILLEGAL, l.buf.String() 283 } 284 for isDigit(l.peek()) { 285 ch, _ := l.read() 286 l.buf.WriteRune(ch) 287 } 288 } else if isDigit(l.peek()) { 289 for isDigit(l.peek()) { 290 ch, _ := l.read() 291 l.buf.WriteRune(ch) 292 } 293 } else { 294 return pos, ILLEGAL, l.buf.String() 295 } 296 } 297 298 lit := l.buf.String() 299 if lit == "." { 300 return pos, DOT, lit 301 } 302 return pos, tok, lit 303 } 304 305 func (l *Lexer) read() (rune, Pos) { 306 if l.full { 307 l.full = false 308 return l.ch, l.pos 309 } 310 311 var err error 312 l.ch, _, err = l.r.ReadRune() 313 if err != nil { 314 l.ch = -1 315 return l.ch, l.pos 316 } 317 318 l.pos.Offset++ 319 if l.ch == '\n' { 320 l.pos.Line++ 321 l.pos.Column = 0 322 } else { 323 l.pos.Column++ 324 } 325 return l.ch, l.pos 326 } 327 328 func (l *Lexer) peek() rune { 329 if !l.full { 330 l.read() 331 l.unread() 332 } 333 return l.ch 334 } 335 336 func (l *Lexer) unread() { 337 assert(!l.full) 338 l.full = true 339 } 340 341 func isDigit(ch rune) bool { 342 return ch >= '0' && ch <= '9' 343 } 344 345 func isAlpha(ch rune) bool { 346 return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') 347 } 348 349 func isHex(ch rune) bool { 350 return isDigit(ch) || (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F') 351 } 352 353 func isUnquotedIdent(ch rune) bool { 354 return isAlpha(ch) || isDigit(ch) || ch == '_' 355 } 356 357 // IsInteger returns true if s only contains digits. 358 func IsInteger(s string) bool { 359 for _, ch := range s { 360 if !isDigit(ch) { 361 return false 362 } 363 } 364 return s != "" 365 }