github.com/arnodel/golua@v0.0.0-20230215163904-e0b5347eaaa1/scanner/states.go (about) 1 package scanner 2 3 import ( 4 "github.com/arnodel/golua/token" 5 ) 6 7 func scanToken(l *Scanner) stateFn { 8 for { 9 switch c := l.next(); { 10 case c == '-': 11 if l.next() == '-' { 12 return scanComment 13 } 14 l.backup() 15 l.emit(token.SgMinus) 16 case c == '"' || c == '\'': 17 return scanShortString(c) 18 case isDec(c): 19 l.backup() 20 return scanNumber 21 case c == '[': 22 n := l.next() 23 if n == '[' || n == '=' { 24 l.backup() 25 return scanLongString 26 } 27 l.backup() 28 l.emit(token.SgOpenSquareBkt) 29 case isAlpha(c): 30 return scanIdent 31 case isSpace(c): 32 l.ignore() 33 default: 34 switch c { 35 case ';', '(', ')', ',', '|', '&', '+', '*', '%', '^', '#', ']', '{', '}': 36 case '=': 37 l.accept("=") 38 case ':': 39 l.accept(":") 40 case '.': 41 if accept(l, isDec, -1) > 0 { 42 return scanExp(l, isDec, "eE", token.NUMDEC) 43 } 44 if l.accept(".") { 45 l.accept(".") 46 } 47 case '<': 48 l.accept("=<") 49 case '>': 50 l.accept("=>") 51 case '~': 52 l.accept("=") 53 case '/': 54 l.accept("/") 55 case -1: 56 l.emit(token.EOF) 57 return nil 58 default: 59 return l.errorf(token.INVALID, "illegal character") 60 } 61 l.emit(sgType[string(l.lit())]) 62 } 63 return scanToken 64 } 65 } 66 67 func scanComment(l *Scanner) stateFn { 68 c := l.next() 69 if c == '[' { 70 return scanLongComment 71 } 72 l.backup() 73 return scanShortComment 74 } 75 76 func scanShortComment(l *Scanner) stateFn { 77 for { 78 switch c := l.next(); c { 79 case '\n': 80 l.acceptRune('\r') 81 l.ignore() 82 return scanToken 83 case -1: 84 l.ignore() 85 l.emit(token.EOF) 86 return nil 87 } 88 } 89 } 90 91 func scanLongComment(l *Scanner) stateFn { 92 return scanLong(true) 93 } 94 95 func scanLong(comment bool) stateFn { 96 return func(l *Scanner) stateFn { 97 level := 0 98 OpeningLoop: 99 for { 100 switch c := l.next(); c { 101 case '=': 102 level++ 103 case '[': 104 break OpeningLoop 105 default: 106 if comment { 107 l.ignore() 108 return scanShortComment 109 } 110 return l.errorf(token.INVALID, "expected opening long bracket") 111 } 112 } 113 closeLevel := -1 114 // -1 means we haven't starting closing a bracket 115 // 0 means we have processed the first ']' 116 // n > 0 means we have processed ']' + n*'=' 117 for { 118 switch c := l.next(); c { 119 case ']': 120 if closeLevel == level { 121 if comment { 122 l.ignore() 123 } else { 124 l.emit(token.LONGSTRING) 125 } 126 return scanToken 127 } 128 closeLevel = 0 129 case '=': 130 if closeLevel >= 0 { 131 closeLevel++ 132 } 133 case -1: 134 return l.errorf(token.UNFINISHED, "illegal <eof> in long bracket of level %d", level) 135 default: 136 closeLevel = -1 137 } 138 } 139 } 140 } 141 142 func scanShortString(q rune) stateFn { 143 return func(l *Scanner) stateFn { 144 for { 145 switch c := l.next(); c { 146 case q: 147 l.emit(token.STRING) 148 return scanToken 149 case '\\': 150 switch c := l.next(); { 151 case c == 'x': 152 if accept(l, isHex, 2) != 2 { 153 return l.errorf(token.INVALID, `\x must be followed by 2 hex digits`) 154 } 155 case isDec(c): 156 accept(l, isDec, 2) 157 case c == 'u': 158 if l.next() != '{' { 159 return l.errorf(token.INVALID, `\u must be followed by '{'`) 160 } 161 if accept(l, isHex, -1) == 0 { 162 return l.errorf(token.INVALID, "at least 1 hex digit required") 163 } 164 if l.next() != '}' { 165 return l.errorf(token.INVALID, "missing '}'") 166 } 167 case c == 'z': 168 accept(l, isSpace, -1) 169 default: 170 switch c { 171 case '\n': 172 // Nothing to do 173 case 'a', 'b', 'f', 'n', 'r', 't', 'v', 'z', '"', '\'', '\\': 174 break 175 default: 176 return l.errorf(token.INVALID, "illegal escaped character") 177 } 178 } 179 case '\n', '\r': 180 return l.errorf(token.INVALID, "illegal new line in string literal") 181 case -1: 182 return l.errorf(token.INVALID, "illegal <eof> in string literal") 183 } 184 } 185 } 186 } 187 188 // For scanning numbers e.g. in files 189 func scanNumberPrefix(l *Scanner) stateFn { 190 accept(l, isSpace, -1) 191 l.accept("+-") 192 return scanNumber 193 } 194 195 func scanNumber(l *Scanner) stateFn { 196 isDigit := isDec 197 exp := "eE" 198 tp := token.NUMDEC 199 leading0 := l.accept("0") 200 dcount := 0 201 if leading0 && l.accept("xX") { 202 isDigit = isHex 203 exp = "pP" 204 tp = token.NUMHEX 205 } else if leading0 { 206 dcount++ 207 } 208 dcount += accept(l, isDigit, -1) 209 if l.accept(".") { 210 dcount += accept(l, isDigit, -1) 211 } 212 if dcount == 0 { 213 return l.errorf(token.INVALID, "no digits in mantissa") 214 } 215 return scanExp(l, isDigit, exp, tp) 216 } 217 218 func scanExp(l *Scanner, isDigit func(rune) bool, exp string, tp token.Type) stateFn { 219 if l.accept(exp) { 220 l.accept("+-") 221 if accept(l, isDec, -1) == 0 { 222 return l.errorf(token.INVALID, "digit required after exponent") 223 } 224 } 225 l.emit(tp) 226 if isAlpha(l.peek()) { 227 l.next() 228 return l.errorf(token.INVALID, "illegal character following number") 229 } 230 return scanToken 231 } 232 233 func scanLongString(l *Scanner) stateFn { 234 return scanLong(false) 235 } 236 237 var kwType = map[string]token.Type{ 238 "break": token.KwBreak, 239 "goto": token.KwGoto, 240 "do": token.KwDo, 241 "while": token.KwWhile, 242 "end": token.KwEnd, 243 "repeat": token.KwRepeat, 244 "until": token.KwUntil, 245 "then": token.KwThen, 246 "else": token.KwElse, 247 "elseif": token.KwElseIf, 248 "if": token.KwIf, 249 "for": token.KwFor, 250 "in": token.KwIn, 251 "function": token.KwFunction, 252 "local": token.KwLocal, 253 "and": token.KwAnd, 254 "or": token.KwOr, 255 "not": token.KwNot, 256 "nil": token.KwNil, 257 "true": token.KwTrue, 258 "false": token.KwFalse, 259 "return": token.KwReturn, 260 } 261 262 var sgType = map[string]token.Type{ 263 "-": token.SgMinus, 264 "+": token.SgPlus, 265 "*": token.SgStar, 266 "/": token.SgSlash, 267 "//": token.SgSlashSlash, 268 "%": token.SgPct, 269 "|": token.SgPipe, 270 "&": token.SgAmpersand, 271 "^": token.SgHat, 272 ">>": token.SgShiftRight, 273 "<<": token.SgShiftLeft, 274 "..": token.SgConcat, 275 276 "==": token.SgEqual, 277 "~=": token.SgNotEqual, 278 "<": token.SgLess, 279 "<=": token.SgLessEqual, 280 ">": token.SgGreater, 281 ">=": token.SgGreaterEqual, 282 283 "...": token.SgEtc, 284 285 "[": token.SgOpenSquareBkt, 286 "]": token.SgCloseSquareBkt, 287 "(": token.SgOpenBkt, 288 ")": token.SgCloseBkt, 289 "{": token.SgOpenBrace, 290 "}": token.SgCloseBrace, 291 ";": token.SgSemicolon, 292 ",": token.SgComma, 293 ".": token.SgDot, 294 ":": token.SgColon, 295 "::": token.SgDoubleColon, 296 "=": token.SgAssign, 297 "#": token.SgHash, 298 "~": token.SgTilde, 299 } 300 301 func scanIdent(l *Scanner) stateFn { 302 accept(l, isAlnum, -1) 303 tp, ok := kwType[string(l.lit())] 304 if !ok { 305 tp = token.IDENT 306 } 307 l.emit(tp) 308 return scanToken 309 } 310 311 func isDec(x rune) bool { 312 return '0' <= x && x <= '9' 313 } 314 315 func isAlpha(x rune) bool { 316 return x >= 'a' && x <= 'z' || x >= 'A' && x <= 'Z' || x == '_' 317 } 318 319 func isAlnum(x rune) bool { 320 return isDec(x) || isAlpha(x) 321 } 322 323 func isHex(x rune) bool { 324 return isDec(x) || 'a' <= x && x <= 'f' || 'A' <= x && x <= 'F' 325 } 326 327 func isSpace(x rune) bool { 328 return x == ' ' || x == '\n' || x == '\r' || x == '\t' || x == '\v' || x == '\f' 329 } 330 331 type runePredicate func(rune) bool 332 333 func accept(l *Scanner, p runePredicate, max int) int { 334 for i := 0; i != max; i++ { 335 if !p(l.next()) { 336 l.backup() 337 return i 338 } 339 } 340 return max 341 }