github.com/arnodel/golua@v0.0.0-20230215163904-e0b5347eaaa1/scanner/states.go (about)

     1  package scanner
     2  
     3  import (
     4  	"github.com/arnodel/golua/token"
     5  )
     6  
     7  func scanToken(l *Scanner) stateFn {
     8  	for {
     9  		switch c := l.next(); {
    10  		case c == '-':
    11  			if l.next() == '-' {
    12  				return scanComment
    13  			}
    14  			l.backup()
    15  			l.emit(token.SgMinus)
    16  		case c == '"' || c == '\'':
    17  			return scanShortString(c)
    18  		case isDec(c):
    19  			l.backup()
    20  			return scanNumber
    21  		case c == '[':
    22  			n := l.next()
    23  			if n == '[' || n == '=' {
    24  				l.backup()
    25  				return scanLongString
    26  			}
    27  			l.backup()
    28  			l.emit(token.SgOpenSquareBkt)
    29  		case isAlpha(c):
    30  			return scanIdent
    31  		case isSpace(c):
    32  			l.ignore()
    33  		default:
    34  			switch c {
    35  			case ';', '(', ')', ',', '|', '&', '+', '*', '%', '^', '#', ']', '{', '}':
    36  			case '=':
    37  				l.accept("=")
    38  			case ':':
    39  				l.accept(":")
    40  			case '.':
    41  				if accept(l, isDec, -1) > 0 {
    42  					return scanExp(l, isDec, "eE", token.NUMDEC)
    43  				}
    44  				if l.accept(".") {
    45  					l.accept(".")
    46  				}
    47  			case '<':
    48  				l.accept("=<")
    49  			case '>':
    50  				l.accept("=>")
    51  			case '~':
    52  				l.accept("=")
    53  			case '/':
    54  				l.accept("/")
    55  			case -1:
    56  				l.emit(token.EOF)
    57  				return nil
    58  			default:
    59  				return l.errorf(token.INVALID, "illegal character")
    60  			}
    61  			l.emit(sgType[string(l.lit())])
    62  		}
    63  		return scanToken
    64  	}
    65  }
    66  
    67  func scanComment(l *Scanner) stateFn {
    68  	c := l.next()
    69  	if c == '[' {
    70  		return scanLongComment
    71  	}
    72  	l.backup()
    73  	return scanShortComment
    74  }
    75  
    76  func scanShortComment(l *Scanner) stateFn {
    77  	for {
    78  		switch c := l.next(); c {
    79  		case '\n':
    80  			l.acceptRune('\r')
    81  			l.ignore()
    82  			return scanToken
    83  		case -1:
    84  			l.ignore()
    85  			l.emit(token.EOF)
    86  			return nil
    87  		}
    88  	}
    89  }
    90  
    91  func scanLongComment(l *Scanner) stateFn {
    92  	return scanLong(true)
    93  }
    94  
    95  func scanLong(comment bool) stateFn {
    96  	return func(l *Scanner) stateFn {
    97  		level := 0
    98  	OpeningLoop:
    99  		for {
   100  			switch c := l.next(); c {
   101  			case '=':
   102  				level++
   103  			case '[':
   104  				break OpeningLoop
   105  			default:
   106  				if comment {
   107  					l.ignore()
   108  					return scanShortComment
   109  				}
   110  				return l.errorf(token.INVALID, "expected opening long bracket")
   111  			}
   112  		}
   113  		closeLevel := -1
   114  		// -1 means we haven't starting closing a bracket
   115  		// 0 means we have processed the first ']'
   116  		// n > 0 means we have processed ']' + n*'='
   117  		for {
   118  			switch c := l.next(); c {
   119  			case ']':
   120  				if closeLevel == level {
   121  					if comment {
   122  						l.ignore()
   123  					} else {
   124  						l.emit(token.LONGSTRING)
   125  					}
   126  					return scanToken
   127  				}
   128  				closeLevel = 0
   129  			case '=':
   130  				if closeLevel >= 0 {
   131  					closeLevel++
   132  				}
   133  			case -1:
   134  				return l.errorf(token.UNFINISHED, "illegal <eof> in long bracket of level %d", level)
   135  			default:
   136  				closeLevel = -1
   137  			}
   138  		}
   139  	}
   140  }
   141  
   142  func scanShortString(q rune) stateFn {
   143  	return func(l *Scanner) stateFn {
   144  		for {
   145  			switch c := l.next(); c {
   146  			case q:
   147  				l.emit(token.STRING)
   148  				return scanToken
   149  			case '\\':
   150  				switch c := l.next(); {
   151  				case c == 'x':
   152  					if accept(l, isHex, 2) != 2 {
   153  						return l.errorf(token.INVALID, `\x must be followed by 2 hex digits`)
   154  					}
   155  				case isDec(c):
   156  					accept(l, isDec, 2)
   157  				case c == 'u':
   158  					if l.next() != '{' {
   159  						return l.errorf(token.INVALID, `\u must be followed by '{'`)
   160  					}
   161  					if accept(l, isHex, -1) == 0 {
   162  						return l.errorf(token.INVALID, "at least 1 hex digit required")
   163  					}
   164  					if l.next() != '}' {
   165  						return l.errorf(token.INVALID, "missing '}'")
   166  					}
   167  				case c == 'z':
   168  					accept(l, isSpace, -1)
   169  				default:
   170  					switch c {
   171  					case '\n':
   172  						// Nothing to do
   173  					case 'a', 'b', 'f', 'n', 'r', 't', 'v', 'z', '"', '\'', '\\':
   174  						break
   175  					default:
   176  						return l.errorf(token.INVALID, "illegal escaped character")
   177  					}
   178  				}
   179  			case '\n', '\r':
   180  				return l.errorf(token.INVALID, "illegal new line in string literal")
   181  			case -1:
   182  				return l.errorf(token.INVALID, "illegal <eof> in string literal")
   183  			}
   184  		}
   185  	}
   186  }
   187  
   188  // For scanning numbers e.g. in files
   189  func scanNumberPrefix(l *Scanner) stateFn {
   190  	accept(l, isSpace, -1)
   191  	l.accept("+-")
   192  	return scanNumber
   193  }
   194  
   195  func scanNumber(l *Scanner) stateFn {
   196  	isDigit := isDec
   197  	exp := "eE"
   198  	tp := token.NUMDEC
   199  	leading0 := l.accept("0")
   200  	dcount := 0
   201  	if leading0 && l.accept("xX") {
   202  		isDigit = isHex
   203  		exp = "pP"
   204  		tp = token.NUMHEX
   205  	} else if leading0 {
   206  		dcount++
   207  	}
   208  	dcount += accept(l, isDigit, -1)
   209  	if l.accept(".") {
   210  		dcount += accept(l, isDigit, -1)
   211  	}
   212  	if dcount == 0 {
   213  		return l.errorf(token.INVALID, "no digits in mantissa")
   214  	}
   215  	return scanExp(l, isDigit, exp, tp)
   216  }
   217  
   218  func scanExp(l *Scanner, isDigit func(rune) bool, exp string, tp token.Type) stateFn {
   219  	if l.accept(exp) {
   220  		l.accept("+-")
   221  		if accept(l, isDec, -1) == 0 {
   222  			return l.errorf(token.INVALID, "digit required after exponent")
   223  		}
   224  	}
   225  	l.emit(tp)
   226  	if isAlpha(l.peek()) {
   227  		l.next()
   228  		return l.errorf(token.INVALID, "illegal character following number")
   229  	}
   230  	return scanToken
   231  }
   232  
   233  func scanLongString(l *Scanner) stateFn {
   234  	return scanLong(false)
   235  }
   236  
   237  var kwType = map[string]token.Type{
   238  	"break":    token.KwBreak,
   239  	"goto":     token.KwGoto,
   240  	"do":       token.KwDo,
   241  	"while":    token.KwWhile,
   242  	"end":      token.KwEnd,
   243  	"repeat":   token.KwRepeat,
   244  	"until":    token.KwUntil,
   245  	"then":     token.KwThen,
   246  	"else":     token.KwElse,
   247  	"elseif":   token.KwElseIf,
   248  	"if":       token.KwIf,
   249  	"for":      token.KwFor,
   250  	"in":       token.KwIn,
   251  	"function": token.KwFunction,
   252  	"local":    token.KwLocal,
   253  	"and":      token.KwAnd,
   254  	"or":       token.KwOr,
   255  	"not":      token.KwNot,
   256  	"nil":      token.KwNil,
   257  	"true":     token.KwTrue,
   258  	"false":    token.KwFalse,
   259  	"return":   token.KwReturn,
   260  }
   261  
   262  var sgType = map[string]token.Type{
   263  	"-":  token.SgMinus,
   264  	"+":  token.SgPlus,
   265  	"*":  token.SgStar,
   266  	"/":  token.SgSlash,
   267  	"//": token.SgSlashSlash,
   268  	"%":  token.SgPct,
   269  	"|":  token.SgPipe,
   270  	"&":  token.SgAmpersand,
   271  	"^":  token.SgHat,
   272  	">>": token.SgShiftRight,
   273  	"<<": token.SgShiftLeft,
   274  	"..": token.SgConcat,
   275  
   276  	"==": token.SgEqual,
   277  	"~=": token.SgNotEqual,
   278  	"<":  token.SgLess,
   279  	"<=": token.SgLessEqual,
   280  	">":  token.SgGreater,
   281  	">=": token.SgGreaterEqual,
   282  
   283  	"...": token.SgEtc,
   284  
   285  	"[":  token.SgOpenSquareBkt,
   286  	"]":  token.SgCloseSquareBkt,
   287  	"(":  token.SgOpenBkt,
   288  	")":  token.SgCloseBkt,
   289  	"{":  token.SgOpenBrace,
   290  	"}":  token.SgCloseBrace,
   291  	";":  token.SgSemicolon,
   292  	",":  token.SgComma,
   293  	".":  token.SgDot,
   294  	":":  token.SgColon,
   295  	"::": token.SgDoubleColon,
   296  	"=":  token.SgAssign,
   297  	"#":  token.SgHash,
   298  	"~":  token.SgTilde,
   299  }
   300  
   301  func scanIdent(l *Scanner) stateFn {
   302  	accept(l, isAlnum, -1)
   303  	tp, ok := kwType[string(l.lit())]
   304  	if !ok {
   305  		tp = token.IDENT
   306  	}
   307  	l.emit(tp)
   308  	return scanToken
   309  }
   310  
   311  func isDec(x rune) bool {
   312  	return '0' <= x && x <= '9'
   313  }
   314  
   315  func isAlpha(x rune) bool {
   316  	return x >= 'a' && x <= 'z' || x >= 'A' && x <= 'Z' || x == '_'
   317  }
   318  
   319  func isAlnum(x rune) bool {
   320  	return isDec(x) || isAlpha(x)
   321  }
   322  
   323  func isHex(x rune) bool {
   324  	return isDec(x) || 'a' <= x && x <= 'f' || 'A' <= x && x <= 'F'
   325  }
   326  
   327  func isSpace(x rune) bool {
   328  	return x == ' ' || x == '\n' || x == '\r' || x == '\t' || x == '\v' || x == '\f'
   329  }
   330  
   331  type runePredicate func(rune) bool
   332  
   333  func accept(l *Scanner, p runePredicate, max int) int {
   334  	for i := 0; i != max; i++ {
   335  		if !p(l.next()) {
   336  			l.backup()
   337  			return i
   338  		}
   339  	}
   340  	return max
   341  }