github.com/xrash/gopher-lua@v0.0.0-20160304065408-e5faab4db06a/parse/lexer.go (about) 1 package parse 2 3 import ( 4 "bufio" 5 "bytes" 6 "fmt" 7 "github.com/yuin/gopher-lua/ast" 8 "io" 9 "reflect" 10 "strconv" 11 "strings" 12 ) 13 14 const EOF = -1 15 const whitespace1 = 1<<'\t' | 1<<'\r' | 1<<' ' 16 const whitespace2 = 1<<'\t' | 1<<'\n' | 1<<'\r' | 1<<' ' 17 18 type Error struct { 19 Pos ast.Position 20 Message string 21 Token string 22 } 23 24 func (e *Error) Error() string { 25 pos := e.Pos 26 if pos.Line == EOF { 27 return fmt.Sprintf("%v at EOF: %s\n", pos.Source, e.Message) 28 } else { 29 return fmt.Sprintf("%v line:%d(column:%d) near '%v': %s\n", pos.Source, pos.Line, pos.Column, e.Token, e.Message) 30 } 31 } 32 33 func writeChar(buf *bytes.Buffer, c int) { buf.WriteByte(byte(c)) } 34 35 func isDecimal(ch int) bool { return '0' <= ch && ch <= '9' } 36 37 func isIdent(ch int, pos int) bool { 38 return ch == '_' || 'A' <= ch && ch <= 'Z' || 'a' <= ch && ch <= 'z' || isDecimal(ch) && pos > 0 39 } 40 41 func isDigit(ch int) bool { 42 return '0' <= ch && ch <= '9' || 'a' <= ch && ch <= 'f' || 'A' <= ch && ch <= 'F' 43 } 44 45 type Scanner struct { 46 Pos ast.Position 47 reader *bufio.Reader 48 } 49 50 func NewScanner(reader io.Reader, source string) *Scanner { 51 return &Scanner{ 52 Pos: ast.Position{source, 1, 0}, 53 reader: bufio.NewReaderSize(reader, 4096), 54 } 55 } 56 57 func (sc *Scanner) Error(tok string, msg string) *Error { return &Error{sc.Pos, msg, tok} } 58 59 func (sc *Scanner) TokenError(tok ast.Token, msg string) *Error { return &Error{tok.Pos, msg, tok.Str} } 60 61 func (sc *Scanner) readNext() int { 62 ch, err := sc.reader.ReadByte() 63 if err == io.EOF { 64 return EOF 65 } 66 return int(ch) 67 } 68 69 func (sc *Scanner) Newline(ch int) { 70 if ch < 0 { 71 return 72 } 73 sc.Pos.Line += 1 74 sc.Pos.Column = 0 75 next := sc.Peek() 76 if ch == '\n' && next == '\r' || ch == '\r' && next == '\n' { 77 sc.reader.ReadByte() 78 } 79 } 80 81 func (sc *Scanner) Next() int { 82 ch := sc.readNext() 83 switch ch { 84 case '\n', '\r': 85 sc.Newline(ch) 86 ch = int('\n') 87 case EOF: 88 sc.Pos.Line = EOF 89 sc.Pos.Column = 0 90 default: 91 sc.Pos.Column++ 92 } 93 return ch 94 } 95 96 func (sc *Scanner) Peek() int { 97 ch := sc.readNext() 98 if ch != EOF { 99 sc.reader.UnreadByte() 100 } 101 return ch 102 } 103 104 func (sc *Scanner) skipWhiteSpace(whitespace int64) int { 105 ch := sc.Next() 106 for ; whitespace&(1<<uint(ch)) != 0; ch = sc.Next() { 107 } 108 return ch 109 } 110 111 func (sc *Scanner) skipComments(ch int) error { 112 // multiline comment 113 if sc.Peek() == '[' { 114 ch = sc.Next() 115 if sc.Peek() == '[' || sc.Peek() == '=' { 116 var buf bytes.Buffer 117 if err := sc.scanMultilineString(sc.Next(), &buf); err != nil { 118 return sc.Error(buf.String(), "invalid multiline comment") 119 } 120 return nil 121 } 122 } 123 for { 124 if ch == '\n' || ch == '\r' || ch < 0 { 125 break 126 } 127 ch = sc.Next() 128 } 129 return nil 130 } 131 132 func (sc *Scanner) scanIdent(ch int, buf *bytes.Buffer) error { 133 writeChar(buf, ch) 134 for isIdent(sc.Peek(), 1) { 135 writeChar(buf, sc.Next()) 136 } 137 return nil 138 } 139 140 func (sc *Scanner) scanDecimal(ch int, buf *bytes.Buffer) error { 141 writeChar(buf, ch) 142 for isDecimal(sc.Peek()) { 143 writeChar(buf, sc.Next()) 144 } 145 return nil 146 } 147 148 func (sc *Scanner) scanNumber(ch int, buf *bytes.Buffer) error { 149 if ch == '0' { // octal 150 if sc.Peek() == 'x' || sc.Peek() == 'X' { 151 writeChar(buf, ch) 152 writeChar(buf, sc.Next()) 153 hasvalue := false 154 for isDigit(sc.Peek()) { 155 writeChar(buf, sc.Next()) 156 hasvalue = true 157 } 158 if !hasvalue { 159 return sc.Error(buf.String(), "illegal hexadecimal number") 160 } 161 return nil 162 } else if sc.Peek() != '.' && isDecimal(sc.Peek()) { 163 ch = sc.Next() 164 } 165 } 166 sc.scanDecimal(ch, buf) 167 if sc.Peek() == '.' { 168 sc.scanDecimal(sc.Next(), buf) 169 } 170 if ch = sc.Peek(); ch == 'e' || ch == 'E' { 171 writeChar(buf, sc.Next()) 172 if ch = sc.Peek(); ch == '-' || ch == '+' { 173 writeChar(buf, sc.Next()) 174 } 175 sc.scanDecimal(sc.Next(), buf) 176 } 177 178 return nil 179 } 180 181 func (sc *Scanner) scanString(quote int, buf *bytes.Buffer) error { 182 ch := sc.Next() 183 for ch != quote { 184 if ch == '\n' || ch == '\r' || ch < 0 { 185 return sc.Error(buf.String(), "unterminated string") 186 } 187 if ch == '\\' { 188 if err := sc.scanEscape(ch, buf); err != nil { 189 return err 190 } 191 } else { 192 writeChar(buf, ch) 193 } 194 ch = sc.Next() 195 } 196 return nil 197 } 198 199 func (sc *Scanner) scanEscape(ch int, buf *bytes.Buffer) error { 200 ch = sc.Next() 201 switch ch { 202 case 'a': 203 buf.WriteByte('\a') 204 case 'b': 205 buf.WriteByte('\b') 206 case 'f': 207 buf.WriteByte('\f') 208 case 'n': 209 buf.WriteByte('\n') 210 case 'r': 211 buf.WriteByte('\r') 212 case 't': 213 buf.WriteByte('\t') 214 case 'v': 215 buf.WriteByte('\v') 216 case '\\': 217 buf.WriteByte('\\') 218 case '"': 219 buf.WriteByte('"') 220 case '\'': 221 buf.WriteByte('\'') 222 case '\n': 223 buf.WriteByte('\n') 224 case '\r': 225 buf.WriteByte('\n') 226 sc.Newline('\r') 227 default: 228 if '0' <= ch && ch <= '9' { 229 bytes := []byte{byte(ch)} 230 for i := 0; i < 2 && isDecimal(sc.Peek()); i++ { 231 bytes = append(bytes, byte(sc.Next())) 232 } 233 val, _ := strconv.ParseInt(string(bytes), 10, 32) 234 writeChar(buf, int(val)) 235 } else { 236 buf.WriteByte('\\') 237 writeChar(buf, ch) 238 return sc.Error(buf.String(), "Invalid escape sequence") 239 } 240 } 241 return nil 242 } 243 244 func (sc *Scanner) countSep(ch int) (int, int) { 245 count := 0 246 for ; ch == '='; count = count + 1 { 247 ch = sc.Next() 248 } 249 return count, ch 250 } 251 252 func (sc *Scanner) scanMultilineString(ch int, buf *bytes.Buffer) error { 253 var count1, count2 int 254 count1, ch = sc.countSep(ch) 255 if ch != '[' { 256 return sc.Error(string(ch), "invalid multiline string") 257 } 258 ch = sc.Next() 259 if ch == '\n' || ch == '\r' { 260 ch = sc.Next() 261 } 262 for { 263 if ch < 0 { 264 return sc.Error(buf.String(), "unterminated multiline string") 265 } else if ch == ']' { 266 count2, ch = sc.countSep(sc.Next()) 267 if count1 == count2 && ch == ']' { 268 goto finally 269 } 270 buf.WriteByte(']') 271 buf.WriteString(strings.Repeat("=", count2)) 272 continue 273 } 274 writeChar(buf, ch) 275 ch = sc.Next() 276 } 277 278 finally: 279 return nil 280 } 281 282 var reservedWords = map[string]int{ 283 "and": TAnd, "break": TBreak, "do": TDo, "else": TElse, "elseif": TElseIf, 284 "end": TEnd, "false": TFalse, "for": TFor, "function": TFunction, 285 "if": TIf, "in": TIn, "local": TLocal, "nil": TNil, "not": TNot, "or": TOr, 286 "return": TReturn, "repeat": TRepeat, "then": TThen, "true": TTrue, 287 "until": TUntil, "while": TWhile} 288 289 func (sc *Scanner) Scan(lexer *Lexer) (ast.Token, error) { 290 redo: 291 var err error 292 tok := ast.Token{} 293 newline := false 294 295 ch := sc.skipWhiteSpace(whitespace1) 296 if ch == '\n' || ch == '\r' { 297 newline = true 298 ch = sc.skipWhiteSpace(whitespace2) 299 } 300 301 if ch == '(' { 302 lexer.PNewLine = newline 303 } 304 305 var _buf bytes.Buffer 306 buf := &_buf 307 tok.Pos = sc.Pos 308 309 switch { 310 case isIdent(ch, 0): 311 tok.Type = TIdent 312 err = sc.scanIdent(ch, buf) 313 tok.Str = buf.String() 314 if err != nil { 315 goto finally 316 } 317 if typ, ok := reservedWords[tok.Str]; ok { 318 tok.Type = typ 319 } 320 case isDecimal(ch): 321 tok.Type = TNumber 322 err = sc.scanNumber(ch, buf) 323 tok.Str = buf.String() 324 default: 325 switch ch { 326 case EOF: 327 tok.Type = EOF 328 case '-': 329 if sc.Peek() == '-' { 330 err = sc.skipComments(sc.Next()) 331 if err != nil { 332 goto finally 333 } 334 goto redo 335 } else { 336 tok.Type = ch 337 tok.Str = string(ch) 338 } 339 case '"', '\'': 340 tok.Type = TString 341 err = sc.scanString(ch, buf) 342 tok.Str = buf.String() 343 case '[': 344 if c := sc.Peek(); c == '[' || c == '=' { 345 tok.Type = TString 346 err = sc.scanMultilineString(sc.Next(), buf) 347 tok.Str = buf.String() 348 } else { 349 tok.Type = ch 350 tok.Str = string(ch) 351 } 352 case '=': 353 if sc.Peek() == '=' { 354 tok.Type = TEqeq 355 tok.Str = "==" 356 sc.Next() 357 } else { 358 tok.Type = ch 359 tok.Str = string(ch) 360 } 361 case '~': 362 if sc.Peek() == '=' { 363 tok.Type = TNeq 364 tok.Str = "~=" 365 sc.Next() 366 } else { 367 err = sc.Error("~", "Invalid '~' token") 368 } 369 case '<': 370 if sc.Peek() == '=' { 371 tok.Type = TLte 372 tok.Str = "<=" 373 sc.Next() 374 } else { 375 tok.Type = ch 376 tok.Str = string(ch) 377 } 378 case '>': 379 if sc.Peek() == '=' { 380 tok.Type = TGte 381 tok.Str = ">=" 382 sc.Next() 383 } else { 384 tok.Type = ch 385 tok.Str = string(ch) 386 } 387 case '.': 388 ch2 := sc.Peek() 389 switch { 390 case isDecimal(ch2): 391 tok.Type = TNumber 392 err = sc.scanNumber(ch, buf) 393 tok.Str = buf.String() 394 case ch2 == '.': 395 writeChar(buf, ch) 396 writeChar(buf, sc.Next()) 397 if sc.Peek() == '.' { 398 writeChar(buf, sc.Next()) 399 tok.Type = T3Comma 400 } else { 401 tok.Type = T2Comma 402 } 403 default: 404 tok.Type = '.' 405 } 406 tok.Str = buf.String() 407 case '+', '*', '/', '%', '^', '#', '(', ')', '{', '}', ']', ';', ':', ',': 408 tok.Type = ch 409 tok.Str = string(ch) 410 default: 411 writeChar(buf, ch) 412 err = sc.Error(buf.String(), "Invalid token") 413 goto finally 414 } 415 } 416 417 finally: 418 tok.Name = TokenName(int(tok.Type)) 419 return tok, err 420 } 421 422 // yacc interface {{{ 423 424 type Lexer struct { 425 scanner *Scanner 426 Stmts []ast.Stmt 427 PNewLine bool 428 Token ast.Token 429 } 430 431 func (lx *Lexer) Lex(lval *yySymType) int { 432 tok, err := lx.scanner.Scan(lx) 433 if err != nil { 434 panic(err) 435 } 436 if tok.Type < 0 { 437 return 0 438 } 439 lval.token = tok 440 lx.Token = tok 441 return int(tok.Type) 442 } 443 444 func (lx *Lexer) Error(message string) { 445 panic(lx.scanner.Error(lx.Token.Str, message)) 446 } 447 448 func (lx *Lexer) TokenError(tok ast.Token, message string) { 449 panic(lx.scanner.TokenError(tok, message)) 450 } 451 452 func Parse(reader io.Reader, name string) (chunk []ast.Stmt, err error) { 453 lexer := &Lexer{NewScanner(reader, name), nil, false, ast.Token{Str: ""}} 454 chunk = nil 455 defer func() { 456 if e := recover(); e != nil { 457 err, _ = e.(error) 458 } 459 }() 460 yyParse(lexer) 461 chunk = lexer.Stmts 462 return 463 } 464 465 // }}} 466 467 // Dump {{{ 468 469 func isInlineDumpNode(rv reflect.Value) bool { 470 switch rv.Kind() { 471 case reflect.Struct, reflect.Slice, reflect.Interface, reflect.Ptr: 472 return false 473 default: 474 return true 475 } 476 } 477 478 func dump(node interface{}, level int, s string) string { 479 rt := reflect.TypeOf(node) 480 if fmt.Sprint(rt) == "<nil>" { 481 return strings.Repeat(s, level) + "<nil>" 482 } 483 484 rv := reflect.ValueOf(node) 485 buf := []string{} 486 switch rt.Kind() { 487 case reflect.Slice: 488 if rv.Len() == 0 { 489 return strings.Repeat(s, level) + "<empty>" 490 } 491 for i := 0; i < rv.Len(); i++ { 492 buf = append(buf, dump(rv.Index(i).Interface(), level, s)) 493 } 494 case reflect.Ptr: 495 vt := rv.Elem() 496 tt := rt.Elem() 497 indicies := []int{} 498 for i := 0; i < tt.NumField(); i++ { 499 if strings.Index(tt.Field(i).Name, "Base") > -1 { 500 continue 501 } 502 indicies = append(indicies, i) 503 } 504 switch { 505 case len(indicies) == 0: 506 return strings.Repeat(s, level) + "<empty>" 507 case len(indicies) == 1 && isInlineDumpNode(vt.Field(indicies[0])): 508 for _, i := range indicies { 509 buf = append(buf, strings.Repeat(s, level)+"- Node$"+tt.Name()+": "+dump(vt.Field(i).Interface(), 0, s)) 510 } 511 default: 512 buf = append(buf, strings.Repeat(s, level)+"- Node$"+tt.Name()) 513 for _, i := range indicies { 514 if isInlineDumpNode(vt.Field(i)) { 515 inf := dump(vt.Field(i).Interface(), 0, s) 516 buf = append(buf, strings.Repeat(s, level+1)+tt.Field(i).Name+": "+inf) 517 } else { 518 buf = append(buf, strings.Repeat(s, level+1)+tt.Field(i).Name+": ") 519 buf = append(buf, dump(vt.Field(i).Interface(), level+2, s)) 520 } 521 } 522 } 523 default: 524 buf = append(buf, strings.Repeat(s, level)+fmt.Sprint(node)) 525 } 526 return strings.Join(buf, "\n") 527 } 528 529 func Dump(chunk []ast.Stmt) string { 530 return dump(chunk, 0, " ") 531 } 532 533 // }}