github.com/coinstack/gopher-lua@v0.0.0-20180626044619-c9c62d4ee45e/parse/lexer.go (about) 1 package parse 2 3 import ( 4 "bufio" 5 "bytes" 6 "fmt" 7 "github.com/coinstack/gopher-lua/ast" 8 "io" 9 "reflect" 10 "strconv" 11 "strings" 12 ) 13 14 const EOF = -1 15 const whitespace1 = 1<<'\t' | 1<<' ' 16 const whitespace2 = 1<<'\t' | 1<<'\n' | 1<<'\r' | 1<<' ' 17 18 type Error struct { 19 Pos ast.Position 20 Message string 21 Token string 22 } 23 24 func (e *Error) Error() string { 25 pos := e.Pos 26 if pos.Line == EOF { 27 return fmt.Sprintf("%v at EOF: %s\n", pos.Source, e.Message) 28 } else { 29 return fmt.Sprintf("%v line:%d(column:%d) near '%v': %s\n", pos.Source, pos.Line, pos.Column, e.Token, e.Message) 30 } 31 } 32 33 func writeChar(buf *bytes.Buffer, c int) { buf.WriteByte(byte(c)) } 34 35 func isDecimal(ch int) bool { return '0' <= ch && ch <= '9' } 36 37 func isIdent(ch int, pos int) bool { 38 return ch == '_' || 'A' <= ch && ch <= 'Z' || 'a' <= ch && ch <= 'z' || isDecimal(ch) && pos > 0 39 } 40 41 func isDigit(ch int) bool { 42 return '0' <= ch && ch <= '9' || 'a' <= ch && ch <= 'f' || 'A' <= ch && ch <= 'F' 43 } 44 45 type Scanner struct { 46 Pos ast.Position 47 reader *bufio.Reader 48 } 49 50 func NewScanner(reader io.Reader, source string) *Scanner { 51 return &Scanner{ 52 Pos: ast.Position{source, 1, 0}, 53 reader: bufio.NewReaderSize(reader, 4096), 54 } 55 } 56 57 func (sc *Scanner) Error(tok string, msg string) *Error { return &Error{sc.Pos, msg, tok} } 58 59 func (sc *Scanner) TokenError(tok ast.Token, msg string) *Error { return &Error{tok.Pos, msg, tok.Str} } 60 61 func (sc *Scanner) readNext() int { 62 ch, err := sc.reader.ReadByte() 63 if err == io.EOF { 64 return EOF 65 } 66 return int(ch) 67 } 68 69 func (sc *Scanner) Newline(ch int) { 70 if ch < 0 { 71 return 72 } 73 sc.Pos.Line += 1 74 sc.Pos.Column = 0 75 next := sc.Peek() 76 if ch == '\n' && next == '\r' || ch == '\r' && next == '\n' { 77 sc.reader.ReadByte() 78 } 79 } 80 81 func (sc *Scanner) Next() int { 82 ch := sc.readNext() 83 switch ch { 84 case '\n', '\r': 85 sc.Newline(ch) 86 ch = int('\n') 87 case EOF: 88 sc.Pos.Line = EOF 89 sc.Pos.Column = 0 90 default: 91 sc.Pos.Column++ 92 } 93 return ch 94 } 95 96 func (sc *Scanner) Peek() int { 97 ch := sc.readNext() 98 if ch != EOF { 99 sc.reader.UnreadByte() 100 } 101 return ch 102 } 103 104 func (sc *Scanner) skipWhiteSpace(whitespace int64) int { 105 ch := sc.Next() 106 for ; whitespace&(1<<uint(ch)) != 0; ch = sc.Next() { 107 } 108 return ch 109 } 110 111 func (sc *Scanner) skipComments(ch int) error { 112 // multiline comment 113 if sc.Peek() == '[' { 114 ch = sc.Next() 115 if sc.Peek() == '[' || sc.Peek() == '=' { 116 var buf bytes.Buffer 117 if err := sc.scanMultilineString(sc.Next(), &buf); err != nil { 118 return sc.Error(buf.String(), "invalid multiline comment") 119 } 120 return nil 121 } 122 } 123 for { 124 if ch == '\n' || ch == '\r' || ch < 0 { 125 break 126 } 127 ch = sc.Next() 128 } 129 return nil 130 } 131 132 func (sc *Scanner) scanIdent(ch int, buf *bytes.Buffer) error { 133 writeChar(buf, ch) 134 for isIdent(sc.Peek(), 1) { 135 writeChar(buf, sc.Next()) 136 } 137 return nil 138 } 139 140 func (sc *Scanner) scanDecimal(ch int, buf *bytes.Buffer) error { 141 writeChar(buf, ch) 142 for isDecimal(sc.Peek()) { 143 writeChar(buf, sc.Next()) 144 } 145 return nil 146 } 147 148 func (sc *Scanner) scanNumber(ch int, buf *bytes.Buffer) error { 149 if ch == '0' { // octal 150 if sc.Peek() == 'x' || sc.Peek() == 'X' { 151 writeChar(buf, ch) 152 writeChar(buf, sc.Next()) 153 hasvalue := false 154 for isDigit(sc.Peek()) { 155 writeChar(buf, sc.Next()) 156 hasvalue = true 157 } 158 if !hasvalue { 159 return sc.Error(buf.String(), "illegal hexadecimal number") 160 } 161 return nil 162 } else if sc.Peek() != '.' && isDecimal(sc.Peek()) { 163 ch = sc.Next() 164 } 165 } 166 sc.scanDecimal(ch, buf) 167 if sc.Peek() == '.' { 168 sc.scanDecimal(sc.Next(), buf) 169 } 170 if ch = sc.Peek(); ch == 'e' || ch == 'E' { 171 writeChar(buf, sc.Next()) 172 if ch = sc.Peek(); ch == '-' || ch == '+' { 173 writeChar(buf, sc.Next()) 174 } 175 sc.scanDecimal(sc.Next(), buf) 176 } 177 178 return nil 179 } 180 181 func (sc *Scanner) scanString(quote int, buf *bytes.Buffer) error { 182 ch := sc.Next() 183 for ch != quote { 184 if ch == '\n' || ch == '\r' || ch < 0 { 185 return sc.Error(buf.String(), "unterminated string") 186 } 187 if ch == '\\' { 188 if err := sc.scanEscape(ch, buf); err != nil { 189 return err 190 } 191 } else { 192 writeChar(buf, ch) 193 } 194 ch = sc.Next() 195 } 196 return nil 197 } 198 199 func (sc *Scanner) scanEscape(ch int, buf *bytes.Buffer) error { 200 ch = sc.Next() 201 switch ch { 202 case 'a': 203 buf.WriteByte('\a') 204 case 'b': 205 buf.WriteByte('\b') 206 case 'f': 207 buf.WriteByte('\f') 208 case 'n': 209 buf.WriteByte('\n') 210 case 'r': 211 buf.WriteByte('\r') 212 case 't': 213 buf.WriteByte('\t') 214 case 'v': 215 buf.WriteByte('\v') 216 case '\\': 217 buf.WriteByte('\\') 218 case '"': 219 buf.WriteByte('"') 220 case '\'': 221 buf.WriteByte('\'') 222 case '\n': 223 buf.WriteByte('\n') 224 case '\r': 225 buf.WriteByte('\n') 226 sc.Newline('\r') 227 default: 228 if '0' <= ch && ch <= '9' { 229 bytes := []byte{byte(ch)} 230 for i := 0; i < 2 && isDecimal(sc.Peek()); i++ { 231 bytes = append(bytes, byte(sc.Next())) 232 } 233 val, _ := strconv.ParseInt(string(bytes), 10, 32) 234 writeChar(buf, int(val)) 235 } else { 236 buf.WriteByte('\\') 237 writeChar(buf, ch) 238 return sc.Error(buf.String(), "Invalid escape sequence") 239 } 240 } 241 return nil 242 } 243 244 func (sc *Scanner) countSep(ch int) (int, int) { 245 count := 0 246 for ; ch == '='; count = count + 1 { 247 ch = sc.Next() 248 } 249 return count, ch 250 } 251 252 func (sc *Scanner) scanMultilineString(ch int, buf *bytes.Buffer) error { 253 var count1, count2 int 254 count1, ch = sc.countSep(ch) 255 if ch != '[' { 256 return sc.Error(string(ch), "invalid multiline string") 257 } 258 ch = sc.Next() 259 if ch == '\n' || ch == '\r' { 260 ch = sc.Next() 261 } 262 for { 263 if ch < 0 { 264 return sc.Error(buf.String(), "unterminated multiline string") 265 } else if ch == ']' { 266 count2, ch = sc.countSep(sc.Next()) 267 if count1 == count2 && ch == ']' { 268 goto finally 269 } 270 buf.WriteByte(']') 271 buf.WriteString(strings.Repeat("=", count2)) 272 continue 273 } 274 writeChar(buf, ch) 275 ch = sc.Next() 276 } 277 278 finally: 279 return nil 280 } 281 282 var reservedWords = map[string]int{ 283 "and": TAnd, "break": TBreak, "do": TDo, "else": TElse, "elseif": TElseIf, 284 "end": TEnd, "false": TFalse, "for": TFor, "function": TFunction, 285 "if": TIf, "in": TIn, "local": TLocal, "nil": TNil, "not": TNot, "or": TOr, 286 "return": TReturn, "repeat": TRepeat, "then": TThen, "true": TTrue, 287 "until": TUntil, "while": TWhile} 288 289 func (sc *Scanner) Scan(lexer *Lexer) (ast.Token, error) { 290 redo: 291 var err error 292 tok := ast.Token{} 293 newline := false 294 295 ch := sc.skipWhiteSpace(whitespace1) 296 if ch == '\n' || ch == '\r' { 297 newline = true 298 ch = sc.skipWhiteSpace(whitespace2) 299 } 300 301 if ch == '(' && lexer.PrevTokenType == ')' { 302 lexer.PNewLine = newline 303 } else { 304 lexer.PNewLine = false 305 } 306 307 var _buf bytes.Buffer 308 buf := &_buf 309 tok.Pos = sc.Pos 310 311 switch { 312 case isIdent(ch, 0): 313 tok.Type = TIdent 314 err = sc.scanIdent(ch, buf) 315 tok.Str = buf.String() 316 if err != nil { 317 goto finally 318 } 319 if typ, ok := reservedWords[tok.Str]; ok { 320 tok.Type = typ 321 } 322 case isDecimal(ch): 323 tok.Type = TNumber 324 err = sc.scanNumber(ch, buf) 325 tok.Str = buf.String() 326 default: 327 switch ch { 328 case EOF: 329 tok.Type = EOF 330 case '-': 331 if sc.Peek() == '-' { 332 err = sc.skipComments(sc.Next()) 333 if err != nil { 334 goto finally 335 } 336 goto redo 337 } else { 338 tok.Type = ch 339 tok.Str = string(ch) 340 } 341 case '"', '\'': 342 tok.Type = TString 343 err = sc.scanString(ch, buf) 344 tok.Str = buf.String() 345 case '[': 346 if c := sc.Peek(); c == '[' || c == '=' { 347 tok.Type = TString 348 err = sc.scanMultilineString(sc.Next(), buf) 349 tok.Str = buf.String() 350 } else { 351 tok.Type = ch 352 tok.Str = string(ch) 353 } 354 case '=': 355 if sc.Peek() == '=' { 356 tok.Type = TEqeq 357 tok.Str = "==" 358 sc.Next() 359 } else { 360 tok.Type = ch 361 tok.Str = string(ch) 362 } 363 case '~': 364 if sc.Peek() == '=' { 365 tok.Type = TNeq 366 tok.Str = "~=" 367 sc.Next() 368 } else { 369 err = sc.Error("~", "Invalid '~' token") 370 } 371 case '<': 372 if sc.Peek() == '=' { 373 tok.Type = TLte 374 tok.Str = "<=" 375 sc.Next() 376 } else { 377 tok.Type = ch 378 tok.Str = string(ch) 379 } 380 case '>': 381 if sc.Peek() == '=' { 382 tok.Type = TGte 383 tok.Str = ">=" 384 sc.Next() 385 } else { 386 tok.Type = ch 387 tok.Str = string(ch) 388 } 389 case '.': 390 ch2 := sc.Peek() 391 switch { 392 case isDecimal(ch2): 393 tok.Type = TNumber 394 err = sc.scanNumber(ch, buf) 395 tok.Str = buf.String() 396 case ch2 == '.': 397 writeChar(buf, ch) 398 writeChar(buf, sc.Next()) 399 if sc.Peek() == '.' { 400 writeChar(buf, sc.Next()) 401 tok.Type = T3Comma 402 } else { 403 tok.Type = T2Comma 404 } 405 default: 406 tok.Type = '.' 407 } 408 tok.Str = buf.String() 409 case '+', '*', '/', '%', '^', '#', '(', ')', '{', '}', ']', ';', ':', ',': 410 tok.Type = ch 411 tok.Str = string(ch) 412 default: 413 writeChar(buf, ch) 414 err = sc.Error(buf.String(), "Invalid token") 415 goto finally 416 } 417 } 418 419 finally: 420 tok.Name = TokenName(int(tok.Type)) 421 return tok, err 422 } 423 424 // yacc interface {{{ 425 426 type Lexer struct { 427 scanner *Scanner 428 Stmts []ast.Stmt 429 PNewLine bool 430 Token ast.Token 431 PrevTokenType int 432 } 433 434 func (lx *Lexer) Lex(lval *yySymType) int { 435 lx.PrevTokenType = lx.Token.Type 436 tok, err := lx.scanner.Scan(lx) 437 if err != nil { 438 panic(err) 439 } 440 if tok.Type < 0 { 441 return 0 442 } 443 lval.token = tok 444 lx.Token = tok 445 return int(tok.Type) 446 } 447 448 func (lx *Lexer) Error(message string) { 449 panic(lx.scanner.Error(lx.Token.Str, message)) 450 } 451 452 func (lx *Lexer) TokenError(tok ast.Token, message string) { 453 panic(lx.scanner.TokenError(tok, message)) 454 } 455 456 func Parse(reader io.Reader, name string) (chunk []ast.Stmt, err error) { 457 lexer := &Lexer{NewScanner(reader, name), nil, false, ast.Token{Str: ""}, TNil} 458 chunk = nil 459 defer func() { 460 if e := recover(); e != nil { 461 err, _ = e.(error) 462 } 463 }() 464 yyParse(lexer) 465 chunk = lexer.Stmts 466 return 467 } 468 469 // }}} 470 471 // Dump {{{ 472 473 func isInlineDumpNode(rv reflect.Value) bool { 474 switch rv.Kind() { 475 case reflect.Struct, reflect.Slice, reflect.Interface, reflect.Ptr: 476 return false 477 default: 478 return true 479 } 480 } 481 482 func dump(node interface{}, level int, s string) string { 483 rt := reflect.TypeOf(node) 484 if fmt.Sprint(rt) == "<nil>" { 485 return strings.Repeat(s, level) + "<nil>" 486 } 487 488 rv := reflect.ValueOf(node) 489 buf := []string{} 490 switch rt.Kind() { 491 case reflect.Slice: 492 if rv.Len() == 0 { 493 return strings.Repeat(s, level) + "<empty>" 494 } 495 for i := 0; i < rv.Len(); i++ { 496 buf = append(buf, dump(rv.Index(i).Interface(), level, s)) 497 } 498 case reflect.Ptr: 499 vt := rv.Elem() 500 tt := rt.Elem() 501 indicies := []int{} 502 for i := 0; i < tt.NumField(); i++ { 503 if strings.Index(tt.Field(i).Name, "Base") > -1 { 504 continue 505 } 506 indicies = append(indicies, i) 507 } 508 switch { 509 case len(indicies) == 0: 510 return strings.Repeat(s, level) + "<empty>" 511 case len(indicies) == 1 && isInlineDumpNode(vt.Field(indicies[0])): 512 for _, i := range indicies { 513 buf = append(buf, strings.Repeat(s, level)+"- Node$"+tt.Name()+": "+dump(vt.Field(i).Interface(), 0, s)) 514 } 515 default: 516 buf = append(buf, strings.Repeat(s, level)+"- Node$"+tt.Name()) 517 for _, i := range indicies { 518 if isInlineDumpNode(vt.Field(i)) { 519 inf := dump(vt.Field(i).Interface(), 0, s) 520 buf = append(buf, strings.Repeat(s, level+1)+tt.Field(i).Name+": "+inf) 521 } else { 522 buf = append(buf, strings.Repeat(s, level+1)+tt.Field(i).Name+": ") 523 buf = append(buf, dump(vt.Field(i).Interface(), level+2, s)) 524 } 525 } 526 } 527 default: 528 buf = append(buf, strings.Repeat(s, level)+fmt.Sprint(node)) 529 } 530 return strings.Join(buf, "\n") 531 } 532 533 func Dump(chunk []ast.Stmt) string { 534 return dump(chunk, 0, " ") 535 } 536 537 // }}