github.com/bitxmesh/gopher-lua@v0.0.0-20190327085718-93c344ef97a4/parse/lexer.go (about) 1 package parse 2 3 import ( 4 "bufio" 5 "bytes" 6 "fmt" 7 "github.com/yuin/gopher-lua/ast" 8 "io" 9 "reflect" 10 "strconv" 11 "strings" 12 ) 13 14 const EOF = -1 15 const whitespace1 = 1<<'\t' | 1<<' ' 16 const whitespace2 = 1<<'\t' | 1<<'\n' | 1<<'\r' | 1<<' ' 17 18 type Error struct { 19 Pos ast.Position 20 Message string 21 Token string 22 } 23 24 func (e *Error) Error() string { 25 pos := e.Pos 26 if pos.Line == EOF { 27 return fmt.Sprintf("%v at EOF: %s\n", pos.Source, e.Message) 28 } else { 29 return fmt.Sprintf("%v line:%d(column:%d) near '%v': %s\n", pos.Source, pos.Line, pos.Column, e.Token, e.Message) 30 } 31 } 32 33 func writeChar(buf *bytes.Buffer, c int) { buf.WriteByte(byte(c)) } 34 35 func isDecimal(ch int) bool { return '0' <= ch && ch <= '9' } 36 37 func isIdent(ch int, pos int) bool { 38 return ch == '_' || 'A' <= ch && ch <= 'Z' || 'a' <= ch && ch <= 'z' || isDecimal(ch) && pos > 0 39 } 40 41 func isDigit(ch int) bool { 42 return '0' <= ch && ch <= '9' || 'a' <= ch && ch <= 'f' || 'A' <= ch && ch <= 'F' 43 } 44 45 type Scanner struct { 46 Pos ast.Position 47 reader *bufio.Reader 48 } 49 50 func NewScanner(reader io.Reader, source string) *Scanner { 51 return &Scanner{ 52 Pos: ast.Position{ 53 Source: source, 54 Line: 1, 55 Column: 0, 56 }, 57 reader: bufio.NewReaderSize(reader, 4096), 58 } 59 } 60 61 func (sc *Scanner) Error(tok string, msg string) *Error { return &Error{sc.Pos, msg, tok} } 62 63 func (sc *Scanner) TokenError(tok ast.Token, msg string) *Error { return &Error{tok.Pos, msg, tok.Str} } 64 65 func (sc *Scanner) readNext() int { 66 ch, err := sc.reader.ReadByte() 67 if err == io.EOF { 68 return EOF 69 } 70 return int(ch) 71 } 72 73 func (sc *Scanner) Newline(ch int) { 74 if ch < 0 { 75 return 76 } 77 sc.Pos.Line += 1 78 sc.Pos.Column = 0 79 next := sc.Peek() 80 if ch == '\n' && next == '\r' || ch == '\r' && next == '\n' { 81 sc.reader.ReadByte() 82 } 83 } 84 85 func (sc *Scanner) Next() int { 86 ch := sc.readNext() 87 switch ch { 88 case '\n', '\r': 89 sc.Newline(ch) 90 ch = int('\n') 91 case EOF: 92 sc.Pos.Line = EOF 93 sc.Pos.Column = 0 94 default: 95 sc.Pos.Column++ 96 } 97 return ch 98 } 99 100 func (sc *Scanner) Peek() int { 101 ch := sc.readNext() 102 if ch != EOF { 103 sc.reader.UnreadByte() 104 } 105 return ch 106 } 107 108 func (sc *Scanner) skipWhiteSpace(whitespace int64) int { 109 ch := sc.Next() 110 for ; whitespace&(1<<uint(ch)) != 0; ch = sc.Next() { 111 } 112 return ch 113 } 114 115 func (sc *Scanner) skipComments(ch int) error { 116 // multiline comment 117 if sc.Peek() == '[' { 118 ch = sc.Next() 119 if sc.Peek() == '[' || sc.Peek() == '=' { 120 var buf bytes.Buffer 121 if err := sc.scanMultilineString(sc.Next(), &buf); err != nil { 122 return sc.Error(buf.String(), "invalid multiline comment") 123 } 124 return nil 125 } 126 } 127 for { 128 if ch == '\n' || ch == '\r' || ch < 0 { 129 break 130 } 131 ch = sc.Next() 132 } 133 return nil 134 } 135 136 func (sc *Scanner) scanIdent(ch int, buf *bytes.Buffer) error { 137 writeChar(buf, ch) 138 for isIdent(sc.Peek(), 1) { 139 writeChar(buf, sc.Next()) 140 } 141 return nil 142 } 143 144 func (sc *Scanner) scanDecimal(ch int, buf *bytes.Buffer) error { 145 writeChar(buf, ch) 146 for isDecimal(sc.Peek()) { 147 writeChar(buf, sc.Next()) 148 } 149 return nil 150 } 151 152 func (sc *Scanner) scanNumber(ch int, buf *bytes.Buffer) error { 153 if ch == '0' { // octal 154 if sc.Peek() == 'x' || sc.Peek() == 'X' { 155 writeChar(buf, ch) 156 writeChar(buf, sc.Next()) 157 hasvalue := false 158 for isDigit(sc.Peek()) { 159 writeChar(buf, sc.Next()) 160 hasvalue = true 161 } 162 if !hasvalue { 163 return sc.Error(buf.String(), "illegal hexadecimal number") 164 } 165 return nil 166 } else if sc.Peek() != '.' && isDecimal(sc.Peek()) { 167 ch = sc.Next() 168 } 169 } 170 sc.scanDecimal(ch, buf) 171 if sc.Peek() == '.' { 172 sc.scanDecimal(sc.Next(), buf) 173 } 174 if ch = sc.Peek(); ch == 'e' || ch == 'E' { 175 writeChar(buf, sc.Next()) 176 if ch = sc.Peek(); ch == '-' || ch == '+' { 177 writeChar(buf, sc.Next()) 178 } 179 sc.scanDecimal(sc.Next(), buf) 180 } 181 182 return nil 183 } 184 185 func (sc *Scanner) scanString(quote int, buf *bytes.Buffer) error { 186 ch := sc.Next() 187 for ch != quote { 188 if ch == '\n' || ch == '\r' || ch < 0 { 189 return sc.Error(buf.String(), "unterminated string") 190 } 191 if ch == '\\' { 192 if err := sc.scanEscape(ch, buf); err != nil { 193 return err 194 } 195 } else { 196 writeChar(buf, ch) 197 } 198 ch = sc.Next() 199 } 200 return nil 201 } 202 203 func (sc *Scanner) scanEscape(ch int, buf *bytes.Buffer) error { 204 ch = sc.Next() 205 switch ch { 206 case 'a': 207 buf.WriteByte('\a') 208 case 'b': 209 buf.WriteByte('\b') 210 case 'f': 211 buf.WriteByte('\f') 212 case 'n': 213 buf.WriteByte('\n') 214 case 'r': 215 buf.WriteByte('\r') 216 case 't': 217 buf.WriteByte('\t') 218 case 'v': 219 buf.WriteByte('\v') 220 case '\\': 221 buf.WriteByte('\\') 222 case '"': 223 buf.WriteByte('"') 224 case '\'': 225 buf.WriteByte('\'') 226 case '\n': 227 buf.WriteByte('\n') 228 case '\r': 229 buf.WriteByte('\n') 230 sc.Newline('\r') 231 default: 232 if '0' <= ch && ch <= '9' { 233 bytes := []byte{byte(ch)} 234 for i := 0; i < 2 && isDecimal(sc.Peek()); i++ { 235 bytes = append(bytes, byte(sc.Next())) 236 } 237 val, _ := strconv.ParseInt(string(bytes), 10, 32) 238 writeChar(buf, int(val)) 239 } else { 240 buf.WriteByte('\\') 241 writeChar(buf, ch) 242 return sc.Error(buf.String(), "Invalid escape sequence") 243 } 244 } 245 return nil 246 } 247 248 func (sc *Scanner) countSep(ch int) (int, int) { 249 count := 0 250 for ; ch == '='; count = count + 1 { 251 ch = sc.Next() 252 } 253 return count, ch 254 } 255 256 func (sc *Scanner) scanMultilineString(ch int, buf *bytes.Buffer) error { 257 var count1, count2 int 258 count1, ch = sc.countSep(ch) 259 if ch != '[' { 260 return sc.Error(string(ch), "invalid multiline string") 261 } 262 ch = sc.Next() 263 if ch == '\n' || ch == '\r' { 264 ch = sc.Next() 265 } 266 for { 267 if ch < 0 { 268 return sc.Error(buf.String(), "unterminated multiline string") 269 } else if ch == ']' { 270 count2, ch = sc.countSep(sc.Next()) 271 if count1 == count2 && ch == ']' { 272 goto finally 273 } 274 buf.WriteByte(']') 275 buf.WriteString(strings.Repeat("=", count2)) 276 continue 277 } 278 writeChar(buf, ch) 279 ch = sc.Next() 280 } 281 282 finally: 283 return nil 284 } 285 286 var reservedWords = map[string]int{ 287 "and": TAnd, "break": TBreak, "do": TDo, "else": TElse, "elseif": TElseIf, 288 "end": TEnd, "false": TFalse, "for": TFor, "function": TFunction, 289 "if": TIf, "in": TIn, "local": TLocal, "nil": TNil, "not": TNot, "or": TOr, 290 "return": TReturn, "repeat": TRepeat, "then": TThen, "true": TTrue, 291 "until": TUntil, "while": TWhile} 292 293 func (sc *Scanner) Scan(lexer *Lexer) (ast.Token, error) { 294 redo: 295 var err error 296 tok := ast.Token{} 297 newline := false 298 299 ch := sc.skipWhiteSpace(whitespace1) 300 if ch == '\n' || ch == '\r' { 301 newline = true 302 ch = sc.skipWhiteSpace(whitespace2) 303 } 304 305 if ch == '(' && lexer.PrevTokenType == ')' { 306 lexer.PNewLine = newline 307 } else { 308 lexer.PNewLine = false 309 } 310 311 var _buf bytes.Buffer 312 buf := &_buf 313 tok.Pos = sc.Pos 314 315 switch { 316 case isIdent(ch, 0): 317 tok.Type = TIdent 318 err = sc.scanIdent(ch, buf) 319 tok.Str = buf.String() 320 if err != nil { 321 goto finally 322 } 323 if typ, ok := reservedWords[tok.Str]; ok { 324 tok.Type = typ 325 } 326 case isDecimal(ch): 327 tok.Type = TNumber 328 err = sc.scanNumber(ch, buf) 329 tok.Str = buf.String() 330 default: 331 switch ch { 332 case EOF: 333 tok.Type = EOF 334 case '-': 335 if sc.Peek() == '-' { 336 err = sc.skipComments(sc.Next()) 337 if err != nil { 338 goto finally 339 } 340 goto redo 341 } else { 342 tok.Type = ch 343 tok.Str = string(ch) 344 } 345 case '"', '\'': 346 tok.Type = TString 347 err = sc.scanString(ch, buf) 348 tok.Str = buf.String() 349 case '[': 350 if c := sc.Peek(); c == '[' || c == '=' { 351 tok.Type = TString 352 err = sc.scanMultilineString(sc.Next(), buf) 353 tok.Str = buf.String() 354 } else { 355 tok.Type = ch 356 tok.Str = string(ch) 357 } 358 case '=': 359 if sc.Peek() == '=' { 360 tok.Type = TEqeq 361 tok.Str = "==" 362 sc.Next() 363 } else { 364 tok.Type = ch 365 tok.Str = string(ch) 366 } 367 case '~': 368 if sc.Peek() == '=' { 369 tok.Type = TNeq 370 tok.Str = "~=" 371 sc.Next() 372 } else { 373 err = sc.Error("~", "Invalid '~' token") 374 } 375 case '<': 376 if sc.Peek() == '=' { 377 tok.Type = TLte 378 tok.Str = "<=" 379 sc.Next() 380 } else { 381 tok.Type = ch 382 tok.Str = string(ch) 383 } 384 case '>': 385 if sc.Peek() == '=' { 386 tok.Type = TGte 387 tok.Str = ">=" 388 sc.Next() 389 } else { 390 tok.Type = ch 391 tok.Str = string(ch) 392 } 393 case '.': 394 ch2 := sc.Peek() 395 switch { 396 case isDecimal(ch2): 397 tok.Type = TNumber 398 err = sc.scanNumber(ch, buf) 399 tok.Str = buf.String() 400 case ch2 == '.': 401 writeChar(buf, ch) 402 writeChar(buf, sc.Next()) 403 if sc.Peek() == '.' { 404 writeChar(buf, sc.Next()) 405 tok.Type = T3Comma 406 } else { 407 tok.Type = T2Comma 408 } 409 default: 410 tok.Type = '.' 411 } 412 tok.Str = buf.String() 413 case '+', '*', '/', '%', '^', '#', '(', ')', '{', '}', ']', ';', ':', ',': 414 tok.Type = ch 415 tok.Str = string(ch) 416 default: 417 writeChar(buf, ch) 418 err = sc.Error(buf.String(), "Invalid token") 419 goto finally 420 } 421 } 422 423 finally: 424 tok.Name = TokenName(int(tok.Type)) 425 return tok, err 426 } 427 428 // yacc interface {{{ 429 430 type Lexer struct { 431 scanner *Scanner 432 Stmts []ast.Stmt 433 PNewLine bool 434 Token ast.Token 435 PrevTokenType int 436 } 437 438 func (lx *Lexer) Lex(lval *yySymType) int { 439 lx.PrevTokenType = lx.Token.Type 440 tok, err := lx.scanner.Scan(lx) 441 if err != nil { 442 panic(err) 443 } 444 if tok.Type < 0 { 445 return 0 446 } 447 lval.token = tok 448 lx.Token = tok 449 return int(tok.Type) 450 } 451 452 func (lx *Lexer) Error(message string) { 453 panic(lx.scanner.Error(lx.Token.Str, message)) 454 } 455 456 func (lx *Lexer) TokenError(tok ast.Token, message string) { 457 panic(lx.scanner.TokenError(tok, message)) 458 } 459 460 func Parse(reader io.Reader, name string) (chunk []ast.Stmt, err error) { 461 lexer := &Lexer{NewScanner(reader, name), nil, false, ast.Token{Str: ""}, TNil} 462 chunk = nil 463 defer func() { 464 if e := recover(); e != nil { 465 err, _ = e.(error) 466 } 467 }() 468 yyParse(lexer) 469 chunk = lexer.Stmts 470 return 471 } 472 473 // }}} 474 475 // Dump {{{ 476 477 func isInlineDumpNode(rv reflect.Value) bool { 478 switch rv.Kind() { 479 case reflect.Struct, reflect.Slice, reflect.Interface, reflect.Ptr: 480 return false 481 default: 482 return true 483 } 484 } 485 486 func dump(node interface{}, level int, s string) string { 487 rt := reflect.TypeOf(node) 488 if fmt.Sprint(rt) == "<nil>" { 489 return strings.Repeat(s, level) + "<nil>" 490 } 491 492 rv := reflect.ValueOf(node) 493 buf := []string{} 494 switch rt.Kind() { 495 case reflect.Slice: 496 if rv.Len() == 0 { 497 return strings.Repeat(s, level) + "<empty>" 498 } 499 for i := 0; i < rv.Len(); i++ { 500 buf = append(buf, dump(rv.Index(i).Interface(), level, s)) 501 } 502 case reflect.Ptr: 503 vt := rv.Elem() 504 tt := rt.Elem() 505 indicies := []int{} 506 for i := 0; i < tt.NumField(); i++ { 507 if strings.Index(tt.Field(i).Name, "Base") > -1 { 508 continue 509 } 510 indicies = append(indicies, i) 511 } 512 switch { 513 case len(indicies) == 0: 514 return strings.Repeat(s, level) + "<empty>" 515 case len(indicies) == 1 && isInlineDumpNode(vt.Field(indicies[0])): 516 for _, i := range indicies { 517 buf = append(buf, strings.Repeat(s, level)+"- Node$"+tt.Name()+": "+dump(vt.Field(i).Interface(), 0, s)) 518 } 519 default: 520 buf = append(buf, strings.Repeat(s, level)+"- Node$"+tt.Name()) 521 for _, i := range indicies { 522 if isInlineDumpNode(vt.Field(i)) { 523 inf := dump(vt.Field(i).Interface(), 0, s) 524 buf = append(buf, strings.Repeat(s, level+1)+tt.Field(i).Name+": "+inf) 525 } else { 526 buf = append(buf, strings.Repeat(s, level+1)+tt.Field(i).Name+": ") 527 buf = append(buf, dump(vt.Field(i).Interface(), level+2, s)) 528 } 529 } 530 } 531 default: 532 buf = append(buf, strings.Repeat(s, level)+fmt.Sprint(node)) 533 } 534 return strings.Join(buf, "\n") 535 } 536 537 func Dump(chunk []ast.Stmt) string { 538 return dump(chunk, 0, " ") 539 } 540 541 // }}