github.com/hootrhino/gopher-lua@v1.0.3/parse/lexer.go (about) 1 package parse 2 3 import ( 4 "bufio" 5 "bytes" 6 "fmt" 7 "io" 8 "reflect" 9 "strconv" 10 "strings" 11 12 "github.com/hootrhino/gopher-lua/ast" 13 ) 14 15 const EOF = -1 16 const whitespace1 = 1<<'\t' | 1<<' ' 17 const whitespace2 = 1<<'\t' | 1<<'\n' | 1<<'\r' | 1<<' ' 18 19 type Error struct { 20 Pos ast.Position 21 Message string 22 Token string 23 } 24 25 func (e *Error) Error() string { 26 pos := e.Pos 27 if pos.Line == EOF { 28 return fmt.Sprintf("%v at EOF: %s\n", pos.Source, e.Message) 29 } else { 30 return fmt.Sprintf("%v line:%d(column:%d) near '%v': %s\n", pos.Source, pos.Line, pos.Column, e.Token, e.Message) 31 } 32 } 33 34 func writeChar(buf *bytes.Buffer, c int) { buf.WriteByte(byte(c)) } 35 36 func isDecimal(ch int) bool { return '0' <= ch && ch <= '9' } 37 38 func isIdent(ch int, pos int) bool { 39 return ch == '_' || 'A' <= ch && ch <= 'Z' || 'a' <= ch && ch <= 'z' || isDecimal(ch) && pos > 0 40 } 41 42 func isDigit(ch int) bool { 43 return '0' <= ch && ch <= '9' || 'a' <= ch && ch <= 'f' || 'A' <= ch && ch <= 'F' 44 } 45 46 type Scanner struct { 47 Pos ast.Position 48 reader *bufio.Reader 49 } 50 51 func NewScanner(reader io.Reader, source string) *Scanner { 52 return &Scanner{ 53 Pos: ast.Position{ 54 Source: source, 55 Line: 1, 56 Column: 0, 57 }, 58 reader: bufio.NewReaderSize(reader, 4096), 59 } 60 } 61 62 func (sc *Scanner) Error(tok string, msg string) *Error { return &Error{sc.Pos, msg, tok} } 63 64 func (sc *Scanner) TokenError(tok ast.Token, msg string) *Error { return &Error{tok.Pos, msg, tok.Str} } 65 66 func (sc *Scanner) readNext() int { 67 ch, err := sc.reader.ReadByte() 68 if err == io.EOF { 69 return EOF 70 } 71 return int(ch) 72 } 73 74 func (sc *Scanner) Newline(ch int) { 75 if ch < 0 { 76 return 77 } 78 sc.Pos.Line += 1 79 sc.Pos.Column = 0 80 next := sc.Peek() 81 if ch == '\n' && next == '\r' || ch == '\r' && next == '\n' { 82 sc.reader.ReadByte() 83 } 84 } 85 86 func (sc *Scanner) Next() int { 87 ch := sc.readNext() 88 switch ch { 89 case '\n', '\r': 90 sc.Newline(ch) 91 ch = int('\n') 92 case EOF: 93 sc.Pos.Line = EOF 94 sc.Pos.Column = 0 95 default: 96 sc.Pos.Column++ 97 } 98 return ch 99 } 100 101 func (sc *Scanner) Peek() int { 102 ch := sc.readNext() 103 if ch != EOF { 104 sc.reader.UnreadByte() 105 } 106 return ch 107 } 108 109 func (sc *Scanner) skipWhiteSpace(whitespace int64) int { 110 ch := sc.Next() 111 for ; whitespace&(1<<uint(ch)) != 0; ch = sc.Next() { 112 } 113 return ch 114 } 115 116 func (sc *Scanner) skipComments(ch int) error { 117 // multiline comment 118 if sc.Peek() == '[' { 119 ch = sc.Next() 120 if sc.Peek() == '[' || sc.Peek() == '=' { 121 var buf bytes.Buffer 122 if err := sc.scanMultilineString(sc.Next(), &buf); err != nil { 123 return sc.Error(buf.String(), "invalid multiline comment") 124 } 125 return nil 126 } 127 } 128 for { 129 if ch == '\n' || ch == '\r' || ch < 0 { 130 break 131 } 132 ch = sc.Next() 133 } 134 return nil 135 } 136 137 func (sc *Scanner) scanIdent(ch int, buf *bytes.Buffer) error { 138 writeChar(buf, ch) 139 for isIdent(sc.Peek(), 1) { 140 writeChar(buf, sc.Next()) 141 } 142 return nil 143 } 144 145 func (sc *Scanner) scanDecimal(ch int, buf *bytes.Buffer) error { 146 writeChar(buf, ch) 147 for isDecimal(sc.Peek()) { 148 writeChar(buf, sc.Next()) 149 } 150 return nil 151 } 152 153 func (sc *Scanner) scanNumber(ch int, buf *bytes.Buffer) error { 154 if ch == '0' { // octal 155 if sc.Peek() == 'x' || sc.Peek() == 'X' { 156 writeChar(buf, ch) 157 writeChar(buf, sc.Next()) 158 hasvalue := false 159 for isDigit(sc.Peek()) { 160 writeChar(buf, sc.Next()) 161 hasvalue = true 162 } 163 if !hasvalue { 164 return sc.Error(buf.String(), "illegal hexadecimal number") 165 } 166 return nil 167 } else if sc.Peek() != '.' && isDecimal(sc.Peek()) { 168 ch = sc.Next() 169 } 170 } 171 sc.scanDecimal(ch, buf) 172 if sc.Peek() == '.' { 173 sc.scanDecimal(sc.Next(), buf) 174 } 175 if ch = sc.Peek(); ch == 'e' || ch == 'E' { 176 writeChar(buf, sc.Next()) 177 if ch = sc.Peek(); ch == '-' || ch == '+' { 178 writeChar(buf, sc.Next()) 179 } 180 sc.scanDecimal(sc.Next(), buf) 181 } 182 183 return nil 184 } 185 186 func (sc *Scanner) scanString(quote int, buf *bytes.Buffer) error { 187 ch := sc.Next() 188 for ch != quote { 189 if ch == '\n' || ch == '\r' || ch < 0 { 190 return sc.Error(buf.String(), "unterminated string") 191 } 192 if ch == '\\' { 193 if err := sc.scanEscape(ch, buf); err != nil { 194 return err 195 } 196 } else { 197 writeChar(buf, ch) 198 } 199 ch = sc.Next() 200 } 201 return nil 202 } 203 204 func (sc *Scanner) scanEscape(ch int, buf *bytes.Buffer) error { 205 ch = sc.Next() 206 switch ch { 207 case 'a': 208 buf.WriteByte('\a') 209 case 'b': 210 buf.WriteByte('\b') 211 case 'f': 212 buf.WriteByte('\f') 213 case 'n': 214 buf.WriteByte('\n') 215 case 'r': 216 buf.WriteByte('\r') 217 case 't': 218 buf.WriteByte('\t') 219 case 'v': 220 buf.WriteByte('\v') 221 case '\\': 222 buf.WriteByte('\\') 223 case '"': 224 buf.WriteByte('"') 225 case '\'': 226 buf.WriteByte('\'') 227 case '\n': 228 buf.WriteByte('\n') 229 case '\r': 230 buf.WriteByte('\n') 231 sc.Newline('\r') 232 default: 233 if '0' <= ch && ch <= '9' { 234 bytes := []byte{byte(ch)} 235 for i := 0; i < 2 && isDecimal(sc.Peek()); i++ { 236 bytes = append(bytes, byte(sc.Next())) 237 } 238 val, _ := strconv.ParseInt(string(bytes), 10, 32) 239 writeChar(buf, int(val)) 240 } else { 241 writeChar(buf, ch) 242 } 243 } 244 return nil 245 } 246 247 func (sc *Scanner) countSep(ch int) (int, int) { 248 count := 0 249 for ; ch == '='; count = count + 1 { 250 ch = sc.Next() 251 } 252 return count, ch 253 } 254 255 func (sc *Scanner) scanMultilineString(ch int, buf *bytes.Buffer) error { 256 var count1, count2 int 257 count1, ch = sc.countSep(ch) 258 if ch != '[' { 259 return sc.Error(string(rune(ch)), "invalid multiline string") 260 } 261 ch = sc.Next() 262 if ch == '\n' || ch == '\r' { 263 ch = sc.Next() 264 } 265 for { 266 if ch < 0 { 267 return sc.Error(buf.String(), "unterminated multiline string") 268 } else if ch == ']' { 269 count2, ch = sc.countSep(sc.Next()) 270 if count1 == count2 && ch == ']' { 271 goto finally 272 } 273 buf.WriteByte(']') 274 buf.WriteString(strings.Repeat("=", count2)) 275 continue 276 } 277 writeChar(buf, ch) 278 ch = sc.Next() 279 } 280 281 finally: 282 return nil 283 } 284 285 var reservedWords = map[string]int{ 286 "and": TAnd, "break": TBreak, "do": TDo, "else": TElse, "elseif": TElseIf, 287 "end": TEnd, "false": TFalse, "for": TFor, "function": TFunction, 288 "if": TIf, "in": TIn, "local": TLocal, "nil": TNil, "not": TNot, "or": TOr, 289 "return": TReturn, "repeat": TRepeat, "then": TThen, "true": TTrue, 290 "until": TUntil, "while": TWhile, "goto": TGoto} 291 292 func (sc *Scanner) Scan(lexer *Lexer) (ast.Token, error) { 293 redo: 294 var err error 295 tok := ast.Token{} 296 newline := false 297 298 ch := sc.skipWhiteSpace(whitespace1) 299 if ch == '\n' || ch == '\r' { 300 newline = true 301 ch = sc.skipWhiteSpace(whitespace2) 302 } 303 304 if ch == '(' && lexer.PrevTokenType == ')' { 305 lexer.PNewLine = newline 306 } else { 307 lexer.PNewLine = false 308 } 309 310 var _buf bytes.Buffer 311 buf := &_buf 312 tok.Pos = sc.Pos 313 314 switch { 315 case isIdent(ch, 0): 316 tok.Type = TIdent 317 err = sc.scanIdent(ch, buf) 318 tok.Str = buf.String() 319 if err != nil { 320 goto finally 321 } 322 if typ, ok := reservedWords[tok.Str]; ok { 323 tok.Type = typ 324 } 325 case isDecimal(ch): 326 tok.Type = TNumber 327 err = sc.scanNumber(ch, buf) 328 tok.Str = buf.String() 329 default: 330 switch ch { 331 case EOF: 332 tok.Type = EOF 333 case '-': 334 if sc.Peek() == '-' { 335 err = sc.skipComments(sc.Next()) 336 if err != nil { 337 goto finally 338 } 339 goto redo 340 } else { 341 tok.Type = ch 342 tok.Str = string(rune(ch)) 343 } 344 case '"', '\'': 345 tok.Type = TString 346 err = sc.scanString(ch, buf) 347 tok.Str = buf.String() 348 case '[': 349 if c := sc.Peek(); c == '[' || c == '=' { 350 tok.Type = TString 351 err = sc.scanMultilineString(sc.Next(), buf) 352 tok.Str = buf.String() 353 } else { 354 tok.Type = ch 355 tok.Str = string(rune(ch)) 356 } 357 case '=': 358 if sc.Peek() == '=' { 359 tok.Type = TEqeq 360 tok.Str = "==" 361 sc.Next() 362 } else { 363 tok.Type = ch 364 tok.Str = string(rune(ch)) 365 } 366 case '~': 367 if sc.Peek() == '=' { 368 tok.Type = TNeq 369 tok.Str = "~=" 370 sc.Next() 371 } else { 372 err = sc.Error("~", "Invalid '~' token") 373 } 374 case '<': 375 if sc.Peek() == '=' { 376 tok.Type = TLte 377 tok.Str = "<=" 378 sc.Next() 379 } else { 380 tok.Type = ch 381 tok.Str = string(rune(ch)) 382 } 383 case '>': 384 if sc.Peek() == '=' { 385 tok.Type = TGte 386 tok.Str = ">=" 387 sc.Next() 388 } else { 389 tok.Type = ch 390 tok.Str = string(rune(ch)) 391 } 392 case '.': 393 ch2 := sc.Peek() 394 switch { 395 case isDecimal(ch2): 396 tok.Type = TNumber 397 err = sc.scanNumber(ch, buf) 398 tok.Str = buf.String() 399 case ch2 == '.': 400 writeChar(buf, ch) 401 writeChar(buf, sc.Next()) 402 if sc.Peek() == '.' { 403 writeChar(buf, sc.Next()) 404 tok.Type = T3Comma 405 } else { 406 tok.Type = T2Comma 407 } 408 default: 409 tok.Type = '.' 410 } 411 tok.Str = buf.String() 412 case ':': 413 if sc.Peek() == ':' { 414 tok.Type = T2Colon 415 tok.Str = "::" 416 sc.Next() 417 } else { 418 tok.Type = ch 419 tok.Str = string(rune(ch)) 420 } 421 case '+', '*', '/', '%', '^', '#', '(', ')', '{', '}', ']', ';', ',': 422 tok.Type = ch 423 tok.Str = string(rune(ch)) 424 default: 425 writeChar(buf, ch) 426 err = sc.Error(buf.String(), "Invalid token") 427 goto finally 428 } 429 } 430 431 finally: 432 tok.Name = TokenName(int(tok.Type)) 433 return tok, err 434 } 435 436 // yacc interface {{{ 437 438 type Lexer struct { 439 scanner *Scanner 440 Stmts []ast.Stmt 441 PNewLine bool 442 Token ast.Token 443 PrevTokenType int 444 } 445 446 func (lx *Lexer) Lex(lval *yySymType) int { 447 lx.PrevTokenType = lx.Token.Type 448 tok, err := lx.scanner.Scan(lx) 449 if err != nil { 450 panic(err) 451 } 452 if tok.Type < 0 { 453 return 0 454 } 455 lval.token = tok 456 lx.Token = tok 457 return int(tok.Type) 458 } 459 460 func (lx *Lexer) Error(message string) { 461 panic(lx.scanner.Error(lx.Token.Str, message)) 462 } 463 464 func (lx *Lexer) TokenError(tok ast.Token, message string) { 465 panic(lx.scanner.TokenError(tok, message)) 466 } 467 468 func Parse(reader io.Reader, name string) (chunk []ast.Stmt, err error) { 469 lexer := &Lexer{NewScanner(reader, name), nil, false, ast.Token{Str: ""}, TNil} 470 chunk = nil 471 defer func() { 472 if e := recover(); e != nil { 473 err, _ = e.(error) 474 } 475 }() 476 yyParse(lexer) 477 chunk = lexer.Stmts 478 return 479 } 480 481 // }}} 482 483 // Dump {{{ 484 485 func isInlineDumpNode(rv reflect.Value) bool { 486 switch rv.Kind() { 487 case reflect.Struct, reflect.Slice, reflect.Interface, reflect.Ptr: 488 return false 489 default: 490 return true 491 } 492 } 493 494 func dump(node interface{}, level int, s string) string { 495 rt := reflect.TypeOf(node) 496 if fmt.Sprint(rt) == "<nil>" { 497 return strings.Repeat(s, level) + "<nil>" 498 } 499 500 rv := reflect.ValueOf(node) 501 buf := []string{} 502 switch rt.Kind() { 503 case reflect.Slice: 504 if rv.Len() == 0 { 505 return strings.Repeat(s, level) + "<empty>" 506 } 507 for i := 0; i < rv.Len(); i++ { 508 buf = append(buf, dump(rv.Index(i).Interface(), level, s)) 509 } 510 case reflect.Ptr: 511 vt := rv.Elem() 512 tt := rt.Elem() 513 indicies := []int{} 514 for i := 0; i < tt.NumField(); i++ { 515 if strings.Index(tt.Field(i).Name, "Base") > -1 { 516 continue 517 } 518 indicies = append(indicies, i) 519 } 520 switch { 521 case len(indicies) == 0: 522 return strings.Repeat(s, level) + "<empty>" 523 case len(indicies) == 1 && isInlineDumpNode(vt.Field(indicies[0])): 524 for _, i := range indicies { 525 buf = append(buf, strings.Repeat(s, level)+"- Node$"+tt.Name()+": "+dump(vt.Field(i).Interface(), 0, s)) 526 } 527 default: 528 buf = append(buf, strings.Repeat(s, level)+"- Node$"+tt.Name()) 529 for _, i := range indicies { 530 if isInlineDumpNode(vt.Field(i)) { 531 inf := dump(vt.Field(i).Interface(), 0, s) 532 buf = append(buf, strings.Repeat(s, level+1)+tt.Field(i).Name+": "+inf) 533 } else { 534 buf = append(buf, strings.Repeat(s, level+1)+tt.Field(i).Name+": ") 535 buf = append(buf, dump(vt.Field(i).Interface(), level+2, s)) 536 } 537 } 538 } 539 default: 540 buf = append(buf, strings.Repeat(s, level)+fmt.Sprint(node)) 541 } 542 return strings.Join(buf, "\n") 543 } 544 545 func Dump(chunk []ast.Stmt) string { 546 return dump(chunk, 0, " ") 547 } 548 549 // }}