github.com/mattn/anko@v0.1.10/parser/lexer.go (about) 1 // Package parser implements parser for anko. 2 package parser 3 4 import ( 5 "errors" 6 "fmt" 7 "reflect" 8 "strconv" 9 "strings" 10 "unicode" 11 12 "github.com/mattn/anko/ast" 13 ) 14 15 const ( 16 // EOF is short for End of file. 17 EOF = -1 18 // EOL is short for End of line. 19 EOL = '\n' 20 ) 21 22 // Error is a parse error. 23 type Error struct { 24 Message string 25 Pos ast.Position 26 Filename string 27 Fatal bool 28 } 29 30 // Error returns the parse error message. 31 func (e *Error) Error() string { 32 return e.Message 33 } 34 35 // Scanner stores informations for lexer. 36 type Scanner struct { 37 src []rune 38 offset int 39 lineHead int 40 line int 41 } 42 43 // opName is correction of operation names. 44 var opName = map[string]int{ 45 "func": FUNC, 46 "return": RETURN, 47 "var": VAR, 48 "throw": THROW, 49 "if": IF, 50 "for": FOR, 51 "break": BREAK, 52 "continue": CONTINUE, 53 "in": IN, 54 "else": ELSE, 55 "new": NEW, 56 "true": TRUE, 57 "false": FALSE, 58 "nil": NIL, 59 "module": MODULE, 60 "try": TRY, 61 "catch": CATCH, 62 "finally": FINALLY, 63 "switch": SWITCH, 64 "case": CASE, 65 "default": DEFAULT, 66 "go": GO, 67 "chan": CHAN, 68 "struct": STRUCT, 69 "make": MAKE, 70 "type": TYPE, 71 "len": LEN, 72 "delete": DELETE, 73 "close": CLOSE, 74 "map": MAP, 75 "import": IMPORT, 76 } 77 78 var ( 79 nilValue = reflect.New(reflect.TypeOf((*interface{})(nil)).Elem()).Elem() 80 trueValue = reflect.ValueOf(true) 81 falseValue = reflect.ValueOf(false) 82 oneLiteral = &ast.LiteralExpr{Literal: reflect.ValueOf(int64(1))} 83 ) 84 85 // Init resets code to scan. 86 func (s *Scanner) Init(src string) { 87 s.src = []rune(src) 88 } 89 90 // Scan analyses token, and decide identify or literals. 91 func (s *Scanner) Scan() (tok int, lit string, pos ast.Position, err error) { 92 retry: 93 s.skipBlank() 94 pos = s.pos() 95 switch ch := s.peek(); { 96 case isLetter(ch): 97 lit, err = s.scanIdentifier() 98 if err != nil { 99 return 100 } 101 if name, ok := opName[lit]; ok { 102 tok = name 103 } else { 104 tok = IDENT 105 } 106 case isDigit(ch): 107 tok = NUMBER 108 lit, err = s.scanNumber() 109 if err != nil { 110 return 111 } 112 case ch == '"': 113 tok = STRING 114 lit, err = s.scanString('"') 115 if err != nil { 116 return 117 } 118 case ch == '\'': 119 tok = STRING 120 lit, err = s.scanString('\'') 121 if err != nil { 122 return 123 } 124 case ch == '`': 125 tok = STRING 126 lit, err = s.scanRawString('`') 127 if err != nil { 128 return 129 } 130 default: 131 switch ch { 132 case EOF: 133 tok = EOF 134 case '#': 135 for !isEOL(s.peek()) { 136 s.next() 137 } 138 goto retry 139 case '!': 140 s.next() 141 switch s.peek() { 142 case '=': 143 tok = NEQ 144 lit = "!=" 145 default: 146 s.back() 147 tok = int(ch) 148 lit = string(ch) 149 } 150 case '=': 151 s.next() 152 switch s.peek() { 153 case '=': 154 tok = EQEQ 155 lit = "==" 156 case ' ': 157 if s.peekPlus(1) == '<' && s.peekPlus(2) == '-' { 158 s.next() 159 s.next() 160 tok = EQOPCHAN 161 lit = "= <-" 162 } else { 163 s.back() 164 tok = int(ch) 165 lit = string(ch) 166 } 167 default: 168 s.back() 169 tok = int(ch) 170 lit = string(ch) 171 } 172 case '?': 173 s.next() 174 switch s.peek() { 175 case '?': 176 tok = NILCOALESCE 177 lit = "??" 178 default: 179 s.back() 180 tok = int(ch) 181 lit = string(ch) 182 } 183 case '+': 184 s.next() 185 switch s.peek() { 186 case '+': 187 tok = PLUSPLUS 188 lit = "++" 189 case '=': 190 tok = PLUSEQ 191 lit = "+=" 192 default: 193 s.back() 194 tok = int(ch) 195 lit = string(ch) 196 } 197 case '-': 198 s.next() 199 switch s.peek() { 200 case '-': 201 tok = MINUSMINUS 202 lit = "--" 203 case '=': 204 tok = MINUSEQ 205 lit = "-=" 206 default: 207 s.back() 208 tok = int(ch) 209 lit = "-" 210 } 211 case '*': 212 s.next() 213 switch s.peek() { 214 case '=': 215 tok = MULEQ 216 lit = "*=" 217 default: 218 s.back() 219 tok = int(ch) 220 lit = string(ch) 221 } 222 case '/': 223 s.next() 224 switch s.peek() { 225 case '=': 226 tok = DIVEQ 227 lit = "/=" 228 case '/': 229 for !isEOL(s.peek()) { 230 s.next() 231 } 232 goto retry 233 case '*': 234 for { 235 _, err = s.scanRawString('*') 236 if err != nil { 237 return 238 } 239 240 if s.peek() == '/' { 241 s.next() 242 goto retry 243 } 244 245 s.back() 246 } 247 default: 248 s.back() 249 tok = int(ch) 250 lit = string(ch) 251 } 252 case '>': 253 s.next() 254 switch s.peek() { 255 case '=': 256 tok = GE 257 lit = ">=" 258 case '>': 259 tok = SHIFTRIGHT 260 lit = ">>" 261 default: 262 s.back() 263 tok = int(ch) 264 lit = string(ch) 265 } 266 case '<': 267 s.next() 268 switch s.peek() { 269 case '-': 270 tok = OPCHAN 271 lit = "<-" 272 case '=': 273 tok = LE 274 lit = "<=" 275 case '<': 276 tok = SHIFTLEFT 277 lit = "<<" 278 default: 279 s.back() 280 tok = int(ch) 281 lit = string(ch) 282 } 283 case '|': 284 s.next() 285 switch s.peek() { 286 case '|': 287 tok = OROR 288 lit = "||" 289 case '=': 290 tok = OREQ 291 lit = "|=" 292 default: 293 s.back() 294 tok = int(ch) 295 lit = string(ch) 296 } 297 case '&': 298 s.next() 299 switch s.peek() { 300 case '&': 301 tok = ANDAND 302 lit = "&&" 303 case '=': 304 tok = ANDEQ 305 lit = "&=" 306 default: 307 s.back() 308 tok = int(ch) 309 lit = string(ch) 310 } 311 case '.': 312 s.next() 313 if s.peek() == '.' { 314 s.next() 315 if s.peek() == '.' { 316 tok = VARARG 317 } else { 318 err = fmt.Errorf("syntax error on '%v' at %v:%v", string(ch), pos.Line, pos.Column) 319 return 320 } 321 } else { 322 s.back() 323 tok = int(ch) 324 lit = string(ch) 325 } 326 case '\n', '(', ')', ':', ';', '%', '{', '}', '[', ']', ',', '^': 327 tok = int(ch) 328 lit = string(ch) 329 default: 330 err = fmt.Errorf("syntax error on '%v' at %v:%v", string(ch), pos.Line, pos.Column) 331 tok = int(ch) 332 lit = string(ch) 333 return 334 } 335 s.next() 336 } 337 return 338 } 339 340 // isLetter returns true if the rune is a letter for identity. 341 func isLetter(ch rune) bool { 342 return unicode.IsLetter(ch) || ch == '_' 343 } 344 345 // isDigit returns true if the rune is a number. 346 func isDigit(ch rune) bool { 347 return '0' <= ch && ch <= '9' 348 } 349 350 // isHex returns true if the rune is a hex digits. 351 func isHex(ch rune) bool { 352 return ('0' <= ch && ch <= '9') || ('a' <= ch && ch <= 'f') || ('A' <= ch && ch <= 'F') 353 } 354 355 // isEOL returns true if the rune is at end-of-line or end-of-file. 356 func isEOL(ch rune) bool { 357 return ch == '\n' || ch == -1 358 } 359 360 // isBlank returns true if the rune is empty character.. 361 func isBlank(ch rune) bool { 362 return ch == ' ' || ch == '\t' || ch == '\r' 363 } 364 365 // peek returns current rune in the code. 366 func (s *Scanner) peek() rune { 367 if s.reachEOF() { 368 return EOF 369 } 370 return s.src[s.offset] 371 } 372 373 // peek returns current rune plus i in the code. 374 func (s *Scanner) peekPlus(i int) rune { 375 if len(s.src) <= s.offset+i { 376 return EOF 377 } 378 return s.src[s.offset+i] 379 } 380 381 // next moves offset to next. 382 func (s *Scanner) next() { 383 if !s.reachEOF() { 384 if s.peek() == '\n' { 385 s.lineHead = s.offset + 1 386 s.line++ 387 } 388 s.offset++ 389 } 390 } 391 392 // current returns the current offset. 393 func (s *Scanner) current() int { 394 return s.offset 395 } 396 397 // offset sets the offset value. 398 func (s *Scanner) set(o int) { 399 s.offset = o 400 } 401 402 // back moves back offset once to top. 403 func (s *Scanner) back() { 404 s.offset-- 405 } 406 407 // reachEOF returns true if offset is at end-of-file. 408 func (s *Scanner) reachEOF() bool { 409 return len(s.src) <= s.offset 410 } 411 412 // pos returns the position of current. 413 func (s *Scanner) pos() ast.Position { 414 return ast.Position{Line: s.line + 1, Column: s.offset - s.lineHead + 1} 415 } 416 417 // skipBlank moves position into non-black character. 418 func (s *Scanner) skipBlank() { 419 for isBlank(s.peek()) { 420 s.next() 421 } 422 } 423 424 // scanIdentifier returns identifier beginning at current position. 425 func (s *Scanner) scanIdentifier() (string, error) { 426 var ret []rune 427 for { 428 if !isLetter(s.peek()) && !isDigit(s.peek()) { 429 break 430 } 431 ret = append(ret, s.peek()) 432 s.next() 433 } 434 return string(ret), nil 435 } 436 437 // scanNumber returns number beginning at current position. 438 func (s *Scanner) scanNumber() (string, error) { 439 result := []rune{s.peek()} 440 s.next() 441 442 if result[0] == '0' && (s.peek() == 'x' || s.peek() == 'X') { 443 // hex 444 result = append(result, 'x') 445 s.next() 446 for isHex(s.peek()) { 447 result = append(result, s.peek()) 448 s.next() 449 } 450 } else { 451 // non-hex 452 found := false 453 for { 454 if isDigit(s.peek()) { 455 // is digit 456 result = append(result, s.peek()) 457 s.next() 458 continue 459 } 460 461 if s.peek() == '.' { 462 // is . 463 result = append(result, '.') 464 s.next() 465 continue 466 } 467 468 if s.peek() == 'e' || s.peek() == 'E' { 469 // is e 470 if found { 471 return "", errors.New("unexpected " + string(s.peek())) 472 } 473 found = true 474 s.next() 475 476 // check if + or - 477 if s.peek() == '+' || s.peek() == '-' { 478 // add e with + or - 479 result = append(result, 'e') 480 result = append(result, s.peek()) 481 s.next() 482 } else { 483 // add e, but next char not + or - 484 result = append(result, 'e') 485 } 486 continue 487 } 488 489 // not digit, e, nor . 490 break 491 } 492 } 493 494 if isLetter(s.peek()) { 495 return "", errors.New("identifier starts immediately after numeric literal") 496 } 497 498 return string(result), nil 499 } 500 501 // scanRawString returns raw-string starting at current position. 502 func (s *Scanner) scanRawString(l rune) (string, error) { 503 var ret []rune 504 for { 505 s.next() 506 if s.peek() == EOF { 507 return "", errors.New("unexpected EOF") 508 } 509 if s.peek() == l { 510 s.next() 511 break 512 } 513 ret = append(ret, s.peek()) 514 } 515 return string(ret), nil 516 } 517 518 // scanString returns string starting at current position. 519 // This handles backslash escaping. 520 func (s *Scanner) scanString(l rune) (string, error) { 521 var ret []rune 522 eos: 523 for { 524 s.next() 525 switch s.peek() { 526 case EOL: 527 return "", errors.New("unexpected EOL") 528 case EOF: 529 return "", errors.New("unexpected EOF") 530 case l: 531 s.next() 532 break eos 533 case '\\': 534 s.next() 535 switch s.peek() { 536 case 'b': 537 ret = append(ret, '\b') 538 continue 539 case 'f': 540 ret = append(ret, '\f') 541 continue 542 case 'r': 543 ret = append(ret, '\r') 544 continue 545 case 'n': 546 ret = append(ret, '\n') 547 continue 548 case 't': 549 ret = append(ret, '\t') 550 continue 551 } 552 ret = append(ret, s.peek()) 553 continue 554 default: 555 ret = append(ret, s.peek()) 556 } 557 } 558 return string(ret), nil 559 } 560 561 // Lexer provides interface to parse codes. 562 type Lexer struct { 563 s *Scanner 564 lit string 565 pos ast.Position 566 e error 567 stmt ast.Stmt 568 } 569 570 // Lex scans the token and literals. 571 func (l *Lexer) Lex(lval *yySymType) int { 572 tok, lit, pos, err := l.s.Scan() 573 if err != nil { 574 l.e = &Error{Message: err.Error(), Pos: pos, Fatal: true} 575 } 576 lval.tok = ast.Token{Tok: tok, Lit: lit} 577 lval.tok.SetPosition(pos) 578 l.lit = lit 579 l.pos = pos 580 return tok 581 } 582 583 // Error sets parse error. 584 func (l *Lexer) Error(msg string) { 585 l.e = &Error{Message: msg, Pos: l.pos, Fatal: false} 586 } 587 588 // Parse provides way to parse the code using Scanner. 589 func Parse(s *Scanner) (ast.Stmt, error) { 590 l := Lexer{s: s} 591 if yyParse(&l) != 0 { 592 return nil, l.e 593 } 594 return l.stmt, l.e 595 } 596 597 // EnableErrorVerbose enabled verbose errors from the parser 598 func EnableErrorVerbose() { 599 yyErrorVerbose = true 600 } 601 602 // EnableDebug enabled debug from the parser 603 func EnableDebug(level int) { 604 yyDebug = level 605 } 606 607 // ParseSrc provides way to parse the code from source. 608 func ParseSrc(src string) (ast.Stmt, error) { 609 scanner := &Scanner{ 610 src: []rune(src), 611 } 612 return Parse(scanner) 613 } 614 615 func toNumber(numString string) (reflect.Value, error) { 616 // hex 617 if len(numString) > 2 && numString[0:2] == "0x" { 618 i, err := strconv.ParseInt(numString[2:], 16, 64) 619 if err != nil { 620 return nilValue, err 621 } 622 return reflect.ValueOf(i), nil 623 } 624 625 // hex 626 if len(numString) > 3 && numString[0:3] == "-0x" { 627 i, err := strconv.ParseInt("-"+numString[3:], 16, 64) 628 if err != nil { 629 return nilValue, err 630 } 631 return reflect.ValueOf(i), nil 632 } 633 634 // float 635 if strings.Contains(numString, ".") || strings.Contains(numString, "e") { 636 f, err := strconv.ParseFloat(numString, 64) 637 if err != nil { 638 return nilValue, err 639 } 640 return reflect.ValueOf(f), nil 641 } 642 643 // int 644 i, err := strconv.ParseInt(numString, 10, 64) 645 if err != nil { 646 return nilValue, err 647 } 648 return reflect.ValueOf(i), nil 649 } 650 651 func stringToValue(aString string) reflect.Value { 652 return reflect.ValueOf(aString) 653 }