github.com/hattya/go.sh@v0.0.0-20240328132134-f53276d95cc6/parser/lexer.go (about) 1 // 2 // go.sh/parser :: lexer.go 3 // 4 // Copyright (c) 2018-2021 Akinori Hattori <hattya@gmail.com> 5 // 6 // SPDX-License-Identifier: MIT 7 // 8 9 //go:generate goyacc -l -o parser.go parser.go.y 10 11 // Package parser implements a parser for the Shell Command Language 12 // (POSIX.1-2017). 13 package parser 14 15 import ( 16 "errors" 17 "fmt" 18 "io" 19 "strings" 20 "sync" 21 "sync/atomic" 22 "unicode" 23 24 "github.com/hattya/go.sh/ast" 25 "github.com/hattya/go.sh/interp" 26 "github.com/hattya/go.sh/printer" 27 ) 28 29 var ( 30 ops = map[int]string{ 31 AND: "&&", 32 OR: "||", 33 LAE: "((", 34 RAE: "))", 35 BREAK: ";;", 36 '|': "|", 37 '(': "(", 38 ')': ")", 39 '<': "<", 40 '>': ">", 41 CLOBBER: ">|", 42 APPEND: ">>", 43 HEREDOC: "<<", 44 HEREDOCI: "<<-", 45 DUPIN: "<&", 46 DUPOUT: ">&", 47 RDWR: "<>", 48 '&': "&", 49 ';': ";", 50 } 51 words = map[string]int{ 52 "!": Bang, 53 "{": Lbrace, 54 "}": Rbrace, 55 "for": For, 56 "case": Case, 57 "esac": Esac, 58 "in": In, 59 "if": If, 60 "elif": Elif, 61 "then": Then, 62 "else": Else, 63 "fi": Fi, 64 "while": While, 65 "until": Until, 66 "do": Do, 67 "done": Done, 68 } 69 70 errParamExp = errors.New("syntax error: invalid parameter expansion") 71 ) 72 73 type lexer struct { 74 env *interp.ExecEnv 75 name string 76 r io.RuneScanner 77 cmds []ast.Command 78 comments []*ast.Comment 79 cmdSubst rune 80 token chan ast.Node 81 done chan struct{} 82 83 mu sync.Mutex 84 eof bool 85 err error 86 cancel chan struct{} 87 88 aliases []*alias 89 stack []int 90 arithExpr bool 91 paren int 92 heredoc heredoc 93 word ast.Word 94 b strings.Builder 95 line int 96 col int 97 prevCol int 98 pos ast.Pos 99 last atomic.Value 100 } 101 102 func newLexer(env *interp.ExecEnv, name string, r io.RuneScanner) *lexer { 103 l := &lexer{ 104 env: env, 105 name: name, 106 r: r, 107 token: make(chan ast.Node), 108 cancel: make(chan struct{}), 109 heredoc: heredoc{c: make(chan struct{}, 1)}, 110 line: 1, 111 col: 1, 112 } 113 l.mark(0) 114 go l.run() 115 return l 116 } 117 118 func (l *lexer) Lex(lval *yySymType) int { 119 switch tok := (<-l.token).(type) { 120 case token: 121 l.last.Store(tok.Pos()) 122 lval.token = tok 123 return tok.typ 124 case word: 125 l.last.Store(tok.Pos()) 126 lval.word = tok.val 127 return tok.typ 128 } 129 return 0 130 } 131 132 func (l *lexer) run() { 133 defer func() { 134 close(l.token) 135 if l.done != nil { 136 close(l.done) 137 } 138 139 if e := recover(); e != nil { 140 // re-panic 141 panic(e) 142 } 143 }() 144 145 for action := l.lexPipeline; action != nil; { 146 action = action() 147 } 148 } 149 150 func (l *lexer) lexPipeline() action { 151 tok := l.scanRawToken() 152 if l.tr(tok) == Bang { 153 l.emit(Bang) 154 tok = l.scanRawToken() 155 } 156 return l.lexCmd(tok) 157 } 158 159 func (l *lexer) lexNextCmd() action { 160 return l.lexCmd(l.scanRawToken()) 161 } 162 163 func (l *lexer) lexCmd(tok int) action { 164 tok = l.tr(tok) 165 switch tok { 166 case '<', '>', CLOBBER, APPEND, HEREDOC, HEREDOCI, DUPIN, DUPOUT, RDWR: 167 l.emit(tok) 168 if tok = l.scanRedir(tok); tok == WORD { 169 l.emit(tok) 170 return l.lexCmdPrefix 171 } 172 case IO_NUMBER: 173 l.emit(tok) 174 return l.lexCmdPrefix 175 case WORD: 176 return l.lexSimpleCmd 177 case '(': 178 return l.lexSubshell 179 case Lbrace: 180 return l.lexGroup 181 case LAE: 182 return l.lexArithEval 183 case For: 184 return l.lexFor 185 case Case: 186 return l.lexCase 187 case BREAK: 188 return l.lexCaseBreak 189 case If: 190 return l.lexIf 191 case Elif: 192 return l.lexElif 193 case Then: 194 return l.lexThen 195 case Else: 196 return l.lexElse 197 case While: 198 return l.lexWhile 199 case Until: 200 return l.lexUntil 201 case Do: 202 return l.lexDo 203 } 204 return l.lexToken(tok) 205 } 206 207 func (l *lexer) lexSimpleCmd() action { 208 switch { 209 case l.isAssign(): 210 l.emit(ASSIGNMENT_WORD) 211 return l.lexCmdPrefix 212 case l.subst(): 213 return l.lexPipeline 214 case len(l.word) == 1: 215 if w, ok := l.word[0].(*ast.Lit); ok && l.isName(w.Value) { 216 // lookahead 217 l.word = nil 218 l.mark(0) 219 tok := l.scanToken() 220 // save lookahead token 221 word := l.word 222 pos := l.pos 223 // emit current token 224 l.word = ast.Word{w} 225 l.pos = w.ValuePos 226 if tok == '(' { 227 if l.isSpBuiltin(w.Value) { 228 l.error(w.ValuePos, "syntax error: invalid function name") 229 return nil 230 } 231 l.emit(NAME) 232 l.pos = pos 233 return l.lexFuncDef 234 } 235 l.emit(WORD) 236 // restore lookahead token 237 l.word = word 238 l.pos = pos 239 return l.onCmdSuffix(tok) 240 } 241 } 242 l.emit(WORD) 243 return l.lexCmdSuffix 244 } 245 246 // isSpBuiltin reports whether s matches the name of a special built-in 247 // utility. 248 func (l *lexer) isSpBuiltin(s string) bool { 249 switch s { 250 case "break", ":", "continue", ".", "eval", "exec", "exit", "export", 251 "readonly", "return", "set", "shift", "times", "trap", "unset": 252 return true 253 } 254 return false 255 } 256 257 func (l *lexer) lexCmdPrefix() action { 258 tok := l.scanToken() 259 switch tok { 260 case '<', '>', CLOBBER, APPEND, HEREDOC, HEREDOCI, DUPIN, DUPOUT, RDWR: 261 l.emit(tok) 262 if tok = l.scanRedir(tok); tok == WORD { 263 goto Prefix 264 } 265 case IO_NUMBER: 266 goto Prefix 267 case WORD: 268 switch { 269 case l.isAssign(): 270 tok = ASSIGNMENT_WORD 271 goto Prefix 272 case l.subst(): 273 return l.lexCmdPrefix 274 } 275 l.emit(WORD) 276 return l.lexCmdSuffix 277 } 278 return l.lexToken(tok) 279 Prefix: 280 l.emit(tok) 281 return l.lexCmdPrefix 282 } 283 284 // isAssign reports whether the current word is ASSIGNMENT_WORD. 285 func (l *lexer) isAssign() bool { 286 if w, ok := l.word[0].(*ast.Lit); ok { 287 if i := strings.IndexRune(w.Value, '='); i > 0 { 288 return l.isName(w.Value[:i]) 289 } 290 } 291 return false 292 } 293 294 func (l *lexer) lexCmdSuffix() action { 295 return l.onCmdSuffix(l.scanToken()) 296 } 297 298 func (l *lexer) onCmdSuffix(tok int) action { 299 switch tok { 300 case '<', '>', CLOBBER, APPEND, HEREDOC, HEREDOCI, DUPIN, DUPOUT, RDWR: 301 l.emit(tok) 302 if tok = l.scanRedir(tok); tok == WORD { 303 goto Suffix 304 } 305 case IO_NUMBER, WORD: 306 goto Suffix 307 } 308 return l.lexToken(tok) 309 Suffix: 310 l.emit(tok) 311 return l.lexCmdSuffix 312 } 313 314 func (l *lexer) lexSubshell() action { 315 l.emit('(') 316 // push 317 l.stack = append(l.stack, ')') 318 return l.lexPipeline 319 } 320 321 func (l *lexer) lexGroup() action { 322 l.emit(Lbrace) 323 // push 324 l.stack = append(l.stack, Rbrace) 325 return l.lexPipeline 326 } 327 328 func (l *lexer) lexArithEval() action { 329 pos := l.pos 330 l.emit(LAE) 331 // push 332 l.stack = append(l.stack, RAE) 333 tok := l.scanArithExpr(pos) 334 if tok == RAE { 335 l.emit(WORD) 336 l.mark(-2) 337 } 338 return l.lexToken(tok) 339 } 340 341 func (l *lexer) lexFor() action { 342 l.emit(For) 343 // name 344 switch tok := l.scanToken(); tok { 345 case WORD: 346 if len(l.word) == 1 { 347 if w, ok := l.word[0].(*ast.Lit); ok && l.isName(w.Value) { 348 l.emit(NAME) 349 break 350 } 351 } 352 l.error(l.word.Pos(), "syntax error: invalid for loop variable") 353 return nil 354 default: 355 return l.lexToken(tok) 356 } 357 Third: 358 switch tok := l.scanRawToken(); tok { 359 case ';': 360 l.emit(';') 361 if !l.linebreak() { 362 return nil 363 } 364 for { 365 switch tok = l.tr(l.scanRawToken()); { 366 case tok == Do: 367 goto Do 368 case !l.subst(): 369 return l.lexToken(tok) 370 } 371 } 372 case '\n': 373 l.emit('\n') 374 if !l.linebreak() { 375 return nil 376 } 377 tok = l.scanRawToken() 378 fallthrough 379 default: 380 switch tok = l.tr(tok); tok { 381 case In: 382 goto In 383 case Do: 384 goto Do 385 default: 386 if l.subst() { 387 goto Third 388 } 389 return l.lexToken(tok) 390 } 391 } 392 In: 393 l.emit(In) 394 for { 395 switch tok := l.scanToken(); tok { 396 case WORD: 397 l.emit(WORD) 398 case ';', '\n': 399 l.emit(tok) 400 if !l.linebreak() { 401 return nil 402 } 403 for { 404 switch tok = l.tr(l.scanRawToken()); { 405 case tok == Do: 406 goto Do 407 case !l.subst(): 408 return l.lexToken(tok) 409 } 410 } 411 default: 412 return l.lexToken(tok) 413 } 414 } 415 Do: 416 l.emit(Do) 417 // push 418 l.stack = append(l.stack, Done) 419 return l.lexPipeline 420 } 421 422 // isName reports whether s satisfies XBD Name. 423 func (l *lexer) isName(s string) bool { 424 for i, r := range s { 425 if !(r == '_' || unicode.IsLetter(r) || (i > 0 && unicode.IsDigit(r))) { 426 return false 427 } 428 } 429 return true 430 } 431 432 func (l *lexer) lexCase() action { 433 l.emit(Case) 434 // word 435 if tok := l.scanToken(); tok != WORD { 436 return l.lexToken(tok) 437 } 438 l.emit(WORD) 439 if !l.linebreak() { 440 return nil 441 } 442 Third: 443 // in 444 if tok := l.scanRawToken(); l.tr(tok) != In { 445 if l.subst() { 446 goto Third 447 } 448 return l.lexToken(tok) 449 } 450 l.emit(In) 451 if !l.linebreak() { 452 return nil 453 } 454 // push 455 l.stack = append(l.stack, Esac) 456 return l.lexCaseItem 457 } 458 459 func (l *lexer) lexCaseItem() action { 460 tok := l.scanToken() 461 // check for esac 462 if l.tr(tok) == Esac { 463 return l.lexToken(Esac) 464 } 465 // patterns 466 if tok == '(' { 467 l.emit('(') 468 tok = l.scanToken() 469 } 470 Pattern: 471 for { 472 switch tok { 473 case '|', WORD: 474 l.emit(tok) 475 case ')': 476 l.emit(')') 477 if !l.linebreak() { 478 return nil 479 } 480 break Pattern 481 default: 482 return l.lexToken(tok) 483 } 484 tok = l.scanToken() 485 } 486 // clear 487 l.paren = 0 488 return l.lexPipeline 489 } 490 491 func (l *lexer) lexCaseBreak() action { 492 l.emit(BREAK) 493 // check 494 if len(l.stack) != 0 && l.stack[len(l.stack)-1] == Esac { 495 if !l.linebreak() { 496 return nil 497 } 498 return l.lexCaseItem 499 } 500 return nil 501 } 502 503 // tr translates a WORD token to a reserved word token if it is. 504 func (l *lexer) tr(tok int) int { 505 if tok == WORD && len(l.word) == 1 { 506 if w, ok := l.word[0].(*ast.Lit); ok { 507 if tok, ok := words[w.Value]; ok { 508 return tok 509 } 510 } 511 } 512 return tok 513 } 514 515 // subst performs alias substitution at the current word. It returns 516 // false when it was not performed. 517 func (l *lexer) subst() bool { 518 if l.env != nil && len(l.word) == 1 { 519 if w, ok := l.word[0].(*ast.Lit); ok { 520 if v, ok := l.env.Aliases[w.Value]; ok { 521 // avoid infinite loop 522 for _, a := range l.aliases { 523 if a.name == w.Value { 524 return false 525 } 526 } 527 528 r := strings.NewReader(strings.TrimRight(v, "\t ") + " ") 529 l.aliases = append(l.aliases, &alias{ 530 name: w.Value, 531 value: r, 532 blank: len(v) > r.Len()-1, 533 }) 534 l.word = nil 535 return true 536 } 537 } 538 } 539 return false 540 } 541 542 func (l *lexer) lexIf() action { 543 l.emit(If) 544 // push 545 l.stack = append(l.stack, Then) 546 return l.lexPipeline 547 } 548 549 func (l *lexer) lexElif() action { 550 l.emit(Elif) 551 // pop & push 552 if len(l.stack) != 0 && l.stack[len(l.stack)-1] == Fi { 553 l.stack[len(l.stack)-1] = Then 554 return l.lexPipeline 555 } 556 return nil 557 } 558 559 func (l *lexer) lexThen() action { 560 l.emit(Then) 561 // pop & push 562 if len(l.stack) != 0 && l.stack[len(l.stack)-1] == Then { 563 l.stack[len(l.stack)-1] = Fi 564 return l.lexPipeline 565 } 566 return nil 567 } 568 569 func (l *lexer) lexElse() action { 570 l.emit(Else) 571 // pop & push 572 if len(l.stack) != 0 && l.stack[len(l.stack)-1] == Fi { 573 l.stack[len(l.stack)-1] = Fi 574 return l.lexPipeline 575 } 576 return nil 577 } 578 579 func (l *lexer) lexWhile() action { 580 l.emit(While) 581 // push 582 l.stack = append(l.stack, Do) 583 return l.lexPipeline 584 } 585 586 func (l *lexer) lexUntil() action { 587 l.emit(Until) 588 // push 589 l.stack = append(l.stack, Do) 590 return l.lexPipeline 591 } 592 593 func (l *lexer) lexDo() action { 594 l.emit(Do) 595 // pop & push 596 if len(l.stack) != 0 && l.stack[len(l.stack)-1] == Do { 597 l.stack[len(l.stack)-1] = Done 598 return l.lexPipeline 599 } 600 return nil 601 } 602 603 func (l *lexer) lexFuncDef() action { 604 l.emit('(') 605 if tok := l.scanToken(); tok != ')' { 606 return l.lexToken(tok) 607 } 608 l.emit(')') 609 if !l.linebreak() { 610 return nil 611 } 612 return l.lexNextCmd 613 } 614 615 func (l *lexer) lexToken(tok int) action { 616 switch tok { 617 case AND, OR: 618 l.emit(tok) 619 if l.linebreak() { 620 return l.lexPipeline 621 } 622 case BREAK: 623 return l.lexCaseBreak 624 case '|': 625 l.emit('|') 626 if l.linebreak() { 627 return l.lexNextCmd 628 } 629 case '&', ';': 630 l.emit(tok) 631 return l.lexPipeline 632 case '\n': 633 switch { 634 case l.heredoc.exists(): 635 return l.lexHeredoc 636 case len(l.aliases) != 0 || len(l.stack) != 0: 637 l.emit('\n') 638 return l.lexPipeline 639 } 640 case ')', RAE: 641 if l.cmdSubst != 0 && len(l.stack) == 1 { 642 l.emit(tok) 643 l.stack = nil 644 break 645 } 646 fallthrough 647 case Rbrace, Esac, Fi, Done: 648 l.emit(tok) 649 // pop 650 if len(l.stack) != 0 && l.stack[len(l.stack)-1] == tok { 651 l.stack = l.stack[:len(l.stack)-1] 652 return l.lexRedir 653 } 654 default: 655 if tok > 0 { 656 l.emit(tok) 657 } 658 } 659 return nil 660 } 661 662 func (l *lexer) lexRedir() action { 663 tok := l.scanToken() 664 switch tok { 665 case '<', '>', CLOBBER, APPEND, HEREDOC, HEREDOCI, DUPIN, DUPOUT, RDWR: 666 l.emit(tok) 667 if tok = l.scanRedir(tok); tok == WORD { 668 goto Redir 669 } 670 case IO_NUMBER: 671 goto Redir 672 } 673 return l.lexToken(tok) 674 Redir: 675 l.emit(tok) 676 return l.lexRedir 677 } 678 679 func (l *lexer) lexHeredoc() action { 680 find := func(r *ast.Redir, delim string) bool { 681 for i := len(l.word) - 1; i >= 0; i-- { 682 if l.word[i].Pos().Col() == 1 { 683 if s := l.print(l.word[i:]); strings.ContainsRune(s, '\n') { 684 break 685 } else if s == delim { 686 r.Heredoc = l.word[:i] 687 r.Delim = l.word[i:] 688 l.word = nil 689 return true 690 } 691 } 692 } 693 return false 694 } 695 for h := l.heredoc.pop(); h != nil; h = l.heredoc.pop() { 696 l.mark(0) 697 // unquote 698 var word ast.Word 699 var quoted bool 700 for _, w := range h.Word { 701 if q, ok := w.(*ast.Quote); ok { 702 word = append(word, q.Value...) 703 quoted = true 704 } else { 705 word = append(word, w) 706 } 707 } 708 // token → string 709 delim := l.print(word) 710 Heredoc: 711 for { 712 r, err := l.read() 713 if err != nil { 714 if !l.heredoc.exists() { 715 if l.lit(); find(h, delim) { 716 return nil 717 } 718 } 719 goto Error 720 } 721 722 switch { 723 case r == '\n': 724 // <newline> 725 if l.lit(); find(h, delim) { 726 break Heredoc 727 } 728 // store <newline> 729 if w1, ok := l.word[len(l.word)-1].(*ast.Lit); ok { 730 w1.Value += "\n" 731 // concatenate 732 if len(l.word) > 1 { 733 if w2, ok := l.word[len(l.word)-2].(*ast.Lit); ok && w2.End() == w1.Pos() { 734 w2.Value += w1.Value 735 l.word = l.word[:len(l.word)-1] 736 } 737 } 738 } else { 739 l.b.WriteByte('\n') 740 l.lit() 741 } 742 l.mark(0) 743 case !quoted: 744 switch r { 745 case '\\': 746 // escape character 747 if r, err = l.read(); err != nil { 748 goto Error 749 } 750 l.esc(r) 751 case '$': 752 // parameter expansion 753 l.lit() 754 l.mark(-1) 755 if !l.scanParamExp() { 756 return nil 757 } 758 case '`': 759 // command substitution 760 l.lit() 761 l.mark(-1) 762 if !l.scanCmdSubst('`') { 763 return nil 764 } 765 default: 766 l.b.WriteRune(r) 767 } 768 default: 769 l.b.WriteRune(r) 770 } 771 772 continue 773 Error: 774 if err == io.EOF { 775 l.error(h.OpPos, "syntax error: here-document delimited by EOF") 776 } 777 return nil 778 } 779 } 780 return l.lexToken('\n') 781 } 782 783 func (l *lexer) scanArithExpr(pos ast.Pos) int { 784 for { 785 r, err := l.read() 786 if err != nil { 787 if err == io.EOF { 788 l.error(pos, "syntax error: reached EOF while looking for matching '))'") 789 } 790 return -1 791 } 792 793 switch r { 794 case '(', ')': 795 // operator 796 if l.scanOp(r) == RAE { 797 l.lit() 798 return RAE 799 } 800 l.b.WriteByte(byte(r)) 801 case '\\', '\'', '"': 802 // quoting 803 l.lit() 804 l.mark(-1) 805 if !l.scanQuote(r) { 806 return -1 807 } 808 case '$': 809 // parameter expansion 810 l.lit() 811 l.mark(-1) 812 if !l.scanParamExp() { 813 return -1 814 } 815 case '`': 816 // command substitution 817 l.lit() 818 l.mark(-1) 819 if !l.scanCmdSubst('`') { 820 return -1 821 } 822 case '\t', ' ': 823 // <blank> 824 fallthrough 825 case '\n': 826 // <newline> 827 l.lit() 828 l.mark(0) 829 default: 830 l.b.WriteRune(r) 831 } 832 } 833 } 834 835 func (l *lexer) scanRedir(tok int) int { 836 var heredoc bool 837 switch tok { 838 case HEREDOC, HEREDOCI: 839 heredoc = true 840 } 841 tok = l.scanToken() 842 if tok == WORD && heredoc { 843 if strings.ContainsRune(l.print(l.word), '\n') { 844 l.error(l.word.Pos(), `syntax error: here-document delimiter contains '\n'`) 845 return -1 846 } 847 l.heredoc.inc() 848 } 849 return tok 850 } 851 852 func (l *lexer) print(w ast.Word) string { 853 defer l.b.Reset() 854 printer.Fprint(&l.b, w) 855 return l.b.String() 856 } 857 858 func (l *lexer) scanToken() int { 859 var blank bool 860 if len(l.aliases) != 0 { 861 if a := l.aliases[len(l.aliases)-1]; a.value.Len() == 0 { 862 blank = a.blank 863 } 864 } 865 Scan: 866 tok := l.scanRawToken() 867 if tok == WORD && blank && l.subst() { 868 goto Scan 869 } 870 return tok 871 } 872 873 func (l *lexer) scanRawToken() int { 874 for { 875 r, err := l.read() 876 if err != nil { 877 if err == io.EOF { 878 if l.lit(); len(l.word) != 0 { 879 return WORD 880 } 881 return 0 882 } 883 return -1 884 } 885 886 switch r { 887 case '&', '(', ')', ';', '|': 888 // operator 889 if l.lit(); len(l.word) != 0 { 890 l.unread() 891 return WORD 892 } 893 return l.scanOp(r) 894 case '<', '>': 895 // redirection operator 896 if l.lit(); len(l.word) != 0 { 897 l.unread() 898 if len(l.word) == 1 { 899 if w, ok := l.word[0].(*ast.Lit); ok { 900 for _, r := range w.Value { 901 if !('0' <= r && r <= '9') { 902 return WORD 903 } 904 } 905 return IO_NUMBER 906 } 907 } 908 return WORD 909 } 910 return l.scanOp(r) 911 case '\\', '\'', '"': 912 // quoting 913 l.lit() 914 l.mark(-1) 915 if !l.scanQuote(r) { 916 return -1 917 } 918 case '$': 919 // parameter expansion 920 l.lit() 921 l.mark(-1) 922 if !l.scanParamExp() { 923 return -1 924 } 925 case '`': 926 // command substitution 927 if l.cmdSubst != '`' { 928 l.lit() 929 l.mark(-1) 930 if !l.scanCmdSubst('`') { 931 return -1 932 } 933 } else { 934 if l.lit(); len(l.word) != 0 { 935 l.unread() 936 return WORD 937 } 938 if len(l.stack) != 0 { 939 return ')' 940 } 941 return '(' 942 } 943 case '\t', ' ': 944 // <blank> 945 if l.lit(); len(l.word) != 0 { 946 return WORD 947 } 948 l.mark(0) 949 case '\n': 950 // <newline> 951 if l.lit(); len(l.word) != 0 { 952 l.unread() 953 return WORD 954 } 955 return int(r) 956 case '#': 957 // comment 958 l.unread() 959 if l.lit(); len(l.word) != 0 { 960 return WORD 961 } 962 if !l.linebreak() { 963 return -1 964 } 965 default: 966 l.b.WriteRune(r) 967 } 968 } 969 } 970 971 func (l *lexer) scanOp(r rune) (op int) { 972 switch r { 973 case '&': 974 op = '&' 975 if r, err := l.read(); err == nil { 976 if r == '&' { 977 op = AND 978 } else { 979 l.unread() 980 } 981 } 982 case '(': 983 op = '(' 984 l.paren++ 985 if l.paren == 1 { 986 if r, err := l.read(); err == nil { 987 if r == '(' { 988 op = LAE 989 l.paren++ 990 l.arithExpr = true 991 } else { 992 l.unread() 993 } 994 } 995 } 996 case ')': 997 op = ')' 998 l.paren-- 999 if l.arithExpr && l.paren == 1 { 1000 if r, err := l.read(); err == nil { 1001 if r == ')' { 1002 op = RAE 1003 l.paren-- 1004 l.arithExpr = false 1005 } else { 1006 l.unread() 1007 } 1008 } 1009 } 1010 case ';': 1011 op = ';' 1012 if r, err := l.read(); err == nil { 1013 if r == ';' { 1014 op = BREAK 1015 } else { 1016 l.unread() 1017 } 1018 } 1019 case '<': 1020 op = '<' 1021 if r, err := l.read(); err == nil { 1022 switch r { 1023 case '&': 1024 op = DUPIN 1025 case '<': 1026 op = HEREDOC 1027 if r, err = l.read(); err == nil { 1028 if r == '-' { 1029 op = HEREDOCI 1030 } else { 1031 l.unread() 1032 } 1033 } 1034 case '>': 1035 op = RDWR 1036 default: 1037 l.unread() 1038 } 1039 } 1040 case '>': 1041 op = '>' 1042 if r, err := l.read(); err == nil { 1043 switch r { 1044 case '&': 1045 op = DUPOUT 1046 case '>': 1047 op = APPEND 1048 case '|': 1049 op = CLOBBER 1050 default: 1051 l.unread() 1052 } 1053 } 1054 case '|': 1055 op = '|' 1056 if r, err := l.read(); err == nil { 1057 if r == '|' { 1058 op = OR 1059 } else { 1060 l.unread() 1061 } 1062 } 1063 } 1064 return 1065 } 1066 1067 func (l *lexer) scanQuote(r rune) bool { 1068 q := &ast.Quote{ 1069 TokPos: l.pos, 1070 Tok: string(r), 1071 } 1072 l.mark(0) 1073 switch r { 1074 case '\\': 1075 // escape character 1076 r, err := l.read() 1077 if err != nil { 1078 if err == io.EOF { 1079 l.word = append(l.word, q) 1080 break 1081 } 1082 return false 1083 } 1084 1085 if r != '\n' { 1086 q.Value = ast.Word{ 1087 &ast.Lit{ 1088 ValuePos: l.pos, 1089 Value: string(r), 1090 }, 1091 } 1092 l.word = append(l.word, q) 1093 } 1094 case '\'': 1095 // single-quotes 1096 for { 1097 r, err := l.read() 1098 if err != nil { 1099 if err == io.EOF { 1100 l.error(q.TokPos, "syntax error: reached EOF while parsing single-quotes") 1101 } 1102 return false 1103 } 1104 1105 if r == '\'' { 1106 break 1107 } 1108 l.b.WriteRune(r) 1109 } 1110 q.Value = ast.Word{ 1111 &ast.Lit{ 1112 ValuePos: l.pos, 1113 Value: l.b.String(), 1114 }, 1115 } 1116 l.b.Reset() 1117 l.word = append(l.word, q) 1118 case '"': 1119 // double-quotes 1120 var err error 1121 // save current word 1122 word := l.word 1123 l.word = nil 1124 QQ: 1125 for { 1126 r, err = l.read() 1127 if err != nil { 1128 break QQ 1129 } 1130 1131 switch r { 1132 case '\\': 1133 // escape character 1134 if r, err = l.read(); err != nil { 1135 break QQ 1136 } 1137 l.esc(r) 1138 case '$': 1139 // parameter expansion 1140 l.lit() 1141 l.mark(-1) 1142 if !l.scanParamExp() { 1143 return false 1144 } 1145 case '`': 1146 // command substitution 1147 l.lit() 1148 l.mark(-1) 1149 if !l.scanCmdSubst('`') { 1150 return false 1151 } 1152 case '"': 1153 // right double-quote 1154 l.lit() 1155 break QQ 1156 default: 1157 l.b.WriteRune(r) 1158 } 1159 } 1160 if err != nil { 1161 if err == io.EOF { 1162 l.error(q.TokPos, "syntax error: reached EOF while parsing double-quotes") 1163 } 1164 return false 1165 } 1166 // append to current word 1167 q.Value = l.word 1168 l.word = append(word, q) 1169 } 1170 l.mark(0) 1171 return true 1172 } 1173 1174 func (l *lexer) scanParamExp() bool { 1175 r, err := l.read() 1176 if err != nil { 1177 if err == io.EOF { 1178 l.b.WriteByte('$') 1179 return true 1180 } 1181 return false 1182 } 1183 1184 var pe *ast.ParamExp 1185 switch r { 1186 case '{': 1187 // enclosed in braces 1188 return l.scanParamExpInBraces() 1189 case '(': 1190 // command substitution 1191 l.mark(-1) 1192 return l.scanCmdSubst('(') 1193 case '@', '*', '#', '?', '-', '$', '!', '0': 1194 // special parameter 1195 pe = &ast.ParamExp{ 1196 Dollar: l.pos, 1197 Name: &ast.Lit{ 1198 ValuePos: ast.NewPos(l.line, l.col-1), 1199 Value: string(r), 1200 }, 1201 } 1202 default: 1203 pe = &ast.ParamExp{Dollar: l.pos} 1204 l.mark(-1) 1205 switch { 1206 case unicode.IsDigit(r): 1207 // positional parameter 1208 l.b.WriteRune(r) 1209 case r == '_' || unicode.IsLetter(r): 1210 // XBD Name 1211 for l.isNameRune(r) { 1212 l.b.WriteRune(r) 1213 if r, err = l.read(); err != nil { 1214 if err == io.EOF { 1215 break 1216 } 1217 return false 1218 } 1219 } 1220 if err == nil { 1221 l.unread() 1222 } 1223 default: 1224 // continue as WORD 1225 l.unread() 1226 l.b.WriteByte('$') 1227 l.mark(-1) 1228 return true 1229 } 1230 pe.Name = &ast.Lit{ 1231 ValuePos: l.pos, 1232 Value: l.b.String(), 1233 } 1234 l.b.Reset() 1235 } 1236 l.word = append(l.word, pe) 1237 l.mark(0) 1238 return true 1239 } 1240 1241 func (l *lexer) scanParamExpInBraces() bool { 1242 pe := &ast.ParamExp{ 1243 Dollar: l.pos, 1244 Braces: true, 1245 } 1246 l.mark(0) 1247 // inside braces 1248 r, err := l.read() 1249 switch { 1250 case err != nil: 1251 goto Error 1252 case r == '#': 1253 if r, err = l.read(); err != nil { 1254 goto Error 1255 } 1256 switch r { 1257 case ':', '=', '+', '%', '}': 1258 // special parameter 1259 pe.Name = &ast.Lit{ 1260 ValuePos: l.pos, 1261 Value: "#", 1262 } 1263 l.mark(-1) 1264 goto Op 1265 case '#', '?', '-': 1266 v := r 1267 if r, err = l.read(); err != nil { 1268 goto Error 1269 } 1270 l.unread() 1271 if r != '}' { 1272 // special parameter 1273 pe.Name = &ast.Lit{ 1274 ValuePos: l.pos, 1275 Value: "#", 1276 } 1277 l.mark(-1) 1278 r = v 1279 goto Op 1280 } else { 1281 // string length 1282 pe.OpPos = l.pos 1283 pe.Op = "#" 1284 l.mark(-1) 1285 pe.Name = &ast.Lit{ 1286 ValuePos: l.pos, 1287 Value: string(v), 1288 } 1289 goto Rbrace 1290 } 1291 default: 1292 // string length 1293 l.unread() 1294 pe.OpPos = l.pos 1295 pe.Op = "#" 1296 l.mark(0) 1297 } 1298 default: 1299 l.unread() 1300 } 1301 // name 1302 switch r, _ = l.read(); r { 1303 case '@', '*', '?', '-', '$', '!', '0': 1304 // special parameter 1305 l.b.WriteByte(byte(r)) 1306 default: 1307 // XBD Name 1308 for l.isNameRune(r) { 1309 l.b.WriteRune(r) 1310 if r, err = l.read(); err != nil { 1311 goto Error 1312 } 1313 } 1314 l.unread() 1315 if l.b.Len() == 0 { 1316 err = errParamExp 1317 goto Error 1318 } 1319 } 1320 pe.Name = &ast.Lit{ 1321 ValuePos: l.pos, 1322 Value: l.b.String(), 1323 } 1324 l.b.Reset() 1325 l.mark(0) 1326 // op 1327 if r, err = l.read(); err != nil { 1328 goto Error 1329 } 1330 Op: 1331 switch r { 1332 case ':': 1333 if r, err = l.read(); err == nil { 1334 switch r { 1335 case '-', '=', '?', '+': 1336 pe.Op = ":" + string(r) 1337 default: 1338 l.unread() 1339 err = errParamExp 1340 } 1341 } 1342 case '-', '=', '?', '+': 1343 pe.Op = string(r) 1344 case '%', '#': 1345 pe.Op = string(r) 1346 if r, err = l.read(); err == nil { 1347 switch r { 1348 case '%', '#': 1349 if s := string(r); pe.Op == s { 1350 pe.Op += s 1351 } else { 1352 l.unread() 1353 err = errParamExp 1354 } 1355 default: 1356 l.unread() 1357 } 1358 } 1359 default: 1360 l.unread() 1361 goto Rbrace 1362 } 1363 switch { 1364 case err != nil: 1365 goto Error 1366 case pe.Op != "": 1367 pe.OpPos = l.pos 1368 l.mark(0) 1369 } 1370 // word 1371 { 1372 // save current word 1373 word := l.word 1374 l.word = ast.Word{} 1375 Word: 1376 for { 1377 r, err = l.read() 1378 if err != nil { 1379 goto Error 1380 } 1381 1382 switch r { 1383 case '\\', '\'', '"': 1384 // quoting 1385 l.lit() 1386 l.mark(-1) 1387 if !l.scanQuote(r) { 1388 return false 1389 } 1390 case '$': 1391 // parameter expansion 1392 l.lit() 1393 l.mark(-1) 1394 if !l.scanParamExp() { 1395 return false 1396 } 1397 case '}': 1398 // right brace 1399 l.unread() 1400 l.lit() 1401 break Word 1402 default: 1403 l.b.WriteRune(r) 1404 } 1405 } 1406 // restore current word 1407 pe.Word = l.word 1408 l.word = word 1409 } 1410 Rbrace: 1411 if r, err = l.read(); err != nil || r != '}' { 1412 goto Error 1413 } 1414 l.word = append(l.word, pe) 1415 l.mark(0) 1416 return true 1417 Error: 1418 switch err { 1419 case nil, io.EOF: 1420 l.error(pe.Dollar, "syntax error: reached EOF while looking for matching '}'") 1421 case errParamExp: 1422 l.error(pe.Dollar, err.Error()) 1423 } 1424 return false 1425 } 1426 1427 // isNameRune reports whether r can be used in XBD Name. 1428 func (l *lexer) isNameRune(r rune) bool { 1429 return r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r) 1430 } 1431 1432 func (l *lexer) scanCmdSubst(r rune) bool { 1433 off := 0 1434 switch r { 1435 case '(': 1436 r = '$' 1437 off = -1 1438 fallthrough 1439 case '`': 1440 l.unread() 1441 left := l.pos 1442 // nest 1443 ll := &lexer{ 1444 name: l.name, 1445 r: l.r, 1446 cmdSubst: r, 1447 token: make(chan ast.Node), 1448 done: make(chan struct{}), 1449 cancel: make(chan struct{}), 1450 heredoc: heredoc{c: make(chan struct{}, 1)}, 1451 line: l.line, 1452 col: l.col, 1453 } 1454 ll.mark(off) 1455 ll.last.Store(ll.pos) 1456 go ll.run() 1457 yyParse(ll) 1458 <-ll.done 1459 if ll.err != nil { 1460 l.mu.Lock() 1461 l.err = ll.err 1462 if len(ll.stack) == 0 && r == '`' { 1463 err := l.err.(Error) 1464 l.err = Error{ 1465 Name: err.Name, 1466 Pos: err.Pos, 1467 Msg: "syntax error: unexpected '`'", 1468 } 1469 } 1470 l.mu.Unlock() 1471 break 1472 } 1473 // apply changes 1474 l.comments = append(l.comments, ll.comments...) 1475 l.line = ll.line 1476 l.col = ll.col 1477 l.pos = ll.pos 1478 // append to current word 1479 switch x := ll.cmds[0].(*ast.Cmd).Expr.(type) { 1480 case *ast.Subshell: 1481 l.word = append(l.word, &ast.CmdSubst{ 1482 Dollar: r == '$', 1483 Left: left, 1484 List: x.List, 1485 Right: x.Rparen, 1486 }) 1487 case *ast.ArithEval: 1488 l.word = append(l.word, &ast.ArithExp{ 1489 Left: ast.NewPos(left.Line(), left.Col()-1), 1490 Expr: x.Expr, 1491 Right: x.Right, 1492 }) 1493 } 1494 return true 1495 } 1496 return false 1497 } 1498 1499 func (l *lexer) linebreak() bool { 1500 var hash bool 1501 for { 1502 r, err := l.read() 1503 if err != nil { 1504 l.comment() 1505 return false 1506 } 1507 1508 switch r { 1509 case '\n': 1510 // <newline> 1511 hash = false 1512 l.comment() 1513 l.mark(0) 1514 case '#': 1515 // comment 1516 hash = true 1517 l.mark(-1) 1518 default: 1519 if !hash { 1520 l.unread() 1521 return true 1522 } 1523 l.b.WriteRune(r) 1524 } 1525 } 1526 } 1527 1528 func (l *lexer) comment() { 1529 if l.b.Len() != 0 { 1530 l.comments = append(l.comments, &ast.Comment{ 1531 Hash: l.pos, 1532 Text: l.b.String(), 1533 }) 1534 l.b.Reset() 1535 } 1536 } 1537 1538 func (l *lexer) lit() { 1539 if l.b.Len() != 0 { 1540 l.word = append(l.word, &ast.Lit{ 1541 ValuePos: l.pos, 1542 Value: l.b.String(), 1543 }) 1544 l.b.Reset() 1545 } 1546 } 1547 1548 func (l *lexer) esc(r rune) { 1549 switch r { 1550 case '\n', '"', '$', '\\', '`': 1551 l.lit() 1552 if r != '\n' { 1553 l.word = append(l.word, &ast.Quote{ 1554 TokPos: ast.NewPos(l.line, l.col-2), 1555 Tok: `\`, 1556 Value: ast.Word{ 1557 &ast.Lit{ 1558 ValuePos: ast.NewPos(l.line, l.col-1), 1559 Value: string(r), 1560 }, 1561 }, 1562 }) 1563 } 1564 l.mark(0) 1565 default: 1566 l.b.WriteByte('\\') 1567 l.b.WriteRune(r) 1568 } 1569 } 1570 1571 func (l *lexer) emit(typ int) { 1572 var tok ast.Node 1573 switch typ { 1574 case IO_NUMBER, WORD, NAME, ASSIGNMENT_WORD: 1575 tok = word{ 1576 typ: typ, 1577 val: l.word, 1578 } 1579 default: 1580 if len(l.word) != 0 { 1581 w := l.word[0].(*ast.Lit) 1582 tok = token{ 1583 typ: typ, 1584 pos: w.ValuePos, 1585 val: w.Value, 1586 } 1587 } else { 1588 tok = token{ 1589 typ: typ, 1590 pos: l.pos, 1591 val: ops[typ], 1592 } 1593 } 1594 } 1595 l.word = nil 1596 select { 1597 case l.token <- tok: 1598 case <-l.cancel: 1599 // bailout 1600 panic(nil) 1601 } 1602 l.mark(0) 1603 } 1604 1605 func (l *lexer) mark(off int) { 1606 if len(l.aliases) == 0 { 1607 l.pos = ast.NewPos(l.line, l.col+off) 1608 } 1609 } 1610 1611 func (l *lexer) read() (rune, error) { 1612 if len(l.aliases) != 0 { 1613 for i := len(l.aliases) - 1; i >= 0; i-- { 1614 if l.aliases[i].value.Len() > 0 { 1615 r, _, err := l.aliases[i].value.ReadRune() 1616 l.aliases = l.aliases[:i+1] 1617 return r, err 1618 } 1619 } 1620 l.aliases = l.aliases[:0] 1621 l.mark(0) 1622 } 1623 1624 r, _, err := l.r.ReadRune() 1625 switch { 1626 case err != nil: 1627 l.mu.Lock() 1628 switch { 1629 case err == io.EOF: 1630 l.eof = true 1631 case l.err == nil: 1632 l.err = err 1633 } 1634 l.mu.Unlock() 1635 case r == '\n': 1636 l.prevCol = l.col 1637 l.line++ 1638 l.col = 1 1639 default: 1640 l.col++ 1641 } 1642 return r, err 1643 } 1644 1645 func (l *lexer) unread() { 1646 if len(l.aliases) != 0 { 1647 l.aliases[len(l.aliases)-1].value.UnreadRune() 1648 return 1649 } 1650 1651 l.r.UnreadRune() 1652 if l.col == 1 { 1653 l.line-- 1654 l.col = l.prevCol 1655 } else { 1656 l.col-- 1657 } 1658 } 1659 1660 func (l *lexer) Error(e string) { 1661 l.error(l.last.Load().(ast.Pos), e) 1662 } 1663 1664 func (l *lexer) error(pos ast.Pos, msg string) { 1665 l.mu.Lock() 1666 defer l.mu.Unlock() 1667 1668 if l.err != nil && strings.Contains(msg, ": unexpected EOF") { 1669 return // lexing was interrupted 1670 } 1671 l.err = Error{ 1672 Name: l.name, 1673 Pos: pos, 1674 Msg: msg, 1675 } 1676 1677 select { 1678 case <-l.cancel: 1679 default: 1680 close(l.cancel) 1681 } 1682 } 1683 1684 type action func() action 1685 1686 type token struct { 1687 typ int 1688 pos ast.Pos 1689 val string 1690 } 1691 1692 func (t token) Pos() ast.Pos { return t.pos } 1693 func (t token) End() ast.Pos { return ast.NewPos(t.pos.Line(), t.pos.Col()+len(t.val)) } 1694 1695 type word struct { 1696 typ int 1697 val ast.Word 1698 } 1699 1700 func (w word) Pos() ast.Pos { return w.val.Pos() } 1701 func (w word) End() ast.Pos { return w.val.End() } 1702 1703 type alias struct { 1704 name string 1705 value *strings.Reader 1706 blank bool 1707 } 1708 1709 type heredoc struct { 1710 c chan struct{} 1711 n uint32 1712 mu sync.Mutex 1713 stack []*ast.Redir 1714 } 1715 1716 func (h *heredoc) exists() bool { 1717 return atomic.LoadUint32(&h.n) != 0 1718 } 1719 1720 func (h *heredoc) inc() { 1721 atomic.AddUint32(&h.n, 1) 1722 } 1723 1724 func (h *heredoc) push(r *ast.Redir) { 1725 h.mu.Lock() 1726 h.stack = append(h.stack, r) 1727 h.mu.Unlock() 1728 // incoming 1729 select { 1730 case h.c <- struct{}{}: 1731 default: 1732 } 1733 } 1734 1735 func (h *heredoc) pop() *ast.Redir { 1736 for atomic.LoadUint32(&h.n) != 0 { 1737 h.mu.Lock() 1738 if n := len(h.stack); n != 0 { 1739 r := h.stack[0] 1740 h.stack = h.stack[1:] 1741 h.mu.Unlock() 1742 atomic.AddUint32(&h.n, ^uint32(0)) 1743 return r 1744 } 1745 h.mu.Unlock() 1746 // wait 1747 <-h.c 1748 } 1749 return nil 1750 } 1751 1752 // Error represents a syntax error 1753 type Error struct { 1754 Name string 1755 Pos ast.Pos 1756 Msg string 1757 } 1758 1759 func (e Error) Error() string { 1760 return fmt.Sprintf("%v:%v:%v: %v", e.Name, e.Pos.Line(), e.Pos.Col(), e.Msg) 1761 }