github.com/markusbkk/elvish@v0.0.0-20231204143114-91dc52438621/pkg/parse/parse.go (about) 1 // Package parse implements the elvish parser. 2 // 3 // The parser builds a hybrid of AST (abstract syntax tree) and parse tree 4 // (a.k.a. concrete syntax tree). The AST part only includes parts that are 5 // semantically significant -- i.e. skipping whitespaces and symbols that do not 6 // alter the semantics, and is embodied in the fields of each *Node type. The 7 // parse tree part corresponds to all the text in the original source text, and 8 // is embodied in the children of each *Node type. 9 package parse 10 11 //go:generate stringer -type=PrimaryType,RedirMode,ExprCtx -output=string.go 12 13 import ( 14 "bytes" 15 "fmt" 16 "io" 17 "math" 18 "strings" 19 "unicode" 20 21 "github.com/markusbkk/elvish/pkg/diag" 22 ) 23 24 // Tree represents a parsed tree. 25 type Tree struct { 26 Root *Chunk 27 Source Source 28 } 29 30 // Config keeps configuration options when parsing. 31 type Config struct { 32 // Destination of warnings. If nil, warnings are suppressed. 33 WarningWriter io.Writer 34 } 35 36 // Parse parses the given source. The returned error always has type *Error 37 // if it is not nil. 38 func Parse(src Source, cfg Config) (Tree, error) { 39 tree := Tree{&Chunk{}, src} 40 err := ParseAs(src, tree.Root, cfg) 41 return tree, err 42 } 43 44 // ParseAs parses the given source as a node, depending on the dynamic type of 45 // n. If the error is not nil, it always has type *Error. 46 func ParseAs(src Source, n Node, cfg Config) error { 47 ps := &parser{srcName: src.Name, src: src.Code, warn: cfg.WarningWriter} 48 ps.parse(n) 49 ps.done() 50 return ps.assembleError() 51 } 52 53 // Errors. 54 var ( 55 errShouldBeForm = newError("", "form") 56 errBadRedirSign = newError("bad redir sign", "'<'", "'>'", "'>>'", "'<>'") 57 errShouldBeFD = newError("", "a composite term representing fd") 58 errShouldBeFilename = newError("", "a composite term representing filename") 59 errShouldBeArray = newError("", "spaced") 60 errStringUnterminated = newError("string not terminated") 61 errInvalidEscape = newError("invalid escape sequence") 62 errInvalidEscapeOct = newError("invalid escape sequence", "octal digit") 63 errInvalidEscapeOctOverflow = newError("invalid octal escape sequence", "below 256") 64 errInvalidEscapeHex = newError("invalid escape sequence", "hex digit") 65 errInvalidEscapeControl = newError("invalid control sequence", "a codepoint between 0x3F and 0x5F") 66 errShouldBePrimary = newError("", "single-quoted string", "double-quoted string", "bareword") 67 errShouldBeVariableName = newError("", "variable name") 68 errShouldBeRBracket = newError("", "']'") 69 errShouldBeRBrace = newError("", "'}'") 70 errShouldBeBraceSepOrRBracket = newError("", "','", "'}'") 71 errShouldBeRParen = newError("", "')'") 72 errShouldBeCompound = newError("", "compound") 73 errShouldBeEqual = newError("", "'='") 74 errShouldBePipe = newError("", "'|'") 75 errBothElementsAndPairs = newError("cannot contain both list elements and map pairs") 76 errShouldBeNewline = newError("", "newline") 77 ) 78 79 // Chunk = { PipelineSep | Space } { Pipeline { PipelineSep | Space } } 80 type Chunk struct { 81 node 82 Pipelines []*Pipeline 83 } 84 85 func (bn *Chunk) parse(ps *parser) { 86 bn.parseSeps(ps) 87 for startsPipeline(ps.peek()) { 88 ps.parse(&Pipeline{}).addTo(&bn.Pipelines, bn) 89 if bn.parseSeps(ps) == 0 { 90 break 91 } 92 } 93 } 94 95 func isPipelineSep(r rune) bool { 96 return r == '\r' || r == '\n' || r == ';' 97 } 98 99 // parseSeps parses pipeline separators along with whitespaces. It returns the 100 // number of pipeline separators parsed. 101 func (bn *Chunk) parseSeps(ps *parser) int { 102 nseps := 0 103 for { 104 r := ps.peek() 105 if isPipelineSep(r) { 106 // parse as a Sep 107 parseSep(bn, ps, r) 108 nseps++ 109 } else if IsInlineWhitespace(r) || r == '#' { 110 // parse a run of spaces as a Sep 111 parseSpaces(bn, ps) 112 } else { 113 break 114 } 115 } 116 return nseps 117 } 118 119 // Pipeline = Form { '|' Form } 120 type Pipeline struct { 121 node 122 Forms []*Form 123 Background bool 124 } 125 126 func (pn *Pipeline) parse(ps *parser) { 127 ps.parse(&Form{}).addTo(&pn.Forms, pn) 128 for parseSep(pn, ps, '|') { 129 parseSpacesAndNewlines(pn, ps) 130 if !startsForm(ps.peek()) { 131 ps.error(errShouldBeForm) 132 return 133 } 134 ps.parse(&Form{}).addTo(&pn.Forms, pn) 135 } 136 parseSpaces(pn, ps) 137 if ps.peek() == '&' { 138 ps.next() 139 addSep(pn, ps) 140 pn.Background = true 141 parseSpaces(pn, ps) 142 } 143 } 144 145 func startsPipeline(r rune) bool { 146 return startsForm(r) 147 } 148 149 // Form = { Space } { { Assignment } { Space } } 150 // { Compound } { Space } { ( Compound | MapPair | Redir ) { Space } } 151 type Form struct { 152 node 153 Assignments []*Assignment 154 Head *Compound 155 Args []*Compound 156 Opts []*MapPair 157 Redirs []*Redir 158 } 159 160 func (fn *Form) parse(ps *parser) { 161 parseSpaces(fn, ps) 162 for startsCompound(ps.peek(), CmdExpr) { 163 initial := ps.save() 164 cmdNode := &Compound{ExprCtx: CmdExpr} 165 parsedCmd := ps.parse(cmdNode) 166 167 if !parsableAsAssignment(cmdNode) { 168 parsedCmd.addAs(&fn.Head, fn) 169 parseSpaces(fn, ps) 170 break 171 } 172 ps.restore(initial) 173 ps.parse(&Assignment{}).addTo(&fn.Assignments, fn) 174 parseSpaces(fn, ps) 175 } 176 177 if fn.Head == nil { 178 if len(fn.Assignments) > 0 { 179 // Assignment-only form. 180 return 181 } 182 // Bad form. 183 ps.error(fmt.Errorf("bad rune at form head: %q", ps.peek())) 184 } 185 186 for { 187 r := ps.peek() 188 switch { 189 case r == '&': 190 ps.next() 191 hasMapPair := startsCompound(ps.peek(), LHSExpr) 192 ps.backup() 193 if !hasMapPair { 194 // background indicator 195 return 196 } 197 ps.parse(&MapPair{}).addTo(&fn.Opts, fn) 198 case startsCompound(r, NormalExpr): 199 cn := &Compound{} 200 ps.parse(cn) 201 if isRedirSign(ps.peek()) { 202 // Redir 203 ps.parse(&Redir{Left: cn}).addTo(&fn.Redirs, fn) 204 } else { 205 parsed{cn}.addTo(&fn.Args, fn) 206 } 207 case isRedirSign(r): 208 ps.parse(&Redir{}).addTo(&fn.Redirs, fn) 209 default: 210 return 211 } 212 parseSpaces(fn, ps) 213 } 214 } 215 216 func parsableAsAssignment(cn *Compound) bool { 217 if len(cn.Indexings) == 0 { 218 return false 219 } 220 switch cn.Indexings[0].Head.Type { 221 case Braced, SingleQuoted, DoubleQuoted: 222 return len(cn.Indexings) >= 2 && 223 strings.HasPrefix(SourceText(cn.Indexings[1]), "=") 224 case Bareword: 225 name := cn.Indexings[0].Head.Value 226 eq := strings.IndexByte(name, '=') 227 if eq >= 0 { 228 return validBarewordVariableName(name[:eq], true) 229 } else { 230 return validBarewordVariableName(name, true) && 231 len(cn.Indexings) >= 2 && 232 strings.HasPrefix(SourceText(cn.Indexings[1]), "=") 233 } 234 default: 235 return false 236 } 237 } 238 239 func startsForm(r rune) bool { 240 return IsInlineWhitespace(r) || startsCompound(r, CmdExpr) 241 } 242 243 // Assignment = Indexing '=' Compound 244 type Assignment struct { 245 node 246 Left *Indexing 247 Right *Compound 248 } 249 250 func (an *Assignment) parse(ps *parser) { 251 ps.parse(&Indexing{ExprCtx: LHSExpr}).addAs(&an.Left, an) 252 head := an.Left.Head 253 if !ValidLHSVariable(head, true) { 254 ps.errorp(head, errShouldBeVariableName) 255 } 256 257 if !parseSep(an, ps, '=') { 258 ps.error(errShouldBeEqual) 259 } 260 ps.parse(&Compound{}).addAs(&an.Right, an) 261 } 262 263 func ValidLHSVariable(p *Primary, allowSigil bool) bool { 264 switch p.Type { 265 case Braced: 266 // TODO(xiaq): check further inside braced expression 267 return true 268 case SingleQuoted, DoubleQuoted: 269 // Quoted variable names may contain anything 270 return true 271 case Bareword: 272 // Bareword variable names may only contain runes that are valid in raw 273 // variable names 274 return validBarewordVariableName(p.Value, allowSigil) 275 default: 276 return false 277 } 278 } 279 280 func validBarewordVariableName(name string, allowSigil bool) bool { 281 if name == "" { 282 return false 283 } 284 if allowSigil && name[0] == '@' { 285 name = name[1:] 286 } 287 for _, r := range name { 288 if !allowedInVariableName(r) { 289 return false 290 } 291 } 292 return true 293 } 294 295 // Redir = { Compound } { '<'|'>'|'<>'|'>>' } { Space } ( '&'? Compound ) 296 type Redir struct { 297 node 298 Left *Compound 299 Mode RedirMode 300 RightIsFd bool 301 Right *Compound 302 } 303 304 func (rn *Redir) parse(ps *parser) { 305 // The parsing of the Left part is done in Form.parse. 306 if rn.Left != nil { 307 addChild(rn, rn.Left) 308 rn.From = rn.Left.From 309 } 310 311 begin := ps.pos 312 for isRedirSign(ps.peek()) { 313 ps.next() 314 } 315 sign := ps.src[begin:ps.pos] 316 switch sign { 317 case "<": 318 rn.Mode = Read 319 case ">": 320 rn.Mode = Write 321 case ">>": 322 rn.Mode = Append 323 case "<>": 324 rn.Mode = ReadWrite 325 default: 326 ps.error(errBadRedirSign) 327 } 328 addSep(rn, ps) 329 parseSpaces(rn, ps) 330 if parseSep(rn, ps, '&') { 331 rn.RightIsFd = true 332 } 333 ps.parse(&Compound{}).addAs(&rn.Right, rn) 334 if len(rn.Right.Indexings) == 0 { 335 if rn.RightIsFd { 336 ps.error(errShouldBeFD) 337 } else { 338 ps.error(errShouldBeFilename) 339 } 340 return 341 } 342 } 343 344 func isRedirSign(r rune) bool { 345 return r == '<' || r == '>' 346 } 347 348 // RedirMode records the mode of an IO redirection. 349 type RedirMode int 350 351 // Possible values for RedirMode. 352 const ( 353 BadRedirMode RedirMode = iota 354 Read 355 Write 356 ReadWrite 357 Append 358 ) 359 360 // Filter is the Elvish filter DSL. It uses the same syntax as arguments and 361 // options to a command. 362 type Filter struct { 363 node 364 Args []*Compound 365 Opts []*MapPair 366 } 367 368 func (qn *Filter) parse(ps *parser) { 369 parseSpaces(qn, ps) 370 for { 371 r := ps.peek() 372 switch { 373 case r == '&': 374 ps.parse(&MapPair{}).addTo(&qn.Opts, qn) 375 case startsCompound(r, NormalExpr): 376 ps.parse(&Compound{}).addTo(&qn.Args, qn) 377 default: 378 return 379 } 380 parseSpaces(qn, ps) 381 } 382 } 383 384 // Compound = { Indexing } 385 type Compound struct { 386 node 387 ExprCtx ExprCtx 388 Indexings []*Indexing 389 } 390 391 // ExprCtx represents special contexts of expression parsing. 392 type ExprCtx int 393 394 const ( 395 // NormalExpr represents a normal expression, namely none of the special 396 // ones below. It is the default value. 397 NormalExpr ExprCtx = iota 398 // CmdExpr represents an expression used as the command in a form. In this 399 // context, unquoted <>*^ are treated as bareword characters. 400 CmdExpr 401 // LHSExpr represents an expression used as the left-hand-side in either 402 // assignments or map pairs. In this context, an unquoted = serves as an 403 // expression terminator and is thus not treated as a bareword character. 404 LHSExpr 405 // BracedElemExpr represents an expression used as an element in a braced 406 // expression. In this context, an unquoted , serves as an expression 407 // terminator and is thus not treated as a bareword character. 408 BracedElemExpr 409 // strictExpr is only meaningful to allowedInBareword. 410 strictExpr 411 ) 412 413 func (cn *Compound) parse(ps *parser) { 414 cn.tilde(ps) 415 for startsIndexing(ps.peek(), cn.ExprCtx) { 416 ps.parse(&Indexing{ExprCtx: cn.ExprCtx}).addTo(&cn.Indexings, cn) 417 } 418 } 419 420 // tilde parses a tilde if there is one. It is implemented here instead of 421 // within Primary since a tilde can only appear as the first part of a 422 // Compound. Elsewhere tildes are barewords. 423 func (cn *Compound) tilde(ps *parser) { 424 if ps.peek() == '~' { 425 ps.next() 426 base := node{Ranging: diag.Ranging{From: ps.pos - 1, To: ps.pos}, 427 sourceText: "~", parent: nil, children: nil} 428 pn := &Primary{node: base, Type: Tilde, Value: "~"} 429 in := &Indexing{node: base} 430 parsed{pn}.addAs(&in.Head, in) 431 parsed{in}.addTo(&cn.Indexings, cn) 432 } 433 } 434 435 func startsCompound(r rune, ctx ExprCtx) bool { 436 return startsIndexing(r, ctx) 437 } 438 439 // Indexing = Primary { '[' Array ']' } 440 type Indexing struct { 441 node 442 ExprCtx ExprCtx 443 Head *Primary 444 Indices []*Array 445 } 446 447 func (in *Indexing) parse(ps *parser) { 448 ps.parse(&Primary{ExprCtx: in.ExprCtx}).addAs(&in.Head, in) 449 for parseSep(in, ps, '[') { 450 if !startsArray(ps.peek()) { 451 ps.error(errShouldBeArray) 452 } 453 454 ps.parse(&Array{}).addTo(&in.Indices, in) 455 456 if !parseSep(in, ps, ']') { 457 ps.error(errShouldBeRBracket) 458 return 459 } 460 } 461 } 462 463 func startsIndexing(r rune, ctx ExprCtx) bool { 464 return startsPrimary(r, ctx) 465 } 466 467 // Array = { Space | '\n' } { Compound { Space | '\n' } } 468 type Array struct { 469 node 470 Compounds []*Compound 471 // When non-empty, records the occurrences of semicolons by the indices of 472 // the compounds they appear before. For instance, [; ; a b; c d;] results 473 // in Semicolons={0 0 2 4}. 474 Semicolons []int 475 } 476 477 func (sn *Array) parse(ps *parser) { 478 parseSep := func() { parseSpacesAndNewlines(sn, ps) } 479 480 parseSep() 481 for startsCompound(ps.peek(), NormalExpr) { 482 ps.parse(&Compound{}).addTo(&sn.Compounds, sn) 483 parseSep() 484 } 485 } 486 487 func startsArray(r rune) bool { 488 return IsWhitespace(r) || startsIndexing(r, NormalExpr) 489 } 490 491 // Primary is the smallest expression unit. 492 type Primary struct { 493 node 494 ExprCtx ExprCtx 495 Type PrimaryType 496 // The unquoted string value. Valid for Bareword, SingleQuoted, 497 // DoubleQuoted, Variable, Wildcard and Tilde. 498 Value string 499 Elements []*Compound // Valid for List and Lambda 500 Chunk *Chunk // Valid for OutputCapture, ExitusCapture and Lambda 501 MapPairs []*MapPair // Valid for Map and Lambda 502 Braced []*Compound // Valid for Braced 503 } 504 505 // PrimaryType is the type of a Primary. 506 type PrimaryType int 507 508 // Possible values for PrimaryType. 509 const ( 510 BadPrimary PrimaryType = iota 511 Bareword 512 SingleQuoted 513 DoubleQuoted 514 Variable 515 Wildcard 516 Tilde 517 ExceptionCapture 518 OutputCapture 519 List 520 Lambda 521 Map 522 Braced 523 ) 524 525 func (pn *Primary) parse(ps *parser) { 526 r := ps.peek() 527 if !startsPrimary(r, pn.ExprCtx) { 528 ps.error(errShouldBePrimary) 529 return 530 } 531 532 // Try bareword early, since it has precedence over wildcard on * 533 // when ctx = commandExpr. 534 if allowedInBareword(r, pn.ExprCtx) { 535 pn.bareword(ps) 536 return 537 } 538 539 switch r { 540 case '\'': 541 pn.singleQuoted(ps) 542 case '"': 543 pn.doubleQuoted(ps) 544 case '$': 545 pn.variable(ps) 546 case '*': 547 pn.starWildcard(ps) 548 case '?': 549 if ps.hasPrefix("?(") { 550 pn.exitusCapture(ps) 551 } else { 552 pn.questionWildcard(ps) 553 } 554 case '(': 555 pn.outputCapture(ps) 556 case '[': 557 pn.lbracket(ps) 558 case '{': 559 pn.lbrace(ps) 560 default: 561 // Parse an empty bareword. 562 pn.Type = Bareword 563 } 564 } 565 566 func (pn *Primary) singleQuoted(ps *parser) { 567 pn.Type = SingleQuoted 568 ps.next() 569 pn.singleQuotedInner(ps) 570 } 571 572 // Parses a single-quoted string after the opening quote. Sets pn.Value but not 573 // pn.Type. 574 func (pn *Primary) singleQuotedInner(ps *parser) { 575 var buf bytes.Buffer 576 defer func() { pn.Value = buf.String() }() 577 for { 578 switch r := ps.next(); r { 579 case eof: 580 ps.error(errStringUnterminated) 581 return 582 case '\'': 583 if ps.peek() == '\'' { 584 // Two consecutive single quotes 585 ps.next() 586 buf.WriteByte('\'') 587 } else { 588 // End of string 589 return 590 } 591 default: 592 buf.WriteRune(r) 593 } 594 } 595 } 596 597 func (pn *Primary) doubleQuoted(ps *parser) { 598 pn.Type = DoubleQuoted 599 ps.next() 600 pn.doubleQuotedInner(ps) 601 } 602 603 // Parses a double-quoted string after the opening quote. Sets pn.Value but not 604 // pn.Type. 605 func (pn *Primary) doubleQuotedInner(ps *parser) { 606 var buf bytes.Buffer 607 defer func() { pn.Value = buf.String() }() 608 for { 609 switch r := ps.next(); r { 610 case eof: 611 ps.error(errStringUnterminated) 612 return 613 case '"': 614 return 615 case '\\': 616 switch r := ps.next(); r { 617 case 'c', '^': // control sequence 618 r := ps.next() 619 if r < 0x3F || r > 0x5F { 620 ps.backup() 621 ps.error(errInvalidEscapeControl) 622 ps.next() 623 } 624 if byte(r) == '?' { // special-case: \c? => del 625 buf.WriteByte(byte(0x7F)) 626 } else { 627 buf.WriteByte(byte(r - 0x40)) 628 } 629 case 'x', 'u', 'U': // two, four, or eight hex digits 630 var n int 631 switch r { 632 case 'x': 633 n = 2 634 case 'u': 635 n = 4 636 case 'U': 637 n = 8 638 } 639 var rr rune 640 for i := 0; i < n; i++ { 641 d, ok := hexToDigit(ps.next()) 642 if !ok { 643 ps.backup() 644 ps.error(errInvalidEscapeHex) 645 break 646 } 647 rr = rr*16 + d 648 } 649 if r == 'x' { 650 buf.WriteByte(byte(rr)) 651 } else { 652 buf.WriteRune(rr) 653 } 654 case '0', '1', '2', '3', '4', '5', '6', '7': // three octal digits 655 rr := r - '0' 656 for i := 0; i < 2; i++ { 657 r := ps.next() 658 if r < '0' || r > '7' { 659 ps.backup() 660 ps.error(errInvalidEscapeOct) 661 break 662 } 663 rr = rr*8 + (r - '0') 664 } 665 if rr <= math.MaxUint8 { 666 buf.WriteByte(byte(rr)) 667 } else { 668 r := diag.Ranging{From: ps.pos - 4, To: ps.pos} 669 ps.errorp(r, errInvalidEscapeOctOverflow) 670 } 671 default: 672 if rr, ok := doubleEscape[r]; ok { 673 buf.WriteRune(rr) 674 } else { 675 ps.backup() 676 ps.error(errInvalidEscape) 677 ps.next() 678 } 679 } 680 default: 681 buf.WriteRune(r) 682 } 683 } 684 } 685 686 // a table for the simple double-quote escape sequences. 687 var doubleEscape = map[rune]rune{ 688 // same as golang 689 'a': '\a', 'b': '\b', 'f': '\f', 'n': '\n', 'r': '\r', 690 't': '\t', 'v': '\v', '\\': '\\', '"': '"', 691 // additional 692 'e': '\033', 693 } 694 695 var doubleUnescape = map[rune]rune{} 696 697 func init() { 698 for k, v := range doubleEscape { 699 doubleUnescape[v] = k 700 } 701 } 702 703 func hexToDigit(r rune) (rune, bool) { 704 switch { 705 case '0' <= r && r <= '9': 706 return r - '0', true 707 case 'a' <= r && r <= 'f': 708 return r - 'a' + 10, true 709 case 'A' <= r && r <= 'F': 710 return r - 'A' + 10, true 711 default: 712 return -1, false 713 } 714 } 715 716 func (pn *Primary) variable(ps *parser) { 717 pn.Type = Variable 718 ps.next() 719 switch r := ps.next(); r { 720 case eof: 721 ps.backup() 722 ps.error(errShouldBeVariableName) 723 ps.next() 724 case '\'': 725 pn.singleQuotedInner(ps) 726 case '"': 727 pn.doubleQuotedInner(ps) 728 default: 729 defer func() { pn.Value = ps.src[pn.From+1 : ps.pos] }() 730 if !allowedInVariableName(r) && r != '@' { 731 ps.backup() 732 ps.error(errShouldBeVariableName) 733 } 734 for allowedInVariableName(ps.peek()) { 735 ps.next() 736 } 737 } 738 } 739 740 // The following are allowed in variable names: 741 // * Anything beyond ASCII that is printable 742 // * Letters and numbers 743 // * The symbols "-_:~" 744 func allowedInVariableName(r rune) bool { 745 return (r >= 0x80 && unicode.IsPrint(r)) || 746 ('0' <= r && r <= '9') || 747 ('a' <= r && r <= 'z') || 748 ('A' <= r && r <= 'Z') || 749 r == '-' || r == '_' || r == ':' || r == '~' 750 } 751 752 func (pn *Primary) starWildcard(ps *parser) { 753 pn.Type = Wildcard 754 for ps.peek() == '*' { 755 ps.next() 756 } 757 pn.Value = ps.src[pn.From:ps.pos] 758 } 759 760 func (pn *Primary) questionWildcard(ps *parser) { 761 pn.Type = Wildcard 762 if ps.peek() == '?' { 763 ps.next() 764 } 765 pn.Value = ps.src[pn.From:ps.pos] 766 } 767 768 func (pn *Primary) exitusCapture(ps *parser) { 769 ps.next() 770 ps.next() 771 addSep(pn, ps) 772 773 pn.Type = ExceptionCapture 774 775 ps.parse(&Chunk{}).addAs(&pn.Chunk, pn) 776 777 if !parseSep(pn, ps, ')') { 778 ps.error(errShouldBeRParen) 779 } 780 } 781 782 func (pn *Primary) outputCapture(ps *parser) { 783 pn.Type = OutputCapture 784 parseSep(pn, ps, '(') 785 786 ps.parse(&Chunk{}).addAs(&pn.Chunk, pn) 787 788 if !parseSep(pn, ps, ')') { 789 ps.error(errShouldBeRParen) 790 } 791 } 792 793 // List = '[' { Space } { Compound } ']' 794 // = '[' { Space } { MapPair { Space } } ']' 795 // Map = '[' { Space } '&' { Space } ']' 796 // Lambda = '[' { Space } { (Compound | MapPair) { Space } } ']' '{' Chunk '}' 797 798 func (pn *Primary) lbracket(ps *parser) { 799 parseSep(pn, ps, '[') 800 parseSpacesAndNewlines(pn, ps) 801 802 loneAmpersand := false 803 items: 804 for { 805 r := ps.peek() 806 switch { 807 case r == '&': 808 ps.next() 809 hasMapPair := startsCompound(ps.peek(), LHSExpr) 810 if !hasMapPair { 811 loneAmpersand = true 812 addSep(pn, ps) 813 parseSpacesAndNewlines(pn, ps) 814 break items 815 } 816 ps.backup() 817 ps.parse(&MapPair{}).addTo(&pn.MapPairs, pn) 818 case startsCompound(r, NormalExpr): 819 ps.parse(&Compound{}).addTo(&pn.Elements, pn) 820 default: 821 break items 822 } 823 parseSpacesAndNewlines(pn, ps) 824 } 825 826 if !parseSep(pn, ps, ']') { 827 ps.error(errShouldBeRBracket) 828 } 829 if loneAmpersand || len(pn.MapPairs) > 0 { 830 if len(pn.Elements) > 0 { 831 // TODO(xiaq): Add correct position information. 832 ps.error(errBothElementsAndPairs) 833 } 834 pn.Type = Map 835 } else { 836 pn.Type = List 837 } 838 } 839 840 // lambda parses a lambda expression. The opening brace has been seen. 841 func (pn *Primary) lambda(ps *parser) { 842 pn.Type = Lambda 843 parseSpacesAndNewlines(pn, ps) 844 if parseSep(pn, ps, '|') { 845 parseSpacesAndNewlines(pn, ps) 846 items: 847 for { 848 r := ps.peek() 849 switch { 850 case r == '&': 851 ps.parse(&MapPair{}).addTo(&pn.MapPairs, pn) 852 case startsCompound(r, NormalExpr): 853 ps.parse(&Compound{}).addTo(&pn.Elements, pn) 854 default: 855 break items 856 } 857 parseSpacesAndNewlines(pn, ps) 858 } 859 if !parseSep(pn, ps, '|') { 860 ps.error(errShouldBePipe) 861 } 862 } 863 ps.parse(&Chunk{}).addAs(&pn.Chunk, pn) 864 if !parseSep(pn, ps, '}') { 865 ps.error(errShouldBeRBrace) 866 } 867 } 868 869 // Braced = '{' Compound { BracedSep Compounds } '}' 870 // BracedSep = { Space | '\n' } [ ',' ] { Space | '\n' } 871 func (pn *Primary) lbrace(ps *parser) { 872 parseSep(pn, ps, '{') 873 874 if r := ps.peek(); r == ';' || r == '\r' || r == '\n' || r == '|' || IsInlineWhitespace(r) { 875 pn.lambda(ps) 876 return 877 } 878 879 pn.Type = Braced 880 881 // TODO(xiaq): The compound can be empty, which allows us to parse {,foo}. 882 // Allowing compounds to be empty can be fragile in other cases. 883 ps.parse(&Compound{ExprCtx: BracedElemExpr}).addTo(&pn.Braced, pn) 884 885 for isBracedSep(ps.peek()) { 886 parseSpacesAndNewlines(pn, ps) 887 // optional, so ignore the return value 888 parseSep(pn, ps, ',') 889 parseSpacesAndNewlines(pn, ps) 890 891 ps.parse(&Compound{ExprCtx: BracedElemExpr}).addTo(&pn.Braced, pn) 892 } 893 if !parseSep(pn, ps, '}') { 894 ps.error(errShouldBeBraceSepOrRBracket) 895 } 896 } 897 898 func isBracedSep(r rune) bool { 899 return r == ',' || IsWhitespace(r) 900 } 901 902 func (pn *Primary) bareword(ps *parser) { 903 pn.Type = Bareword 904 defer func() { pn.Value = ps.src[pn.From:ps.pos] }() 905 for allowedInBareword(ps.peek(), pn.ExprCtx) { 906 ps.next() 907 } 908 } 909 910 // allowedInBareword returns where a rune is allowed in barewords in the given 911 // expression context. The special strictExpr context queries whether the rune 912 // is allowed in all contexts. 913 // 914 // The following are allowed in barewords: 915 // 916 // * Anything allowed in variable names 917 // * The symbols "./\@%+!" 918 // * The symbol "=", if ctx != lhsExpr && ctx != strictExpr 919 // * The symbol ",", if ctx != bracedExpr && ctx != strictExpr 920 // * The symbols "<>*^", if ctx = commandExpr 921 // 922 // The seemingly weird inclusion of \ is for easier path manipulation in 923 // Windows. 924 func allowedInBareword(r rune, ctx ExprCtx) bool { 925 return allowedInVariableName(r) || r == '.' || r == '/' || 926 r == '\\' || r == '@' || r == '%' || r == '+' || r == '!' || 927 (ctx != LHSExpr && ctx != strictExpr && r == '=') || 928 (ctx != BracedElemExpr && ctx != strictExpr && r == ',') || 929 (ctx == CmdExpr && (r == '<' || r == '>' || r == '*' || r == '^')) 930 } 931 932 func startsPrimary(r rune, ctx ExprCtx) bool { 933 return r == '\'' || r == '"' || r == '$' || allowedInBareword(r, ctx) || 934 r == '?' || r == '*' || r == '(' || r == '[' || r == '{' 935 } 936 937 // MapPair = '&' { Space } Compound { Space } Compound 938 type MapPair struct { 939 node 940 Key, Value *Compound 941 } 942 943 func (mpn *MapPair) parse(ps *parser) { 944 parseSep(mpn, ps, '&') 945 946 ps.parse(&Compound{ExprCtx: LHSExpr}).addAs(&mpn.Key, mpn) 947 if len(mpn.Key.Indexings) == 0 { 948 ps.error(errShouldBeCompound) 949 } 950 951 if parseSep(mpn, ps, '=') { 952 parseSpacesAndNewlines(mpn, ps) 953 // Parse value part. It can be empty. 954 ps.parse(&Compound{}).addAs(&mpn.Value, mpn) 955 } 956 } 957 958 // Sep is the catch-all node type for leaf nodes that lack internal structures 959 // and semantics, and serve solely for syntactic purposes. The parsing of 960 // separators depend on the Parent node; as such it lacks a genuine parse 961 // method. 962 type Sep struct { 963 node 964 } 965 966 // NewSep makes a new Sep. 967 func NewSep(src string, begin, end int) *Sep { 968 return &Sep{node: node{diag.Ranging{From: begin, To: end}, src[begin:end], nil, nil}} 969 } 970 971 func (*Sep) parse(*parser) { 972 // A no-op, only to satisfy the Node interface. 973 } 974 975 func addSep(n Node, ps *parser) { 976 var begin int 977 ch := Children(n) 978 if len(ch) > 0 { 979 begin = ch[len(ch)-1].Range().To 980 } else { 981 begin = n.Range().From 982 } 983 if begin < ps.pos { 984 addChild(n, NewSep(ps.src, begin, ps.pos)) 985 } 986 } 987 988 func parseSep(n Node, ps *parser, sep rune) bool { 989 if ps.peek() == sep { 990 ps.next() 991 addSep(n, ps) 992 return true 993 } 994 return false 995 } 996 997 func parseSpaces(n Node, ps *parser) { 998 parseSpacesInner(n, ps, false) 999 } 1000 1001 func parseSpacesAndNewlines(n Node, ps *parser) { 1002 parseSpacesInner(n, ps, true) 1003 } 1004 1005 func parseSpacesInner(n Node, ps *parser, newlines bool) { 1006 spaces: 1007 for { 1008 r := ps.peek() 1009 switch { 1010 case IsInlineWhitespace(r): 1011 ps.next() 1012 case newlines && IsWhitespace(r): 1013 ps.next() 1014 case r == '#': 1015 // Comment is like inline whitespace as long as we don't include the 1016 // trailing newline. 1017 ps.next() 1018 for { 1019 r := ps.peek() 1020 if r == eof || r == '\r' || r == '\n' { 1021 break 1022 } 1023 ps.next() 1024 } 1025 case r == '^': 1026 // Line continuation is like inline whitespace. 1027 ps.next() 1028 switch ps.peek() { 1029 case '\r': 1030 ps.next() 1031 if ps.peek() == '\n' { 1032 ps.next() 1033 } 1034 case '\n': 1035 ps.next() 1036 case eof: 1037 ps.error(errShouldBeNewline) 1038 default: 1039 ps.backup() 1040 break spaces 1041 } 1042 default: 1043 break spaces 1044 } 1045 } 1046 addSep(n, ps) 1047 } 1048 1049 // IsInlineWhitespace reports whether r is an inline whitespace character. 1050 // Currently this includes space (Unicode 0x20) and tab (Unicode 0x9). 1051 func IsInlineWhitespace(r rune) bool { 1052 return r == ' ' || r == '\t' 1053 } 1054 1055 // IsWhitespace reports whether r is a whitespace. Currently this includes 1056 // inline whitespace characters and newline (Unicode 0xa). 1057 func IsWhitespace(r rune) bool { 1058 return IsInlineWhitespace(r) || r == '\r' || r == '\n' 1059 } 1060 1061 func addChild(p Node, ch Node) { 1062 p.n().addChild(ch) 1063 ch.n().parent = p 1064 }