github.com/elves/Elvish@v0.12.0/parse/parse.go

github.com/elves/Elvish@v0.12.0/parse/parse.go (about)

     1  // Package parse implements the elvish parser.
     2  package parse
     3  
     4  //go:generate ./boilerplate.py
     5  //go:generate stringer -type=PrimaryType,RedirMode -output=string.go
     6  
     7  import (
     8  	"bytes"
     9  	"errors"
    10  	"fmt"
    11  	"unicode"
    12  )
    13  
    14  // Parse parses Elvish source. If the error is not nil, it always has type
    15  // ParseError.
    16  func Parse(srcname, src string) (*Chunk, error) {
    17  	ps := NewParser(srcname, src)
    18  	n := ParseChunk(ps)
    19  	ps.Done()
    20  	return n, ps.Errors()
    21  }
    22  
    23  // Errors.
    24  var (
    25  	errShouldBeForm         = newError("", "form")
    26  	errBadLHS               = errors.New("bad assignment LHS")
    27  	errDuplicateExitusRedir = newError("duplicate exitus redir")
    28  	errBadRedirSign         = newError("bad redir sign", "'<'", "'>'", "'>>'", "'<>'")
    29  	errShouldBeFD           = newError("", "a composite term representing fd")
    30  	errShouldBeFilename     = newError("", "a composite term representing filename")
    31  	errShouldBeArray        = newError("", "spaced")
    32  	errStringUnterminated   = newError("string not terminated")
    33  	errChainedAssignment    = newError("chained assignment not yet supported")
    34  	errInvalidEscape        = newError("invalid escape sequence")
    35  	errInvalidEscapeOct     = newError("invalid escape sequence", "octal digit")
    36  	errInvalidEscapeHex     = newError("invalid escape sequence", "hex digit")
    37  	errInvalidEscapeControl = newError("invalid control sequence", "a rune between @ (0x40) and _(0x5F)")
    38  	errShouldBePrimary      = newError("",
    39  		"single-quoted string", "double-quoted string", "bareword")
    40  	errShouldBeVariableName       = newError("", "variable name")
    41  	errShouldBeRBracket           = newError("", "']'")
    42  	errShouldBeRBrace             = newError("", "'}'")
    43  	errShouldBeBraceSepOrRBracket = newError("", "','", "'}'")
    44  	errShouldBeRParen             = newError("", "')'")
    45  	errShouldBeCompound           = newError("", "compound")
    46  	errShouldBeEqual              = newError("", "'='")
    47  	errBothElementsAndPairs       = newError("cannot contain both list elements and map pairs")
    48  	errShouldBeEscapeSequence     = newError("", "escape sequence")
    49  )
    50  
    51  // Chunk = { PipelineSep | Space } { Pipeline { PipelineSep | Space } }
    52  type Chunk struct {
    53  	node
    54  	Pipelines []*Pipeline
    55  }
    56  
    57  func (bn *Chunk) parse(ps *Parser) {
    58  	bn.parseSeps(ps)
    59  	for startsPipeline(ps.peek()) {
    60  		bn.addToPipelines(ParsePipeline(ps))
    61  		if bn.parseSeps(ps) == 0 {
    62  			break
    63  		}
    64  	}
    65  }
    66  
    67  func isPipelineSep(r rune) bool {
    68  	return r == '\n' || r == ';'
    69  }
    70  
    71  // parseSeps parses pipeline separators along with whitespaces. It returns the
    72  // number of pipeline separators parsed.
    73  func (bn *Chunk) parseSeps(ps *Parser) int {
    74  	nseps := 0
    75  	for {
    76  		r := ps.peek()
    77  		if isPipelineSep(r) {
    78  			// parse as a Sep
    79  			parseSep(bn, ps, r)
    80  			nseps++
    81  		} else if IsSpace(r) {
    82  			// parse a run of spaces as a Sep
    83  			parseSpaces(bn, ps)
    84  		} else if r == '#' {
    85  			// parse a comment as a Sep
    86  			for {
    87  				r := ps.peek()
    88  				if r == eof || r == '\n' {
    89  					break
    90  				}
    91  				ps.next()
    92  			}
    93  			addSep(bn, ps)
    94  			nseps++
    95  		} else {
    96  			break
    97  		}
    98  	}
    99  	return nseps
   100  }
   101  
   102  // Pipeline = Form { '|' Form }
   103  type Pipeline struct {
   104  	node
   105  	Forms      []*Form
   106  	Background bool
   107  }
   108  
   109  func (pn *Pipeline) parse(ps *Parser) {
   110  	pn.addToForms(ParseForm(ps))
   111  	for parseSep(pn, ps, '|') {
   112  		parseSpacesAndNewlines(pn, ps)
   113  		if !startsForm(ps.peek()) {
   114  			ps.error(errShouldBeForm)
   115  			return
   116  		}
   117  		pn.addToForms(ParseForm(ps))
   118  	}
   119  	parseSpaces(pn, ps)
   120  	if ps.peek() == '&' {
   121  		ps.next()
   122  		addSep(pn, ps)
   123  		pn.Background = true
   124  		parseSpaces(pn, ps)
   125  	}
   126  }
   127  
   128  func startsPipeline(r rune) bool {
   129  	return startsForm(r)
   130  }
   131  
   132  // Form = { Space } { { Assignment } { Space } }
   133  //        { Compound } { Space } { ( Compound | MapPair | Redir | ExitusRedir ) { Space } }
   134  type Form struct {
   135  	node
   136  	Assignments []*Assignment
   137  	Head        *Compound
   138  	// Left-hand-sides for the spacey assignment. Right-hand-sides are in Args.
   139  	Vars        []*Compound
   140  	Args        []*Compound
   141  	Opts        []*MapPair
   142  	Redirs      []*Redir
   143  	ExitusRedir *ExitusRedir
   144  }
   145  
   146  func (fn *Form) parse(ps *Parser) {
   147  	parseSpaces(fn, ps)
   148  	for fn.tryAssignment(ps) {
   149  		parseSpaces(fn, ps)
   150  	}
   151  
   152  	// Parse head.
   153  	if !startsCompound(ps.peek(), CmdExpr) {
   154  		if len(fn.Assignments) > 0 {
   155  			// Assignment-only form.
   156  			return
   157  		}
   158  		// Bad form.
   159  		ps.error(fmt.Errorf("bad rune at form head: %q", ps.peek()))
   160  	}
   161  	fn.setHead(ParseCompound(ps, CmdExpr))
   162  	parseSpaces(fn, ps)
   163  
   164  	for {
   165  		r := ps.peek()
   166  		switch {
   167  		case r == '&':
   168  			ps.next()
   169  			hasMapPair := startsCompound(ps.peek(), LHSExpr)
   170  			ps.backup()
   171  			if !hasMapPair {
   172  				// background indicator
   173  				return
   174  			}
   175  			fn.addToOpts(ParseMapPair(ps))
   176  		case startsCompound(r, NormalExpr):
   177  			if ps.hasPrefix("?>") {
   178  				if fn.ExitusRedir != nil {
   179  					ps.error(errDuplicateExitusRedir)
   180  					// Parse the duplicate redir anyway.
   181  					addChild(fn, ParseExitusRedir(ps))
   182  				} else {
   183  					fn.setExitusRedir(ParseExitusRedir(ps))
   184  				}
   185  				continue
   186  			}
   187  			cn := ParseCompound(ps, NormalExpr)
   188  			if isRedirSign(ps.peek()) {
   189  				// Redir
   190  				fn.addToRedirs(ParseRedir(ps, cn))
   191  			} else if cn.sourceText == "=" {
   192  				// Spacey assignment.
   193  				// Turn the equal sign into a Sep.
   194  				addChild(fn, NewSep(ps.src, cn.begin, cn.end))
   195  				// Turn the head and preceding arguments into LHSs.
   196  				addLHS := func(cn *Compound) {
   197  					if len(cn.Indexings) == 1 && checkVariableInAssignment(cn.Indexings[0].Head, ps) {
   198  						fn.Vars = append(fn.Vars, cn)
   199  					} else {
   200  						ps.errorp(cn.begin, cn.end, errBadLHS)
   201  					}
   202  				}
   203  				if fn.Head != nil {
   204  					addLHS(fn.Head)
   205  				} else {
   206  					ps.error(errChainedAssignment)
   207  				}
   208  				fn.Head = nil
   209  				for _, cn := range fn.Args {
   210  					addLHS(cn)
   211  				}
   212  				fn.Args = nil
   213  			} else {
   214  				fn.addToArgs(cn)
   215  			}
   216  		case isRedirSign(r):
   217  			fn.addToRedirs(ParseRedir(ps, nil))
   218  		default:
   219  			return
   220  		}
   221  		parseSpaces(fn, ps)
   222  	}
   223  }
   224  
   225  // tryAssignment tries to parse an assignment. If succeeded, it adds the parsed
   226  // assignment to fn.Assignments and returns true. Otherwise it rewinds the
   227  // parser and returns false.
   228  func (fn *Form) tryAssignment(ps *Parser) bool {
   229  	if !startsIndexing(ps.peek(), LHSExpr) {
   230  		return false
   231  	}
   232  
   233  	pos := ps.pos
   234  	errorEntries := ps.errors.Entries
   235  	an := ParseAssignment(ps)
   236  	// If errors were added, revert
   237  	if len(ps.errors.Entries) > len(errorEntries) {
   238  		ps.errors.Entries = errorEntries
   239  		ps.pos = pos
   240  		return false
   241  	}
   242  	fn.addToAssignments(an)
   243  	return true
   244  }
   245  
   246  func startsForm(r rune) bool {
   247  	return IsSpace(r) || startsCompound(r, CmdExpr)
   248  }
   249  
   250  // Assignment = Indexing '=' Compound
   251  type Assignment struct {
   252  	node
   253  	Left  *Indexing
   254  	Right *Compound
   255  }
   256  
   257  func (an *Assignment) parse(ps *Parser) {
   258  	an.setLeft(ParseIndexing(ps, LHSExpr))
   259  	head := an.Left.Head
   260  	if !checkVariableInAssignment(head, ps) {
   261  		ps.errorp(head.Begin(), head.End(), errShouldBeVariableName)
   262  	}
   263  
   264  	if !parseSep(an, ps, '=') {
   265  		ps.error(errShouldBeEqual)
   266  	}
   267  	an.setRight(ParseCompound(ps, NormalExpr))
   268  }
   269  
   270  func checkVariableInAssignment(p *Primary, ps *Parser) bool {
   271  	if p.Type == Braced {
   272  		// XXX don't check further inside braced expression
   273  		return true
   274  	}
   275  	if p.Type != Bareword && p.Type != SingleQuoted && p.Type != DoubleQuoted {
   276  		return false
   277  	}
   278  	if p.Value == "" {
   279  		return false
   280  	}
   281  	for _, r := range p.Value {
   282  		// XXX special case '&' and '@'.
   283  		if !allowedInVariableName(r) && r != '&' && r != '@' {
   284  			return false
   285  		}
   286  	}
   287  	return true
   288  }
   289  
   290  // ExitusRedir = '?' '>' { Space } Compound
   291  type ExitusRedir struct {
   292  	node
   293  	Dest *Compound
   294  }
   295  
   296  func (ern *ExitusRedir) parse(ps *Parser) {
   297  	ps.next()
   298  	ps.next()
   299  	addSep(ern, ps)
   300  	parseSpaces(ern, ps)
   301  	ern.setDest(ParseCompound(ps, NormalExpr))
   302  }
   303  
   304  // Redir = { Compound } { '<'|'>'|'<>'|'>>' } { Space } ( '&'? Compound )
   305  type Redir struct {
   306  	node
   307  	Left      *Compound
   308  	Mode      RedirMode
   309  	RightIsFd bool
   310  	Right     *Compound
   311  }
   312  
   313  func (rn *Redir) parse(ps *Parser, dest *Compound) {
   314  	// The parsing of the Left part is done in Form.parse.
   315  	if dest != nil {
   316  		rn.setLeft(dest)
   317  		rn.begin = dest.begin
   318  	}
   319  
   320  	begin := ps.pos
   321  	for isRedirSign(ps.peek()) {
   322  		ps.next()
   323  	}
   324  	sign := ps.src[begin:ps.pos]
   325  	switch sign {
   326  	case "<":
   327  		rn.Mode = Read
   328  	case ">":
   329  		rn.Mode = Write
   330  	case ">>":
   331  		rn.Mode = Append
   332  	case "<>":
   333  		rn.Mode = ReadWrite
   334  	default:
   335  		ps.error(errBadRedirSign)
   336  	}
   337  	addSep(rn, ps)
   338  	parseSpaces(rn, ps)
   339  	if parseSep(rn, ps, '&') {
   340  		rn.RightIsFd = true
   341  	}
   342  	rn.setRight(ParseCompound(ps, NormalExpr))
   343  	if len(rn.Right.Indexings) == 0 {
   344  		if rn.RightIsFd {
   345  			ps.error(errShouldBeFD)
   346  		} else {
   347  			ps.error(errShouldBeFilename)
   348  		}
   349  		return
   350  	}
   351  }
   352  
   353  func isRedirSign(r rune) bool {
   354  	return r == '<' || r == '>'
   355  }
   356  
   357  // RedirMode records the mode of an IO redirection.
   358  type RedirMode int
   359  
   360  // Possible values for RedirMode.
   361  const (
   362  	BadRedirMode RedirMode = iota
   363  	Read
   364  	Write
   365  	ReadWrite
   366  	Append
   367  )
   368  
   369  // Compound = { Indexing }
   370  type Compound struct {
   371  	node
   372  	Indexings []*Indexing
   373  }
   374  
   375  // ExprCtx represents special contexts of expression parsing.
   376  type ExprCtx int
   377  
   378  const (
   379  	// NormalExpr represents a normal expression, namely none of the special
   380  	// ones below.
   381  	NormalExpr ExprCtx = iota
   382  	// CmdExpr represents an expression used as the command in a form. In this
   383  	// context, unquoted <>*^ are treated as bareword characters.
   384  	CmdExpr
   385  	// LHSExpr represents an expression used as the left-hand-side in either
   386  	// assignments or map pairs. In this context, an unquoted = serves as an
   387  	// expression terminator and is thus not treated as a bareword character.
   388  	LHSExpr
   389  	// BracedElemExpr represents an expression used as an element in a braced
   390  	// expression. In this context, an unquoted , serves as an expression
   391  	// terminator and is thus not treated as a bareword character.
   392  	BracedElemExpr
   393  	// strictExpr is only meaningful to allowedInBareword.
   394  	strictExpr
   395  )
   396  
   397  func (cn *Compound) parse(ps *Parser, ctx ExprCtx) {
   398  	cn.tilde(ps)
   399  	for startsIndexing(ps.peek(), ctx) {
   400  		cn.addToIndexings(ParseIndexing(ps, ctx))
   401  	}
   402  }
   403  
   404  // tilde parses a tilde if there is one. It is implemented here instead of
   405  // within Primary since a tilde can only appear as the first part of a
   406  // Compound. Elsewhere tildes are barewords.
   407  func (cn *Compound) tilde(ps *Parser) {
   408  	if ps.peek() == '~' {
   409  		ps.next()
   410  		base := node{nil, ps.pos - 1, ps.pos, "~", nil}
   411  		pn := &Primary{node: base, Type: Tilde, Value: "~"}
   412  		in := &Indexing{node: base}
   413  		in.setHead(pn)
   414  		cn.addToIndexings(in)
   415  	}
   416  }
   417  
   418  func startsCompound(r rune, ctx ExprCtx) bool {
   419  	return startsIndexing(r, ctx)
   420  }
   421  
   422  // Indexing = Primary { '[' Array ']' }
   423  type Indexing struct {
   424  	node
   425  	Head     *Primary
   426  	Indicies []*Array
   427  }
   428  
   429  func (in *Indexing) parse(ps *Parser, ctx ExprCtx) {
   430  	in.setHead(ParsePrimary(ps, ctx))
   431  	for parseSep(in, ps, '[') {
   432  		if !startsArray(ps.peek()) {
   433  			ps.error(errShouldBeArray)
   434  		}
   435  
   436  		in.addToIndicies(ParseArray(ps, false))
   437  
   438  		if !parseSep(in, ps, ']') {
   439  			ps.error(errShouldBeRBracket)
   440  			return
   441  		}
   442  	}
   443  }
   444  
   445  func startsIndexing(r rune, ctx ExprCtx) bool {
   446  	return startsPrimary(r, ctx)
   447  }
   448  
   449  // Array = { Space | '\n' } { Compound { Space | '\n' } }
   450  type Array struct {
   451  	node
   452  	Compounds []*Compound
   453  	// When non-empty, records the occurrences of semicolons by the indices of
   454  	// the compounds they appear before. For instance, [; ; a b; c d;] results
   455  	// in Semicolons={0 0 2 4}.
   456  	Semicolons []int
   457  }
   458  
   459  func (sn *Array) parse(ps *Parser, allowSemicolon bool) {
   460  	parseSep := func() {
   461  		parseSpacesAndNewlines(sn, ps)
   462  		if allowSemicolon {
   463  			for parseSep(sn, ps, ';') {
   464  				sn.Semicolons = append(sn.Semicolons, len(sn.Compounds))
   465  			}
   466  			parseSpacesAndNewlines(sn, ps)
   467  		}
   468  	}
   469  
   470  	parseSep()
   471  	for startsCompound(ps.peek(), NormalExpr) {
   472  		sn.addToCompounds(ParseCompound(ps, NormalExpr))
   473  		parseSep()
   474  	}
   475  }
   476  
   477  func IsSpace(r rune) bool {
   478  	return r == ' ' || r == '\t'
   479  }
   480  
   481  func startsArray(r rune) bool {
   482  	return IsSpaceOrNewline(r) || startsIndexing(r, NormalExpr)
   483  }
   484  
   485  // Primary is the smallest expression unit.
   486  type Primary struct {
   487  	node
   488  	Type PrimaryType
   489  	// The unquoted string value. Valid for Bareword, SingleQuoted,
   490  	// DoubleQuoted, Variable, Wildcard and Tilde.
   491  	Value    string
   492  	Elements []*Compound // Valid for List and Labda
   493  	Chunk    *Chunk      // Valid for OutputCapture, ExitusCapture and Lambda
   494  	MapPairs []*MapPair  // Valid for Map and Lambda
   495  	Braced   []*Compound // Valid for Braced
   496  }
   497  
   498  // PrimaryType is the type of a Primary.
   499  type PrimaryType int
   500  
   501  // Possible values for PrimaryType.
   502  const (
   503  	BadPrimary PrimaryType = iota
   504  	Bareword
   505  	SingleQuoted
   506  	DoubleQuoted
   507  	Variable
   508  	Wildcard
   509  	Tilde
   510  	ExceptionCapture
   511  	OutputCapture
   512  	List
   513  	Lambda
   514  	Map
   515  	Braced
   516  )
   517  
   518  func (pn *Primary) parse(ps *Parser, ctx ExprCtx) {
   519  	r := ps.peek()
   520  	if !startsPrimary(r, ctx) {
   521  		ps.error(errShouldBePrimary)
   522  		return
   523  	}
   524  
   525  	// Try bareword early, since it has precedence over wildcard on *
   526  	// when ctx = commandExpr.
   527  	if allowedInBareword(r, ctx) {
   528  		pn.bareword(ps, ctx)
   529  		return
   530  	}
   531  
   532  	switch r {
   533  	case '\'':
   534  		pn.singleQuoted(ps)
   535  	case '"':
   536  		pn.doubleQuoted(ps)
   537  	case '$':
   538  		pn.variable(ps)
   539  	case '*':
   540  		pn.wildcard(ps)
   541  	case '?':
   542  		if ps.hasPrefix("?(") {
   543  			pn.exitusCapture(ps)
   544  		} else {
   545  			pn.wildcard(ps)
   546  		}
   547  	case '(':
   548  		pn.outputCapture(ps)
   549  	case '[':
   550  		pn.lbracket(ps)
   551  	case '{':
   552  		pn.lbrace(ps)
   553  	default:
   554  		// Parse an empty bareword.
   555  		pn.Type = Bareword
   556  	}
   557  }
   558  
   559  func (pn *Primary) singleQuoted(ps *Parser) {
   560  	pn.Type = SingleQuoted
   561  	ps.next()
   562  	var buf bytes.Buffer
   563  	defer func() { pn.Value = buf.String() }()
   564  	for {
   565  		switch r := ps.next(); r {
   566  		case eof:
   567  			ps.error(errStringUnterminated)
   568  			return
   569  		case '\'':
   570  			if ps.peek() == '\'' {
   571  				// Two consecutive single quotes
   572  				ps.next()
   573  				buf.WriteByte('\'')
   574  			} else {
   575  				// End of string
   576  				return
   577  			}
   578  		default:
   579  			buf.WriteRune(r)
   580  		}
   581  	}
   582  }
   583  
   584  func (pn *Primary) doubleQuoted(ps *Parser) {
   585  	pn.Type = DoubleQuoted
   586  	ps.next()
   587  	var buf bytes.Buffer
   588  	defer func() { pn.Value = buf.String() }()
   589  	for {
   590  		switch r := ps.next(); r {
   591  		case eof:
   592  			ps.error(errStringUnterminated)
   593  			return
   594  		case '"':
   595  			return
   596  		case '\\':
   597  			switch r := ps.next(); r {
   598  			case 'c', '^':
   599  				// Control sequence
   600  				r := ps.next()
   601  				if r < 0x40 || r >= 0x60 {
   602  					ps.backup()
   603  					ps.error(errInvalidEscapeControl)
   604  					ps.next()
   605  				}
   606  				buf.WriteByte(byte(r - 0x40))
   607  			case 'x', 'u', 'U':
   608  				var n int
   609  				switch r {
   610  				case 'x':
   611  					n = 2
   612  				case 'u':
   613  					n = 4
   614  				case 'U':
   615  					n = 8
   616  				}
   617  				var rr rune
   618  				for i := 0; i < n; i++ {
   619  					d, ok := hexToDigit(ps.next())
   620  					if !ok {
   621  						ps.backup()
   622  						ps.error(errInvalidEscapeHex)
   623  						break
   624  					}
   625  					rr = rr*16 + d
   626  				}
   627  				buf.WriteRune(rr)
   628  			case '0', '1', '2', '3', '4', '5', '6', '7':
   629  				// 2 more octal digits
   630  				rr := r - '0'
   631  				for i := 0; i < 2; i++ {
   632  					r := ps.next()
   633  					if r < '0' || r > '7' {
   634  						ps.backup()
   635  						ps.error(errInvalidEscapeOct)
   636  						break
   637  					}
   638  					rr = rr*8 + (r - '0')
   639  				}
   640  				buf.WriteRune(rr)
   641  			default:
   642  				if rr, ok := doubleEscape[r]; ok {
   643  					buf.WriteRune(rr)
   644  				} else {
   645  					ps.backup()
   646  					ps.error(errInvalidEscape)
   647  					ps.next()
   648  				}
   649  			}
   650  		default:
   651  			buf.WriteRune(r)
   652  		}
   653  	}
   654  }
   655  
   656  // a table for the simple double-quote escape sequences.
   657  var doubleEscape = map[rune]rune{
   658  	// same as golang
   659  	'a': '\a', 'b': '\b', 'f': '\f', 'n': '\n', 'r': '\r',
   660  	't': '\t', 'v': '\v', '\\': '\\', '"': '"',
   661  	// additional
   662  	'e': '\033',
   663  }
   664  
   665  var doubleUnescape = map[rune]rune{}
   666  
   667  func init() {
   668  	for k, v := range doubleEscape {
   669  		doubleUnescape[v] = k
   670  	}
   671  }
   672  
   673  func hexToDigit(r rune) (rune, bool) {
   674  	switch {
   675  	case '0' <= r && r <= '9':
   676  		return r - '0', true
   677  	case 'a' <= r && r <= 'f':
   678  		return r - 'a' + 10, true
   679  	case 'A' <= r && r <= 'F':
   680  		return r - 'A' + 10, true
   681  	default:
   682  		return -1, false
   683  	}
   684  }
   685  
   686  func (pn *Primary) variable(ps *Parser) {
   687  	pn.Type = Variable
   688  	defer func() { pn.Value = ps.src[pn.begin+1 : ps.pos] }()
   689  	ps.next()
   690  	// The character of the variable name can be anything.
   691  	if ps.next() == eof {
   692  		ps.backup()
   693  		ps.error(errShouldBeVariableName)
   694  		ps.next()
   695  	}
   696  	for allowedInVariableName(ps.peek()) {
   697  		ps.next()
   698  	}
   699  }
   700  
   701  // The following are allowed in variable names:
   702  // * Anything beyond ASCII that is printable
   703  // * Letters and numbers
   704  // * The symbols "-_:~"
   705  func allowedInVariableName(r rune) bool {
   706  	return (r >= 0x80 && unicode.IsPrint(r)) ||
   707  		('0' <= r && r <= '9') ||
   708  		('a' <= r && r <= 'z') ||
   709  		('A' <= r && r <= 'Z') ||
   710  		r == '-' || r == '_' || r == ':' || r == '~'
   711  }
   712  
   713  func (pn *Primary) wildcard(ps *Parser) {
   714  	pn.Type = Wildcard
   715  	for isWildcard(ps.peek()) {
   716  		ps.next()
   717  	}
   718  	pn.Value = ps.src[pn.begin:ps.pos]
   719  }
   720  
   721  func isWildcard(r rune) bool {
   722  	return r == '*' || r == '?'
   723  }
   724  
   725  func (pn *Primary) exitusCapture(ps *Parser) {
   726  	ps.next()
   727  	ps.next()
   728  	addSep(pn, ps)
   729  
   730  	pn.Type = ExceptionCapture
   731  
   732  	pn.setChunk(ParseChunk(ps))
   733  
   734  	if !parseSep(pn, ps, ')') {
   735  		ps.error(errShouldBeRParen)
   736  	}
   737  }
   738  
   739  func (pn *Primary) outputCapture(ps *Parser) {
   740  	pn.Type = OutputCapture
   741  	parseSep(pn, ps, '(')
   742  
   743  	pn.setChunk(ParseChunk(ps))
   744  
   745  	if !parseSep(pn, ps, ')') {
   746  		ps.error(errShouldBeRParen)
   747  	}
   748  }
   749  
   750  // List   = '[' { Space } { Compound } ']'
   751  //        = '[' { Space } { MapPair { Space } } ']'
   752  // Map    = '[' { Space } '&' { Space } ']'
   753  // Lambda = '[' { Space } { (Compound | MapPair) { Space } } ']' '{' Chunk '}'
   754  
   755  func (pn *Primary) lbracket(ps *Parser) {
   756  	parseSep(pn, ps, '[')
   757  	parseSpacesAndNewlines(pn, ps)
   758  
   759  	loneAmpersand := false
   760  items:
   761  	for {
   762  		r := ps.peek()
   763  		switch {
   764  		case r == '&':
   765  			ps.next()
   766  			hasMapPair := startsCompound(ps.peek(), LHSExpr)
   767  			if !hasMapPair {
   768  				loneAmpersand = true
   769  				addSep(pn, ps)
   770  				parseSpacesAndNewlines(pn, ps)
   771  				break items
   772  			}
   773  			ps.backup()
   774  			pn.addToMapPairs(ParseMapPair(ps))
   775  		case startsCompound(r, NormalExpr):
   776  			pn.addToElements(ParseCompound(ps, NormalExpr))
   777  		default:
   778  			break items
   779  		}
   780  		parseSpacesAndNewlines(pn, ps)
   781  	}
   782  
   783  	if !parseSep(pn, ps, ']') {
   784  		ps.error(errShouldBeRBracket)
   785  	}
   786  	if parseSep(pn, ps, '{') {
   787  		pn.lambda(ps)
   788  	} else {
   789  		if loneAmpersand || len(pn.MapPairs) > 0 {
   790  			if len(pn.Elements) > 0 {
   791  				ps.error(errBothElementsAndPairs)
   792  			}
   793  			pn.Type = Map
   794  		} else {
   795  			pn.Type = List
   796  		}
   797  	}
   798  }
   799  
   800  // lambda parses a lambda expression. The opening brace has been seen.
   801  func (pn *Primary) lambda(ps *Parser) {
   802  	pn.Type = Lambda
   803  	pn.setChunk(ParseChunk(ps))
   804  	if !parseSep(pn, ps, '}') {
   805  		ps.error(errShouldBeRBrace)
   806  	}
   807  }
   808  
   809  // Braced = '{' Compound { BracedSep Compounds } '}'
   810  // BracedSep = { Space | '\n' } [ ',' ] { Space | '\n' }
   811  func (pn *Primary) lbrace(ps *Parser) {
   812  	parseSep(pn, ps, '{')
   813  
   814  	if r := ps.peek(); r == ';' || r == '\n' || IsSpace(r) {
   815  		pn.lambda(ps)
   816  		return
   817  	}
   818  
   819  	pn.Type = Braced
   820  
   821  	// XXX: The compound can be empty, which allows us to parse {,foo}.
   822  	// Allowing compounds to be empty can be fragile in other cases.
   823  	pn.addToBraced(ParseCompound(ps, BracedElemExpr))
   824  
   825  	for isBracedSep(ps.peek()) {
   826  		parseSpacesAndNewlines(pn, ps)
   827  		// optional, so ignore the return value
   828  		parseSep(pn, ps, ',')
   829  		parseSpacesAndNewlines(pn, ps)
   830  
   831  		pn.addToBraced(ParseCompound(ps, BracedElemExpr))
   832  	}
   833  	if !parseSep(pn, ps, '}') {
   834  		ps.error(errShouldBeBraceSepOrRBracket)
   835  	}
   836  }
   837  
   838  func isBracedSep(r rune) bool {
   839  	return r == ',' || IsSpaceOrNewline(r)
   840  }
   841  
   842  func (pn *Primary) bareword(ps *Parser, ctx ExprCtx) {
   843  	pn.Type = Bareword
   844  	defer func() { pn.Value = ps.src[pn.begin:ps.pos] }()
   845  	for allowedInBareword(ps.peek(), ctx) {
   846  		ps.next()
   847  	}
   848  }
   849  
   850  // allowedInBareword returns where a rune is allowed in barewords in the given
   851  // expression context. The special strictExpr context queries whether the rune
   852  // is allowed in all contexts.
   853  //
   854  // The following are allowed in barewords:
   855  //
   856  // * Anything allowed in variable names
   857  // * The symbols "./@%+!"
   858  // * The symbol "=", if ctx != lhsExpr && ctx != strictExpr
   859  // * The symbol ",", if ctx != bracedExpr && ctx != strictExpr
   860  // * The symbols "<>*^", if ctx = commandExpr
   861  //
   862  // The seemingly weird inclusion of \ is for easier path manipulation in
   863  // Windows.
   864  func allowedInBareword(r rune, ctx ExprCtx) bool {
   865  	return allowedInVariableName(r) || r == '.' || r == '/' ||
   866  		r == '@' || r == '%' || r == '+' || r == '!' ||
   867  		(ctx != LHSExpr && ctx != strictExpr && r == '=') ||
   868  		(ctx != BracedElemExpr && ctx != strictExpr && r == ',') ||
   869  		(ctx == CmdExpr && (r == '<' || r == '>' || r == '*' || r == '^'))
   870  }
   871  
   872  func startsPrimary(r rune, ctx ExprCtx) bool {
   873  	return r == '\'' || r == '"' || r == '$' || allowedInBareword(r, ctx) ||
   874  		r == '?' || r == '*' || r == '(' || r == '[' || r == '{'
   875  }
   876  
   877  // MapPair = '&' { Space } Compound { Space } Compound
   878  type MapPair struct {
   879  	node
   880  	Key, Value *Compound
   881  }
   882  
   883  func (mpn *MapPair) parse(ps *Parser) {
   884  	parseSep(mpn, ps, '&')
   885  
   886  	mpn.setKey(ParseCompound(ps, LHSExpr))
   887  	if len(mpn.Key.Indexings) == 0 {
   888  		ps.error(errShouldBeCompound)
   889  	}
   890  
   891  	if parseSep(mpn, ps, '=') {
   892  		parseSpacesAndNewlines(mpn, ps)
   893  		// Parse value part.
   894  		mpn.setValue(ParseCompound(ps, NormalExpr))
   895  		// The value part can be empty.
   896  	}
   897  }
   898  
   899  // Sep is the catch-all node type for leaf nodes that lack internal structures
   900  // and semantics, and serve solely for syntactic purposes. The parsing of
   901  // separators depend on the Parent node; as such it lacks a genuine parse
   902  // method.
   903  type Sep struct {
   904  	node
   905  }
   906  
   907  func NewSep(src string, begin, end int) *Sep {
   908  	return &Sep{node{nil, begin, end, src[begin:end], nil}}
   909  }
   910  
   911  func addSep(n Node, ps *Parser) {
   912  	var begin int
   913  	ch := n.Children()
   914  	if len(ch) > 0 {
   915  		begin = ch[len(ch)-1].End()
   916  	} else {
   917  		begin = n.Begin()
   918  	}
   919  	if begin < ps.pos {
   920  		addChild(n, NewSep(ps.src, begin, ps.pos))
   921  	}
   922  }
   923  
   924  func parseSep(n Node, ps *Parser, sep rune) bool {
   925  	if ps.peek() == sep {
   926  		ps.next()
   927  		addSep(n, ps)
   928  		return true
   929  	}
   930  	return false
   931  }
   932  
   933  func parseSpaces(n Node, ps *Parser) {
   934  	parseSpacesInner(n, ps, IsSpace)
   935  }
   936  
   937  func parseSpacesAndNewlines(n Node, ps *Parser) {
   938  	parseSpacesInner(n, ps, IsSpaceOrNewline)
   939  }
   940  
   941  func parseSpacesInner(n Node, ps *Parser, isSpace func(rune) bool) {
   942  spaces:
   943  	for {
   944  		r := ps.peek()
   945  		switch {
   946  		case isSpace(r):
   947  			ps.next()
   948  		case r == '\\': // line continuation
   949  			ps.next()
   950  			switch ps.peek() {
   951  			case '\n':
   952  				ps.next()
   953  			case eof:
   954  				ps.error(errShouldBeEscapeSequence)
   955  			default:
   956  				ps.backup()
   957  				break spaces
   958  			}
   959  		default:
   960  			break spaces
   961  		}
   962  	}
   963  	addSep(n, ps)
   964  }
   965  
   966  func IsSpaceOrNewline(r rune) bool {
   967  	return IsSpace(r) || r == '\n'
   968  }
   969  
   970  func addChild(p Node, ch Node) {
   971  	p.n().children = append(p.n().children, ch)
   972  	ch.n().parent = p
   973  }