github.com/markusbkk/elvish@v0.0.0-20231204143114-91dc52438621/pkg/parse/parse.go

github.com/markusbkk/elvish@v0.0.0-20231204143114-91dc52438621/pkg/parse/parse.go (about)

     1  // Package parse implements the elvish parser.
     2  //
     3  // The parser builds a hybrid of AST (abstract syntax tree) and parse tree
     4  // (a.k.a. concrete syntax tree). The AST part only includes parts that are
     5  // semantically significant -- i.e. skipping whitespaces and symbols that do not
     6  // alter the semantics, and is embodied in the fields of each *Node type. The
     7  // parse tree part corresponds to all the text in the original source text, and
     8  // is embodied in the children of each *Node type.
     9  package parse
    10  
    11  //go:generate stringer -type=PrimaryType,RedirMode,ExprCtx -output=string.go
    12  
    13  import (
    14  	"bytes"
    15  	"fmt"
    16  	"io"
    17  	"math"
    18  	"strings"
    19  	"unicode"
    20  
    21  	"github.com/markusbkk/elvish/pkg/diag"
    22  )
    23  
    24  // Tree represents a parsed tree.
    25  type Tree struct {
    26  	Root   *Chunk
    27  	Source Source
    28  }
    29  
    30  // Config keeps configuration options when parsing.
    31  type Config struct {
    32  	// Destination of warnings. If nil, warnings are suppressed.
    33  	WarningWriter io.Writer
    34  }
    35  
    36  // Parse parses the given source. The returned error always has type *Error
    37  // if it is not nil.
    38  func Parse(src Source, cfg Config) (Tree, error) {
    39  	tree := Tree{&Chunk{}, src}
    40  	err := ParseAs(src, tree.Root, cfg)
    41  	return tree, err
    42  }
    43  
    44  // ParseAs parses the given source as a node, depending on the dynamic type of
    45  // n. If the error is not nil, it always has type *Error.
    46  func ParseAs(src Source, n Node, cfg Config) error {
    47  	ps := &parser{srcName: src.Name, src: src.Code, warn: cfg.WarningWriter}
    48  	ps.parse(n)
    49  	ps.done()
    50  	return ps.assembleError()
    51  }
    52  
    53  // Errors.
    54  var (
    55  	errShouldBeForm               = newError("", "form")
    56  	errBadRedirSign               = newError("bad redir sign", "'<'", "'>'", "'>>'", "'<>'")
    57  	errShouldBeFD                 = newError("", "a composite term representing fd")
    58  	errShouldBeFilename           = newError("", "a composite term representing filename")
    59  	errShouldBeArray              = newError("", "spaced")
    60  	errStringUnterminated         = newError("string not terminated")
    61  	errInvalidEscape              = newError("invalid escape sequence")
    62  	errInvalidEscapeOct           = newError("invalid escape sequence", "octal digit")
    63  	errInvalidEscapeOctOverflow   = newError("invalid octal escape sequence", "below 256")
    64  	errInvalidEscapeHex           = newError("invalid escape sequence", "hex digit")
    65  	errInvalidEscapeControl       = newError("invalid control sequence", "a codepoint between 0x3F and 0x5F")
    66  	errShouldBePrimary            = newError("", "single-quoted string", "double-quoted string", "bareword")
    67  	errShouldBeVariableName       = newError("", "variable name")
    68  	errShouldBeRBracket           = newError("", "']'")
    69  	errShouldBeRBrace             = newError("", "'}'")
    70  	errShouldBeBraceSepOrRBracket = newError("", "','", "'}'")
    71  	errShouldBeRParen             = newError("", "')'")
    72  	errShouldBeCompound           = newError("", "compound")
    73  	errShouldBeEqual              = newError("", "'='")
    74  	errShouldBePipe               = newError("", "'|'")
    75  	errBothElementsAndPairs       = newError("cannot contain both list elements and map pairs")
    76  	errShouldBeNewline            = newError("", "newline")
    77  )
    78  
    79  // Chunk = { PipelineSep | Space } { Pipeline { PipelineSep | Space } }
    80  type Chunk struct {
    81  	node
    82  	Pipelines []*Pipeline
    83  }
    84  
    85  func (bn *Chunk) parse(ps *parser) {
    86  	bn.parseSeps(ps)
    87  	for startsPipeline(ps.peek()) {
    88  		ps.parse(&Pipeline{}).addTo(&bn.Pipelines, bn)
    89  		if bn.parseSeps(ps) == 0 {
    90  			break
    91  		}
    92  	}
    93  }
    94  
    95  func isPipelineSep(r rune) bool {
    96  	return r == '\r' || r == '\n' || r == ';'
    97  }
    98  
    99  // parseSeps parses pipeline separators along with whitespaces. It returns the
   100  // number of pipeline separators parsed.
   101  func (bn *Chunk) parseSeps(ps *parser) int {
   102  	nseps := 0
   103  	for {
   104  		r := ps.peek()
   105  		if isPipelineSep(r) {
   106  			// parse as a Sep
   107  			parseSep(bn, ps, r)
   108  			nseps++
   109  		} else if IsInlineWhitespace(r) || r == '#' {
   110  			// parse a run of spaces as a Sep
   111  			parseSpaces(bn, ps)
   112  		} else {
   113  			break
   114  		}
   115  	}
   116  	return nseps
   117  }
   118  
   119  // Pipeline = Form { '|' Form }
   120  type Pipeline struct {
   121  	node
   122  	Forms      []*Form
   123  	Background bool
   124  }
   125  
   126  func (pn *Pipeline) parse(ps *parser) {
   127  	ps.parse(&Form{}).addTo(&pn.Forms, pn)
   128  	for parseSep(pn, ps, '|') {
   129  		parseSpacesAndNewlines(pn, ps)
   130  		if !startsForm(ps.peek()) {
   131  			ps.error(errShouldBeForm)
   132  			return
   133  		}
   134  		ps.parse(&Form{}).addTo(&pn.Forms, pn)
   135  	}
   136  	parseSpaces(pn, ps)
   137  	if ps.peek() == '&' {
   138  		ps.next()
   139  		addSep(pn, ps)
   140  		pn.Background = true
   141  		parseSpaces(pn, ps)
   142  	}
   143  }
   144  
   145  func startsPipeline(r rune) bool {
   146  	return startsForm(r)
   147  }
   148  
   149  // Form = { Space } { { Assignment } { Space } }
   150  //        { Compound } { Space } { ( Compound | MapPair | Redir ) { Space } }
   151  type Form struct {
   152  	node
   153  	Assignments []*Assignment
   154  	Head        *Compound
   155  	Args        []*Compound
   156  	Opts        []*MapPair
   157  	Redirs      []*Redir
   158  }
   159  
   160  func (fn *Form) parse(ps *parser) {
   161  	parseSpaces(fn, ps)
   162  	for startsCompound(ps.peek(), CmdExpr) {
   163  		initial := ps.save()
   164  		cmdNode := &Compound{ExprCtx: CmdExpr}
   165  		parsedCmd := ps.parse(cmdNode)
   166  
   167  		if !parsableAsAssignment(cmdNode) {
   168  			parsedCmd.addAs(&fn.Head, fn)
   169  			parseSpaces(fn, ps)
   170  			break
   171  		}
   172  		ps.restore(initial)
   173  		ps.parse(&Assignment{}).addTo(&fn.Assignments, fn)
   174  		parseSpaces(fn, ps)
   175  	}
   176  
   177  	if fn.Head == nil {
   178  		if len(fn.Assignments) > 0 {
   179  			// Assignment-only form.
   180  			return
   181  		}
   182  		// Bad form.
   183  		ps.error(fmt.Errorf("bad rune at form head: %q", ps.peek()))
   184  	}
   185  
   186  	for {
   187  		r := ps.peek()
   188  		switch {
   189  		case r == '&':
   190  			ps.next()
   191  			hasMapPair := startsCompound(ps.peek(), LHSExpr)
   192  			ps.backup()
   193  			if !hasMapPair {
   194  				// background indicator
   195  				return
   196  			}
   197  			ps.parse(&MapPair{}).addTo(&fn.Opts, fn)
   198  		case startsCompound(r, NormalExpr):
   199  			cn := &Compound{}
   200  			ps.parse(cn)
   201  			if isRedirSign(ps.peek()) {
   202  				// Redir
   203  				ps.parse(&Redir{Left: cn}).addTo(&fn.Redirs, fn)
   204  			} else {
   205  				parsed{cn}.addTo(&fn.Args, fn)
   206  			}
   207  		case isRedirSign(r):
   208  			ps.parse(&Redir{}).addTo(&fn.Redirs, fn)
   209  		default:
   210  			return
   211  		}
   212  		parseSpaces(fn, ps)
   213  	}
   214  }
   215  
   216  func parsableAsAssignment(cn *Compound) bool {
   217  	if len(cn.Indexings) == 0 {
   218  		return false
   219  	}
   220  	switch cn.Indexings[0].Head.Type {
   221  	case Braced, SingleQuoted, DoubleQuoted:
   222  		return len(cn.Indexings) >= 2 &&
   223  			strings.HasPrefix(SourceText(cn.Indexings[1]), "=")
   224  	case Bareword:
   225  		name := cn.Indexings[0].Head.Value
   226  		eq := strings.IndexByte(name, '=')
   227  		if eq >= 0 {
   228  			return validBarewordVariableName(name[:eq], true)
   229  		} else {
   230  			return validBarewordVariableName(name, true) &&
   231  				len(cn.Indexings) >= 2 &&
   232  				strings.HasPrefix(SourceText(cn.Indexings[1]), "=")
   233  		}
   234  	default:
   235  		return false
   236  	}
   237  }
   238  
   239  func startsForm(r rune) bool {
   240  	return IsInlineWhitespace(r) || startsCompound(r, CmdExpr)
   241  }
   242  
   243  // Assignment = Indexing '=' Compound
   244  type Assignment struct {
   245  	node
   246  	Left  *Indexing
   247  	Right *Compound
   248  }
   249  
   250  func (an *Assignment) parse(ps *parser) {
   251  	ps.parse(&Indexing{ExprCtx: LHSExpr}).addAs(&an.Left, an)
   252  	head := an.Left.Head
   253  	if !ValidLHSVariable(head, true) {
   254  		ps.errorp(head, errShouldBeVariableName)
   255  	}
   256  
   257  	if !parseSep(an, ps, '=') {
   258  		ps.error(errShouldBeEqual)
   259  	}
   260  	ps.parse(&Compound{}).addAs(&an.Right, an)
   261  }
   262  
   263  func ValidLHSVariable(p *Primary, allowSigil bool) bool {
   264  	switch p.Type {
   265  	case Braced:
   266  		// TODO(xiaq): check further inside braced expression
   267  		return true
   268  	case SingleQuoted, DoubleQuoted:
   269  		// Quoted variable names may contain anything
   270  		return true
   271  	case Bareword:
   272  		// Bareword variable names may only contain runes that are valid in raw
   273  		// variable names
   274  		return validBarewordVariableName(p.Value, allowSigil)
   275  	default:
   276  		return false
   277  	}
   278  }
   279  
   280  func validBarewordVariableName(name string, allowSigil bool) bool {
   281  	if name == "" {
   282  		return false
   283  	}
   284  	if allowSigil && name[0] == '@' {
   285  		name = name[1:]
   286  	}
   287  	for _, r := range name {
   288  		if !allowedInVariableName(r) {
   289  			return false
   290  		}
   291  	}
   292  	return true
   293  }
   294  
   295  // Redir = { Compound } { '<'|'>'|'<>'|'>>' } { Space } ( '&'? Compound )
   296  type Redir struct {
   297  	node
   298  	Left      *Compound
   299  	Mode      RedirMode
   300  	RightIsFd bool
   301  	Right     *Compound
   302  }
   303  
   304  func (rn *Redir) parse(ps *parser) {
   305  	// The parsing of the Left part is done in Form.parse.
   306  	if rn.Left != nil {
   307  		addChild(rn, rn.Left)
   308  		rn.From = rn.Left.From
   309  	}
   310  
   311  	begin := ps.pos
   312  	for isRedirSign(ps.peek()) {
   313  		ps.next()
   314  	}
   315  	sign := ps.src[begin:ps.pos]
   316  	switch sign {
   317  	case "<":
   318  		rn.Mode = Read
   319  	case ">":
   320  		rn.Mode = Write
   321  	case ">>":
   322  		rn.Mode = Append
   323  	case "<>":
   324  		rn.Mode = ReadWrite
   325  	default:
   326  		ps.error(errBadRedirSign)
   327  	}
   328  	addSep(rn, ps)
   329  	parseSpaces(rn, ps)
   330  	if parseSep(rn, ps, '&') {
   331  		rn.RightIsFd = true
   332  	}
   333  	ps.parse(&Compound{}).addAs(&rn.Right, rn)
   334  	if len(rn.Right.Indexings) == 0 {
   335  		if rn.RightIsFd {
   336  			ps.error(errShouldBeFD)
   337  		} else {
   338  			ps.error(errShouldBeFilename)
   339  		}
   340  		return
   341  	}
   342  }
   343  
   344  func isRedirSign(r rune) bool {
   345  	return r == '<' || r == '>'
   346  }
   347  
   348  // RedirMode records the mode of an IO redirection.
   349  type RedirMode int
   350  
   351  // Possible values for RedirMode.
   352  const (
   353  	BadRedirMode RedirMode = iota
   354  	Read
   355  	Write
   356  	ReadWrite
   357  	Append
   358  )
   359  
   360  // Filter is the Elvish filter DSL. It uses the same syntax as arguments and
   361  // options to a command.
   362  type Filter struct {
   363  	node
   364  	Args []*Compound
   365  	Opts []*MapPair
   366  }
   367  
   368  func (qn *Filter) parse(ps *parser) {
   369  	parseSpaces(qn, ps)
   370  	for {
   371  		r := ps.peek()
   372  		switch {
   373  		case r == '&':
   374  			ps.parse(&MapPair{}).addTo(&qn.Opts, qn)
   375  		case startsCompound(r, NormalExpr):
   376  			ps.parse(&Compound{}).addTo(&qn.Args, qn)
   377  		default:
   378  			return
   379  		}
   380  		parseSpaces(qn, ps)
   381  	}
   382  }
   383  
   384  // Compound = { Indexing }
   385  type Compound struct {
   386  	node
   387  	ExprCtx   ExprCtx
   388  	Indexings []*Indexing
   389  }
   390  
   391  // ExprCtx represents special contexts of expression parsing.
   392  type ExprCtx int
   393  
   394  const (
   395  	// NormalExpr represents a normal expression, namely none of the special
   396  	// ones below. It is the default value.
   397  	NormalExpr ExprCtx = iota
   398  	// CmdExpr represents an expression used as the command in a form. In this
   399  	// context, unquoted <>*^ are treated as bareword characters.
   400  	CmdExpr
   401  	// LHSExpr represents an expression used as the left-hand-side in either
   402  	// assignments or map pairs. In this context, an unquoted = serves as an
   403  	// expression terminator and is thus not treated as a bareword character.
   404  	LHSExpr
   405  	// BracedElemExpr represents an expression used as an element in a braced
   406  	// expression. In this context, an unquoted , serves as an expression
   407  	// terminator and is thus not treated as a bareword character.
   408  	BracedElemExpr
   409  	// strictExpr is only meaningful to allowedInBareword.
   410  	strictExpr
   411  )
   412  
   413  func (cn *Compound) parse(ps *parser) {
   414  	cn.tilde(ps)
   415  	for startsIndexing(ps.peek(), cn.ExprCtx) {
   416  		ps.parse(&Indexing{ExprCtx: cn.ExprCtx}).addTo(&cn.Indexings, cn)
   417  	}
   418  }
   419  
   420  // tilde parses a tilde if there is one. It is implemented here instead of
   421  // within Primary since a tilde can only appear as the first part of a
   422  // Compound. Elsewhere tildes are barewords.
   423  func (cn *Compound) tilde(ps *parser) {
   424  	if ps.peek() == '~' {
   425  		ps.next()
   426  		base := node{Ranging: diag.Ranging{From: ps.pos - 1, To: ps.pos},
   427  			sourceText: "~", parent: nil, children: nil}
   428  		pn := &Primary{node: base, Type: Tilde, Value: "~"}
   429  		in := &Indexing{node: base}
   430  		parsed{pn}.addAs(&in.Head, in)
   431  		parsed{in}.addTo(&cn.Indexings, cn)
   432  	}
   433  }
   434  
   435  func startsCompound(r rune, ctx ExprCtx) bool {
   436  	return startsIndexing(r, ctx)
   437  }
   438  
   439  // Indexing = Primary { '[' Array ']' }
   440  type Indexing struct {
   441  	node
   442  	ExprCtx ExprCtx
   443  	Head    *Primary
   444  	Indices []*Array
   445  }
   446  
   447  func (in *Indexing) parse(ps *parser) {
   448  	ps.parse(&Primary{ExprCtx: in.ExprCtx}).addAs(&in.Head, in)
   449  	for parseSep(in, ps, '[') {
   450  		if !startsArray(ps.peek()) {
   451  			ps.error(errShouldBeArray)
   452  		}
   453  
   454  		ps.parse(&Array{}).addTo(&in.Indices, in)
   455  
   456  		if !parseSep(in, ps, ']') {
   457  			ps.error(errShouldBeRBracket)
   458  			return
   459  		}
   460  	}
   461  }
   462  
   463  func startsIndexing(r rune, ctx ExprCtx) bool {
   464  	return startsPrimary(r, ctx)
   465  }
   466  
   467  // Array = { Space | '\n' } { Compound { Space | '\n' } }
   468  type Array struct {
   469  	node
   470  	Compounds []*Compound
   471  	// When non-empty, records the occurrences of semicolons by the indices of
   472  	// the compounds they appear before. For instance, [; ; a b; c d;] results
   473  	// in Semicolons={0 0 2 4}.
   474  	Semicolons []int
   475  }
   476  
   477  func (sn *Array) parse(ps *parser) {
   478  	parseSep := func() { parseSpacesAndNewlines(sn, ps) }
   479  
   480  	parseSep()
   481  	for startsCompound(ps.peek(), NormalExpr) {
   482  		ps.parse(&Compound{}).addTo(&sn.Compounds, sn)
   483  		parseSep()
   484  	}
   485  }
   486  
   487  func startsArray(r rune) bool {
   488  	return IsWhitespace(r) || startsIndexing(r, NormalExpr)
   489  }
   490  
   491  // Primary is the smallest expression unit.
   492  type Primary struct {
   493  	node
   494  	ExprCtx ExprCtx
   495  	Type    PrimaryType
   496  	// The unquoted string value. Valid for Bareword, SingleQuoted,
   497  	// DoubleQuoted, Variable, Wildcard and Tilde.
   498  	Value    string
   499  	Elements []*Compound // Valid for List and Lambda
   500  	Chunk    *Chunk      // Valid for OutputCapture, ExitusCapture and Lambda
   501  	MapPairs []*MapPair  // Valid for Map and Lambda
   502  	Braced   []*Compound // Valid for Braced
   503  }
   504  
   505  // PrimaryType is the type of a Primary.
   506  type PrimaryType int
   507  
   508  // Possible values for PrimaryType.
   509  const (
   510  	BadPrimary PrimaryType = iota
   511  	Bareword
   512  	SingleQuoted
   513  	DoubleQuoted
   514  	Variable
   515  	Wildcard
   516  	Tilde
   517  	ExceptionCapture
   518  	OutputCapture
   519  	List
   520  	Lambda
   521  	Map
   522  	Braced
   523  )
   524  
   525  func (pn *Primary) parse(ps *parser) {
   526  	r := ps.peek()
   527  	if !startsPrimary(r, pn.ExprCtx) {
   528  		ps.error(errShouldBePrimary)
   529  		return
   530  	}
   531  
   532  	// Try bareword early, since it has precedence over wildcard on *
   533  	// when ctx = commandExpr.
   534  	if allowedInBareword(r, pn.ExprCtx) {
   535  		pn.bareword(ps)
   536  		return
   537  	}
   538  
   539  	switch r {
   540  	case '\'':
   541  		pn.singleQuoted(ps)
   542  	case '"':
   543  		pn.doubleQuoted(ps)
   544  	case '$':
   545  		pn.variable(ps)
   546  	case '*':
   547  		pn.starWildcard(ps)
   548  	case '?':
   549  		if ps.hasPrefix("?(") {
   550  			pn.exitusCapture(ps)
   551  		} else {
   552  			pn.questionWildcard(ps)
   553  		}
   554  	case '(':
   555  		pn.outputCapture(ps)
   556  	case '[':
   557  		pn.lbracket(ps)
   558  	case '{':
   559  		pn.lbrace(ps)
   560  	default:
   561  		// Parse an empty bareword.
   562  		pn.Type = Bareword
   563  	}
   564  }
   565  
   566  func (pn *Primary) singleQuoted(ps *parser) {
   567  	pn.Type = SingleQuoted
   568  	ps.next()
   569  	pn.singleQuotedInner(ps)
   570  }
   571  
   572  // Parses a single-quoted string after the opening quote. Sets pn.Value but not
   573  // pn.Type.
   574  func (pn *Primary) singleQuotedInner(ps *parser) {
   575  	var buf bytes.Buffer
   576  	defer func() { pn.Value = buf.String() }()
   577  	for {
   578  		switch r := ps.next(); r {
   579  		case eof:
   580  			ps.error(errStringUnterminated)
   581  			return
   582  		case '\'':
   583  			if ps.peek() == '\'' {
   584  				// Two consecutive single quotes
   585  				ps.next()
   586  				buf.WriteByte('\'')
   587  			} else {
   588  				// End of string
   589  				return
   590  			}
   591  		default:
   592  			buf.WriteRune(r)
   593  		}
   594  	}
   595  }
   596  
   597  func (pn *Primary) doubleQuoted(ps *parser) {
   598  	pn.Type = DoubleQuoted
   599  	ps.next()
   600  	pn.doubleQuotedInner(ps)
   601  }
   602  
   603  // Parses a double-quoted string after the opening quote. Sets pn.Value but not
   604  // pn.Type.
   605  func (pn *Primary) doubleQuotedInner(ps *parser) {
   606  	var buf bytes.Buffer
   607  	defer func() { pn.Value = buf.String() }()
   608  	for {
   609  		switch r := ps.next(); r {
   610  		case eof:
   611  			ps.error(errStringUnterminated)
   612  			return
   613  		case '"':
   614  			return
   615  		case '\\':
   616  			switch r := ps.next(); r {
   617  			case 'c', '^': // control sequence
   618  				r := ps.next()
   619  				if r < 0x3F || r > 0x5F {
   620  					ps.backup()
   621  					ps.error(errInvalidEscapeControl)
   622  					ps.next()
   623  				}
   624  				if byte(r) == '?' { // special-case: \c? => del
   625  					buf.WriteByte(byte(0x7F))
   626  				} else {
   627  					buf.WriteByte(byte(r - 0x40))
   628  				}
   629  			case 'x', 'u', 'U': // two, four, or eight hex digits
   630  				var n int
   631  				switch r {
   632  				case 'x':
   633  					n = 2
   634  				case 'u':
   635  					n = 4
   636  				case 'U':
   637  					n = 8
   638  				}
   639  				var rr rune
   640  				for i := 0; i < n; i++ {
   641  					d, ok := hexToDigit(ps.next())
   642  					if !ok {
   643  						ps.backup()
   644  						ps.error(errInvalidEscapeHex)
   645  						break
   646  					}
   647  					rr = rr*16 + d
   648  				}
   649  				if r == 'x' {
   650  					buf.WriteByte(byte(rr))
   651  				} else {
   652  					buf.WriteRune(rr)
   653  				}
   654  			case '0', '1', '2', '3', '4', '5', '6', '7': // three octal digits
   655  				rr := r - '0'
   656  				for i := 0; i < 2; i++ {
   657  					r := ps.next()
   658  					if r < '0' || r > '7' {
   659  						ps.backup()
   660  						ps.error(errInvalidEscapeOct)
   661  						break
   662  					}
   663  					rr = rr*8 + (r - '0')
   664  				}
   665  				if rr <= math.MaxUint8 {
   666  					buf.WriteByte(byte(rr))
   667  				} else {
   668  					r := diag.Ranging{From: ps.pos - 4, To: ps.pos}
   669  					ps.errorp(r, errInvalidEscapeOctOverflow)
   670  				}
   671  			default:
   672  				if rr, ok := doubleEscape[r]; ok {
   673  					buf.WriteRune(rr)
   674  				} else {
   675  					ps.backup()
   676  					ps.error(errInvalidEscape)
   677  					ps.next()
   678  				}
   679  			}
   680  		default:
   681  			buf.WriteRune(r)
   682  		}
   683  	}
   684  }
   685  
   686  // a table for the simple double-quote escape sequences.
   687  var doubleEscape = map[rune]rune{
   688  	// same as golang
   689  	'a': '\a', 'b': '\b', 'f': '\f', 'n': '\n', 'r': '\r',
   690  	't': '\t', 'v': '\v', '\\': '\\', '"': '"',
   691  	// additional
   692  	'e': '\033',
   693  }
   694  
   695  var doubleUnescape = map[rune]rune{}
   696  
   697  func init() {
   698  	for k, v := range doubleEscape {
   699  		doubleUnescape[v] = k
   700  	}
   701  }
   702  
   703  func hexToDigit(r rune) (rune, bool) {
   704  	switch {
   705  	case '0' <= r && r <= '9':
   706  		return r - '0', true
   707  	case 'a' <= r && r <= 'f':
   708  		return r - 'a' + 10, true
   709  	case 'A' <= r && r <= 'F':
   710  		return r - 'A' + 10, true
   711  	default:
   712  		return -1, false
   713  	}
   714  }
   715  
   716  func (pn *Primary) variable(ps *parser) {
   717  	pn.Type = Variable
   718  	ps.next()
   719  	switch r := ps.next(); r {
   720  	case eof:
   721  		ps.backup()
   722  		ps.error(errShouldBeVariableName)
   723  		ps.next()
   724  	case '\'':
   725  		pn.singleQuotedInner(ps)
   726  	case '"':
   727  		pn.doubleQuotedInner(ps)
   728  	default:
   729  		defer func() { pn.Value = ps.src[pn.From+1 : ps.pos] }()
   730  		if !allowedInVariableName(r) && r != '@' {
   731  			ps.backup()
   732  			ps.error(errShouldBeVariableName)
   733  		}
   734  		for allowedInVariableName(ps.peek()) {
   735  			ps.next()
   736  		}
   737  	}
   738  }
   739  
   740  // The following are allowed in variable names:
   741  // * Anything beyond ASCII that is printable
   742  // * Letters and numbers
   743  // * The symbols "-_:~"
   744  func allowedInVariableName(r rune) bool {
   745  	return (r >= 0x80 && unicode.IsPrint(r)) ||
   746  		('0' <= r && r <= '9') ||
   747  		('a' <= r && r <= 'z') ||
   748  		('A' <= r && r <= 'Z') ||
   749  		r == '-' || r == '_' || r == ':' || r == '~'
   750  }
   751  
   752  func (pn *Primary) starWildcard(ps *parser) {
   753  	pn.Type = Wildcard
   754  	for ps.peek() == '*' {
   755  		ps.next()
   756  	}
   757  	pn.Value = ps.src[pn.From:ps.pos]
   758  }
   759  
   760  func (pn *Primary) questionWildcard(ps *parser) {
   761  	pn.Type = Wildcard
   762  	if ps.peek() == '?' {
   763  		ps.next()
   764  	}
   765  	pn.Value = ps.src[pn.From:ps.pos]
   766  }
   767  
   768  func (pn *Primary) exitusCapture(ps *parser) {
   769  	ps.next()
   770  	ps.next()
   771  	addSep(pn, ps)
   772  
   773  	pn.Type = ExceptionCapture
   774  
   775  	ps.parse(&Chunk{}).addAs(&pn.Chunk, pn)
   776  
   777  	if !parseSep(pn, ps, ')') {
   778  		ps.error(errShouldBeRParen)
   779  	}
   780  }
   781  
   782  func (pn *Primary) outputCapture(ps *parser) {
   783  	pn.Type = OutputCapture
   784  	parseSep(pn, ps, '(')
   785  
   786  	ps.parse(&Chunk{}).addAs(&pn.Chunk, pn)
   787  
   788  	if !parseSep(pn, ps, ')') {
   789  		ps.error(errShouldBeRParen)
   790  	}
   791  }
   792  
   793  // List   = '[' { Space } { Compound } ']'
   794  //        = '[' { Space } { MapPair { Space } } ']'
   795  // Map    = '[' { Space } '&' { Space } ']'
   796  // Lambda = '[' { Space } { (Compound | MapPair) { Space } } ']' '{' Chunk '}'
   797  
   798  func (pn *Primary) lbracket(ps *parser) {
   799  	parseSep(pn, ps, '[')
   800  	parseSpacesAndNewlines(pn, ps)
   801  
   802  	loneAmpersand := false
   803  items:
   804  	for {
   805  		r := ps.peek()
   806  		switch {
   807  		case r == '&':
   808  			ps.next()
   809  			hasMapPair := startsCompound(ps.peek(), LHSExpr)
   810  			if !hasMapPair {
   811  				loneAmpersand = true
   812  				addSep(pn, ps)
   813  				parseSpacesAndNewlines(pn, ps)
   814  				break items
   815  			}
   816  			ps.backup()
   817  			ps.parse(&MapPair{}).addTo(&pn.MapPairs, pn)
   818  		case startsCompound(r, NormalExpr):
   819  			ps.parse(&Compound{}).addTo(&pn.Elements, pn)
   820  		default:
   821  			break items
   822  		}
   823  		parseSpacesAndNewlines(pn, ps)
   824  	}
   825  
   826  	if !parseSep(pn, ps, ']') {
   827  		ps.error(errShouldBeRBracket)
   828  	}
   829  	if loneAmpersand || len(pn.MapPairs) > 0 {
   830  		if len(pn.Elements) > 0 {
   831  			// TODO(xiaq): Add correct position information.
   832  			ps.error(errBothElementsAndPairs)
   833  		}
   834  		pn.Type = Map
   835  	} else {
   836  		pn.Type = List
   837  	}
   838  }
   839  
   840  // lambda parses a lambda expression. The opening brace has been seen.
   841  func (pn *Primary) lambda(ps *parser) {
   842  	pn.Type = Lambda
   843  	parseSpacesAndNewlines(pn, ps)
   844  	if parseSep(pn, ps, '|') {
   845  		parseSpacesAndNewlines(pn, ps)
   846  	items:
   847  		for {
   848  			r := ps.peek()
   849  			switch {
   850  			case r == '&':
   851  				ps.parse(&MapPair{}).addTo(&pn.MapPairs, pn)
   852  			case startsCompound(r, NormalExpr):
   853  				ps.parse(&Compound{}).addTo(&pn.Elements, pn)
   854  			default:
   855  				break items
   856  			}
   857  			parseSpacesAndNewlines(pn, ps)
   858  		}
   859  		if !parseSep(pn, ps, '|') {
   860  			ps.error(errShouldBePipe)
   861  		}
   862  	}
   863  	ps.parse(&Chunk{}).addAs(&pn.Chunk, pn)
   864  	if !parseSep(pn, ps, '}') {
   865  		ps.error(errShouldBeRBrace)
   866  	}
   867  }
   868  
   869  // Braced = '{' Compound { BracedSep Compounds } '}'
   870  // BracedSep = { Space | '\n' } [ ',' ] { Space | '\n' }
   871  func (pn *Primary) lbrace(ps *parser) {
   872  	parseSep(pn, ps, '{')
   873  
   874  	if r := ps.peek(); r == ';' || r == '\r' || r == '\n' || r == '|' || IsInlineWhitespace(r) {
   875  		pn.lambda(ps)
   876  		return
   877  	}
   878  
   879  	pn.Type = Braced
   880  
   881  	// TODO(xiaq): The compound can be empty, which allows us to parse {,foo}.
   882  	// Allowing compounds to be empty can be fragile in other cases.
   883  	ps.parse(&Compound{ExprCtx: BracedElemExpr}).addTo(&pn.Braced, pn)
   884  
   885  	for isBracedSep(ps.peek()) {
   886  		parseSpacesAndNewlines(pn, ps)
   887  		// optional, so ignore the return value
   888  		parseSep(pn, ps, ',')
   889  		parseSpacesAndNewlines(pn, ps)
   890  
   891  		ps.parse(&Compound{ExprCtx: BracedElemExpr}).addTo(&pn.Braced, pn)
   892  	}
   893  	if !parseSep(pn, ps, '}') {
   894  		ps.error(errShouldBeBraceSepOrRBracket)
   895  	}
   896  }
   897  
   898  func isBracedSep(r rune) bool {
   899  	return r == ',' || IsWhitespace(r)
   900  }
   901  
   902  func (pn *Primary) bareword(ps *parser) {
   903  	pn.Type = Bareword
   904  	defer func() { pn.Value = ps.src[pn.From:ps.pos] }()
   905  	for allowedInBareword(ps.peek(), pn.ExprCtx) {
   906  		ps.next()
   907  	}
   908  }
   909  
   910  // allowedInBareword returns where a rune is allowed in barewords in the given
   911  // expression context. The special strictExpr context queries whether the rune
   912  // is allowed in all contexts.
   913  //
   914  // The following are allowed in barewords:
   915  //
   916  // * Anything allowed in variable names
   917  // * The symbols "./\@%+!"
   918  // * The symbol "=", if ctx != lhsExpr && ctx != strictExpr
   919  // * The symbol ",", if ctx != bracedExpr && ctx != strictExpr
   920  // * The symbols "<>*^", if ctx = commandExpr
   921  //
   922  // The seemingly weird inclusion of \ is for easier path manipulation in
   923  // Windows.
   924  func allowedInBareword(r rune, ctx ExprCtx) bool {
   925  	return allowedInVariableName(r) || r == '.' || r == '/' ||
   926  		r == '\\' || r == '@' || r == '%' || r == '+' || r == '!' ||
   927  		(ctx != LHSExpr && ctx != strictExpr && r == '=') ||
   928  		(ctx != BracedElemExpr && ctx != strictExpr && r == ',') ||
   929  		(ctx == CmdExpr && (r == '<' || r == '>' || r == '*' || r == '^'))
   930  }
   931  
   932  func startsPrimary(r rune, ctx ExprCtx) bool {
   933  	return r == '\'' || r == '"' || r == '$' || allowedInBareword(r, ctx) ||
   934  		r == '?' || r == '*' || r == '(' || r == '[' || r == '{'
   935  }
   936  
   937  // MapPair = '&' { Space } Compound { Space } Compound
   938  type MapPair struct {
   939  	node
   940  	Key, Value *Compound
   941  }
   942  
   943  func (mpn *MapPair) parse(ps *parser) {
   944  	parseSep(mpn, ps, '&')
   945  
   946  	ps.parse(&Compound{ExprCtx: LHSExpr}).addAs(&mpn.Key, mpn)
   947  	if len(mpn.Key.Indexings) == 0 {
   948  		ps.error(errShouldBeCompound)
   949  	}
   950  
   951  	if parseSep(mpn, ps, '=') {
   952  		parseSpacesAndNewlines(mpn, ps)
   953  		// Parse value part. It can be empty.
   954  		ps.parse(&Compound{}).addAs(&mpn.Value, mpn)
   955  	}
   956  }
   957  
   958  // Sep is the catch-all node type for leaf nodes that lack internal structures
   959  // and semantics, and serve solely for syntactic purposes. The parsing of
   960  // separators depend on the Parent node; as such it lacks a genuine parse
   961  // method.
   962  type Sep struct {
   963  	node
   964  }
   965  
   966  // NewSep makes a new Sep.
   967  func NewSep(src string, begin, end int) *Sep {
   968  	return &Sep{node: node{diag.Ranging{From: begin, To: end}, src[begin:end], nil, nil}}
   969  }
   970  
   971  func (*Sep) parse(*parser) {
   972  	// A no-op, only to satisfy the Node interface.
   973  }
   974  
   975  func addSep(n Node, ps *parser) {
   976  	var begin int
   977  	ch := Children(n)
   978  	if len(ch) > 0 {
   979  		begin = ch[len(ch)-1].Range().To
   980  	} else {
   981  		begin = n.Range().From
   982  	}
   983  	if begin < ps.pos {
   984  		addChild(n, NewSep(ps.src, begin, ps.pos))
   985  	}
   986  }
   987  
   988  func parseSep(n Node, ps *parser, sep rune) bool {
   989  	if ps.peek() == sep {
   990  		ps.next()
   991  		addSep(n, ps)
   992  		return true
   993  	}
   994  	return false
   995  }
   996  
   997  func parseSpaces(n Node, ps *parser) {
   998  	parseSpacesInner(n, ps, false)
   999  }
  1000  
  1001  func parseSpacesAndNewlines(n Node, ps *parser) {
  1002  	parseSpacesInner(n, ps, true)
  1003  }
  1004  
  1005  func parseSpacesInner(n Node, ps *parser, newlines bool) {
  1006  spaces:
  1007  	for {
  1008  		r := ps.peek()
  1009  		switch {
  1010  		case IsInlineWhitespace(r):
  1011  			ps.next()
  1012  		case newlines && IsWhitespace(r):
  1013  			ps.next()
  1014  		case r == '#':
  1015  			// Comment is like inline whitespace as long as we don't include the
  1016  			// trailing newline.
  1017  			ps.next()
  1018  			for {
  1019  				r := ps.peek()
  1020  				if r == eof || r == '\r' || r == '\n' {
  1021  					break
  1022  				}
  1023  				ps.next()
  1024  			}
  1025  		case r == '^':
  1026  			// Line continuation is like inline whitespace.
  1027  			ps.next()
  1028  			switch ps.peek() {
  1029  			case '\r':
  1030  				ps.next()
  1031  				if ps.peek() == '\n' {
  1032  					ps.next()
  1033  				}
  1034  			case '\n':
  1035  				ps.next()
  1036  			case eof:
  1037  				ps.error(errShouldBeNewline)
  1038  			default:
  1039  				ps.backup()
  1040  				break spaces
  1041  			}
  1042  		default:
  1043  			break spaces
  1044  		}
  1045  	}
  1046  	addSep(n, ps)
  1047  }
  1048  
  1049  // IsInlineWhitespace reports whether r is an inline whitespace character.
  1050  // Currently this includes space (Unicode 0x20) and tab (Unicode 0x9).
  1051  func IsInlineWhitespace(r rune) bool {
  1052  	return r == ' ' || r == '\t'
  1053  }
  1054  
  1055  // IsWhitespace reports whether r is a whitespace. Currently this includes
  1056  // inline whitespace characters and newline (Unicode 0xa).
  1057  func IsWhitespace(r rune) bool {
  1058  	return IsInlineWhitespace(r) || r == '\r' || r == '\n'
  1059  }
  1060  
  1061  func addChild(p Node, ch Node) {
  1062  	p.n().addChild(ch)
  1063  	ch.n().parent = p
  1064  }