github.com/xaverkapeller/go-gitignore@v0.0.0-20201129201858-74ef740b8b77/parser.go (about)

     1  package gitignore
     2  
     3  import (
     4  	"io"
     5  )
     6  
     7  // Parser is the interface for parsing .gitignore files and extracting the set
     8  // of patterns specified in the .gitignore file.
     9  type Parser interface {
    10  	// Parse returns all well-formed .gitignore Patterns contained within the
    11  	// parser stream. Parsing will terminate at the end of the stream, or if
    12  	// the parser error handler returns false.
    13  	Parse() []Pattern
    14  
    15  	// Next returns the next well-formed .gitignore Pattern from the parser
    16  	// stream.  If an error is encountered, and the error handler is either
    17  	// not defined, or returns true, Next will skip to the end of the current
    18  	// line and attempt to parse the next Pattern. If the error handler
    19  	// returns false, or the parser reaches the end of the stream, Next
    20  	// returns nil.
    21  	Next() Pattern
    22  
    23  	// Position returns the current position of the parser in the input stream.
    24  	Position() Position
    25  } // Parser{}
    26  
    27  // parser is the implementation of the .gitignore parser
    28  type parser struct {
    29  	_lexer Lexer
    30  	_undo  []*Token
    31  	_error func(Error) bool
    32  } // parser{}
    33  
    34  // NewParser returns a new Parser instance for the given stream r.
    35  // If err is not nil, it will be called for every error encountered during
    36  // parsing. Parsing will terminate at the end of the stream, or if err
    37  // returns false.
    38  func NewParser(r io.Reader, err func(Error) bool) Parser {
    39  	return &parser{_lexer: NewLexer(r), _error: err}
    40  } // NewParser()
    41  
    42  // Parse returns all well-formed .gitignore Patterns contained within the
    43  // parser stream. Parsing will terminate at the end of the stream, or if
    44  // the parser error handler returns false.
    45  func (p *parser) Parse() []Pattern {
    46  	// keep parsing until there's no more patterns
    47  	_patterns := make([]Pattern, 0)
    48  	for {
    49  		_pattern := p.Next()
    50  		if _pattern == nil {
    51  			return _patterns
    52  		}
    53  		_patterns = append(_patterns, _pattern)
    54  	}
    55  } // Parse()
    56  
    57  // Next returns the next well-formed .gitignore Pattern from the parser stream.
    58  // If an error is encountered, and the error handler is either not defined, or
    59  // returns true, Next will skip to the end of the current line and attempt to
    60  // parse the next Pattern. If the error handler returns false, or the parser
    61  // reaches the end of the stream, Next returns nil.
    62  func (p *parser) Next() Pattern {
    63  	// keep searching until we find the next pattern, or until we
    64  	// reach the end of the file
    65  	for {
    66  		_token, _err := p.next()
    67  		if _err != nil {
    68  			if !p.errors(_err) {
    69  				return nil
    70  			}
    71  
    72  			// we got an error from the lexer, so skip the remainder
    73  			// of this line and try again from the next line
    74  			for _err != nil {
    75  				_err = p.skip()
    76  				if _err != nil {
    77  					if !p.errors(_err) {
    78  						return nil
    79  					}
    80  				}
    81  			}
    82  			continue
    83  		}
    84  
    85  		switch _token.Type {
    86  		// we're at the end of the file
    87  		case EOF:
    88  			return nil
    89  
    90  		// we have a blank line or comment
    91  		case EOL:
    92  			continue
    93  		case COMMENT:
    94  			continue
    95  
    96  		// otherwise, attempt to build the next pattern
    97  		default:
    98  			_pattern, _err := p.build(_token)
    99  			if _err != nil {
   100  				if !p.errors(_err) {
   101  					return nil
   102  				}
   103  
   104  				// we encountered an error parsing the retrieved tokens
   105  				//      - skip to the end of the line
   106  				for _err != nil {
   107  					_err = p.skip()
   108  					if _err != nil {
   109  						if !p.errors(_err) {
   110  							return nil
   111  						}
   112  					}
   113  				}
   114  
   115  				// skip to the next token
   116  				continue
   117  			} else if _pattern != nil {
   118  				return _pattern
   119  			}
   120  		}
   121  	}
   122  } // Next()
   123  
   124  // Position returns the current position of the parser in the input stream.
   125  func (p *parser) Position() Position {
   126  	// if we have any previously read tokens, then the token at
   127  	// the end of the "undo" list (most recently "undone") gives the
   128  	// position of the parser
   129  	_length := len(p._undo)
   130  	if _length != 0 {
   131  		return p._undo[_length-1].Position
   132  	}
   133  
   134  	// otherwise, return the position of the lexer
   135  	return p._lexer.Position()
   136  } // Position()
   137  
   138  //
   139  // private methods
   140  //
   141  
   142  // build attempts to build a well-formed .gitignore Pattern starting from the
   143  // given Token t. An Error will be returned if the sequence of tokens returned
   144  // by the Lexer does not represent a valid Pattern.
   145  func (p *parser) build(t *Token) (Pattern, Error) {
   146  	// attempt to create a valid pattern
   147  	switch t.Type {
   148  	// we have a negated pattern
   149  	case NEGATION:
   150  		return p.negation(t)
   151  
   152  	// attempt to build a path specification
   153  	default:
   154  		return p.path(t)
   155  	}
   156  } // build()
   157  
   158  // negation attempts to build a well-formed negated .gitignore Pattern starting
   159  // from the negation Token t. As with build, negation returns an Error if the
   160  // sequence of tokens returned by the Lexer does not represent a valid Pattern.
   161  func (p *parser) negation(t *Token) (Pattern, Error) {
   162  	// a negation appears before a path specification, so
   163  	// skip the negation token
   164  	_next, _err := p.next()
   165  	if _err != nil {
   166  		return nil, _err
   167  	}
   168  
   169  	// extract the sequence of tokens for this path
   170  	_tokens, _err := p.sequence(_next)
   171  	if _err != nil {
   172  		return nil, _err
   173  	}
   174  
   175  	// include the "negation" token at the front of the sequence
   176  	_tokens = append([]*Token{t}, _tokens...)
   177  
   178  	// return the Pattern instance
   179  	return NewPattern(_tokens), nil
   180  } // negation()
   181  
   182  // path attempts to build a well-formed .gitignore Pattern representing a path
   183  // specification, starting with the Token t. If the sequence of tokens returned
   184  // by the Lexer does not represent a valid Pattern, path returns an Error.
   185  // Trailing whitespace is dropped from the sequence of pattern tokens.
   186  func (p *parser) path(t *Token) (Pattern, Error) {
   187  	// extract the sequence of tokens for this path
   188  	_tokens, _err := p.sequence(t)
   189  	if _err != nil {
   190  		return nil, _err
   191  	}
   192  
   193  	// remove trailing whitespace tokens
   194  	_length := len(_tokens)
   195  	for _length > 0 {
   196  		// if we have a non-whitespace token, we can stop
   197  		_length--
   198  		if _tokens[_length].Type != WHITESPACE {
   199  			break
   200  		}
   201  
   202  		// otherwise, truncate the token list
   203  		_tokens = _tokens[:_length]
   204  	}
   205  
   206  	// return the Pattern instance
   207  	return NewPattern(_tokens), nil
   208  } // path()
   209  
   210  // sequence attempts to extract a well-formed Token sequence from the Lexer
   211  // representing a .gitignore Pattern. sequence returns an Error if the
   212  // retrieved sequence of tokens does not represent a valid Pattern.
   213  func (p *parser) sequence(t *Token) ([]*Token, Error) {
   214  	// extract the sequence of tokens for a valid path
   215  	//      - this excludes the negation token, which is handled as
   216  	//        a special case before sequence() is called
   217  	switch t.Type {
   218  	// the path starts with a separator
   219  	case SEPARATOR:
   220  		return p.separator(t)
   221  
   222  	// the path starts with the "any" pattern ("**")
   223  	case ANY:
   224  		return p.any(t)
   225  
   226  	// the path starts with whitespace, wildcard or a pattern
   227  	case WHITESPACE:
   228  		fallthrough
   229  	case PATTERN:
   230  		return p.pattern(t)
   231  	}
   232  
   233  	// otherwise, we have an invalid specification
   234  	p.undo(t)
   235  	return nil, p.err(InvalidPatternError)
   236  } // sequence()
   237  
   238  // separator attempts to retrieve a valid sequence of tokens that may appear
   239  // after the path separator '/' Token t. An Error is returned if the sequence if
   240  // tokens is not valid, or if there is an error extracting tokens from the
   241  // input stream.
   242  func (p *parser) separator(t *Token) ([]*Token, Error) {
   243  	// build a list of tokens that may appear after a separator
   244  	_tokens := []*Token{t}
   245  	_token, _err := p.next()
   246  	if _err != nil {
   247  		return _tokens, _err
   248  	}
   249  
   250  	// what tokens are we allowed to have follow a separator?
   251  	switch _token.Type {
   252  	// a separator can be followed by a pattern or
   253  	// an "any" pattern (i.e. "**")
   254  	case ANY:
   255  		_next, _err := p.any(_token)
   256  		return append(_tokens, _next...), _err
   257  
   258  	case WHITESPACE:
   259  		fallthrough
   260  	case PATTERN:
   261  		_next, _err := p.pattern(_token)
   262  		return append(_tokens, _next...), _err
   263  
   264  	// if we encounter end of line or file we are done
   265  	case EOL:
   266  		fallthrough
   267  	case EOF:
   268  		return _tokens, nil
   269  
   270  	// a separator can be followed by another separator
   271  	//      - it's not ideal, and not very useful, but it's interpreted
   272  	//        as a single separator
   273  	//      - we could clean it up here, but instead we pass
   274  	//        everything down to the matching later on
   275  	case SEPARATOR:
   276  		_next, _err := p.separator(_token)
   277  		return append(_tokens, _next...), _err
   278  	}
   279  
   280  	// any other token is invalid
   281  	p.undo(_token)
   282  	return _tokens, p.err(InvalidPatternError)
   283  } // separator()
   284  
   285  // any attempts to retrieve a valid sequence of tokens that may appear
   286  // after the any '**' Token t. An Error is returned if the sequence if
   287  // tokens is not valid, or if there is an error extracting tokens from the
   288  // input stream.
   289  func (p *parser) any(t *Token) ([]*Token, Error) {
   290  	// build the list of tokens that may appear after "any" (i.e. "**")
   291  	_tokens := []*Token{t}
   292  	_token, _err := p.next()
   293  	if _err != nil {
   294  		return _tokens, _err
   295  	}
   296  
   297  	// what tokens are we allowed to have follow an "any" symbol?
   298  	switch _token.Type {
   299  	// an "any" token may only be followed by a separator
   300  	case SEPARATOR:
   301  		_next, _err := p.separator(_token)
   302  		return append(_tokens, _next...), _err
   303  
   304  	// whitespace is acceptable if it takes us to the end of the line
   305  	case WHITESPACE:
   306  		return _tokens, p.eol()
   307  
   308  	// if we encounter end of line or file we are done
   309  	case EOL:
   310  		fallthrough
   311  	case EOF:
   312  		return _tokens, nil
   313  	}
   314  
   315  	// any other token is invalid
   316  	p.undo(_token)
   317  	return _tokens, p.err(InvalidPatternError)
   318  } // any()
   319  
   320  // pattern attempts to retrieve a valid sequence of tokens that may appear
   321  // after the path pattern Token t. An Error is returned if the sequence if
   322  // tokens is not valid, or if there is an error extracting tokens from the
   323  // input stream.
   324  func (p *parser) pattern(t *Token) ([]*Token, Error) {
   325  	// build the list of tokens that may appear after a pattern
   326  	_tokens := []*Token{t}
   327  	_token, _err := p.next()
   328  	if _err != nil {
   329  		return _tokens, _err
   330  	}
   331  
   332  	// what tokens are we allowed to have follow a pattern?
   333  	var _next []*Token
   334  	switch _token.Type {
   335  	case SEPARATOR:
   336  		_next, _err = p.separator(_token)
   337  		return append(_tokens, _next...), _err
   338  
   339  	case WHITESPACE:
   340  		fallthrough
   341  	case PATTERN:
   342  		_next, _err = p.pattern(_token)
   343  		return append(_tokens, _next...), _err
   344  
   345  	// if we encounter end of line or file we are done
   346  	case EOL:
   347  		fallthrough
   348  	case EOF:
   349  		return _tokens, nil
   350  	}
   351  
   352  	// any other token is invalid
   353  	p.undo(_token)
   354  	return _tokens, p.err(InvalidPatternError)
   355  } // pattern()
   356  
   357  // eol attempts to consume the next Lexer token to read the end of line or end
   358  // of file. If a EOL or EOF is not reached , eol will return an error.
   359  func (p *parser) eol() Error {
   360  	// are we at the end of the line?
   361  	_token, _err := p.next()
   362  	if _err != nil {
   363  		return _err
   364  	}
   365  
   366  	// have we encountered whitespace only?
   367  	switch _token.Type {
   368  	// if we're at the end of the line or file, we're done
   369  	case EOL:
   370  		fallthrough
   371  	case EOF:
   372  		p.undo(_token)
   373  		return nil
   374  	}
   375  
   376  	// otherwise, we have an invalid pattern
   377  	p.undo(_token)
   378  	return p.err(InvalidPatternError)
   379  } // eol()
   380  
   381  // next returns the next token from the Lexer, or an error if there is a
   382  // problem reading from the input stream.
   383  func (p *parser) next() (*Token, Error) {
   384  	// do we have any previously read tokens?
   385  	_length := len(p._undo)
   386  	if _length > 0 {
   387  		_token := p._undo[_length-1]
   388  		p._undo = p._undo[:_length-1]
   389  		return _token, nil
   390  	}
   391  
   392  	// otherwise, attempt to retrieve the next token from the lexer
   393  	return p._lexer.Next()
   394  } // next()
   395  
   396  // skip reads Tokens from the input until the end of line or end of file is
   397  // reached. If there is a problem reading tokens, an Error is returned.
   398  func (p *parser) skip() Error {
   399  	// skip to the next end of line or end of file token
   400  	for {
   401  		_token, _err := p.next()
   402  		if _err != nil {
   403  			return _err
   404  		}
   405  
   406  		// if we have an end of line or file token, then we can stop
   407  		switch _token.Type {
   408  		case EOL:
   409  			fallthrough
   410  		case EOF:
   411  			return nil
   412  		}
   413  	}
   414  } // skip()
   415  
   416  // undo returns the given Token t to the parser input stream to be retrieved
   417  // again on a subsequent call to next.
   418  func (p *parser) undo(t *Token) {
   419  	// add this token to the list of previously read tokens
   420  	//      - initialise the undo list if required
   421  	if p._undo == nil {
   422  		p._undo = make([]*Token, 0, 1)
   423  	}
   424  	p._undo = append(p._undo, t)
   425  } // undo()
   426  
   427  // err returns an Error for the error e, capturing the current parser Position.
   428  func (p *parser) err(e error) Error {
   429  	// convert the error to include the parser position
   430  	return NewError(e, p.Position())
   431  } // err()
   432  
   433  // errors returns the response from the parser error handler to the Error e. If
   434  // no error handler has been configured for this parser, errors returns true.
   435  func (p *parser) errors(e Error) bool {
   436  	// do we have an error handler?
   437  	if p._error == nil {
   438  		return true
   439  	}
   440  
   441  	// pass the error through to the error handler
   442  	//      - if this returns false, parsing will stop
   443  	return p._error(e)
   444  } // errors()