github.com/muesli/go-gitignore@v0.0.0-20200714020803-ff91c85188b2/parser.go (about) 1 package gitignore 2 3 import ( 4 "io" 5 ) 6 7 // Parser is the interface for parsing .gitignore files and extracting the set 8 // of patterns specified in the .gitignore file. 9 type Parser interface { 10 // Parse returns all well-formed .gitignore Patterns contained within the 11 // parser stream. Parsing will terminate at the end of the stream, or if 12 // the parser error handler returns false. 13 Parse() []Pattern 14 15 // Next returns the next well-formed .gitignore Pattern from the parser 16 // stream. If an error is encountered, and the error handler is either 17 // not defined, or returns true, Next will skip to the end of the current 18 // line and attempt to parse the next Pattern. If the error handler 19 // returns false, or the parser reaches the end of the stream, Next 20 // returns nil. 21 Next() Pattern 22 23 // Position returns the current position of the parser in the input stream. 24 Position() Position 25 } // Parser{} 26 27 // parser is the implementation of the .gitignore parser 28 type parser struct { 29 _lexer Lexer 30 _undo []*Token 31 _error func(Error) bool 32 } // parser{} 33 34 // NewParser returns a new Parser instance for the given stream r. 35 // If err is not nil, it will be called for every error encountered during 36 // parsing. Parsing will terminate at the end of the stream, or if err 37 // returns false. 38 func NewParser(r io.Reader, err func(Error) bool) Parser { 39 return &parser{_lexer: NewLexer(r), _error: err} 40 } // NewParser() 41 42 // Parse returns all well-formed .gitignore Patterns contained within the 43 // parser stream. Parsing will terminate at the end of the stream, or if 44 // the parser error handler returns false. 45 func (p *parser) Parse() []Pattern { 46 // keep parsing until there's no more patterns 47 _patterns := make([]Pattern, 0) 48 for { 49 _pattern := p.Next() 50 if _pattern == nil { 51 return _patterns 52 } 53 _patterns = append(_patterns, _pattern) 54 } 55 } // Parse() 56 57 // Next returns the next well-formed .gitignore Pattern from the parser stream. 58 // If an error is encountered, and the error handler is either not defined, or 59 // returns true, Next will skip to the end of the current line and attempt to 60 // parse the next Pattern. If the error handler returns false, or the parser 61 // reaches the end of the stream, Next returns nil. 62 func (p *parser) Next() Pattern { 63 // keep searching until we find the next pattern, or until we 64 // reach the end of the file 65 for { 66 _token, _err := p.next() 67 if _err != nil { 68 if !p.errors(_err) { 69 return nil 70 } 71 72 // we got an error from the lexer, so skip the remainder 73 // of this line and try again from the next line 74 for _err != nil { 75 _err = p.skip() 76 if _err != nil { 77 if !p.errors(_err) { 78 return nil 79 } 80 } 81 } 82 continue 83 } 84 85 switch _token.Type { 86 // we're at the end of the file 87 case EOF: 88 return nil 89 90 // we have a blank line or comment 91 case EOL: 92 continue 93 case COMMENT: 94 continue 95 96 // otherwise, attempt to build the next pattern 97 default: 98 _pattern, _err := p.build(_token) 99 if _err != nil { 100 if !p.errors(_err) { 101 return nil 102 } 103 104 // we encountered an error parsing the retrieved tokens 105 // - skip to the end of the line 106 for _err != nil { 107 _err = p.skip() 108 if _err != nil { 109 if !p.errors(_err) { 110 return nil 111 } 112 } 113 } 114 115 // skip to the next token 116 continue 117 } else if _pattern != nil { 118 return _pattern 119 } 120 } 121 } 122 } // Next() 123 124 // Position returns the current position of the parser in the input stream. 125 func (p *parser) Position() Position { 126 // if we have any previously read tokens, then the token at 127 // the end of the "undo" list (most recently "undone") gives the 128 // position of the parser 129 _length := len(p._undo) 130 if _length != 0 { 131 return p._undo[_length-1].Position 132 } 133 134 // otherwise, return the position of the lexer 135 return p._lexer.Position() 136 } // Position() 137 138 // 139 // private methods 140 // 141 142 // build attempts to build a well-formed .gitignore Pattern starting from the 143 // given Token t. An Error will be returned if the sequence of tokens returned 144 // by the Lexer does not represent a valid Pattern. 145 func (p *parser) build(t *Token) (Pattern, Error) { 146 // attempt to create a valid pattern 147 switch t.Type { 148 // we have a negated pattern 149 case NEGATION: 150 return p.negation(t) 151 152 // attempt to build a path specification 153 default: 154 return p.path(t) 155 } 156 } // build() 157 158 // negation attempts to build a well-formed negated .gitignore Pattern starting 159 // from the negation Token t. As with build, negation returns an Error if the 160 // sequence of tokens returned by the Lexer does not represent a valid Pattern. 161 func (p *parser) negation(t *Token) (Pattern, Error) { 162 // a negation appears before a path specification, so 163 // skip the negation token 164 _next, _err := p.next() 165 if _err != nil { 166 return nil, _err 167 } 168 169 // extract the sequence of tokens for this path 170 _tokens, _err := p.sequence(_next) 171 if _err != nil { 172 return nil, _err 173 } 174 175 // include the "negation" token at the front of the sequence 176 _tokens = append([]*Token{t}, _tokens...) 177 178 // return the Pattern instance 179 return NewPattern(_tokens), nil 180 } // negation() 181 182 // path attempts to build a well-formed .gitignore Pattern representing a path 183 // specification, starting with the Token t. If the sequence of tokens returned 184 // by the Lexer does not represent a valid Pattern, path returns an Error. 185 // Trailing whitespace is dropped from the sequence of pattern tokens. 186 func (p *parser) path(t *Token) (Pattern, Error) { 187 // extract the sequence of tokens for this path 188 _tokens, _err := p.sequence(t) 189 if _err != nil { 190 return nil, _err 191 } 192 193 // remove trailing whitespace tokens 194 _length := len(_tokens) 195 for _length > 0 { 196 // if we have a non-whitespace token, we can stop 197 _length-- 198 if _tokens[_length].Type != WHITESPACE { 199 break 200 } 201 202 // otherwise, truncate the token list 203 _tokens = _tokens[:_length] 204 } 205 206 // return the Pattern instance 207 return NewPattern(_tokens), nil 208 } // path() 209 210 // sequence attempts to extract a well-formed Token sequence from the Lexer 211 // representing a .gitignore Pattern. sequence returns an Error if the 212 // retrieved sequence of tokens does not represent a valid Pattern. 213 func (p *parser) sequence(t *Token) ([]*Token, Error) { 214 // extract the sequence of tokens for a valid path 215 // - this excludes the negation token, which is handled as 216 // a special case before sequence() is called 217 switch t.Type { 218 // the path starts with a separator 219 case SEPARATOR: 220 return p.separator(t) 221 222 // the path starts with the "any" pattern ("**") 223 case ANY: 224 return p.any(t) 225 226 // the path starts with whitespace, wildcard or a pattern 227 case WHITESPACE: 228 fallthrough 229 case PATTERN: 230 return p.pattern(t) 231 } 232 233 // otherwise, we have an invalid specification 234 p.undo(t) 235 return nil, p.err(InvalidPatternError) 236 } // sequence() 237 238 // separator attempts to retrieve a valid sequence of tokens that may appear 239 // after the path separator '/' Token t. An Error is returned if the sequence if 240 // tokens is not valid, or if there is an error extracting tokens from the 241 // input stream. 242 func (p *parser) separator(t *Token) ([]*Token, Error) { 243 // build a list of tokens that may appear after a separator 244 _tokens := []*Token{t} 245 _token, _err := p.next() 246 if _err != nil { 247 return _tokens, _err 248 } 249 250 // what tokens are we allowed to have follow a separator? 251 switch _token.Type { 252 // a separator can be followed by a pattern or 253 // an "any" pattern (i.e. "**") 254 case ANY: 255 _next, _err := p.any(_token) 256 return append(_tokens, _next...), _err 257 258 case WHITESPACE: 259 fallthrough 260 case PATTERN: 261 _next, _err := p.pattern(_token) 262 return append(_tokens, _next...), _err 263 264 // if we encounter end of line or file we are done 265 case EOL: 266 fallthrough 267 case EOF: 268 return _tokens, nil 269 270 // a separator can be followed by another separator 271 // - it's not ideal, and not very useful, but it's interpreted 272 // as a single separator 273 // - we could clean it up here, but instead we pass 274 // everything down to the matching later on 275 case SEPARATOR: 276 _next, _err := p.separator(_token) 277 return append(_tokens, _next...), _err 278 } 279 280 // any other token is invalid 281 p.undo(_token) 282 return _tokens, p.err(InvalidPatternError) 283 } // separator() 284 285 // any attempts to retrieve a valid sequence of tokens that may appear 286 // after the any '**' Token t. An Error is returned if the sequence if 287 // tokens is not valid, or if there is an error extracting tokens from the 288 // input stream. 289 func (p *parser) any(t *Token) ([]*Token, Error) { 290 // build the list of tokens that may appear after "any" (i.e. "**") 291 _tokens := []*Token{t} 292 _token, _err := p.next() 293 if _err != nil { 294 return _tokens, _err 295 } 296 297 // what tokens are we allowed to have follow an "any" symbol? 298 switch _token.Type { 299 // an "any" token may only be followed by a separator 300 case SEPARATOR: 301 _next, _err := p.separator(_token) 302 return append(_tokens, _next...), _err 303 304 // whitespace is acceptable if it takes us to the end of the line 305 case WHITESPACE: 306 return _tokens, p.eol() 307 308 // if we encounter end of line or file we are done 309 case EOL: 310 fallthrough 311 case EOF: 312 return _tokens, nil 313 } 314 315 // any other token is invalid 316 p.undo(_token) 317 return _tokens, p.err(InvalidPatternError) 318 } // any() 319 320 // pattern attempts to retrieve a valid sequence of tokens that may appear 321 // after the path pattern Token t. An Error is returned if the sequence if 322 // tokens is not valid, or if there is an error extracting tokens from the 323 // input stream. 324 func (p *parser) pattern(t *Token) ([]*Token, Error) { 325 // build the list of tokens that may appear after a pattern 326 _tokens := []*Token{t} 327 _token, _err := p.next() 328 if _err != nil { 329 return _tokens, _err 330 } 331 332 // what tokens are we allowed to have follow a pattern? 333 var _next []*Token 334 switch _token.Type { 335 case SEPARATOR: 336 _next, _err = p.separator(_token) 337 return append(_tokens, _next...), _err 338 339 case WHITESPACE: 340 fallthrough 341 case PATTERN: 342 _next, _err = p.pattern(_token) 343 return append(_tokens, _next...), _err 344 345 // if we encounter end of line or file we are done 346 case EOL: 347 fallthrough 348 case EOF: 349 return _tokens, nil 350 } 351 352 // any other token is invalid 353 p.undo(_token) 354 return _tokens, p.err(InvalidPatternError) 355 } // pattern() 356 357 // eol attempts to consume the next Lexer token to read the end of line or end 358 // of file. If a EOL or EOF is not reached , eol will return an error. 359 func (p *parser) eol() Error { 360 // are we at the end of the line? 361 _token, _err := p.next() 362 if _err != nil { 363 return _err 364 } 365 366 // have we encountered whitespace only? 367 switch _token.Type { 368 // if we're at the end of the line or file, we're done 369 case EOL: 370 fallthrough 371 case EOF: 372 p.undo(_token) 373 return nil 374 } 375 376 // otherwise, we have an invalid pattern 377 p.undo(_token) 378 return p.err(InvalidPatternError) 379 } // eol() 380 381 // next returns the next token from the Lexer, or an error if there is a 382 // problem reading from the input stream. 383 func (p *parser) next() (*Token, Error) { 384 // do we have any previously read tokens? 385 _length := len(p._undo) 386 if _length > 0 { 387 _token := p._undo[_length-1] 388 p._undo = p._undo[:_length-1] 389 return _token, nil 390 } 391 392 // otherwise, attempt to retrieve the next token from the lexer 393 return p._lexer.Next() 394 } // next() 395 396 // skip reads Tokens from the input until the end of line or end of file is 397 // reached. If there is a problem reading tokens, an Error is returned. 398 func (p *parser) skip() Error { 399 // skip to the next end of line or end of file token 400 for { 401 _token, _err := p.next() 402 if _err != nil { 403 return _err 404 } 405 406 // if we have an end of line or file token, then we can stop 407 switch _token.Type { 408 case EOL: 409 fallthrough 410 case EOF: 411 return nil 412 } 413 } 414 } // skip() 415 416 // undo returns the given Token t to the parser input stream to be retrieved 417 // again on a subsequent call to next. 418 func (p *parser) undo(t *Token) { 419 // add this token to the list of previously read tokens 420 // - initialise the undo list if required 421 if p._undo == nil { 422 p._undo = make([]*Token, 0, 1) 423 } 424 p._undo = append(p._undo, t) 425 } // undo() 426 427 // err returns an Error for the error e, capturing the current parser Position. 428 func (p *parser) err(e error) Error { 429 // convert the error to include the parser position 430 return NewError(e, p.Position()) 431 } // err() 432 433 // errors returns the response from the parser error handler to the Error e. If 434 // no error handler has been configured for this parser, errors returns true. 435 func (p *parser) errors(e Error) bool { 436 // do we have an error handler? 437 if p._error == nil { 438 return true 439 } 440 441 // pass the error through to the error handler 442 // - if this returns false, parsing will stop 443 return p._error(e) 444 } // errors()