github.com/boyter/gocodewalker@v1.3.2/go-gitignore/lexer.go (about) 1 // SPDX-License-Identifier: MIT 2 3 package gitignore 4 5 import ( 6 "bufio" 7 "io" 8 ) 9 10 // 11 // inspired by https://blog.gopheracademy.com/advent-2014/parsers-lexers/ 12 // 13 14 // lexer is the implementation of the .gitignore lexical analyser 15 type lexer struct { 16 _r *bufio.Reader 17 _unread []rune 18 _offset int 19 _line int 20 _column int 21 _previous []int 22 } // lexer{} 23 24 // Lexer is the interface to the lexical analyser for .gitignore files 25 type Lexer interface { 26 // Next returns the next Token from the Lexer reader. If an error is 27 // encountered, it will be returned as an Error instance, detailing the 28 // error and its position within the stream. 29 Next() (*Token, Error) 30 31 // Position returns the current position of the Lexer. 32 Position() Position 33 34 // String returns the string representation of the current position of the 35 // Lexer. 36 String() string 37 } 38 39 // NewLexer returns a Lexer instance for the io.Reader r. 40 func NewLexer(r io.Reader) Lexer { 41 return &lexer{_r: bufio.NewReader(r), _line: 1, _column: 1} 42 } // NewLexer() 43 44 // Next returns the next Token from the Lexer reader. If an error is 45 // encountered, it will be returned as an Error instance, detailing the error 46 // and its position within the stream. 47 func (l *lexer) Next() (*Token, Error) { 48 // are we at the beginning of the line? 49 _beginning := l.beginning() 50 51 // read the next rune 52 _r, _err := l.read() 53 if _err != nil { 54 return nil, _err 55 } 56 57 switch _r { 58 // end of file 59 case _EOF: 60 return l.token(EOF, nil, nil) 61 62 // whitespace ' ', '\t' 63 case _SPACE: 64 fallthrough 65 case _TAB: 66 l.unread(_r) 67 _rtn, _err := l.whitespace() 68 return l.token(WHITESPACE, _rtn, _err) 69 70 // end of line '\n' or '\r\n' 71 case _CR: 72 fallthrough 73 case _NEWLINE: 74 l.unread(_r) 75 _rtn, _err := l.eol() 76 return l.token(EOL, _rtn, _err) 77 78 // separator '/' 79 case _SEPARATOR: 80 return l.token(SEPARATOR, []rune{_r}, nil) 81 82 // '*' or any '**' 83 case _WILDCARD: 84 // is the wildcard followed by another wildcard? 85 // - does this represent the "any" token (i.e. "**") 86 _next, _err := l.peek() 87 if _err != nil { 88 return nil, _err 89 } else if _next == _WILDCARD { 90 // we know read() will succeed here since we used peek() above 91 _, _ = l.read() 92 return l.token(ANY, []rune{_WILDCARD, _WILDCARD}, nil) 93 } 94 95 // we have a single wildcard, so treat this as a pattern 96 l.unread(_r) 97 _rtn, _err := l.pattern() 98 return l.token(PATTERN, _rtn, _err) 99 100 // comment '#' 101 case _COMMENT: 102 l.unread(_r) 103 104 // if we are at the start of the line, then we treat this as a comment 105 if _beginning { 106 _rtn, _err := l.comment() 107 return l.token(COMMENT, _rtn, _err) 108 } 109 110 // otherwise, we regard this as a pattern 111 _rtn, _err := l.pattern() 112 return l.token(PATTERN, _rtn, _err) 113 114 // negation '!' 115 case _NEGATION: 116 if _beginning { 117 return l.token(NEGATION, []rune{_r}, nil) 118 } 119 fallthrough 120 121 // pattern 122 default: 123 l.unread(_r) 124 _rtn, _err := l.pattern() 125 return l.token(PATTERN, _rtn, _err) 126 } 127 } // Next() 128 129 // Position returns the current position of the Lexer. 130 func (l *lexer) Position() Position { 131 return Position{"", l._line, l._column, l._offset} 132 } // Position() 133 134 // String returns the string representation of the current position of the 135 // Lexer. 136 func (l *lexer) String() string { 137 return l.Position().String() 138 } // String() 139 140 // 141 // private methods 142 // 143 144 // read the next rune from the stream. Return an Error if there is a problem 145 // reading from the stream. If the end of stream is reached, return the EOF 146 // Token. 147 func (l *lexer) read() (rune, Error) { 148 var _r rune 149 var _err error 150 151 // do we have any unread runes to read? 152 _length := len(l._unread) 153 if _length > 0 { 154 _r = l._unread[_length-1] 155 l._unread = l._unread[:_length-1] 156 157 // otherwise, attempt to read a new rune 158 } else { 159 _r, _, _err = l._r.ReadRune() 160 if _err == io.EOF { 161 return _EOF, nil 162 } 163 } 164 165 // increment the offset and column counts 166 l._offset++ 167 l._column++ 168 169 return _r, l.err(_err) 170 } // read() 171 172 // unread returns the given runes to the stream, making them eligible to be 173 // read again. The runes are returned in the order given, so the last rune 174 // specified will be the next rune read from the stream. 175 func (l *lexer) unread(r ...rune) { 176 // ignore EOF runes 177 _r := make([]rune, 0) 178 for _, _rune := range r { 179 if _rune != _EOF { 180 _r = append(_r, _rune) 181 } 182 } 183 184 // initialise the unread rune list if necessary 185 if l._unread == nil { 186 l._unread = make([]rune, 0) 187 } 188 if len(_r) != 0 { 189 l._unread = append(l._unread, _r...) 190 } 191 192 // decrement the offset and column counts 193 // - we have to take care of column being 0 194 // - at present we can only unwind across a single line boundary 195 _length := len(_r) 196 for ; _length > 0; _length-- { 197 l._offset-- 198 if l._column == 1 { 199 _length := len(l._previous) 200 if _length > 0 { 201 l._column = l._previous[_length-1] 202 l._previous = l._previous[:_length-1] 203 l._line-- 204 } 205 } else { 206 l._column-- 207 } 208 } 209 } // unread() 210 211 // peek returns the next rune in the stream without consuming it (i.e. it will 212 // be returned by the next call to read or peek). peek will return an error if 213 // there is a problem reading from the stream. 214 func (l *lexer) peek() (rune, Error) { 215 // read the next rune 216 _r, _err := l.read() 217 if _err != nil { 218 return _r, _err 219 } 220 221 // unread & return the rune 222 l.unread(_r) 223 return _r, _err 224 } // peek() 225 226 // newline adjusts the positional counters when an end of line is reached 227 func (l *lexer) newline() { 228 // adjust the counters for the new line 229 if l._previous == nil { 230 l._previous = make([]int, 0) 231 } 232 l._previous = append(l._previous, l._column) 233 l._column = 1 234 l._line++ 235 } // newline() 236 237 // comment reads all runes until a newline or end of file is reached. An 238 // error is returned if an error is encountered reading from the stream. 239 func (l *lexer) comment() ([]rune, Error) { 240 _comment := make([]rune, 0) 241 242 // read until we reach end of line or end of file 243 // - as we are in a comment, we ignore escape characters 244 for { 245 _next, _err := l.read() 246 if _err != nil { 247 return _comment, _err 248 } 249 250 // read until we have end of line or end of file 251 switch _next { 252 case _CR: 253 fallthrough 254 case _NEWLINE: 255 fallthrough 256 case _EOF: 257 // return the read run to the stream and stop 258 l.unread(_next) 259 return _comment, nil 260 } 261 262 // otherwise, add this run to the comment 263 _comment = append(_comment, _next) 264 } 265 } // comment() 266 267 // escape attempts to read an escape sequence (e.g. '\ ') form the input 268 // stream. An error will be returned if there is an error reading from the 269 // stream. escape returns just the escape rune if the following rune is either 270 // end of line or end of file (since .gitignore files do not support line 271 // continuations). 272 func (l *lexer) escape() ([]rune, Error) { 273 // attempt to process the escape sequence 274 _peek, _err := l.peek() 275 if _err != nil { 276 return nil, _err 277 } 278 279 // what is the next rune after the escape? 280 switch _peek { 281 // are we at the end of the line or file? 282 // - we return just the escape rune 283 case _CR: 284 fallthrough 285 case _NEWLINE: 286 fallthrough 287 case _EOF: 288 return []rune{_ESCAPE}, nil 289 } 290 291 // otherwise, return the escape and the next rune 292 // - we know read() will succeed here since we used peek() above 293 _, _ = l.read() 294 return []rune{_ESCAPE, _peek}, nil 295 } // escape() 296 297 // eol returns all runes from the current position to the end of the line. An 298 // error is returned if there is a problem reading from the stream, or if a 299 // carriage return character '\r' is encountered that is not followed by a 300 // newline '\n'. 301 func (l *lexer) eol() ([]rune, Error) { 302 // read the to the end of the line 303 // - we should only be called here when we encounter an end of line 304 // sequence 305 _line := make([]rune, 0, 1) 306 307 // loop until there's nothing more to do 308 for { 309 _next, _err := l.read() 310 if _err != nil { 311 return _line, _err 312 } 313 314 // read until we have a newline or we're at end of file 315 switch _next { 316 // end of file 317 case _EOF: 318 return _line, nil 319 320 // carriage return - we expect to see a newline next 321 case _CR: 322 _line = append(_line, _next) 323 _next, _err = l.read() 324 if _err != nil { 325 return _line, _err 326 } else if _next != _NEWLINE { 327 l.unread(_next) 328 return _line, l.err(CarriageReturnError) 329 } 330 fallthrough 331 332 // newline 333 case _NEWLINE: 334 _line = append(_line, _next) 335 return _line, nil 336 } 337 } 338 } // eol() 339 340 // whitespace returns all whitespace (i.e. ' ' and '\t') runes in a sequence, 341 // or an error if there is a problem reading the next runes. 342 func (l *lexer) whitespace() ([]rune, Error) { 343 // read until we hit the first non-whitespace rune 344 _ws := make([]rune, 0, 1) 345 346 // loop until there's nothing more to do 347 for { 348 _next, _err := l.read() 349 if _err != nil { 350 return _ws, _err 351 } 352 353 // what is this next rune? 354 switch _next { 355 // space or tab is consumed 356 case _SPACE: 357 fallthrough 358 case _TAB: 359 break 360 361 // non-whitespace rune 362 default: 363 // return the rune to the buffer and we're done 364 l.unread(_next) 365 return _ws, nil 366 } 367 368 // add this rune to the whitespace 369 _ws = append(_ws, _next) 370 } 371 } // whitespace() 372 373 // pattern returns all runes representing a file or path pattern, delimited 374 // either by unescaped whitespace, a path separator '/' or enf of file. An 375 // error is returned if a problem is encountered reading from the stream. 376 func (l *lexer) pattern() ([]rune, Error) { 377 // read until we hit the first whitespace/end of line/eof rune 378 _pattern := make([]rune, 0, 1) 379 380 // loop until there's nothing more to do 381 for { 382 _r, _err := l.read() 383 if _err != nil { 384 return _pattern, _err 385 } 386 387 // what is the next rune? 388 switch _r { 389 // whitespace, newline, end of file, separator 390 // - this is the end of the pattern 391 case _SPACE: 392 fallthrough 393 case _TAB: 394 fallthrough 395 case _CR: 396 fallthrough 397 case _NEWLINE: 398 fallthrough 399 case _SEPARATOR: 400 fallthrough 401 case _EOF: 402 // return what we have 403 l.unread(_r) 404 return _pattern, nil 405 406 // a wildcard is the end of the pattern if it is part of any '**' 407 case _WILDCARD: 408 _next, _err := l.peek() 409 if _err != nil { 410 return _pattern, _err 411 } else if _next == _WILDCARD { 412 l.unread(_r) 413 return _pattern, _err 414 } else { 415 _pattern = append(_pattern, _r) 416 } 417 418 // escape sequence - consume the next rune 419 case _ESCAPE: 420 _escape, _err := l.escape() 421 if _err != nil { 422 return _pattern, _err 423 } 424 425 // add the escape sequence as part of the pattern 426 _pattern = append(_pattern, _escape...) 427 428 // any other character, we add to the pattern 429 default: 430 _pattern = append(_pattern, _r) 431 } 432 } 433 } // pattern() 434 435 // token returns a Token instance of the given type_ represented by word runes. 436 func (l *lexer) token(type_ TokenType, word []rune, e Error) (*Token, Error) { 437 // if we have an error, then we return a BAD token 438 if e != nil { 439 type_ = BAD 440 } 441 442 // extract the lexer position 443 // - the column is taken from the current column position 444 // minus the length of the consumed "word" 445 _word := len(word) 446 _column := l._column - _word 447 _offset := l._offset - _word 448 position := Position{"", l._line, _column, _offset} 449 450 // if this is a newline token, we adjust the line & column counts 451 if type_ == EOL { 452 l.newline() 453 } 454 455 // return the Token 456 return NewToken(type_, word, position), e 457 } // token() 458 459 // err returns an Error encapsulating the error e and the current Lexer 460 // position. 461 func (l *lexer) err(e error) Error { 462 // do we have an error? 463 if e == nil { 464 return nil 465 } else { 466 return NewError(e, l.Position()) 467 } 468 } // err() 469 470 // beginning returns true if the Lexer is at the start of a new line. 471 func (l *lexer) beginning() bool { 472 return l._column == 1 473 } // beginning() 474 475 // ensure the lexer conforms to the lexer interface 476 var _ Lexer = &lexer{}