github.com/ianlewis/go-gitignore@v0.1.1-0.20231110021210-4a0f15cbd56f/pattern.go (about)

     1  // Copyright 2016 Denormal Limited
     2  // Copyright 2023 Google LLC
     3  //
     4  // Licensed under the Apache License, Version 2.0 (the "License");
     5  // you may not use this file except in compliance with the License.
     6  // You may obtain a copy of the License at
     7  //
     8  //      http://www.apache.org/licenses/LICENSE-2.0
     9  //
    10  // Unless required by applicable law or agreed to in writing, software
    11  // distributed under the License is distributed on an "AS IS" BASIS,
    12  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  // See the License for the specific language governing permissions and
    14  // limitations under the License.
    15  
    16  package gitignore
    17  
    18  import (
    19  	"path/filepath"
    20  	"strings"
    21  
    22  	"github.com/danwakefield/fnmatch"
    23  )
    24  
    25  // Pattern represents per-line patterns within a .gitignore file
    26  type Pattern interface {
    27  	Match
    28  
    29  	// Match returns true if the given path matches the name pattern. If the
    30  	// pattern is meant for directories only, and the path is not a directory,
    31  	// Match will return false. The matching is performed by fnmatch(). It
    32  	// is assumed path is relative to the base path of the owning GitIgnore.
    33  	Match(string, bool) bool
    34  }
    35  
    36  // pattern is the base implementation of a .gitignore pattern
    37  type pattern struct {
    38  	_negated   bool
    39  	_anchored  bool
    40  	_directory bool
    41  	_string    string
    42  	_fnmatch   string
    43  	_position  Position
    44  } // pattern()
    45  
    46  // name represents patterns matching a file or path name (i.e. the last
    47  // component of a path)
    48  type name struct {
    49  	pattern
    50  } // name{}
    51  
    52  // path represents a pattern that contains at least one path separator within
    53  // the pattern (i.e. not at the start or end of the pattern)
    54  type path struct {
    55  	pattern
    56  	_depth int
    57  } // path{}
    58  
    59  // any represents a pattern that contains at least one "any" token "**"
    60  // allowing for recursive matching.
    61  type any struct {
    62  	pattern
    63  	_tokens []*Token
    64  } // any{}
    65  
    66  // NewPattern returns a Pattern from the ordered slice of Tokens. The tokens are
    67  // assumed to represent a well-formed .gitignore pattern. A Pattern may be
    68  // negated, anchored to the start of the path (relative to the base directory
    69  // of tie containing .gitignore), or match directories only.
    70  func NewPattern(tokens []*Token) Pattern {
    71  	// if we have no tokens there is no pattern
    72  	if len(tokens) == 0 {
    73  		return nil
    74  	}
    75  
    76  	// extract the pattern position from first token
    77  	_position := tokens[0].Position
    78  	_string := tokenset(tokens).String()
    79  
    80  	// is this a negated pattern?
    81  	_negated := false
    82  	if tokens[0].Type == NEGATION {
    83  		_negated = true
    84  		tokens = tokens[1:]
    85  	}
    86  
    87  	// is this pattern anchored to the start of the path?
    88  	_anchored := false
    89  	if tokens[0].Type == SEPARATOR {
    90  		_anchored = true
    91  		tokens = tokens[1:]
    92  	}
    93  
    94  	// is this pattern for directories only?
    95  	_directory := false
    96  	_last := len(tokens) - 1
    97  	if tokens[_last].Type == SEPARATOR {
    98  		_directory = true
    99  		tokens = tokens[:_last]
   100  	}
   101  
   102  	// build the pattern expression
   103  	_fnmatch := tokenset(tokens).String()
   104  	_pattern := &pattern{
   105  		_negated:   _negated,
   106  		_anchored:  _anchored,
   107  		_position:  _position,
   108  		_directory: _directory,
   109  		_string:    _string,
   110  		_fnmatch:   _fnmatch,
   111  	}
   112  	return _pattern.compile(tokens)
   113  } // NewPattern()
   114  
   115  // compile generates a specific Pattern (i.e. name, path or any)
   116  // represented by the list of tokens.
   117  func (p *pattern) compile(tokens []*Token) Pattern {
   118  	// what tokens do we have in this pattern?
   119  	//      - ANY token means we can match to any depth
   120  	//      - SEPARATOR means we have path rather than file matching
   121  	_separator := false
   122  	for _, _token := range tokens {
   123  		switch _token.Type {
   124  		case ANY:
   125  			return p.any(tokens)
   126  		case SEPARATOR:
   127  			_separator = true
   128  		}
   129  	}
   130  
   131  	// should we perform path or name/file matching?
   132  	if _separator {
   133  		return p.path(tokens)
   134  	} else {
   135  		return p.name(tokens)
   136  	}
   137  } // compile()
   138  
   139  // Ignore returns true if the pattern describes files or paths that should be
   140  // ignored.
   141  func (p *pattern) Ignore() bool { return !p._negated }
   142  
   143  // Include returns true if the pattern describes files or paths that should be
   144  // included (i.e. not ignored)
   145  func (p *pattern) Include() bool { return p._negated }
   146  
   147  // Position returns the position of the first token of this pattern.
   148  func (p *pattern) Position() Position { return p._position }
   149  
   150  // String returns the string representation of the pattern.
   151  func (p *pattern) String() string { return p._string }
   152  
   153  //
   154  // name patterns
   155  //      - designed to match trailing file/directory names only
   156  //
   157  
   158  // name returns a Pattern designed to match file or directory names, with no
   159  // path elements.
   160  func (p *pattern) name(tokens []*Token) Pattern {
   161  	return &name{*p}
   162  } // name()
   163  
   164  // Match returns true if the given path matches the name pattern. If the
   165  // pattern is meant for directories only, and the path is not a directory,
   166  // Match will return false. The matching is performed by fnmatch(). It
   167  // is assumed path is relative to the base path of the owning GitIgnore.
   168  func (n *name) Match(path string, isdir bool) bool {
   169  	// are we expecting a directory?
   170  	if n._directory && !isdir {
   171  		return false
   172  	}
   173  
   174  	// should we match the whole path, or just the last component?
   175  	if n._anchored {
   176  		return fnmatch.Match(n._fnmatch, path, 0)
   177  	} else {
   178  		_, _base := filepath.Split(path)
   179  		return fnmatch.Match(n._fnmatch, _base, 0)
   180  	}
   181  } // Match()
   182  
   183  //
   184  // path patterns
   185  //      - designed to match complete or partial paths (not just filenames)
   186  //
   187  
   188  // path returns a Pattern designed to match paths that include at least one
   189  // path separator '/' neither at the end nor the start of the pattern.
   190  func (p *pattern) path(tokens []*Token) Pattern {
   191  	// how many directory components are we expecting?
   192  	_depth := 0
   193  	for _, _token := range tokens {
   194  		if _token.Type == SEPARATOR {
   195  			_depth++
   196  		}
   197  	}
   198  
   199  	// return the pattern instance
   200  	return &path{pattern: *p, _depth: _depth}
   201  } // path()
   202  
   203  // Match returns true if the given path matches the path pattern. If the
   204  // pattern is meant for directories only, and the path is not a directory,
   205  // Match will return false. The matching is performed by fnmatch()
   206  // with flags set to FNM_PATHNAME. It is assumed path is relative to the
   207  // base path of the owning GitIgnore.
   208  func (p *path) Match(path string, isdir bool) bool {
   209  	// are we expecting a directory
   210  	if p._directory && !isdir {
   211  		return false
   212  	}
   213  
   214  	if fnmatch.Match(p._fnmatch, path, fnmatch.FNM_PATHNAME) {
   215  		return true
   216  	} else if p._anchored {
   217  		return false
   218  	}
   219  
   220  	// match against the trailing path elements
   221  	return fnmatch.Match(p._fnmatch, path, fnmatch.FNM_PATHNAME)
   222  } // Match()
   223  
   224  //
   225  // "any" patterns
   226  //
   227  
   228  // any returns a Pattern designed to match paths that include at least one
   229  // any pattern '**', specifying recursive matching.
   230  func (p *pattern) any(tokens []*Token) Pattern {
   231  	// consider only the non-SEPARATOR tokens, as these will be matched
   232  	// against the path components
   233  	_tokens := make([]*Token, 0)
   234  	for _, _token := range tokens {
   235  		if _token.Type != SEPARATOR {
   236  			_tokens = append(_tokens, _token)
   237  		}
   238  	}
   239  
   240  	return &any{*p, _tokens}
   241  } // any()
   242  
   243  // Match returns true if the given path matches the any pattern. If the
   244  // pattern is meant for directories only, and the path is not a directory,
   245  // Match will return false. The matching is performed by recursively applying
   246  // fnmatch() with flags set to FNM_PATHNAME. It is assumed path is relative to
   247  // the base path of the owning GitIgnore.
   248  func (a *any) Match(path string, isdir bool) bool {
   249  	// are we expecting a directory?
   250  	if a._directory && !isdir {
   251  		return false
   252  	}
   253  
   254  	// split the path into components
   255  	_parts := strings.Split(path, string(_SEPARATOR))
   256  
   257  	// attempt to match the parts against the pattern tokens
   258  	return a.match(_parts, a._tokens)
   259  } // Match()
   260  
   261  // match performs the recursive matching for 'any' patterns. An 'any'
   262  // token '**' may match any path component, or no path component.
   263  func (a *any) match(path []string, tokens []*Token) bool {
   264  	// if we have no more tokens, then we have matched this path
   265  	// if there are also no more path elements, otherwise there's no match
   266  	if len(tokens) == 0 {
   267  		return len(path) == 0
   268  	}
   269  
   270  	// what token are we trying to match?
   271  	_token := tokens[0]
   272  	switch _token.Type {
   273  	case ANY:
   274  		if len(path) == 0 {
   275  			return a.match(path, tokens[1:])
   276  		} else {
   277  			return a.match(path, tokens[1:]) || a.match(path[1:], tokens)
   278  		}
   279  
   280  	default:
   281  		// if we have a non-ANY token, then we must have a non-empty path
   282  		if len(path) != 0 {
   283  			// if the current path element matches this token,
   284  			// we match if the remainder of the path matches the
   285  			// remaining tokens
   286  			if fnmatch.Match(_token.Token(), path[0], fnmatch.FNM_PATHNAME) {
   287  				return a.match(path[1:], tokens[1:])
   288  			}
   289  		}
   290  	}
   291  
   292  	// if we are here, then we have no match
   293  	return false
   294  } // match()
   295  
   296  // ensure the patterns confirm to the Pattern interface
   297  var (
   298  	_ Pattern = &name{}
   299  	_ Pattern = &path{}
   300  	_ Pattern = &any{}
   301  )