github.com/AndrewDeryabin/doublestar/v4@v4.0.0-20230123132908-d9476b7d41be/match.go (about)

     1  package doublestar
     2  
     3  import (
     4  	"path/filepath"
     5  	"unicode/utf8"
     6  )
     7  
     8  // Match reports whether name matches the shell pattern.
     9  // The pattern syntax is:
    10  //
    11  //  pattern:
    12  //    { term }
    13  //  term:
    14  //    '*'         matches any sequence of non-path-separators
    15  //    '/**/'      matches zero or more directories
    16  //    '?'         matches any single non-path-separator character
    17  //    '[' [ '^' '!' ] { character-range } ']'
    18  //                character class (must be non-empty)
    19  //                starting with `^` or `!` negates the class
    20  //    '{' { term } [ ',' { term } ... ] '}'
    21  //                alternatives
    22  //    c           matches character c (c != '*', '?', '\\', '[')
    23  //    '\\' c      matches character c
    24  //
    25  //  character-range:
    26  //    c           matches character c (c != '\\', '-', ']')
    27  //    '\\' c      matches character c
    28  //    lo '-' hi   matches character c for lo <= c <= hi
    29  //
    30  // Match returns true if `name` matches the file name `pattern`. `name` and
    31  // `pattern` are split on forward slash (`/`) characters and may be relative or
    32  // absolute.
    33  //
    34  // Match requires pattern to match all of name, not just a substring.
    35  // The only possible returned error is ErrBadPattern, when pattern
    36  // is malformed.
    37  //
    38  // A doublestar (`**`) should appear surrounded by path separators such as
    39  // `/**/`.  A mid-pattern doublestar (`**`) behaves like bash's globstar
    40  // option: a pattern such as `path/to/**.txt` would return the same results as
    41  // `path/to/*.txt`. The pattern you're looking for is `path/to/**/*.txt`.
    42  //
    43  // Note: this is meant as a drop-in replacement for path.Match() which
    44  // always uses '/' as the path separator. If you want to support systems
    45  // which use a different path separator (such as Windows), what you want
    46  // is PathMatch(). Alternatively, you can run filepath.ToSlash() on both
    47  // pattern and name and then use this function.
    48  //
    49  // Note: users should _not_ count on the returned error,
    50  // doublestar.ErrBadPattern, being equal to path.ErrBadPattern.
    51  //
    52  func Match(pattern, name string) (bool, error) {
    53  	return matchWithSeparator(pattern, name, '/', true)
    54  }
    55  
    56  // PathMatch returns true if `name` matches the file name `pattern`. The
    57  // difference between Match and PathMatch is that PathMatch will automatically
    58  // use your system's path separator to split `name` and `pattern`. On systems
    59  // where the path separator is `'\'`, escaping will be disabled.
    60  //
    61  // Note: this is meant as a drop-in replacement for filepath.Match(). It
    62  // assumes that both `pattern` and `name` are using the system's path
    63  // separator. If you can't be sure of that, use filepath.ToSlash() on both
    64  // `pattern` and `name`, and then use the Match() function instead.
    65  //
    66  func PathMatch(pattern, name string) (bool, error) {
    67  	return matchWithSeparator(pattern, name, filepath.Separator, true)
    68  }
    69  
    70  // MatchWithSeparator returns true if `name` matches the file name `pattern`
    71  // using the specified rune as separator.
    72  func MatchWithSeparator(pattern, name string, separator rune) (bool, error) {
    73  	return matchWithSeparator(pattern, name, separator, true)
    74  }
    75  
    76  func matchWithSeparator(pattern, name string, separator rune, validate bool) (matched bool, err error) {
    77  	return doMatchWithSeparator(pattern, name, separator, validate, -1, -1, -1, -1, 0, 0)
    78  }
    79  
    80  func doMatchWithSeparator(pattern, name string, separator rune, validate bool, doublestarPatternBacktrack, doublestarNameBacktrack, starPatternBacktrack, starNameBacktrack, patIdx, nameIdx int) (matched bool, err error) {
    81  	patLen := len(pattern)
    82  	nameLen := len(name)
    83  	startOfSegment := true
    84  MATCH:
    85  	for nameIdx < nameLen {
    86  		if patIdx < patLen {
    87  			switch pattern[patIdx] {
    88  			case '*':
    89  				if patIdx++; patIdx < patLen && pattern[patIdx] == '*' {
    90  					// doublestar - must begin with a path separator, otherwise we'll
    91  					// treat it like a single star like bash
    92  					patIdx++
    93  					if startOfSegment {
    94  						if patIdx >= patLen {
    95  							// pattern ends in `/**`: return true
    96  							return true, nil
    97  						}
    98  
    99  						// doublestar must also end with a path separator, otherwise we're
   100  						// just going to treat the doublestar as a single star like bash
   101  						patRune, patRuneLen := utf8.DecodeRuneInString(pattern[patIdx:])
   102  						if patRune == separator {
   103  							patIdx += patRuneLen
   104  
   105  							doublestarPatternBacktrack = patIdx
   106  							doublestarNameBacktrack = nameIdx
   107  							starPatternBacktrack = -1
   108  							starNameBacktrack = -1
   109  							continue
   110  						}
   111  					}
   112  				}
   113  				startOfSegment = false
   114  
   115  				starPatternBacktrack = patIdx
   116  				starNameBacktrack = nameIdx
   117  				continue
   118  
   119  			case '?':
   120  				startOfSegment = false
   121  				nameRune, nameRuneLen := utf8.DecodeRuneInString(name[nameIdx:])
   122  				if nameRune == separator {
   123  					// `?` cannot match the separator
   124  					break
   125  				}
   126  
   127  				patIdx++
   128  				nameIdx += nameRuneLen
   129  				continue
   130  
   131  			case '[':
   132  				startOfSegment = false
   133  				if patIdx++; patIdx >= patLen {
   134  					// class didn't end
   135  					return false, ErrBadPattern
   136  				}
   137  				nameRune, nameRuneLen := utf8.DecodeRuneInString(name[nameIdx:])
   138  
   139  				matched := false
   140  				negate := pattern[patIdx] == '!' || pattern[patIdx] == '^'
   141  				if negate {
   142  					patIdx++
   143  				}
   144  
   145  				if patIdx >= patLen || pattern[patIdx] == ']' {
   146  					// class didn't end or empty character class
   147  					return false, ErrBadPattern
   148  				}
   149  
   150  				last := utf8.MaxRune
   151  				for patIdx < patLen && pattern[patIdx] != ']' {
   152  					patRune, patRuneLen := utf8.DecodeRuneInString(pattern[patIdx:])
   153  					patIdx += patRuneLen
   154  
   155  					// match a range
   156  					if last < utf8.MaxRune && patRune == '-' && patIdx < patLen && pattern[patIdx] != ']' {
   157  						if pattern[patIdx] == '\\' {
   158  							// next character is escaped
   159  							patIdx++
   160  						}
   161  						patRune, patRuneLen = utf8.DecodeRuneInString(pattern[patIdx:])
   162  						patIdx += patRuneLen
   163  
   164  						if last <= nameRune && nameRune <= patRune {
   165  							matched = true
   166  							break
   167  						}
   168  
   169  						// didn't match range - reset `last`
   170  						last = utf8.MaxRune
   171  						continue
   172  					}
   173  
   174  					// not a range - check if the next rune is escaped
   175  					if patRune == '\\' {
   176  						patRune, patRuneLen = utf8.DecodeRuneInString(pattern[patIdx:])
   177  						patIdx += patRuneLen
   178  					}
   179  
   180  					// check if the rune matches
   181  					if patRune == nameRune {
   182  						matched = true
   183  						break
   184  					}
   185  
   186  					// no matches yet
   187  					last = patRune
   188  				}
   189  
   190  				if matched == negate {
   191  					// failed to match - if we reached the end of the pattern, that means
   192  					// we never found a closing `]`
   193  					if patIdx >= patLen {
   194  						return false, ErrBadPattern
   195  					}
   196  					break
   197  				}
   198  
   199  				closingIdx := indexUnescapedByte(pattern[patIdx:], ']', true)
   200  				if closingIdx == -1 {
   201  					// no closing `]`
   202  					return false, ErrBadPattern
   203  				}
   204  
   205  				patIdx += closingIdx + 1
   206  				nameIdx += nameRuneLen
   207  				continue
   208  
   209  			case '{':
   210  				startOfSegment = false
   211  				beforeIdx := patIdx
   212  				patIdx++
   213  				closingIdx := indexMatchedClosingAlt(pattern[patIdx:], separator != '\\')
   214  				if closingIdx == -1 {
   215  					// no closing `}`
   216  					return false, ErrBadPattern
   217  				}
   218  				closingIdx += patIdx
   219  
   220  				for {
   221  					commaIdx := indexNextAlt(pattern[patIdx:closingIdx], separator != '\\')
   222  					if commaIdx == -1 {
   223  						break
   224  					}
   225  					commaIdx += patIdx
   226  
   227  					result, err := doMatchWithSeparator(pattern[:beforeIdx]+pattern[patIdx:commaIdx]+pattern[closingIdx+1:], name, separator, validate, doublestarPatternBacktrack, doublestarNameBacktrack, starPatternBacktrack, starNameBacktrack, beforeIdx, nameIdx)
   228  					if result || err != nil {
   229  						return result, err
   230  					}
   231  
   232  					patIdx = commaIdx + 1
   233  				}
   234  				return doMatchWithSeparator(pattern[:beforeIdx]+pattern[patIdx:closingIdx]+pattern[closingIdx+1:], name, separator, validate, doublestarPatternBacktrack, doublestarNameBacktrack, starPatternBacktrack, starNameBacktrack, beforeIdx, nameIdx)
   235  
   236  			case '\\':
   237  				if separator != '\\' {
   238  					// next rune is "escaped" in the pattern - literal match
   239  					if patIdx++; patIdx >= patLen {
   240  						// pattern ended
   241  						return false, ErrBadPattern
   242  					}
   243  				}
   244  				fallthrough
   245  
   246  			default:
   247  				patRune, patRuneLen := utf8.DecodeRuneInString(pattern[patIdx:])
   248  				nameRune, nameRuneLen := utf8.DecodeRuneInString(name[nameIdx:])
   249  				if patRune != nameRune {
   250  					if separator != '\\' && patIdx > 0 && pattern[patIdx-1] == '\\' {
   251  						// if this rune was meant to be escaped, we need to move patIdx
   252  						// back to the backslash before backtracking or validating below
   253  						patIdx--
   254  					}
   255  					break
   256  				}
   257  
   258  				patIdx += patRuneLen
   259  				nameIdx += nameRuneLen
   260  				startOfSegment = patRune == separator
   261  				continue
   262  			}
   263  		}
   264  
   265  		if starPatternBacktrack >= 0 {
   266  			// `*` backtrack, but only if the `name` rune isn't the separator
   267  			nameRune, nameRuneLen := utf8.DecodeRuneInString(name[starNameBacktrack:])
   268  			if nameRune != separator {
   269  				starNameBacktrack += nameRuneLen
   270  				patIdx = starPatternBacktrack
   271  				nameIdx = starNameBacktrack
   272  				startOfSegment = false
   273  				continue
   274  			}
   275  		}
   276  
   277  		if doublestarPatternBacktrack >= 0 {
   278  			// `**` backtrack, advance `name` past next separator
   279  			nameIdx = doublestarNameBacktrack
   280  			for nameIdx < nameLen {
   281  				nameRune, nameRuneLen := utf8.DecodeRuneInString(name[nameIdx:])
   282  				nameIdx += nameRuneLen
   283  				if nameRune == separator {
   284  					doublestarNameBacktrack = nameIdx
   285  					patIdx = doublestarPatternBacktrack
   286  					startOfSegment = true
   287  					continue MATCH
   288  				}
   289  			}
   290  		}
   291  
   292  		if validate && patIdx < patLen && !ValidateWithSeparator(pattern[patIdx:], separator) {
   293  			return false, ErrBadPattern
   294  		}
   295  		return false, nil
   296  	}
   297  
   298  	if nameIdx < nameLen {
   299  		// we reached the end of `pattern` before the end of `name`
   300  		return false, nil
   301  	}
   302  
   303  	// we've reached the end of `name`; we've successfully matched if we've also
   304  	// reached the end of `pattern`, or if the rest of `pattern` can match a
   305  	// zero-length string
   306  	return isZeroLengthPattern(pattern[patIdx:], separator)
   307  }
   308  
   309  func isZeroLengthPattern(pattern string, separator rune) (ret bool, err error) {
   310  	// `/**` is a special case - a pattern such as `path/to/a/**` *should* match
   311  	// `path/to/a` because `a` might be a directory
   312  	if pattern == "" || pattern == "*" || pattern == "**" || pattern == string(separator)+"**" {
   313  		return true, nil
   314  	}
   315  
   316  	if pattern[0] == '{' {
   317  		closingIdx := indexMatchedClosingAlt(pattern[1:], separator != '\\')
   318  		if closingIdx == -1 {
   319  			// no closing '}'
   320  			return false, ErrBadPattern
   321  		}
   322  		closingIdx += 1
   323  
   324  		patIdx := 1
   325  		for {
   326  			commaIdx := indexNextAlt(pattern[patIdx:closingIdx], separator != '\\')
   327  			if commaIdx == -1 {
   328  				break
   329  			}
   330  			commaIdx += patIdx
   331  
   332  			ret, err = isZeroLengthPattern(pattern[patIdx:commaIdx]+pattern[closingIdx+1:], separator)
   333  			if ret || err != nil {
   334  				return
   335  			}
   336  
   337  			patIdx = commaIdx + 1
   338  		}
   339  		return isZeroLengthPattern(pattern[patIdx:closingIdx]+pattern[closingIdx+1:], separator)
   340  	}
   341  
   342  	// no luck - validate the rest of the pattern
   343  	if !ValidateWithSeparator(pattern, separator) {
   344  		return false, ErrBadPattern
   345  	}
   346  	return false, nil
   347  }
   348  
   349  // Finds the index of the first unescaped byte `c`, or negative 1.
   350  func indexUnescapedByte(s string, c byte, allowEscaping bool) int {
   351  	l := len(s)
   352  	for i := 0; i < l; i++ {
   353  		if allowEscaping && s[i] == '\\' {
   354  			// skip next byte
   355  			i++
   356  		} else if s[i] == c {
   357  			return i
   358  		}
   359  	}
   360  	return -1
   361  }
   362  
   363  // Assuming the byte before the beginning of `s` is an opening `{`, this
   364  // function will find the index of the matching `}`. That is, it'll skip over
   365  // any nested `{}` and account for escaping
   366  func indexMatchedClosingAlt(s string, allowEscaping bool) int {
   367  	alts := 1
   368  	l := len(s)
   369  	for i := 0; i < l; i++ {
   370  		if allowEscaping && s[i] == '\\' {
   371  			// skip next byte
   372  			i++
   373  		} else if s[i] == '{' {
   374  			alts++
   375  		} else if s[i] == '}' {
   376  			if alts--; alts == 0 {
   377  				return i
   378  			}
   379  		}
   380  	}
   381  	return -1
   382  }