github.com/windmilleng/wat@v0.0.2-0.20180626175338-9349b638e250/data/pathutil/matcher.go (about)

     1  // Helpers for matching both DB paths and File paths.
     2  package pathutil
     3  
     4  import (
     5  	"fmt"
     6  	"sort"
     7  	"strings"
     8  	"unicode"
     9  )
    10  
    11  // A Matcher is a limited propositional logic engine for choosing a subset of files
    12  // in a file tree.
    13  //
    14  // By design, we don't try to implement a logic engine that allows arbitrarily
    15  // complex boolean formulas. For example, our current logic engine does not
    16  // support the matcher
    17  //
    18  // (foo/** && ((NOT foo/bar/**) || foo/bar/baz/**))
    19  //
    20  // Right now, we try only to support matchers in a normal form:
    21  //
    22  // (A or B) and (not C) and (not D)
    23  //
    24  // or equivalently
    25  //
    26  // (A or B) and not (C or D)
    27  //
    28  // This is not super formal right now. One of the sad limitations of this engine
    29  // is that there are cases where we can express a boolean formula but not its inverse.
    30  // For example, (foo/** && (NOT foo/bar/**)) is expressible but its inverse is not.
    31  type Matcher interface {
    32  	Match(s string) bool
    33  	ToPatterns() []string
    34  
    35  	// True if are certain this Matcher won't match anything.
    36  	Empty() bool
    37  
    38  	// True if are certain this Matcher matches everything
    39  	All() bool
    40  
    41  	// Whether this is a well-formed Matcher. Verify that we only accept matchers written in normal form:
    42  	// (A or B or C) and (not D) and (not E)
    43  	IsNormal() bool
    44  
    45  	// If this matcher will only match a discrete set of files, return the file path.
    46  	AsFileSet() []string
    47  
    48  	// Create a new matcher that matches prefix/{originalMatch}.
    49  	// i.e., if m.Matches('a') is true, then m.Subdir('b').Matches('b/a') is true.
    50  	Subdir(prefix string) Matcher
    51  
    52  	// Create a new matcher that matches children of the original match pattern.
    53  	// i.e., if m.Matches('b/a') is true, then m.Child('b').Matches('a') is true.
    54  	Child(prefix string) Matcher
    55  }
    56  
    57  // Inverts a matcher
    58  type invertMatcher struct {
    59  	matcher Matcher
    60  }
    61  
    62  func InvertMatcher(m Matcher) (Matcher, error) {
    63  	if m.Empty() {
    64  		return NewAllMatcher(), nil
    65  	} else if m.All() {
    66  		return NewEmptyMatcher(), nil
    67  	} else if listMatcher, ok := m.(listMatcher); ok {
    68  		// DeMorgan's rule:
    69  		// not (A or B) = not A and not B
    70  		// not (A and B) = not A or not B
    71  		// But not all inverted matchers can be written in normal form,
    72  		// so we need to make sure that the result is normal.
    73  		matchers := listMatcher.matchers
    74  		inverted := make([]Matcher, len(matchers))
    75  		for i, m := range matchers {
    76  			im, err := InvertMatcher(m)
    77  			if err != nil {
    78  				return nil, err
    79  			}
    80  			inverted[i] = im
    81  		}
    82  		result := newListMatcher(!listMatcher.conjunction, inverted)
    83  		if !result.IsNormal() {
    84  			return nil, fmt.Errorf("Inverted matcher cannot be written in normal form: %v", m.ToPatterns())
    85  		}
    86  		return result, nil
    87  	} else if invertedMatcher, ok := m.(invertMatcher); ok {
    88  		return invertedMatcher.matcher, nil
    89  	}
    90  	return invertMatcher{matcher: m}, nil
    91  }
    92  
    93  func (m invertMatcher) ToPatterns() []string {
    94  	patterns := m.matcher.ToPatterns()
    95  	for i, p := range patterns {
    96  		if isInverted(p) {
    97  			patterns[i] = p[1:]
    98  		} else {
    99  			patterns[i] = "!" + p
   100  		}
   101  	}
   102  	return patterns
   103  }
   104  
   105  func (m invertMatcher) Match(path string) bool { return !m.matcher.Match(path) }
   106  func (m invertMatcher) Empty() bool            { return m.matcher.Empty() }
   107  func (m invertMatcher) All() bool              { return m.matcher.All() }
   108  func (m invertMatcher) AsFileSet() []string    { return nil }
   109  
   110  func (m invertMatcher) IsNormal() bool {
   111  	_, isList := m.matcher.(listMatcher)
   112  	return !isList && m.matcher.IsNormal()
   113  }
   114  
   115  func (m invertMatcher) Subdir(prefix string) Matcher {
   116  	i, err := InvertMatcher(m.matcher.Subdir(prefix))
   117  	if err != nil {
   118  		// This shouldn't be possible, because we know the inner matcher is invertible.
   119  		panic(err)
   120  	}
   121  	return i
   122  }
   123  
   124  func (m invertMatcher) Child(prefix string) Matcher {
   125  	i, err := InvertMatcher(m.matcher.Child(prefix))
   126  	if err != nil {
   127  		// This shouldn't be possible, because we know the inner matcher is invertible.
   128  		panic(err)
   129  	}
   130  	return i
   131  }
   132  
   133  // ANDs/ORs a bunch of matchers together.
   134  type listMatcher struct {
   135  	conjunction bool // If true, this is an AND. Otherwise it's an OR.
   136  	matchers    []Matcher
   137  }
   138  
   139  func newListMatcher(conjunction bool, matchers []Matcher) Matcher {
   140  	simplified := make([]Matcher, 0, len(matchers))
   141  	for _, m := range matchers {
   142  		if conjunction {
   143  			if m.Empty() {
   144  				return m
   145  			} else if m.All() {
   146  				continue
   147  			}
   148  		} else {
   149  			if m.Empty() {
   150  				continue
   151  			} else if m.All() {
   152  				return m
   153  			}
   154  		}
   155  		simplified = append(simplified, m)
   156  	}
   157  	if len(simplified) == 1 {
   158  		return simplified[0]
   159  	}
   160  	return listMatcher{conjunction: conjunction, matchers: simplified}
   161  }
   162  
   163  func newDisjunctionMatcher(matchers []Matcher) Matcher {
   164  	return newListMatcher(false, matchers)
   165  }
   166  
   167  func newConjunctionMatcher(matchers []Matcher) Matcher {
   168  	return newListMatcher(true, matchers)
   169  }
   170  
   171  func (d listMatcher) ToPatterns() []string {
   172  	if d.All() {
   173  		return []string{"**"}
   174  	}
   175  
   176  	result := make([]string, 0, len(d.matchers))
   177  	for _, matcher := range d.matchers {
   178  		result = append(result, matcher.ToPatterns()...)
   179  	}
   180  	return result
   181  }
   182  
   183  func (d listMatcher) Match(s string) bool {
   184  	if d.conjunction {
   185  		for _, matcher := range d.matchers {
   186  			ok := matcher.Match(s)
   187  			if !ok {
   188  				return false
   189  			}
   190  		}
   191  		return true
   192  	} else {
   193  		for _, matcher := range d.matchers {
   194  			ok := matcher.Match(s)
   195  			if ok {
   196  				return true
   197  			}
   198  		}
   199  		return false
   200  	}
   201  }
   202  
   203  func (d listMatcher) Empty() bool {
   204  	if d.conjunction {
   205  		for _, matcher := range d.matchers {
   206  			ok := matcher.Empty()
   207  			if ok {
   208  				return true
   209  			}
   210  		}
   211  		return false
   212  	} else {
   213  		for _, matcher := range d.matchers {
   214  			ok := matcher.Empty()
   215  			if !ok {
   216  				return false
   217  			}
   218  		}
   219  		return true
   220  	}
   221  }
   222  
   223  func (d listMatcher) All() bool {
   224  	if d.conjunction {
   225  		for _, matcher := range d.matchers {
   226  			ok := matcher.All()
   227  			if !ok {
   228  				return false
   229  			}
   230  		}
   231  		return true
   232  	} else {
   233  		for _, matcher := range d.matchers {
   234  			ok := matcher.All()
   235  			if ok {
   236  				return true
   237  			}
   238  		}
   239  		return false
   240  	}
   241  }
   242  
   243  func (d listMatcher) IsNormal() bool {
   244  	for _, m := range d.matchers {
   245  		if !m.IsNormal() {
   246  			return false
   247  		}
   248  
   249  		// Conjunctions may have inner lists, but they must be disjunctions.
   250  		// Disjunctions may not have inner lists.
   251  		innerList, isInnerList := m.(listMatcher)
   252  		if isInnerList && !(d.conjunction && !innerList.conjunction) {
   253  			return false
   254  		}
   255  
   256  		// Disjunctions may not have inner inversions
   257  		if !d.conjunction {
   258  			_, isInversion := m.(invertMatcher)
   259  			if isInversion {
   260  				return false
   261  			}
   262  		}
   263  	}
   264  	return true
   265  }
   266  
   267  func (d listMatcher) AsFileSet() []string {
   268  	if d.conjunction {
   269  		return nil
   270  	}
   271  	result := []string{}
   272  	for _, m := range d.matchers {
   273  		fileSet := m.AsFileSet()
   274  		if fileSet == nil {
   275  			return nil
   276  		}
   277  		result = append(result, fileSet...)
   278  	}
   279  	return result
   280  }
   281  
   282  func (d listMatcher) Subdir(prefix string) Matcher {
   283  	matchers := make([]Matcher, len(d.matchers))
   284  	for i, m := range d.matchers {
   285  		matchers[i] = m.Subdir(prefix)
   286  	}
   287  	return newListMatcher(d.conjunction, matchers)
   288  }
   289  
   290  func (d listMatcher) Child(prefix string) Matcher {
   291  	matchers := make([]Matcher, len(d.matchers))
   292  	for i, m := range d.matchers {
   293  		matchers[i] = m.Child(prefix)
   294  	}
   295  	return newListMatcher(d.conjunction, matchers)
   296  }
   297  
   298  // Matches a single file.
   299  type fileMatcher struct {
   300  	util PathUtil
   301  	file string
   302  }
   303  
   304  const filePrefix = "file://"
   305  
   306  func (m fileMatcher) ToPatterns() []string {
   307  	return []string{filePrefix + m.file}
   308  }
   309  
   310  func (m fileMatcher) Match(path string) bool {
   311  	return m.file == path
   312  }
   313  
   314  func (m fileMatcher) Empty() bool {
   315  	return false
   316  }
   317  
   318  func (m fileMatcher) All() bool {
   319  	return false
   320  }
   321  
   322  func (m fileMatcher) IsNormal() bool {
   323  	return true
   324  }
   325  
   326  func (m fileMatcher) AsFileSet() []string {
   327  	return []string{m.file}
   328  }
   329  
   330  func (m fileMatcher) Subdir(prefix string) Matcher {
   331  	return fileMatcher{
   332  		util: m.util,
   333  		file: m.util.Join(prefix, m.file),
   334  	}
   335  }
   336  
   337  func (m fileMatcher) Child(prefix string) Matcher {
   338  	child, ok := Child(m.util, prefix, m.file)
   339  	if !ok {
   340  		return NewEmptyMatcher()
   341  	}
   342  	return fileMatcher{
   343  		util: m.util,
   344  		file: child,
   345  	}
   346  }
   347  
   348  // Matches file paths.
   349  //
   350  // Pattern semantics attempt to match  `ls`. All patterns are taken
   351  // relative to the root of the current directory.
   352  //
   353  // Uses ** globs for recursive matches.
   354  //
   355  // Implemented with golang's path.Match on each part of the path.
   356  //
   357  // Examples:
   358  // 'foo' will match 'foo', but not 'foo/bar'
   359  // 'foo/bar' will match 'foo/bar/baz' but not 'baz/foo/bar'
   360  // '*.txt' will match 'foo.txt' and 'bar.baz.txt' but not 'foo/bar.txt'
   361  // '*/foo.txt' will match 'a/foo.txt' but not 'foo.txt' or 'a/b/foo.txt'
   362  // **/*.txt will match foo.txt and a/b/c/foo.txt
   363  type patternMatcher struct {
   364  	util    PathUtil
   365  	pattern string
   366  }
   367  
   368  func (m patternMatcher) ToPatterns() []string {
   369  	return []string{m.pattern}
   370  }
   371  
   372  func (m patternMatcher) Match(path string) bool {
   373  	return m.matchRecur(m.pattern, path)
   374  }
   375  
   376  func (m patternMatcher) matchRecur(pattern string, path string) bool {
   377  	// Base case #1: the pattern and path are both exhausted.
   378  	if (pattern == "" || pattern == "**") && path == "" {
   379  		return true
   380  	}
   381  
   382  	if pattern == "" {
   383  		return false
   384  	}
   385  
   386  	// Base case #2: the path has been exhausted but there's still pattern
   387  	// left to match.
   388  	if path == "" {
   389  		return false
   390  	}
   391  
   392  	pFirst, pRest := SplitFirst(m.util, pattern)
   393  	first, rest := SplitFirst(m.util, path)
   394  	if pFirst == "**" {
   395  		// The double star case is special.
   396  		// First recur on the case where the double star matches nothing.
   397  		match := m.matchRecur(pRest, first)
   398  		if match {
   399  			return true
   400  		}
   401  
   402  		// If that doesn't match, recur on the case where the double star
   403  		// matches the first part of the path.
   404  		// Note that this is potentially exponential, and a "optimized" algorithm
   405  		// would use a dynamic programming approach, but this is ok
   406  		// for most cases.
   407  		return m.matchRecur(pattern, rest)
   408  	}
   409  
   410  	// Normal patterns only match one part of the path.
   411  	match, err := m.util.Match(pFirst, first)
   412  	if err != nil {
   413  		// The pattern should have been validated up-front.
   414  		panic(err)
   415  	}
   416  
   417  	if !match {
   418  		return false
   419  	}
   420  
   421  	// Recur on the next part of both the pattern and the path.
   422  	return m.matchRecur(pRest, rest)
   423  }
   424  
   425  func (m patternMatcher) Empty() bool {
   426  	return false
   427  }
   428  
   429  func (m patternMatcher) All() bool {
   430  	return false
   431  }
   432  
   433  func (m patternMatcher) IsNormal() bool {
   434  	return true
   435  }
   436  
   437  func (m patternMatcher) AsFileSet() []string {
   438  	return nil
   439  }
   440  
   441  func (m patternMatcher) Subdir(prefix string) Matcher {
   442  	return &patternMatcher{
   443  		util:    m.util,
   444  		pattern: m.util.Join(prefix, m.pattern),
   445  	}
   446  }
   447  
   448  func (m patternMatcher) Child(prefix string) Matcher {
   449  	child, ok := childPattern(m.util, prefix, m.pattern)
   450  	if !ok {
   451  		return NewEmptyMatcher()
   452  	}
   453  	result, err := NewMatcherFromPattern(m.util, child)
   454  	if err != nil {
   455  		panic(fmt.Sprintf("Child(%v, %s) produced invalid pattern: %q", m.ToPatterns(), prefix, child))
   456  	}
   457  	return result
   458  }
   459  
   460  // Matches nothing.
   461  func NewEmptyMatcher() Matcher {
   462  	return listMatcher{conjunction: false, matchers: []Matcher{}}
   463  }
   464  
   465  // Matches everything.
   466  func NewAllMatcher() Matcher {
   467  	return listMatcher{conjunction: true, matchers: []Matcher{}}
   468  }
   469  
   470  // Matches a single file only
   471  func NewFileMatcher(util PathUtil, file string) (Matcher, error) {
   472  	if file == "" {
   473  		return nil, fmt.Errorf("NewFileMatcher: no file specified")
   474  	}
   475  	return fileMatcher{util: util, file: file}, nil
   476  }
   477  
   478  func NewFilesMatcher(util PathUtil, files []string) (Matcher, error) {
   479  	matchers := make([]Matcher, 0, len(files))
   480  	for _, f := range files {
   481  		m, err := NewFileMatcher(util, f)
   482  		if err != nil {
   483  			return nil, err
   484  		}
   485  		matchers = append(matchers, m)
   486  	}
   487  	return newDisjunctionMatcher(matchers), nil
   488  }
   489  
   490  func NewMatcherFromPattern(util PathUtil, pattern string) (Matcher, error) {
   491  	if strings.IndexFunc(pattern, unicode.IsSpace) != -1 {
   492  		return nil, fmt.Errorf("Path patterns may not contain whitespace: %q", pattern)
   493  	}
   494  
   495  	if strings.HasPrefix(pattern, "/") {
   496  		return nil, fmt.Errorf("Path patterns may not start with a leading slash: %q", pattern)
   497  	}
   498  
   499  	if isInverted(pattern) {
   500  		inner, err := NewMatcherFromPattern(util, pattern[1:])
   501  		if err != nil {
   502  			return nil, err
   503  		}
   504  		return InvertMatcher(inner)
   505  	}
   506  
   507  	if strings.Index(pattern, filePrefix) == 0 {
   508  		return NewFileMatcher(util, pattern[len(filePrefix):])
   509  	}
   510  
   511  	if pattern == "**" {
   512  		return NewAllMatcher(), nil
   513  	}
   514  
   515  	// Validate the match pattern.
   516  	// The only possible error from filepatch.Match is ErrBadPattern.
   517  	_, err := util.Match(pattern, "")
   518  	if err != nil {
   519  		return nil, fmt.Errorf("Bad match pattern %q: %v", pattern, err)
   520  	}
   521  
   522  	return &patternMatcher{
   523  		util:    util,
   524  		pattern: pattern,
   525  	}, nil
   526  }
   527  
   528  // When we have positive and negative patterns in the same pattern set,
   529  // we treat them as a conjunction of all the positive forms, then disjunction on
   530  // all the negative forms.
   531  //
   532  // For example, the pattern set [A, B, !C, !D] is interpreted as
   533  // (A or B) and (not C) and (not D)
   534  // We consider this Normal Form.
   535  //
   536  // We try to enforce that all matchers are in normal form, and reject matchers that are not.
   537  func NewMatcherFromPatterns(util PathUtil, patterns []string) (Matcher, error) {
   538  	positivePatterns := make([]string, 0, len(patterns))
   539  	negativePatterns := make([]string, 0, len(patterns))
   540  	for _, pattern := range patterns {
   541  		if isInverted(pattern) {
   542  			negativePatterns = append(negativePatterns, pattern)
   543  		} else {
   544  			positivePatterns = append(positivePatterns, pattern)
   545  		}
   546  	}
   547  
   548  	positivePatterns, negativePatterns = simplifyPatterns(util, positivePatterns, negativePatterns)
   549  
   550  	matchers := make([]Matcher, len(positivePatterns))
   551  	for i, pattern := range positivePatterns {
   552  		m, err := NewMatcherFromPattern(util, pattern)
   553  		if err != nil {
   554  			return nil, err
   555  		}
   556  		matchers[i] = m
   557  	}
   558  
   559  	invMatchers := make([]Matcher, len(negativePatterns))
   560  	for i, pattern := range negativePatterns {
   561  		m, err := NewMatcherFromPattern(util, pattern)
   562  		if err != nil {
   563  			return nil, err
   564  		}
   565  		invMatchers[i] = m
   566  	}
   567  
   568  	if len(matchers) != 0 {
   569  		return newConjunctionMatcher(
   570  				append([]Matcher{newDisjunctionMatcher(matchers)}, invMatchers...)),
   571  			nil
   572  	} else {
   573  		return newConjunctionMatcher(invMatchers), nil
   574  	}
   575  }
   576  
   577  func isInverted(p string) bool {
   578  	return len(p) != 0 && p[0] == '!'
   579  }
   580  
   581  func MatchersEqual(a, b Matcher) bool {
   582  	aPatterns := a.ToPatterns()
   583  	bPatterns := b.ToPatterns()
   584  	if len(aPatterns) != len(bPatterns) {
   585  		return false
   586  	}
   587  
   588  	sort.Strings(aPatterns)
   589  	sort.Strings(bPatterns)
   590  	for i, aPattern := range aPatterns {
   591  		bPattern := bPatterns[i]
   592  		if aPattern != bPattern {
   593  			return false
   594  		}
   595  	}
   596  	return true
   597  }
   598  
   599  // Helper function to check if two positive patterns are orthogonal.
   600  // By "orthogonal", we mean that there does not exist a path that can satisfy both.
   601  func arePatternsOrthogonal(util PathUtil, p1, p2 string) bool {
   602  	// This is a very simple algorithm that goes through each
   603  	// path segment and see if they don't match.
   604  	//
   605  	// For example,
   606  	// a/b/*
   607  	// a/c/*
   608  	// are not equal when we compare "b" and "c", so they are orthogonal.
   609  	//
   610  	// If we see any stars, or if we're out of path segments, we end immediately.
   611  	p1First, p1Rest := SplitFirst(util, p1)
   612  	if p1Rest == "" || strings.ContainsRune(p1First, '*') {
   613  		return false
   614  	}
   615  
   616  	p2First, p2Rest := SplitFirst(util, p2)
   617  	if p2Rest == "" || strings.ContainsRune(p2First, '*') {
   618  		return false
   619  	}
   620  
   621  	if p1First != p2First {
   622  		return true
   623  	}
   624  	return arePatternsOrthogonal(util, p1Rest, p2Rest)
   625  }
   626  
   627  // Helper to filter out negative patterns that are orthogonal
   628  // to the positive patterns. As an example, if we have:
   629  // ["*.txt", "!*.py"]
   630  // we can skip the *.py.
   631  //
   632  // This is both an optimization and needed for correctness,
   633  // because ["*.txt"] is invertible in our matcher engine
   634  // but ["*.txt", "!*.py"] is not.
   635  func simplifyPatterns(util PathUtil, positivePatterns, negativePatterns []string) ([]string, []string) {
   636  	if len(positivePatterns) > 0 && len(negativePatterns) > 0 {
   637  		simplifiedNegativePatterns := make([]string, 0, len(negativePatterns))
   638  		for _, negPattern := range negativePatterns {
   639  			p := negPattern[1:] // remove the "!"
   640  			for _, posPattern := range positivePatterns {
   641  				if !arePatternsOrthogonal(util, p, posPattern) {
   642  					simplifiedNegativePatterns = append(simplifiedNegativePatterns, negPattern)
   643  					break
   644  				}
   645  			}
   646  		}
   647  		return positivePatterns, simplifiedNegativePatterns
   648  	}
   649  	return positivePatterns, negativePatterns
   650  }