github.com/windmilleng/wat@v0.0.2-0.20180626175338-9349b638e250/data/pathutil/matcher.go (about) 1 // Helpers for matching both DB paths and File paths. 2 package pathutil 3 4 import ( 5 "fmt" 6 "sort" 7 "strings" 8 "unicode" 9 ) 10 11 // A Matcher is a limited propositional logic engine for choosing a subset of files 12 // in a file tree. 13 // 14 // By design, we don't try to implement a logic engine that allows arbitrarily 15 // complex boolean formulas. For example, our current logic engine does not 16 // support the matcher 17 // 18 // (foo/** && ((NOT foo/bar/**) || foo/bar/baz/**)) 19 // 20 // Right now, we try only to support matchers in a normal form: 21 // 22 // (A or B) and (not C) and (not D) 23 // 24 // or equivalently 25 // 26 // (A or B) and not (C or D) 27 // 28 // This is not super formal right now. One of the sad limitations of this engine 29 // is that there are cases where we can express a boolean formula but not its inverse. 30 // For example, (foo/** && (NOT foo/bar/**)) is expressible but its inverse is not. 31 type Matcher interface { 32 Match(s string) bool 33 ToPatterns() []string 34 35 // True if are certain this Matcher won't match anything. 36 Empty() bool 37 38 // True if are certain this Matcher matches everything 39 All() bool 40 41 // Whether this is a well-formed Matcher. Verify that we only accept matchers written in normal form: 42 // (A or B or C) and (not D) and (not E) 43 IsNormal() bool 44 45 // If this matcher will only match a discrete set of files, return the file path. 46 AsFileSet() []string 47 48 // Create a new matcher that matches prefix/{originalMatch}. 49 // i.e., if m.Matches('a') is true, then m.Subdir('b').Matches('b/a') is true. 50 Subdir(prefix string) Matcher 51 52 // Create a new matcher that matches children of the original match pattern. 53 // i.e., if m.Matches('b/a') is true, then m.Child('b').Matches('a') is true. 54 Child(prefix string) Matcher 55 } 56 57 // Inverts a matcher 58 type invertMatcher struct { 59 matcher Matcher 60 } 61 62 func InvertMatcher(m Matcher) (Matcher, error) { 63 if m.Empty() { 64 return NewAllMatcher(), nil 65 } else if m.All() { 66 return NewEmptyMatcher(), nil 67 } else if listMatcher, ok := m.(listMatcher); ok { 68 // DeMorgan's rule: 69 // not (A or B) = not A and not B 70 // not (A and B) = not A or not B 71 // But not all inverted matchers can be written in normal form, 72 // so we need to make sure that the result is normal. 73 matchers := listMatcher.matchers 74 inverted := make([]Matcher, len(matchers)) 75 for i, m := range matchers { 76 im, err := InvertMatcher(m) 77 if err != nil { 78 return nil, err 79 } 80 inverted[i] = im 81 } 82 result := newListMatcher(!listMatcher.conjunction, inverted) 83 if !result.IsNormal() { 84 return nil, fmt.Errorf("Inverted matcher cannot be written in normal form: %v", m.ToPatterns()) 85 } 86 return result, nil 87 } else if invertedMatcher, ok := m.(invertMatcher); ok { 88 return invertedMatcher.matcher, nil 89 } 90 return invertMatcher{matcher: m}, nil 91 } 92 93 func (m invertMatcher) ToPatterns() []string { 94 patterns := m.matcher.ToPatterns() 95 for i, p := range patterns { 96 if isInverted(p) { 97 patterns[i] = p[1:] 98 } else { 99 patterns[i] = "!" + p 100 } 101 } 102 return patterns 103 } 104 105 func (m invertMatcher) Match(path string) bool { return !m.matcher.Match(path) } 106 func (m invertMatcher) Empty() bool { return m.matcher.Empty() } 107 func (m invertMatcher) All() bool { return m.matcher.All() } 108 func (m invertMatcher) AsFileSet() []string { return nil } 109 110 func (m invertMatcher) IsNormal() bool { 111 _, isList := m.matcher.(listMatcher) 112 return !isList && m.matcher.IsNormal() 113 } 114 115 func (m invertMatcher) Subdir(prefix string) Matcher { 116 i, err := InvertMatcher(m.matcher.Subdir(prefix)) 117 if err != nil { 118 // This shouldn't be possible, because we know the inner matcher is invertible. 119 panic(err) 120 } 121 return i 122 } 123 124 func (m invertMatcher) Child(prefix string) Matcher { 125 i, err := InvertMatcher(m.matcher.Child(prefix)) 126 if err != nil { 127 // This shouldn't be possible, because we know the inner matcher is invertible. 128 panic(err) 129 } 130 return i 131 } 132 133 // ANDs/ORs a bunch of matchers together. 134 type listMatcher struct { 135 conjunction bool // If true, this is an AND. Otherwise it's an OR. 136 matchers []Matcher 137 } 138 139 func newListMatcher(conjunction bool, matchers []Matcher) Matcher { 140 simplified := make([]Matcher, 0, len(matchers)) 141 for _, m := range matchers { 142 if conjunction { 143 if m.Empty() { 144 return m 145 } else if m.All() { 146 continue 147 } 148 } else { 149 if m.Empty() { 150 continue 151 } else if m.All() { 152 return m 153 } 154 } 155 simplified = append(simplified, m) 156 } 157 if len(simplified) == 1 { 158 return simplified[0] 159 } 160 return listMatcher{conjunction: conjunction, matchers: simplified} 161 } 162 163 func newDisjunctionMatcher(matchers []Matcher) Matcher { 164 return newListMatcher(false, matchers) 165 } 166 167 func newConjunctionMatcher(matchers []Matcher) Matcher { 168 return newListMatcher(true, matchers) 169 } 170 171 func (d listMatcher) ToPatterns() []string { 172 if d.All() { 173 return []string{"**"} 174 } 175 176 result := make([]string, 0, len(d.matchers)) 177 for _, matcher := range d.matchers { 178 result = append(result, matcher.ToPatterns()...) 179 } 180 return result 181 } 182 183 func (d listMatcher) Match(s string) bool { 184 if d.conjunction { 185 for _, matcher := range d.matchers { 186 ok := matcher.Match(s) 187 if !ok { 188 return false 189 } 190 } 191 return true 192 } else { 193 for _, matcher := range d.matchers { 194 ok := matcher.Match(s) 195 if ok { 196 return true 197 } 198 } 199 return false 200 } 201 } 202 203 func (d listMatcher) Empty() bool { 204 if d.conjunction { 205 for _, matcher := range d.matchers { 206 ok := matcher.Empty() 207 if ok { 208 return true 209 } 210 } 211 return false 212 } else { 213 for _, matcher := range d.matchers { 214 ok := matcher.Empty() 215 if !ok { 216 return false 217 } 218 } 219 return true 220 } 221 } 222 223 func (d listMatcher) All() bool { 224 if d.conjunction { 225 for _, matcher := range d.matchers { 226 ok := matcher.All() 227 if !ok { 228 return false 229 } 230 } 231 return true 232 } else { 233 for _, matcher := range d.matchers { 234 ok := matcher.All() 235 if ok { 236 return true 237 } 238 } 239 return false 240 } 241 } 242 243 func (d listMatcher) IsNormal() bool { 244 for _, m := range d.matchers { 245 if !m.IsNormal() { 246 return false 247 } 248 249 // Conjunctions may have inner lists, but they must be disjunctions. 250 // Disjunctions may not have inner lists. 251 innerList, isInnerList := m.(listMatcher) 252 if isInnerList && !(d.conjunction && !innerList.conjunction) { 253 return false 254 } 255 256 // Disjunctions may not have inner inversions 257 if !d.conjunction { 258 _, isInversion := m.(invertMatcher) 259 if isInversion { 260 return false 261 } 262 } 263 } 264 return true 265 } 266 267 func (d listMatcher) AsFileSet() []string { 268 if d.conjunction { 269 return nil 270 } 271 result := []string{} 272 for _, m := range d.matchers { 273 fileSet := m.AsFileSet() 274 if fileSet == nil { 275 return nil 276 } 277 result = append(result, fileSet...) 278 } 279 return result 280 } 281 282 func (d listMatcher) Subdir(prefix string) Matcher { 283 matchers := make([]Matcher, len(d.matchers)) 284 for i, m := range d.matchers { 285 matchers[i] = m.Subdir(prefix) 286 } 287 return newListMatcher(d.conjunction, matchers) 288 } 289 290 func (d listMatcher) Child(prefix string) Matcher { 291 matchers := make([]Matcher, len(d.matchers)) 292 for i, m := range d.matchers { 293 matchers[i] = m.Child(prefix) 294 } 295 return newListMatcher(d.conjunction, matchers) 296 } 297 298 // Matches a single file. 299 type fileMatcher struct { 300 util PathUtil 301 file string 302 } 303 304 const filePrefix = "file://" 305 306 func (m fileMatcher) ToPatterns() []string { 307 return []string{filePrefix + m.file} 308 } 309 310 func (m fileMatcher) Match(path string) bool { 311 return m.file == path 312 } 313 314 func (m fileMatcher) Empty() bool { 315 return false 316 } 317 318 func (m fileMatcher) All() bool { 319 return false 320 } 321 322 func (m fileMatcher) IsNormal() bool { 323 return true 324 } 325 326 func (m fileMatcher) AsFileSet() []string { 327 return []string{m.file} 328 } 329 330 func (m fileMatcher) Subdir(prefix string) Matcher { 331 return fileMatcher{ 332 util: m.util, 333 file: m.util.Join(prefix, m.file), 334 } 335 } 336 337 func (m fileMatcher) Child(prefix string) Matcher { 338 child, ok := Child(m.util, prefix, m.file) 339 if !ok { 340 return NewEmptyMatcher() 341 } 342 return fileMatcher{ 343 util: m.util, 344 file: child, 345 } 346 } 347 348 // Matches file paths. 349 // 350 // Pattern semantics attempt to match `ls`. All patterns are taken 351 // relative to the root of the current directory. 352 // 353 // Uses ** globs for recursive matches. 354 // 355 // Implemented with golang's path.Match on each part of the path. 356 // 357 // Examples: 358 // 'foo' will match 'foo', but not 'foo/bar' 359 // 'foo/bar' will match 'foo/bar/baz' but not 'baz/foo/bar' 360 // '*.txt' will match 'foo.txt' and 'bar.baz.txt' but not 'foo/bar.txt' 361 // '*/foo.txt' will match 'a/foo.txt' but not 'foo.txt' or 'a/b/foo.txt' 362 // **/*.txt will match foo.txt and a/b/c/foo.txt 363 type patternMatcher struct { 364 util PathUtil 365 pattern string 366 } 367 368 func (m patternMatcher) ToPatterns() []string { 369 return []string{m.pattern} 370 } 371 372 func (m patternMatcher) Match(path string) bool { 373 return m.matchRecur(m.pattern, path) 374 } 375 376 func (m patternMatcher) matchRecur(pattern string, path string) bool { 377 // Base case #1: the pattern and path are both exhausted. 378 if (pattern == "" || pattern == "**") && path == "" { 379 return true 380 } 381 382 if pattern == "" { 383 return false 384 } 385 386 // Base case #2: the path has been exhausted but there's still pattern 387 // left to match. 388 if path == "" { 389 return false 390 } 391 392 pFirst, pRest := SplitFirst(m.util, pattern) 393 first, rest := SplitFirst(m.util, path) 394 if pFirst == "**" { 395 // The double star case is special. 396 // First recur on the case where the double star matches nothing. 397 match := m.matchRecur(pRest, first) 398 if match { 399 return true 400 } 401 402 // If that doesn't match, recur on the case where the double star 403 // matches the first part of the path. 404 // Note that this is potentially exponential, and a "optimized" algorithm 405 // would use a dynamic programming approach, but this is ok 406 // for most cases. 407 return m.matchRecur(pattern, rest) 408 } 409 410 // Normal patterns only match one part of the path. 411 match, err := m.util.Match(pFirst, first) 412 if err != nil { 413 // The pattern should have been validated up-front. 414 panic(err) 415 } 416 417 if !match { 418 return false 419 } 420 421 // Recur on the next part of both the pattern and the path. 422 return m.matchRecur(pRest, rest) 423 } 424 425 func (m patternMatcher) Empty() bool { 426 return false 427 } 428 429 func (m patternMatcher) All() bool { 430 return false 431 } 432 433 func (m patternMatcher) IsNormal() bool { 434 return true 435 } 436 437 func (m patternMatcher) AsFileSet() []string { 438 return nil 439 } 440 441 func (m patternMatcher) Subdir(prefix string) Matcher { 442 return &patternMatcher{ 443 util: m.util, 444 pattern: m.util.Join(prefix, m.pattern), 445 } 446 } 447 448 func (m patternMatcher) Child(prefix string) Matcher { 449 child, ok := childPattern(m.util, prefix, m.pattern) 450 if !ok { 451 return NewEmptyMatcher() 452 } 453 result, err := NewMatcherFromPattern(m.util, child) 454 if err != nil { 455 panic(fmt.Sprintf("Child(%v, %s) produced invalid pattern: %q", m.ToPatterns(), prefix, child)) 456 } 457 return result 458 } 459 460 // Matches nothing. 461 func NewEmptyMatcher() Matcher { 462 return listMatcher{conjunction: false, matchers: []Matcher{}} 463 } 464 465 // Matches everything. 466 func NewAllMatcher() Matcher { 467 return listMatcher{conjunction: true, matchers: []Matcher{}} 468 } 469 470 // Matches a single file only 471 func NewFileMatcher(util PathUtil, file string) (Matcher, error) { 472 if file == "" { 473 return nil, fmt.Errorf("NewFileMatcher: no file specified") 474 } 475 return fileMatcher{util: util, file: file}, nil 476 } 477 478 func NewFilesMatcher(util PathUtil, files []string) (Matcher, error) { 479 matchers := make([]Matcher, 0, len(files)) 480 for _, f := range files { 481 m, err := NewFileMatcher(util, f) 482 if err != nil { 483 return nil, err 484 } 485 matchers = append(matchers, m) 486 } 487 return newDisjunctionMatcher(matchers), nil 488 } 489 490 func NewMatcherFromPattern(util PathUtil, pattern string) (Matcher, error) { 491 if strings.IndexFunc(pattern, unicode.IsSpace) != -1 { 492 return nil, fmt.Errorf("Path patterns may not contain whitespace: %q", pattern) 493 } 494 495 if strings.HasPrefix(pattern, "/") { 496 return nil, fmt.Errorf("Path patterns may not start with a leading slash: %q", pattern) 497 } 498 499 if isInverted(pattern) { 500 inner, err := NewMatcherFromPattern(util, pattern[1:]) 501 if err != nil { 502 return nil, err 503 } 504 return InvertMatcher(inner) 505 } 506 507 if strings.Index(pattern, filePrefix) == 0 { 508 return NewFileMatcher(util, pattern[len(filePrefix):]) 509 } 510 511 if pattern == "**" { 512 return NewAllMatcher(), nil 513 } 514 515 // Validate the match pattern. 516 // The only possible error from filepatch.Match is ErrBadPattern. 517 _, err := util.Match(pattern, "") 518 if err != nil { 519 return nil, fmt.Errorf("Bad match pattern %q: %v", pattern, err) 520 } 521 522 return &patternMatcher{ 523 util: util, 524 pattern: pattern, 525 }, nil 526 } 527 528 // When we have positive and negative patterns in the same pattern set, 529 // we treat them as a conjunction of all the positive forms, then disjunction on 530 // all the negative forms. 531 // 532 // For example, the pattern set [A, B, !C, !D] is interpreted as 533 // (A or B) and (not C) and (not D) 534 // We consider this Normal Form. 535 // 536 // We try to enforce that all matchers are in normal form, and reject matchers that are not. 537 func NewMatcherFromPatterns(util PathUtil, patterns []string) (Matcher, error) { 538 positivePatterns := make([]string, 0, len(patterns)) 539 negativePatterns := make([]string, 0, len(patterns)) 540 for _, pattern := range patterns { 541 if isInverted(pattern) { 542 negativePatterns = append(negativePatterns, pattern) 543 } else { 544 positivePatterns = append(positivePatterns, pattern) 545 } 546 } 547 548 positivePatterns, negativePatterns = simplifyPatterns(util, positivePatterns, negativePatterns) 549 550 matchers := make([]Matcher, len(positivePatterns)) 551 for i, pattern := range positivePatterns { 552 m, err := NewMatcherFromPattern(util, pattern) 553 if err != nil { 554 return nil, err 555 } 556 matchers[i] = m 557 } 558 559 invMatchers := make([]Matcher, len(negativePatterns)) 560 for i, pattern := range negativePatterns { 561 m, err := NewMatcherFromPattern(util, pattern) 562 if err != nil { 563 return nil, err 564 } 565 invMatchers[i] = m 566 } 567 568 if len(matchers) != 0 { 569 return newConjunctionMatcher( 570 append([]Matcher{newDisjunctionMatcher(matchers)}, invMatchers...)), 571 nil 572 } else { 573 return newConjunctionMatcher(invMatchers), nil 574 } 575 } 576 577 func isInverted(p string) bool { 578 return len(p) != 0 && p[0] == '!' 579 } 580 581 func MatchersEqual(a, b Matcher) bool { 582 aPatterns := a.ToPatterns() 583 bPatterns := b.ToPatterns() 584 if len(aPatterns) != len(bPatterns) { 585 return false 586 } 587 588 sort.Strings(aPatterns) 589 sort.Strings(bPatterns) 590 for i, aPattern := range aPatterns { 591 bPattern := bPatterns[i] 592 if aPattern != bPattern { 593 return false 594 } 595 } 596 return true 597 } 598 599 // Helper function to check if two positive patterns are orthogonal. 600 // By "orthogonal", we mean that there does not exist a path that can satisfy both. 601 func arePatternsOrthogonal(util PathUtil, p1, p2 string) bool { 602 // This is a very simple algorithm that goes through each 603 // path segment and see if they don't match. 604 // 605 // For example, 606 // a/b/* 607 // a/c/* 608 // are not equal when we compare "b" and "c", so they are orthogonal. 609 // 610 // If we see any stars, or if we're out of path segments, we end immediately. 611 p1First, p1Rest := SplitFirst(util, p1) 612 if p1Rest == "" || strings.ContainsRune(p1First, '*') { 613 return false 614 } 615 616 p2First, p2Rest := SplitFirst(util, p2) 617 if p2Rest == "" || strings.ContainsRune(p2First, '*') { 618 return false 619 } 620 621 if p1First != p2First { 622 return true 623 } 624 return arePatternsOrthogonal(util, p1Rest, p2Rest) 625 } 626 627 // Helper to filter out negative patterns that are orthogonal 628 // to the positive patterns. As an example, if we have: 629 // ["*.txt", "!*.py"] 630 // we can skip the *.py. 631 // 632 // This is both an optimization and needed for correctness, 633 // because ["*.txt"] is invertible in our matcher engine 634 // but ["*.txt", "!*.py"] is not. 635 func simplifyPatterns(util PathUtil, positivePatterns, negativePatterns []string) ([]string, []string) { 636 if len(positivePatterns) > 0 && len(negativePatterns) > 0 { 637 simplifiedNegativePatterns := make([]string, 0, len(negativePatterns)) 638 for _, negPattern := range negativePatterns { 639 p := negPattern[1:] // remove the "!" 640 for _, posPattern := range positivePatterns { 641 if !arePatternsOrthogonal(util, p, posPattern) { 642 simplifiedNegativePatterns = append(simplifiedNegativePatterns, negPattern) 643 break 644 } 645 } 646 } 647 return positivePatterns, simplifiedNegativePatterns 648 } 649 return positivePatterns, negativePatterns 650 }