github.com/ncw/rclone@v1.48.1-0.20190724201158-a35aa1360e3e/fs/filter/filter.go (about) 1 // Package filter controls the filtering of files 2 package filter 3 4 import ( 5 "bufio" 6 "context" 7 "fmt" 8 "log" 9 "os" 10 "path" 11 "regexp" 12 "strings" 13 "time" 14 15 "github.com/ncw/rclone/fs" 16 "github.com/pkg/errors" 17 "golang.org/x/sync/errgroup" 18 ) 19 20 // Active is the globally active filter 21 var Active = mustNewFilter(nil) 22 23 // rule is one filter rule 24 type rule struct { 25 Include bool 26 Regexp *regexp.Regexp 27 boundedRecursion bool 28 } 29 30 // Match returns true if rule matches path 31 func (r *rule) Match(path string) bool { 32 return r.Regexp.MatchString(path) 33 } 34 35 // String the rule 36 func (r *rule) String() string { 37 c := "-" 38 if r.Include { 39 c = "+" 40 } 41 return fmt.Sprintf("%s %s", c, r.Regexp.String()) 42 } 43 44 // rules is a slice of rules 45 type rules struct { 46 rules []rule 47 existing map[string]struct{} 48 } 49 50 // add adds a rule if it doesn't exist already 51 func (rs *rules) add(Include bool, re *regexp.Regexp, boundedRecursion bool) { 52 if rs.existing == nil { 53 rs.existing = make(map[string]struct{}) 54 } 55 newRule := rule{ 56 Include: Include, 57 Regexp: re, 58 boundedRecursion: boundedRecursion, 59 } 60 newRuleString := newRule.String() 61 if _, ok := rs.existing[newRuleString]; ok { 62 return // rule already exists 63 } 64 rs.rules = append(rs.rules, newRule) 65 rs.existing[newRuleString] = struct{}{} 66 } 67 68 // clear clears all the rules 69 func (rs *rules) clear() { 70 rs.rules = nil 71 rs.existing = nil 72 } 73 74 // len returns the number of rules 75 func (rs *rules) len() int { 76 return len(rs.rules) 77 } 78 79 // boundedRecursion returns true if the set of filters would only 80 // need bounded recursion to evaluate 81 func (rs *rules) boundedRecursion() bool { 82 var ( 83 excludeAll = false 84 boundedRecursion = true 85 ) 86 for _, rule := range rs.rules { 87 if rule.Include { 88 boundedRecursion = boundedRecursion && rule.boundedRecursion 89 } else if rule.Regexp.String() == `^.*$` { 90 excludeAll = true 91 } 92 } 93 return excludeAll && boundedRecursion 94 } 95 96 // FilesMap describes the map of files to transfer 97 type FilesMap map[string]struct{} 98 99 // Opt configures the filter 100 type Opt struct { 101 DeleteExcluded bool 102 FilterRule []string 103 FilterFrom []string 104 ExcludeRule []string 105 ExcludeFrom []string 106 ExcludeFile string 107 IncludeRule []string 108 IncludeFrom []string 109 FilesFrom []string 110 MinAge fs.Duration 111 MaxAge fs.Duration 112 MinSize fs.SizeSuffix 113 MaxSize fs.SizeSuffix 114 IgnoreCase bool 115 } 116 117 // DefaultOpt is the default config for the filter 118 var DefaultOpt = Opt{ 119 MinAge: fs.DurationOff, 120 MaxAge: fs.DurationOff, 121 MinSize: fs.SizeSuffix(-1), 122 MaxSize: fs.SizeSuffix(-1), 123 } 124 125 // Filter describes any filtering in operation 126 type Filter struct { 127 Opt Opt 128 ModTimeFrom time.Time 129 ModTimeTo time.Time 130 fileRules rules 131 dirRules rules 132 files FilesMap // files if filesFrom 133 dirs FilesMap // dirs from filesFrom 134 } 135 136 // NewFilter parses the command line options and creates a Filter 137 // object. If opt is nil, then DefaultOpt will be used 138 func NewFilter(opt *Opt) (f *Filter, err error) { 139 f = &Filter{} 140 141 // Make a copy of the options 142 if opt != nil { 143 f.Opt = *opt 144 } else { 145 f.Opt = DefaultOpt 146 } 147 148 // Filter flags 149 if f.Opt.MinAge.IsSet() { 150 f.ModTimeTo = time.Now().Add(-time.Duration(f.Opt.MinAge)) 151 fs.Debugf(nil, "--min-age %v to %v", f.Opt.MinAge, f.ModTimeTo) 152 } 153 if f.Opt.MaxAge.IsSet() { 154 f.ModTimeFrom = time.Now().Add(-time.Duration(f.Opt.MaxAge)) 155 if !f.ModTimeTo.IsZero() && f.ModTimeTo.Before(f.ModTimeFrom) { 156 log.Fatal("filter: --min-age can't be larger than --max-age") 157 } 158 fs.Debugf(nil, "--max-age %v to %v", f.Opt.MaxAge, f.ModTimeFrom) 159 } 160 161 addImplicitExclude := false 162 foundExcludeRule := false 163 164 for _, rule := range f.Opt.IncludeRule { 165 err = f.Add(true, rule) 166 if err != nil { 167 return nil, err 168 } 169 addImplicitExclude = true 170 } 171 for _, rule := range f.Opt.IncludeFrom { 172 err := forEachLine(rule, func(line string) error { 173 return f.Add(true, line) 174 }) 175 if err != nil { 176 return nil, err 177 } 178 addImplicitExclude = true 179 } 180 for _, rule := range f.Opt.ExcludeRule { 181 err = f.Add(false, rule) 182 if err != nil { 183 return nil, err 184 } 185 foundExcludeRule = true 186 } 187 for _, rule := range f.Opt.ExcludeFrom { 188 err := forEachLine(rule, func(line string) error { 189 return f.Add(false, line) 190 }) 191 if err != nil { 192 return nil, err 193 } 194 foundExcludeRule = true 195 } 196 197 if addImplicitExclude && foundExcludeRule { 198 fs.Errorf(nil, "Using --filter is recommended instead of both --include and --exclude as the order they are parsed in is indeterminate") 199 } 200 201 for _, rule := range f.Opt.FilterRule { 202 err = f.AddRule(rule) 203 if err != nil { 204 return nil, err 205 } 206 } 207 for _, rule := range f.Opt.FilterFrom { 208 err := forEachLine(rule, f.AddRule) 209 if err != nil { 210 return nil, err 211 } 212 } 213 for _, rule := range f.Opt.FilesFrom { 214 f.initAddFile() // init to show --files-from set even if no files within 215 err := forEachLine(rule, func(line string) error { 216 return f.AddFile(line) 217 }) 218 if err != nil { 219 return nil, err 220 } 221 } 222 if addImplicitExclude { 223 err = f.Add(false, "/**") 224 if err != nil { 225 return nil, err 226 } 227 } 228 if fs.Config.Dump&fs.DumpFilters != 0 { 229 fmt.Println("--- start filters ---") 230 fmt.Println(f.DumpFilters()) 231 fmt.Println("--- end filters ---") 232 } 233 return f, nil 234 } 235 236 func mustNewFilter(opt *Opt) *Filter { 237 f, err := NewFilter(opt) 238 if err != nil { 239 panic(err) 240 } 241 return f 242 } 243 244 // addDirGlobs adds directory globs from the file glob passed in 245 func (f *Filter) addDirGlobs(Include bool, glob string) error { 246 for _, dirGlob := range globToDirGlobs(glob) { 247 // Don't add "/" as we always include the root 248 if dirGlob == "/" { 249 continue 250 } 251 dirRe, err := globToRegexp(dirGlob, f.Opt.IgnoreCase) 252 if err != nil { 253 return err 254 } 255 boundedRecursion := globBoundedRecursion(dirGlob) 256 f.dirRules.add(Include, dirRe, boundedRecursion) 257 } 258 return nil 259 } 260 261 // Add adds a filter rule with include or exclude status indicated 262 func (f *Filter) Add(Include bool, glob string) error { 263 isDirRule := strings.HasSuffix(glob, "/") 264 isFileRule := !isDirRule 265 if strings.Contains(glob, "**") { 266 isDirRule, isFileRule = true, true 267 } 268 re, err := globToRegexp(glob, f.Opt.IgnoreCase) 269 if err != nil { 270 return err 271 } 272 boundedRecursion := globBoundedRecursion(glob) 273 if isFileRule { 274 f.fileRules.add(Include, re, boundedRecursion) 275 // If include rule work out what directories are needed to scan 276 // if exclude rule, we can't rule anything out 277 // Unless it is `*` which matches everything 278 // NB ** and /** are DirRules 279 if Include || glob == "*" { 280 err = f.addDirGlobs(Include, glob) 281 if err != nil { 282 return err 283 } 284 } 285 } 286 if isDirRule { 287 f.dirRules.add(Include, re, boundedRecursion) 288 } 289 return nil 290 } 291 292 // AddRule adds a filter rule with include/exclude indicated by the prefix 293 // 294 // These are 295 // 296 // + glob 297 // - glob 298 // ! 299 // 300 // '+' includes the glob, '-' excludes it and '!' resets the filter list 301 // 302 // Line comments may be introduced with '#' or ';' 303 func (f *Filter) AddRule(rule string) error { 304 switch { 305 case rule == "!": 306 f.Clear() 307 return nil 308 case strings.HasPrefix(rule, "- "): 309 return f.Add(false, rule[2:]) 310 case strings.HasPrefix(rule, "+ "): 311 return f.Add(true, rule[2:]) 312 } 313 return errors.Errorf("malformed rule %q", rule) 314 } 315 316 // initAddFile creates f.files and f.dirs 317 func (f *Filter) initAddFile() { 318 if f.files == nil { 319 f.files = make(FilesMap) 320 f.dirs = make(FilesMap) 321 } 322 } 323 324 // AddFile adds a single file to the files from list 325 func (f *Filter) AddFile(file string) error { 326 f.initAddFile() 327 file = strings.Trim(file, "/") 328 f.files[file] = struct{}{} 329 // Put all the parent directories into f.dirs 330 for { 331 file = path.Dir(file) 332 if file == "." { 333 break 334 } 335 if _, found := f.dirs[file]; found { 336 break 337 } 338 f.dirs[file] = struct{}{} 339 } 340 return nil 341 } 342 343 // Files returns all the files from the `--files-from` list 344 // 345 // It may be nil if the list is empty 346 func (f *Filter) Files() FilesMap { 347 return f.files 348 } 349 350 // Clear clears all the filter rules 351 func (f *Filter) Clear() { 352 f.fileRules.clear() 353 f.dirRules.clear() 354 } 355 356 // InActive returns false if any filters are active 357 func (f *Filter) InActive() bool { 358 return (f.files == nil && 359 f.ModTimeFrom.IsZero() && 360 f.ModTimeTo.IsZero() && 361 f.Opt.MinSize < 0 && 362 f.Opt.MaxSize < 0 && 363 f.fileRules.len() == 0 && 364 f.dirRules.len() == 0 && 365 len(f.Opt.ExcludeFile) == 0) 366 } 367 368 // BoundedRecursion returns true if the filter can be evaluated with 369 // bounded recursion only. 370 func (f *Filter) BoundedRecursion() bool { 371 return f.fileRules.boundedRecursion() 372 } 373 374 // includeRemote returns whether this remote passes the filter rules. 375 func (f *Filter) includeRemote(remote string) bool { 376 for _, rule := range f.fileRules.rules { 377 if rule.Match(remote) { 378 return rule.Include 379 } 380 } 381 return true 382 } 383 384 // ListContainsExcludeFile checks if exclude file is present in the list. 385 func (f *Filter) ListContainsExcludeFile(entries fs.DirEntries) bool { 386 if len(f.Opt.ExcludeFile) == 0 { 387 return false 388 } 389 for _, entry := range entries { 390 obj, ok := entry.(fs.Object) 391 if ok { 392 basename := path.Base(obj.Remote()) 393 if basename == f.Opt.ExcludeFile { 394 return true 395 } 396 } 397 } 398 return false 399 } 400 401 // IncludeDirectory returns a function which checks whether this 402 // directory should be included in the sync or not. 403 func (f *Filter) IncludeDirectory(ctx context.Context, fs fs.Fs) func(string) (bool, error) { 404 return func(remote string) (bool, error) { 405 remote = strings.Trim(remote, "/") 406 // first check if we need to remove directory based on 407 // the exclude file 408 excl, err := f.DirContainsExcludeFile(ctx, fs, remote) 409 if err != nil { 410 return false, err 411 } 412 if excl { 413 return false, nil 414 } 415 416 // filesFrom takes precedence 417 if f.files != nil { 418 _, include := f.dirs[remote] 419 return include, nil 420 } 421 remote += "/" 422 for _, rule := range f.dirRules.rules { 423 if rule.Match(remote) { 424 return rule.Include, nil 425 } 426 } 427 428 return true, nil 429 } 430 } 431 432 // DirContainsExcludeFile checks if exclude file is present in a 433 // directroy. If fs is nil, it works properly if ExcludeFile is an 434 // empty string (for testing). 435 func (f *Filter) DirContainsExcludeFile(ctx context.Context, fremote fs.Fs, remote string) (bool, error) { 436 if len(f.Opt.ExcludeFile) > 0 { 437 exists, err := fs.FileExists(ctx, fremote, path.Join(remote, f.Opt.ExcludeFile)) 438 if err != nil { 439 return false, err 440 } 441 if exists { 442 return true, nil 443 } 444 } 445 return false, nil 446 } 447 448 // Include returns whether this object should be included into the 449 // sync or not 450 func (f *Filter) Include(remote string, size int64, modTime time.Time) bool { 451 // filesFrom takes precedence 452 if f.files != nil { 453 _, include := f.files[remote] 454 return include 455 } 456 if !f.ModTimeFrom.IsZero() && modTime.Before(f.ModTimeFrom) { 457 return false 458 } 459 if !f.ModTimeTo.IsZero() && modTime.After(f.ModTimeTo) { 460 return false 461 } 462 if f.Opt.MinSize >= 0 && size < int64(f.Opt.MinSize) { 463 return false 464 } 465 if f.Opt.MaxSize >= 0 && size > int64(f.Opt.MaxSize) { 466 return false 467 } 468 return f.includeRemote(remote) 469 } 470 471 // IncludeObject returns whether this object should be included into 472 // the sync or not. This is a convenience function to avoid calling 473 // o.ModTime(), which is an expensive operation. 474 func (f *Filter) IncludeObject(ctx context.Context, o fs.Object) bool { 475 var modTime time.Time 476 477 if !f.ModTimeFrom.IsZero() || !f.ModTimeTo.IsZero() { 478 modTime = o.ModTime(ctx) 479 } else { 480 modTime = time.Unix(0, 0) 481 } 482 483 return f.Include(o.Remote(), o.Size(), modTime) 484 } 485 486 // forEachLine calls fn on every line in the file pointed to by path 487 // 488 // It ignores empty lines and lines starting with '#' or ';' 489 func forEachLine(path string, fn func(string) error) (err error) { 490 in, err := os.Open(path) 491 if err != nil { 492 return err 493 } 494 defer fs.CheckClose(in, &err) 495 scanner := bufio.NewScanner(in) 496 for scanner.Scan() { 497 line := scanner.Text() 498 line = strings.TrimSpace(line) 499 if len(line) == 0 || line[0] == '#' || line[0] == ';' { 500 continue 501 } 502 err := fn(line) 503 if err != nil { 504 return err 505 } 506 } 507 return scanner.Err() 508 } 509 510 // DumpFilters dumps the filters in textual form, 1 per line 511 func (f *Filter) DumpFilters() string { 512 rules := []string{} 513 if !f.ModTimeFrom.IsZero() { 514 rules = append(rules, fmt.Sprintf("Last-modified date must be equal or greater than: %s", f.ModTimeFrom.String())) 515 } 516 if !f.ModTimeTo.IsZero() { 517 rules = append(rules, fmt.Sprintf("Last-modified date must be equal or less than: %s", f.ModTimeTo.String())) 518 } 519 rules = append(rules, "--- File filter rules ---") 520 for _, rule := range f.fileRules.rules { 521 rules = append(rules, rule.String()) 522 } 523 rules = append(rules, "--- Directory filter rules ---") 524 for _, dirRule := range f.dirRules.rules { 525 rules = append(rules, dirRule.String()) 526 } 527 return strings.Join(rules, "\n") 528 } 529 530 // HaveFilesFrom returns true if --files-from has been supplied 531 func (f *Filter) HaveFilesFrom() bool { 532 return f.files != nil 533 } 534 535 var errFilesFromNotSet = errors.New("--files-from not set so can't use Filter.ListR") 536 537 // MakeListR makes function to return all the files set using --files-from 538 func (f *Filter) MakeListR(ctx context.Context, NewObject func(ctx context.Context, remote string) (fs.Object, error)) fs.ListRFn { 539 return func(ctx context.Context, dir string, callback fs.ListRCallback) error { 540 if !f.HaveFilesFrom() { 541 return errFilesFromNotSet 542 } 543 var ( 544 remotes = make(chan string, fs.Config.Checkers) 545 g errgroup.Group 546 ) 547 for i := 0; i < fs.Config.Checkers; i++ { 548 g.Go(func() (err error) { 549 var entries = make(fs.DirEntries, 1) 550 for remote := range remotes { 551 entries[0], err = NewObject(ctx, remote) 552 if err == fs.ErrorObjectNotFound { 553 // Skip files that are not found 554 } else if err != nil { 555 return err 556 } else { 557 err = callback(entries) 558 if err != nil { 559 return err 560 } 561 } 562 } 563 return nil 564 }) 565 } 566 for remote := range f.files { 567 remotes <- remote 568 } 569 close(remotes) 570 return g.Wait() 571 } 572 }