github.com/10XDev/rclone@v1.52.3-0.20200626220027-16af9ab76b2a/fs/filter/filter.go (about) 1 // Package filter controls the filtering of files 2 package filter 3 4 import ( 5 "bufio" 6 "context" 7 "fmt" 8 "log" 9 "os" 10 "path" 11 "regexp" 12 "strings" 13 "time" 14 15 "github.com/pkg/errors" 16 "github.com/rclone/rclone/fs" 17 "golang.org/x/sync/errgroup" 18 ) 19 20 // Active is the globally active filter 21 var Active = mustNewFilter(nil) 22 23 // rule is one filter rule 24 type rule struct { 25 Include bool 26 Regexp *regexp.Regexp 27 } 28 29 // Match returns true if rule matches path 30 func (r *rule) Match(path string) bool { 31 return r.Regexp.MatchString(path) 32 } 33 34 // String the rule 35 func (r *rule) String() string { 36 c := "-" 37 if r.Include { 38 c = "+" 39 } 40 return fmt.Sprintf("%s %s", c, r.Regexp.String()) 41 } 42 43 // rules is a slice of rules 44 type rules struct { 45 rules []rule 46 existing map[string]struct{} 47 } 48 49 // add adds a rule if it doesn't exist already 50 func (rs *rules) add(Include bool, re *regexp.Regexp) { 51 if rs.existing == nil { 52 rs.existing = make(map[string]struct{}) 53 } 54 newRule := rule{ 55 Include: Include, 56 Regexp: re, 57 } 58 newRuleString := newRule.String() 59 if _, ok := rs.existing[newRuleString]; ok { 60 return // rule already exists 61 } 62 rs.rules = append(rs.rules, newRule) 63 rs.existing[newRuleString] = struct{}{} 64 } 65 66 // clear clears all the rules 67 func (rs *rules) clear() { 68 rs.rules = nil 69 rs.existing = nil 70 } 71 72 // len returns the number of rules 73 func (rs *rules) len() int { 74 return len(rs.rules) 75 } 76 77 // FilesMap describes the map of files to transfer 78 type FilesMap map[string]struct{} 79 80 // Opt configures the filter 81 type Opt struct { 82 DeleteExcluded bool 83 FilterRule []string 84 FilterFrom []string 85 ExcludeRule []string 86 ExcludeFrom []string 87 ExcludeFile string 88 IncludeRule []string 89 IncludeFrom []string 90 FilesFrom []string 91 FilesFromRaw []string 92 MinAge fs.Duration 93 MaxAge fs.Duration 94 MinSize fs.SizeSuffix 95 MaxSize fs.SizeSuffix 96 IgnoreCase bool 97 } 98 99 // DefaultOpt is the default config for the filter 100 var DefaultOpt = Opt{ 101 MinAge: fs.DurationOff, 102 MaxAge: fs.DurationOff, 103 MinSize: fs.SizeSuffix(-1), 104 MaxSize: fs.SizeSuffix(-1), 105 } 106 107 // Filter describes any filtering in operation 108 type Filter struct { 109 Opt Opt 110 ModTimeFrom time.Time 111 ModTimeTo time.Time 112 fileRules rules 113 dirRules rules 114 files FilesMap // files if filesFrom 115 dirs FilesMap // dirs from filesFrom 116 } 117 118 // NewFilter parses the command line options and creates a Filter 119 // object. If opt is nil, then DefaultOpt will be used 120 func NewFilter(opt *Opt) (f *Filter, err error) { 121 f = &Filter{} 122 123 // Make a copy of the options 124 if opt != nil { 125 f.Opt = *opt 126 } else { 127 f.Opt = DefaultOpt 128 } 129 130 // Filter flags 131 if f.Opt.MinAge.IsSet() { 132 f.ModTimeTo = time.Now().Add(-time.Duration(f.Opt.MinAge)) 133 fs.Debugf(nil, "--min-age %v to %v", f.Opt.MinAge, f.ModTimeTo) 134 } 135 if f.Opt.MaxAge.IsSet() { 136 f.ModTimeFrom = time.Now().Add(-time.Duration(f.Opt.MaxAge)) 137 if !f.ModTimeTo.IsZero() && f.ModTimeTo.Before(f.ModTimeFrom) { 138 log.Fatal("filter: --min-age can't be larger than --max-age") 139 } 140 fs.Debugf(nil, "--max-age %v to %v", f.Opt.MaxAge, f.ModTimeFrom) 141 } 142 143 addImplicitExclude := false 144 foundExcludeRule := false 145 146 for _, rule := range f.Opt.IncludeRule { 147 err = f.Add(true, rule) 148 if err != nil { 149 return nil, err 150 } 151 addImplicitExclude = true 152 } 153 for _, rule := range f.Opt.IncludeFrom { 154 err := forEachLine(rule, false, func(line string) error { 155 return f.Add(true, line) 156 }) 157 if err != nil { 158 return nil, err 159 } 160 addImplicitExclude = true 161 } 162 for _, rule := range f.Opt.ExcludeRule { 163 err = f.Add(false, rule) 164 if err != nil { 165 return nil, err 166 } 167 foundExcludeRule = true 168 } 169 for _, rule := range f.Opt.ExcludeFrom { 170 err := forEachLine(rule, false, func(line string) error { 171 return f.Add(false, line) 172 }) 173 if err != nil { 174 return nil, err 175 } 176 foundExcludeRule = true 177 } 178 179 if addImplicitExclude && foundExcludeRule { 180 fs.Errorf(nil, "Using --filter is recommended instead of both --include and --exclude as the order they are parsed in is indeterminate") 181 } 182 183 for _, rule := range f.Opt.FilterRule { 184 err = f.AddRule(rule) 185 if err != nil { 186 return nil, err 187 } 188 } 189 for _, rule := range f.Opt.FilterFrom { 190 err := forEachLine(rule, false, f.AddRule) 191 if err != nil { 192 return nil, err 193 } 194 } 195 196 inActive := f.InActive() 197 198 for _, rule := range f.Opt.FilesFrom { 199 if !inActive { 200 return nil, fmt.Errorf("The usage of --files-from overrides all other filters, it should be used alone or with --files-from-raw") 201 } 202 f.initAddFile() // init to show --files-from set even if no files within 203 err := forEachLine(rule, false, func(line string) error { 204 return f.AddFile(line) 205 }) 206 if err != nil { 207 return nil, err 208 } 209 } 210 211 for _, rule := range f.Opt.FilesFromRaw { 212 // --files-from-raw can be used with --files-from, hence we do 213 // not need to get the value of f.InActive again 214 if !inActive { 215 return nil, fmt.Errorf("The usage of --files-from-raw overrides all other filters, it should be used alone or with --files-from") 216 } 217 f.initAddFile() // init to show --files-from set even if no files within 218 err := forEachLine(rule, true, func(line string) error { 219 return f.AddFile(line) 220 }) 221 if err != nil { 222 return nil, err 223 } 224 } 225 226 if addImplicitExclude { 227 err = f.Add(false, "/**") 228 if err != nil { 229 return nil, err 230 } 231 } 232 if fs.Config.Dump&fs.DumpFilters != 0 { 233 fmt.Println("--- start filters ---") 234 fmt.Println(f.DumpFilters()) 235 fmt.Println("--- end filters ---") 236 } 237 return f, nil 238 } 239 240 func mustNewFilter(opt *Opt) *Filter { 241 f, err := NewFilter(opt) 242 if err != nil { 243 panic(err) 244 } 245 return f 246 } 247 248 // addDirGlobs adds directory globs from the file glob passed in 249 func (f *Filter) addDirGlobs(Include bool, glob string) error { 250 for _, dirGlob := range globToDirGlobs(glob) { 251 // Don't add "/" as we always include the root 252 if dirGlob == "/" { 253 continue 254 } 255 dirRe, err := globToRegexp(dirGlob, f.Opt.IgnoreCase) 256 if err != nil { 257 return err 258 } 259 f.dirRules.add(Include, dirRe) 260 } 261 return nil 262 } 263 264 // Add adds a filter rule with include or exclude status indicated 265 func (f *Filter) Add(Include bool, glob string) error { 266 isDirRule := strings.HasSuffix(glob, "/") 267 isFileRule := !isDirRule 268 if strings.Contains(glob, "**") { 269 isDirRule, isFileRule = true, true 270 } 271 re, err := globToRegexp(glob, f.Opt.IgnoreCase) 272 if err != nil { 273 return err 274 } 275 if isFileRule { 276 f.fileRules.add(Include, re) 277 // If include rule work out what directories are needed to scan 278 // if exclude rule, we can't rule anything out 279 // Unless it is `*` which matches everything 280 // NB ** and /** are DirRules 281 if Include || glob == "*" { 282 err = f.addDirGlobs(Include, glob) 283 if err != nil { 284 return err 285 } 286 } 287 } 288 if isDirRule { 289 f.dirRules.add(Include, re) 290 } 291 return nil 292 } 293 294 // AddRule adds a filter rule with include/exclude indicated by the prefix 295 // 296 // These are 297 // 298 // + glob 299 // - glob 300 // ! 301 // 302 // '+' includes the glob, '-' excludes it and '!' resets the filter list 303 // 304 // Line comments may be introduced with '#' or ';' 305 func (f *Filter) AddRule(rule string) error { 306 switch { 307 case rule == "!": 308 f.Clear() 309 return nil 310 case strings.HasPrefix(rule, "- "): 311 return f.Add(false, rule[2:]) 312 case strings.HasPrefix(rule, "+ "): 313 return f.Add(true, rule[2:]) 314 } 315 return errors.Errorf("malformed rule %q", rule) 316 } 317 318 // initAddFile creates f.files and f.dirs 319 func (f *Filter) initAddFile() { 320 if f.files == nil { 321 f.files = make(FilesMap) 322 f.dirs = make(FilesMap) 323 } 324 } 325 326 // AddFile adds a single file to the files from list 327 func (f *Filter) AddFile(file string) error { 328 f.initAddFile() 329 file = strings.Trim(file, "/") 330 f.files[file] = struct{}{} 331 // Put all the parent directories into f.dirs 332 for { 333 file = path.Dir(file) 334 if file == "." { 335 break 336 } 337 if _, found := f.dirs[file]; found { 338 break 339 } 340 f.dirs[file] = struct{}{} 341 } 342 return nil 343 } 344 345 // Files returns all the files from the `--files-from` list 346 // 347 // It may be nil if the list is empty 348 func (f *Filter) Files() FilesMap { 349 return f.files 350 } 351 352 // Clear clears all the filter rules 353 func (f *Filter) Clear() { 354 f.fileRules.clear() 355 f.dirRules.clear() 356 } 357 358 // InActive returns false if any filters are active 359 func (f *Filter) InActive() bool { 360 return (f.files == nil && 361 f.ModTimeFrom.IsZero() && 362 f.ModTimeTo.IsZero() && 363 f.Opt.MinSize < 0 && 364 f.Opt.MaxSize < 0 && 365 f.fileRules.len() == 0 && 366 f.dirRules.len() == 0 && 367 len(f.Opt.ExcludeFile) == 0) 368 } 369 370 // includeRemote returns whether this remote passes the filter rules. 371 func (f *Filter) includeRemote(remote string) bool { 372 for _, rule := range f.fileRules.rules { 373 if rule.Match(remote) { 374 return rule.Include 375 } 376 } 377 return true 378 } 379 380 // ListContainsExcludeFile checks if exclude file is present in the list. 381 func (f *Filter) ListContainsExcludeFile(entries fs.DirEntries) bool { 382 if len(f.Opt.ExcludeFile) == 0 { 383 return false 384 } 385 for _, entry := range entries { 386 obj, ok := entry.(fs.Object) 387 if ok { 388 basename := path.Base(obj.Remote()) 389 if basename == f.Opt.ExcludeFile { 390 return true 391 } 392 } 393 } 394 return false 395 } 396 397 // IncludeDirectory returns a function which checks whether this 398 // directory should be included in the sync or not. 399 func (f *Filter) IncludeDirectory(ctx context.Context, fs fs.Fs) func(string) (bool, error) { 400 return func(remote string) (bool, error) { 401 remote = strings.Trim(remote, "/") 402 // first check if we need to remove directory based on 403 // the exclude file 404 excl, err := f.DirContainsExcludeFile(ctx, fs, remote) 405 if err != nil { 406 return false, err 407 } 408 if excl { 409 return false, nil 410 } 411 412 // filesFrom takes precedence 413 if f.files != nil { 414 _, include := f.dirs[remote] 415 return include, nil 416 } 417 remote += "/" 418 for _, rule := range f.dirRules.rules { 419 if rule.Match(remote) { 420 return rule.Include, nil 421 } 422 } 423 424 return true, nil 425 } 426 } 427 428 // DirContainsExcludeFile checks if exclude file is present in a 429 // directory. If fs is nil, it works properly if ExcludeFile is an 430 // empty string (for testing). 431 func (f *Filter) DirContainsExcludeFile(ctx context.Context, fremote fs.Fs, remote string) (bool, error) { 432 if len(f.Opt.ExcludeFile) > 0 { 433 exists, err := fs.FileExists(ctx, fremote, path.Join(remote, f.Opt.ExcludeFile)) 434 if err != nil { 435 return false, err 436 } 437 if exists { 438 return true, nil 439 } 440 } 441 return false, nil 442 } 443 444 // Include returns whether this object should be included into the 445 // sync or not 446 func (f *Filter) Include(remote string, size int64, modTime time.Time) bool { 447 // filesFrom takes precedence 448 if f.files != nil { 449 _, include := f.files[remote] 450 return include 451 } 452 if !f.ModTimeFrom.IsZero() && modTime.Before(f.ModTimeFrom) { 453 return false 454 } 455 if !f.ModTimeTo.IsZero() && modTime.After(f.ModTimeTo) { 456 return false 457 } 458 if f.Opt.MinSize >= 0 && size < int64(f.Opt.MinSize) { 459 return false 460 } 461 if f.Opt.MaxSize >= 0 && size > int64(f.Opt.MaxSize) { 462 return false 463 } 464 return f.includeRemote(remote) 465 } 466 467 // IncludeObject returns whether this object should be included into 468 // the sync or not. This is a convenience function to avoid calling 469 // o.ModTime(), which is an expensive operation. 470 func (f *Filter) IncludeObject(ctx context.Context, o fs.Object) bool { 471 var modTime time.Time 472 473 if !f.ModTimeFrom.IsZero() || !f.ModTimeTo.IsZero() { 474 modTime = o.ModTime(ctx) 475 } else { 476 modTime = time.Unix(0, 0) 477 } 478 479 return f.Include(o.Remote(), o.Size(), modTime) 480 } 481 482 // forEachLine calls fn on every line in the file pointed to by path 483 // 484 // It ignores empty lines and lines starting with '#' or ';' if raw is false 485 func forEachLine(path string, raw bool, fn func(string) error) (err error) { 486 var scanner *bufio.Scanner 487 if path == "-" { 488 scanner = bufio.NewScanner(os.Stdin) 489 } else { 490 in, err := os.Open(path) 491 if err != nil { 492 return err 493 } 494 scanner = bufio.NewScanner(in) 495 defer fs.CheckClose(in, &err) 496 } 497 for scanner.Scan() { 498 line := scanner.Text() 499 if !raw { 500 line = strings.TrimSpace(line) 501 if len(line) == 0 || line[0] == '#' || line[0] == ';' { 502 continue 503 } 504 } 505 err := fn(line) 506 if err != nil { 507 return err 508 } 509 } 510 return scanner.Err() 511 } 512 513 // DumpFilters dumps the filters in textual form, 1 per line 514 func (f *Filter) DumpFilters() string { 515 rules := []string{} 516 if !f.ModTimeFrom.IsZero() { 517 rules = append(rules, fmt.Sprintf("Last-modified date must be equal or greater than: %s", f.ModTimeFrom.String())) 518 } 519 if !f.ModTimeTo.IsZero() { 520 rules = append(rules, fmt.Sprintf("Last-modified date must be equal or less than: %s", f.ModTimeTo.String())) 521 } 522 rules = append(rules, "--- File filter rules ---") 523 for _, rule := range f.fileRules.rules { 524 rules = append(rules, rule.String()) 525 } 526 rules = append(rules, "--- Directory filter rules ---") 527 for _, dirRule := range f.dirRules.rules { 528 rules = append(rules, dirRule.String()) 529 } 530 return strings.Join(rules, "\n") 531 } 532 533 // HaveFilesFrom returns true if --files-from has been supplied 534 func (f *Filter) HaveFilesFrom() bool { 535 return f.files != nil 536 } 537 538 var errFilesFromNotSet = errors.New("--files-from not set so can't use Filter.ListR") 539 540 // MakeListR makes function to return all the files set using --files-from 541 func (f *Filter) MakeListR(ctx context.Context, NewObject func(ctx context.Context, remote string) (fs.Object, error)) fs.ListRFn { 542 return func(ctx context.Context, dir string, callback fs.ListRCallback) error { 543 if !f.HaveFilesFrom() { 544 return errFilesFromNotSet 545 } 546 var ( 547 remotes = make(chan string, fs.Config.Checkers) 548 g errgroup.Group 549 ) 550 for i := 0; i < fs.Config.Checkers; i++ { 551 g.Go(func() (err error) { 552 var entries = make(fs.DirEntries, 1) 553 for remote := range remotes { 554 entries[0], err = NewObject(ctx, remote) 555 if err == fs.ErrorObjectNotFound { 556 // Skip files that are not found 557 } else if err != nil { 558 return err 559 } else { 560 err = callback(entries) 561 if err != nil { 562 return err 563 } 564 } 565 } 566 return nil 567 }) 568 } 569 for remote := range f.files { 570 remotes <- remote 571 } 572 close(remotes) 573 return g.Wait() 574 } 575 } 576 577 // UsesDirectoryFilters returns true if the filter uses directory 578 // filters and false if it doesn't. 579 // 580 // This is used in deciding whether to walk directories or use ListR 581 func (f *Filter) UsesDirectoryFilters() bool { 582 if len(f.dirRules.rules) == 0 { 583 return false 584 } 585 rule := f.dirRules.rules[0] 586 re := rule.Regexp.String() 587 if rule.Include == true && re == "^.*$" { 588 return false 589 } 590 return true 591 }