github.com/xhghs/rclone@v1.51.1-0.20200430155106-e186a28cced8/fs/filter/filter.go (about) 1 // Package filter controls the filtering of files 2 package filter 3 4 import ( 5 "bufio" 6 "context" 7 "fmt" 8 "log" 9 "os" 10 "path" 11 "regexp" 12 "strings" 13 "time" 14 15 "github.com/pkg/errors" 16 "github.com/rclone/rclone/fs" 17 "golang.org/x/sync/errgroup" 18 ) 19 20 // Active is the globally active filter 21 var Active = mustNewFilter(nil) 22 23 // rule is one filter rule 24 type rule struct { 25 Include bool 26 Regexp *regexp.Regexp 27 } 28 29 // Match returns true if rule matches path 30 func (r *rule) Match(path string) bool { 31 return r.Regexp.MatchString(path) 32 } 33 34 // String the rule 35 func (r *rule) String() string { 36 c := "-" 37 if r.Include { 38 c = "+" 39 } 40 return fmt.Sprintf("%s %s", c, r.Regexp.String()) 41 } 42 43 // rules is a slice of rules 44 type rules struct { 45 rules []rule 46 existing map[string]struct{} 47 } 48 49 // add adds a rule if it doesn't exist already 50 func (rs *rules) add(Include bool, re *regexp.Regexp) { 51 if rs.existing == nil { 52 rs.existing = make(map[string]struct{}) 53 } 54 newRule := rule{ 55 Include: Include, 56 Regexp: re, 57 } 58 newRuleString := newRule.String() 59 if _, ok := rs.existing[newRuleString]; ok { 60 return // rule already exists 61 } 62 rs.rules = append(rs.rules, newRule) 63 rs.existing[newRuleString] = struct{}{} 64 } 65 66 // clear clears all the rules 67 func (rs *rules) clear() { 68 rs.rules = nil 69 rs.existing = nil 70 } 71 72 // len returns the number of rules 73 func (rs *rules) len() int { 74 return len(rs.rules) 75 } 76 77 // FilesMap describes the map of files to transfer 78 type FilesMap map[string]struct{} 79 80 // Opt configures the filter 81 type Opt struct { 82 DeleteExcluded bool 83 FilterRule []string 84 FilterFrom []string 85 ExcludeRule []string 86 ExcludeFrom []string 87 ExcludeFile string 88 IncludeRule []string 89 IncludeFrom []string 90 FilesFrom []string 91 MinAge fs.Duration 92 MaxAge fs.Duration 93 MinSize fs.SizeSuffix 94 MaxSize fs.SizeSuffix 95 IgnoreCase bool 96 } 97 98 // DefaultOpt is the default config for the filter 99 var DefaultOpt = Opt{ 100 MinAge: fs.DurationOff, 101 MaxAge: fs.DurationOff, 102 MinSize: fs.SizeSuffix(-1), 103 MaxSize: fs.SizeSuffix(-1), 104 } 105 106 // Filter describes any filtering in operation 107 type Filter struct { 108 Opt Opt 109 ModTimeFrom time.Time 110 ModTimeTo time.Time 111 fileRules rules 112 dirRules rules 113 files FilesMap // files if filesFrom 114 dirs FilesMap // dirs from filesFrom 115 } 116 117 // NewFilter parses the command line options and creates a Filter 118 // object. If opt is nil, then DefaultOpt will be used 119 func NewFilter(opt *Opt) (f *Filter, err error) { 120 f = &Filter{} 121 122 // Make a copy of the options 123 if opt != nil { 124 f.Opt = *opt 125 } else { 126 f.Opt = DefaultOpt 127 } 128 129 // Filter flags 130 if f.Opt.MinAge.IsSet() { 131 f.ModTimeTo = time.Now().Add(-time.Duration(f.Opt.MinAge)) 132 fs.Debugf(nil, "--min-age %v to %v", f.Opt.MinAge, f.ModTimeTo) 133 } 134 if f.Opt.MaxAge.IsSet() { 135 f.ModTimeFrom = time.Now().Add(-time.Duration(f.Opt.MaxAge)) 136 if !f.ModTimeTo.IsZero() && f.ModTimeTo.Before(f.ModTimeFrom) { 137 log.Fatal("filter: --min-age can't be larger than --max-age") 138 } 139 fs.Debugf(nil, "--max-age %v to %v", f.Opt.MaxAge, f.ModTimeFrom) 140 } 141 142 addImplicitExclude := false 143 foundExcludeRule := false 144 145 for _, rule := range f.Opt.IncludeRule { 146 err = f.Add(true, rule) 147 if err != nil { 148 return nil, err 149 } 150 addImplicitExclude = true 151 } 152 for _, rule := range f.Opt.IncludeFrom { 153 err := forEachLine(rule, func(line string) error { 154 return f.Add(true, line) 155 }) 156 if err != nil { 157 return nil, err 158 } 159 addImplicitExclude = true 160 } 161 for _, rule := range f.Opt.ExcludeRule { 162 err = f.Add(false, rule) 163 if err != nil { 164 return nil, err 165 } 166 foundExcludeRule = true 167 } 168 for _, rule := range f.Opt.ExcludeFrom { 169 err := forEachLine(rule, func(line string) error { 170 return f.Add(false, line) 171 }) 172 if err != nil { 173 return nil, err 174 } 175 foundExcludeRule = true 176 } 177 178 if addImplicitExclude && foundExcludeRule { 179 fs.Errorf(nil, "Using --filter is recommended instead of both --include and --exclude as the order they are parsed in is indeterminate") 180 } 181 182 for _, rule := range f.Opt.FilterRule { 183 err = f.AddRule(rule) 184 if err != nil { 185 return nil, err 186 } 187 } 188 for _, rule := range f.Opt.FilterFrom { 189 err := forEachLine(rule, f.AddRule) 190 if err != nil { 191 return nil, err 192 } 193 } 194 195 inActive := f.InActive() 196 for _, rule := range f.Opt.FilesFrom { 197 if !inActive { 198 return nil, fmt.Errorf("The usage of --files-from overrides all other filters, it should be used alone") 199 } 200 f.initAddFile() // init to show --files-from set even if no files within 201 err := forEachLine(rule, func(line string) error { 202 return f.AddFile(line) 203 }) 204 if err != nil { 205 return nil, err 206 } 207 } 208 if addImplicitExclude { 209 err = f.Add(false, "/**") 210 if err != nil { 211 return nil, err 212 } 213 } 214 if fs.Config.Dump&fs.DumpFilters != 0 { 215 fmt.Println("--- start filters ---") 216 fmt.Println(f.DumpFilters()) 217 fmt.Println("--- end filters ---") 218 } 219 return f, nil 220 } 221 222 func mustNewFilter(opt *Opt) *Filter { 223 f, err := NewFilter(opt) 224 if err != nil { 225 panic(err) 226 } 227 return f 228 } 229 230 // addDirGlobs adds directory globs from the file glob passed in 231 func (f *Filter) addDirGlobs(Include bool, glob string) error { 232 for _, dirGlob := range globToDirGlobs(glob) { 233 // Don't add "/" as we always include the root 234 if dirGlob == "/" { 235 continue 236 } 237 dirRe, err := globToRegexp(dirGlob, f.Opt.IgnoreCase) 238 if err != nil { 239 return err 240 } 241 f.dirRules.add(Include, dirRe) 242 } 243 return nil 244 } 245 246 // Add adds a filter rule with include or exclude status indicated 247 func (f *Filter) Add(Include bool, glob string) error { 248 isDirRule := strings.HasSuffix(glob, "/") 249 isFileRule := !isDirRule 250 if strings.Contains(glob, "**") { 251 isDirRule, isFileRule = true, true 252 } 253 re, err := globToRegexp(glob, f.Opt.IgnoreCase) 254 if err != nil { 255 return err 256 } 257 if isFileRule { 258 f.fileRules.add(Include, re) 259 // If include rule work out what directories are needed to scan 260 // if exclude rule, we can't rule anything out 261 // Unless it is `*` which matches everything 262 // NB ** and /** are DirRules 263 if Include || glob == "*" { 264 err = f.addDirGlobs(Include, glob) 265 if err != nil { 266 return err 267 } 268 } 269 } 270 if isDirRule { 271 f.dirRules.add(Include, re) 272 } 273 return nil 274 } 275 276 // AddRule adds a filter rule with include/exclude indicated by the prefix 277 // 278 // These are 279 // 280 // + glob 281 // - glob 282 // ! 283 // 284 // '+' includes the glob, '-' excludes it and '!' resets the filter list 285 // 286 // Line comments may be introduced with '#' or ';' 287 func (f *Filter) AddRule(rule string) error { 288 switch { 289 case rule == "!": 290 f.Clear() 291 return nil 292 case strings.HasPrefix(rule, "- "): 293 return f.Add(false, rule[2:]) 294 case strings.HasPrefix(rule, "+ "): 295 return f.Add(true, rule[2:]) 296 } 297 return errors.Errorf("malformed rule %q", rule) 298 } 299 300 // initAddFile creates f.files and f.dirs 301 func (f *Filter) initAddFile() { 302 if f.files == nil { 303 f.files = make(FilesMap) 304 f.dirs = make(FilesMap) 305 } 306 } 307 308 // AddFile adds a single file to the files from list 309 func (f *Filter) AddFile(file string) error { 310 f.initAddFile() 311 file = strings.Trim(file, "/") 312 f.files[file] = struct{}{} 313 // Put all the parent directories into f.dirs 314 for { 315 file = path.Dir(file) 316 if file == "." { 317 break 318 } 319 if _, found := f.dirs[file]; found { 320 break 321 } 322 f.dirs[file] = struct{}{} 323 } 324 return nil 325 } 326 327 // Files returns all the files from the `--files-from` list 328 // 329 // It may be nil if the list is empty 330 func (f *Filter) Files() FilesMap { 331 return f.files 332 } 333 334 // Clear clears all the filter rules 335 func (f *Filter) Clear() { 336 f.fileRules.clear() 337 f.dirRules.clear() 338 } 339 340 // InActive returns false if any filters are active 341 func (f *Filter) InActive() bool { 342 return (f.files == nil && 343 f.ModTimeFrom.IsZero() && 344 f.ModTimeTo.IsZero() && 345 f.Opt.MinSize < 0 && 346 f.Opt.MaxSize < 0 && 347 f.fileRules.len() == 0 && 348 f.dirRules.len() == 0 && 349 len(f.Opt.ExcludeFile) == 0) 350 } 351 352 // includeRemote returns whether this remote passes the filter rules. 353 func (f *Filter) includeRemote(remote string) bool { 354 for _, rule := range f.fileRules.rules { 355 if rule.Match(remote) { 356 return rule.Include 357 } 358 } 359 return true 360 } 361 362 // ListContainsExcludeFile checks if exclude file is present in the list. 363 func (f *Filter) ListContainsExcludeFile(entries fs.DirEntries) bool { 364 if len(f.Opt.ExcludeFile) == 0 { 365 return false 366 } 367 for _, entry := range entries { 368 obj, ok := entry.(fs.Object) 369 if ok { 370 basename := path.Base(obj.Remote()) 371 if basename == f.Opt.ExcludeFile { 372 return true 373 } 374 } 375 } 376 return false 377 } 378 379 // IncludeDirectory returns a function which checks whether this 380 // directory should be included in the sync or not. 381 func (f *Filter) IncludeDirectory(ctx context.Context, fs fs.Fs) func(string) (bool, error) { 382 return func(remote string) (bool, error) { 383 remote = strings.Trim(remote, "/") 384 // first check if we need to remove directory based on 385 // the exclude file 386 excl, err := f.DirContainsExcludeFile(ctx, fs, remote) 387 if err != nil { 388 return false, err 389 } 390 if excl { 391 return false, nil 392 } 393 394 // filesFrom takes precedence 395 if f.files != nil { 396 _, include := f.dirs[remote] 397 return include, nil 398 } 399 remote += "/" 400 for _, rule := range f.dirRules.rules { 401 if rule.Match(remote) { 402 return rule.Include, nil 403 } 404 } 405 406 return true, nil 407 } 408 } 409 410 // DirContainsExcludeFile checks if exclude file is present in a 411 // directroy. If fs is nil, it works properly if ExcludeFile is an 412 // empty string (for testing). 413 func (f *Filter) DirContainsExcludeFile(ctx context.Context, fremote fs.Fs, remote string) (bool, error) { 414 if len(f.Opt.ExcludeFile) > 0 { 415 exists, err := fs.FileExists(ctx, fremote, path.Join(remote, f.Opt.ExcludeFile)) 416 if err != nil { 417 return false, err 418 } 419 if exists { 420 return true, nil 421 } 422 } 423 return false, nil 424 } 425 426 // Include returns whether this object should be included into the 427 // sync or not 428 func (f *Filter) Include(remote string, size int64, modTime time.Time) bool { 429 // filesFrom takes precedence 430 if f.files != nil { 431 _, include := f.files[remote] 432 return include 433 } 434 if !f.ModTimeFrom.IsZero() && modTime.Before(f.ModTimeFrom) { 435 return false 436 } 437 if !f.ModTimeTo.IsZero() && modTime.After(f.ModTimeTo) { 438 return false 439 } 440 if f.Opt.MinSize >= 0 && size < int64(f.Opt.MinSize) { 441 return false 442 } 443 if f.Opt.MaxSize >= 0 && size > int64(f.Opt.MaxSize) { 444 return false 445 } 446 return f.includeRemote(remote) 447 } 448 449 // IncludeObject returns whether this object should be included into 450 // the sync or not. This is a convenience function to avoid calling 451 // o.ModTime(), which is an expensive operation. 452 func (f *Filter) IncludeObject(ctx context.Context, o fs.Object) bool { 453 var modTime time.Time 454 455 if !f.ModTimeFrom.IsZero() || !f.ModTimeTo.IsZero() { 456 modTime = o.ModTime(ctx) 457 } else { 458 modTime = time.Unix(0, 0) 459 } 460 461 return f.Include(o.Remote(), o.Size(), modTime) 462 } 463 464 // forEachLine calls fn on every line in the file pointed to by path 465 // 466 // It ignores empty lines and lines starting with '#' or ';' 467 func forEachLine(path string, fn func(string) error) (err error) { 468 in, err := os.Open(path) 469 if err != nil { 470 return err 471 } 472 defer fs.CheckClose(in, &err) 473 scanner := bufio.NewScanner(in) 474 for scanner.Scan() { 475 line := scanner.Text() 476 line = strings.TrimSpace(line) 477 if len(line) == 0 || line[0] == '#' || line[0] == ';' { 478 continue 479 } 480 err := fn(line) 481 if err != nil { 482 return err 483 } 484 } 485 return scanner.Err() 486 } 487 488 // DumpFilters dumps the filters in textual form, 1 per line 489 func (f *Filter) DumpFilters() string { 490 rules := []string{} 491 if !f.ModTimeFrom.IsZero() { 492 rules = append(rules, fmt.Sprintf("Last-modified date must be equal or greater than: %s", f.ModTimeFrom.String())) 493 } 494 if !f.ModTimeTo.IsZero() { 495 rules = append(rules, fmt.Sprintf("Last-modified date must be equal or less than: %s", f.ModTimeTo.String())) 496 } 497 rules = append(rules, "--- File filter rules ---") 498 for _, rule := range f.fileRules.rules { 499 rules = append(rules, rule.String()) 500 } 501 rules = append(rules, "--- Directory filter rules ---") 502 for _, dirRule := range f.dirRules.rules { 503 rules = append(rules, dirRule.String()) 504 } 505 return strings.Join(rules, "\n") 506 } 507 508 // HaveFilesFrom returns true if --files-from has been supplied 509 func (f *Filter) HaveFilesFrom() bool { 510 return f.files != nil 511 } 512 513 var errFilesFromNotSet = errors.New("--files-from not set so can't use Filter.ListR") 514 515 // MakeListR makes function to return all the files set using --files-from 516 func (f *Filter) MakeListR(ctx context.Context, NewObject func(ctx context.Context, remote string) (fs.Object, error)) fs.ListRFn { 517 return func(ctx context.Context, dir string, callback fs.ListRCallback) error { 518 if !f.HaveFilesFrom() { 519 return errFilesFromNotSet 520 } 521 var ( 522 remotes = make(chan string, fs.Config.Checkers) 523 g errgroup.Group 524 ) 525 for i := 0; i < fs.Config.Checkers; i++ { 526 g.Go(func() (err error) { 527 var entries = make(fs.DirEntries, 1) 528 for remote := range remotes { 529 entries[0], err = NewObject(ctx, remote) 530 if err == fs.ErrorObjectNotFound { 531 // Skip files that are not found 532 } else if err != nil { 533 return err 534 } else { 535 err = callback(entries) 536 if err != nil { 537 return err 538 } 539 } 540 } 541 return nil 542 }) 543 } 544 for remote := range f.files { 545 remotes <- remote 546 } 547 close(remotes) 548 return g.Wait() 549 } 550 } 551 552 // UsesDirectoryFilters returns true if the filter uses directory 553 // filters and false if it doesn't. 554 // 555 // This is used in deciding whether to walk directories or use ListR 556 func (f *Filter) UsesDirectoryFilters() bool { 557 if len(f.dirRules.rules) == 0 { 558 return false 559 } 560 rule := f.dirRules.rules[0] 561 re := rule.Regexp.String() 562 if rule.Include == true && re == "^.*$" { 563 return false 564 } 565 return true 566 }