github.com/xhghs/rclone@v1.51.1-0.20200430155106-e186a28cced8/fs/filter/filter.go (about)

     1  // Package filter controls the filtering of files
     2  package filter
     3  
     4  import (
     5  	"bufio"
     6  	"context"
     7  	"fmt"
     8  	"log"
     9  	"os"
    10  	"path"
    11  	"regexp"
    12  	"strings"
    13  	"time"
    14  
    15  	"github.com/pkg/errors"
    16  	"github.com/rclone/rclone/fs"
    17  	"golang.org/x/sync/errgroup"
    18  )
    19  
    20  // Active is the globally active filter
    21  var Active = mustNewFilter(nil)
    22  
    23  // rule is one filter rule
    24  type rule struct {
    25  	Include bool
    26  	Regexp  *regexp.Regexp
    27  }
    28  
    29  // Match returns true if rule matches path
    30  func (r *rule) Match(path string) bool {
    31  	return r.Regexp.MatchString(path)
    32  }
    33  
    34  // String the rule
    35  func (r *rule) String() string {
    36  	c := "-"
    37  	if r.Include {
    38  		c = "+"
    39  	}
    40  	return fmt.Sprintf("%s %s", c, r.Regexp.String())
    41  }
    42  
    43  // rules is a slice of rules
    44  type rules struct {
    45  	rules    []rule
    46  	existing map[string]struct{}
    47  }
    48  
    49  // add adds a rule if it doesn't exist already
    50  func (rs *rules) add(Include bool, re *regexp.Regexp) {
    51  	if rs.existing == nil {
    52  		rs.existing = make(map[string]struct{})
    53  	}
    54  	newRule := rule{
    55  		Include: Include,
    56  		Regexp:  re,
    57  	}
    58  	newRuleString := newRule.String()
    59  	if _, ok := rs.existing[newRuleString]; ok {
    60  		return // rule already exists
    61  	}
    62  	rs.rules = append(rs.rules, newRule)
    63  	rs.existing[newRuleString] = struct{}{}
    64  }
    65  
    66  // clear clears all the rules
    67  func (rs *rules) clear() {
    68  	rs.rules = nil
    69  	rs.existing = nil
    70  }
    71  
    72  // len returns the number of rules
    73  func (rs *rules) len() int {
    74  	return len(rs.rules)
    75  }
    76  
    77  // FilesMap describes the map of files to transfer
    78  type FilesMap map[string]struct{}
    79  
    80  // Opt configures the filter
    81  type Opt struct {
    82  	DeleteExcluded bool
    83  	FilterRule     []string
    84  	FilterFrom     []string
    85  	ExcludeRule    []string
    86  	ExcludeFrom    []string
    87  	ExcludeFile    string
    88  	IncludeRule    []string
    89  	IncludeFrom    []string
    90  	FilesFrom      []string
    91  	MinAge         fs.Duration
    92  	MaxAge         fs.Duration
    93  	MinSize        fs.SizeSuffix
    94  	MaxSize        fs.SizeSuffix
    95  	IgnoreCase     bool
    96  }
    97  
    98  // DefaultOpt is the default config for the filter
    99  var DefaultOpt = Opt{
   100  	MinAge:  fs.DurationOff,
   101  	MaxAge:  fs.DurationOff,
   102  	MinSize: fs.SizeSuffix(-1),
   103  	MaxSize: fs.SizeSuffix(-1),
   104  }
   105  
   106  // Filter describes any filtering in operation
   107  type Filter struct {
   108  	Opt         Opt
   109  	ModTimeFrom time.Time
   110  	ModTimeTo   time.Time
   111  	fileRules   rules
   112  	dirRules    rules
   113  	files       FilesMap // files if filesFrom
   114  	dirs        FilesMap // dirs from filesFrom
   115  }
   116  
   117  // NewFilter parses the command line options and creates a Filter
   118  // object.  If opt is nil, then DefaultOpt will be used
   119  func NewFilter(opt *Opt) (f *Filter, err error) {
   120  	f = &Filter{}
   121  
   122  	// Make a copy of the options
   123  	if opt != nil {
   124  		f.Opt = *opt
   125  	} else {
   126  		f.Opt = DefaultOpt
   127  	}
   128  
   129  	// Filter flags
   130  	if f.Opt.MinAge.IsSet() {
   131  		f.ModTimeTo = time.Now().Add(-time.Duration(f.Opt.MinAge))
   132  		fs.Debugf(nil, "--min-age %v to %v", f.Opt.MinAge, f.ModTimeTo)
   133  	}
   134  	if f.Opt.MaxAge.IsSet() {
   135  		f.ModTimeFrom = time.Now().Add(-time.Duration(f.Opt.MaxAge))
   136  		if !f.ModTimeTo.IsZero() && f.ModTimeTo.Before(f.ModTimeFrom) {
   137  			log.Fatal("filter: --min-age can't be larger than --max-age")
   138  		}
   139  		fs.Debugf(nil, "--max-age %v to %v", f.Opt.MaxAge, f.ModTimeFrom)
   140  	}
   141  
   142  	addImplicitExclude := false
   143  	foundExcludeRule := false
   144  
   145  	for _, rule := range f.Opt.IncludeRule {
   146  		err = f.Add(true, rule)
   147  		if err != nil {
   148  			return nil, err
   149  		}
   150  		addImplicitExclude = true
   151  	}
   152  	for _, rule := range f.Opt.IncludeFrom {
   153  		err := forEachLine(rule, func(line string) error {
   154  			return f.Add(true, line)
   155  		})
   156  		if err != nil {
   157  			return nil, err
   158  		}
   159  		addImplicitExclude = true
   160  	}
   161  	for _, rule := range f.Opt.ExcludeRule {
   162  		err = f.Add(false, rule)
   163  		if err != nil {
   164  			return nil, err
   165  		}
   166  		foundExcludeRule = true
   167  	}
   168  	for _, rule := range f.Opt.ExcludeFrom {
   169  		err := forEachLine(rule, func(line string) error {
   170  			return f.Add(false, line)
   171  		})
   172  		if err != nil {
   173  			return nil, err
   174  		}
   175  		foundExcludeRule = true
   176  	}
   177  
   178  	if addImplicitExclude && foundExcludeRule {
   179  		fs.Errorf(nil, "Using --filter is recommended instead of both --include and --exclude as the order they are parsed in is indeterminate")
   180  	}
   181  
   182  	for _, rule := range f.Opt.FilterRule {
   183  		err = f.AddRule(rule)
   184  		if err != nil {
   185  			return nil, err
   186  		}
   187  	}
   188  	for _, rule := range f.Opt.FilterFrom {
   189  		err := forEachLine(rule, f.AddRule)
   190  		if err != nil {
   191  			return nil, err
   192  		}
   193  	}
   194  
   195  	inActive := f.InActive()
   196  	for _, rule := range f.Opt.FilesFrom {
   197  		if !inActive {
   198  			return nil, fmt.Errorf("The usage of --files-from overrides all other filters, it should be used alone")
   199  		}
   200  		f.initAddFile() // init to show --files-from set even if no files within
   201  		err := forEachLine(rule, func(line string) error {
   202  			return f.AddFile(line)
   203  		})
   204  		if err != nil {
   205  			return nil, err
   206  		}
   207  	}
   208  	if addImplicitExclude {
   209  		err = f.Add(false, "/**")
   210  		if err != nil {
   211  			return nil, err
   212  		}
   213  	}
   214  	if fs.Config.Dump&fs.DumpFilters != 0 {
   215  		fmt.Println("--- start filters ---")
   216  		fmt.Println(f.DumpFilters())
   217  		fmt.Println("--- end filters ---")
   218  	}
   219  	return f, nil
   220  }
   221  
   222  func mustNewFilter(opt *Opt) *Filter {
   223  	f, err := NewFilter(opt)
   224  	if err != nil {
   225  		panic(err)
   226  	}
   227  	return f
   228  }
   229  
   230  // addDirGlobs adds directory globs from the file glob passed in
   231  func (f *Filter) addDirGlobs(Include bool, glob string) error {
   232  	for _, dirGlob := range globToDirGlobs(glob) {
   233  		// Don't add "/" as we always include the root
   234  		if dirGlob == "/" {
   235  			continue
   236  		}
   237  		dirRe, err := globToRegexp(dirGlob, f.Opt.IgnoreCase)
   238  		if err != nil {
   239  			return err
   240  		}
   241  		f.dirRules.add(Include, dirRe)
   242  	}
   243  	return nil
   244  }
   245  
   246  // Add adds a filter rule with include or exclude status indicated
   247  func (f *Filter) Add(Include bool, glob string) error {
   248  	isDirRule := strings.HasSuffix(glob, "/")
   249  	isFileRule := !isDirRule
   250  	if strings.Contains(glob, "**") {
   251  		isDirRule, isFileRule = true, true
   252  	}
   253  	re, err := globToRegexp(glob, f.Opt.IgnoreCase)
   254  	if err != nil {
   255  		return err
   256  	}
   257  	if isFileRule {
   258  		f.fileRules.add(Include, re)
   259  		// If include rule work out what directories are needed to scan
   260  		// if exclude rule, we can't rule anything out
   261  		// Unless it is `*` which matches everything
   262  		// NB ** and /** are DirRules
   263  		if Include || glob == "*" {
   264  			err = f.addDirGlobs(Include, glob)
   265  			if err != nil {
   266  				return err
   267  			}
   268  		}
   269  	}
   270  	if isDirRule {
   271  		f.dirRules.add(Include, re)
   272  	}
   273  	return nil
   274  }
   275  
   276  // AddRule adds a filter rule with include/exclude indicated by the prefix
   277  //
   278  // These are
   279  //
   280  //   + glob
   281  //   - glob
   282  //   !
   283  //
   284  // '+' includes the glob, '-' excludes it and '!' resets the filter list
   285  //
   286  // Line comments may be introduced with '#' or ';'
   287  func (f *Filter) AddRule(rule string) error {
   288  	switch {
   289  	case rule == "!":
   290  		f.Clear()
   291  		return nil
   292  	case strings.HasPrefix(rule, "- "):
   293  		return f.Add(false, rule[2:])
   294  	case strings.HasPrefix(rule, "+ "):
   295  		return f.Add(true, rule[2:])
   296  	}
   297  	return errors.Errorf("malformed rule %q", rule)
   298  }
   299  
   300  // initAddFile creates f.files and f.dirs
   301  func (f *Filter) initAddFile() {
   302  	if f.files == nil {
   303  		f.files = make(FilesMap)
   304  		f.dirs = make(FilesMap)
   305  	}
   306  }
   307  
   308  // AddFile adds a single file to the files from list
   309  func (f *Filter) AddFile(file string) error {
   310  	f.initAddFile()
   311  	file = strings.Trim(file, "/")
   312  	f.files[file] = struct{}{}
   313  	// Put all the parent directories into f.dirs
   314  	for {
   315  		file = path.Dir(file)
   316  		if file == "." {
   317  			break
   318  		}
   319  		if _, found := f.dirs[file]; found {
   320  			break
   321  		}
   322  		f.dirs[file] = struct{}{}
   323  	}
   324  	return nil
   325  }
   326  
   327  // Files returns all the files from the `--files-from` list
   328  //
   329  // It may be nil if the list is empty
   330  func (f *Filter) Files() FilesMap {
   331  	return f.files
   332  }
   333  
   334  // Clear clears all the filter rules
   335  func (f *Filter) Clear() {
   336  	f.fileRules.clear()
   337  	f.dirRules.clear()
   338  }
   339  
   340  // InActive returns false if any filters are active
   341  func (f *Filter) InActive() bool {
   342  	return (f.files == nil &&
   343  		f.ModTimeFrom.IsZero() &&
   344  		f.ModTimeTo.IsZero() &&
   345  		f.Opt.MinSize < 0 &&
   346  		f.Opt.MaxSize < 0 &&
   347  		f.fileRules.len() == 0 &&
   348  		f.dirRules.len() == 0 &&
   349  		len(f.Opt.ExcludeFile) == 0)
   350  }
   351  
   352  // includeRemote returns whether this remote passes the filter rules.
   353  func (f *Filter) includeRemote(remote string) bool {
   354  	for _, rule := range f.fileRules.rules {
   355  		if rule.Match(remote) {
   356  			return rule.Include
   357  		}
   358  	}
   359  	return true
   360  }
   361  
   362  // ListContainsExcludeFile checks if exclude file is present in the list.
   363  func (f *Filter) ListContainsExcludeFile(entries fs.DirEntries) bool {
   364  	if len(f.Opt.ExcludeFile) == 0 {
   365  		return false
   366  	}
   367  	for _, entry := range entries {
   368  		obj, ok := entry.(fs.Object)
   369  		if ok {
   370  			basename := path.Base(obj.Remote())
   371  			if basename == f.Opt.ExcludeFile {
   372  				return true
   373  			}
   374  		}
   375  	}
   376  	return false
   377  }
   378  
   379  // IncludeDirectory returns a function which checks whether this
   380  // directory should be included in the sync or not.
   381  func (f *Filter) IncludeDirectory(ctx context.Context, fs fs.Fs) func(string) (bool, error) {
   382  	return func(remote string) (bool, error) {
   383  		remote = strings.Trim(remote, "/")
   384  		// first check if we need to remove directory based on
   385  		// the exclude file
   386  		excl, err := f.DirContainsExcludeFile(ctx, fs, remote)
   387  		if err != nil {
   388  			return false, err
   389  		}
   390  		if excl {
   391  			return false, nil
   392  		}
   393  
   394  		// filesFrom takes precedence
   395  		if f.files != nil {
   396  			_, include := f.dirs[remote]
   397  			return include, nil
   398  		}
   399  		remote += "/"
   400  		for _, rule := range f.dirRules.rules {
   401  			if rule.Match(remote) {
   402  				return rule.Include, nil
   403  			}
   404  		}
   405  
   406  		return true, nil
   407  	}
   408  }
   409  
   410  // DirContainsExcludeFile checks if exclude file is present in a
   411  // directroy. If fs is nil, it works properly if ExcludeFile is an
   412  // empty string (for testing).
   413  func (f *Filter) DirContainsExcludeFile(ctx context.Context, fremote fs.Fs, remote string) (bool, error) {
   414  	if len(f.Opt.ExcludeFile) > 0 {
   415  		exists, err := fs.FileExists(ctx, fremote, path.Join(remote, f.Opt.ExcludeFile))
   416  		if err != nil {
   417  			return false, err
   418  		}
   419  		if exists {
   420  			return true, nil
   421  		}
   422  	}
   423  	return false, nil
   424  }
   425  
   426  // Include returns whether this object should be included into the
   427  // sync or not
   428  func (f *Filter) Include(remote string, size int64, modTime time.Time) bool {
   429  	// filesFrom takes precedence
   430  	if f.files != nil {
   431  		_, include := f.files[remote]
   432  		return include
   433  	}
   434  	if !f.ModTimeFrom.IsZero() && modTime.Before(f.ModTimeFrom) {
   435  		return false
   436  	}
   437  	if !f.ModTimeTo.IsZero() && modTime.After(f.ModTimeTo) {
   438  		return false
   439  	}
   440  	if f.Opt.MinSize >= 0 && size < int64(f.Opt.MinSize) {
   441  		return false
   442  	}
   443  	if f.Opt.MaxSize >= 0 && size > int64(f.Opt.MaxSize) {
   444  		return false
   445  	}
   446  	return f.includeRemote(remote)
   447  }
   448  
   449  // IncludeObject returns whether this object should be included into
   450  // the sync or not. This is a convenience function to avoid calling
   451  // o.ModTime(), which is an expensive operation.
   452  func (f *Filter) IncludeObject(ctx context.Context, o fs.Object) bool {
   453  	var modTime time.Time
   454  
   455  	if !f.ModTimeFrom.IsZero() || !f.ModTimeTo.IsZero() {
   456  		modTime = o.ModTime(ctx)
   457  	} else {
   458  		modTime = time.Unix(0, 0)
   459  	}
   460  
   461  	return f.Include(o.Remote(), o.Size(), modTime)
   462  }
   463  
   464  // forEachLine calls fn on every line in the file pointed to by path
   465  //
   466  // It ignores empty lines and lines starting with '#' or ';'
   467  func forEachLine(path string, fn func(string) error) (err error) {
   468  	in, err := os.Open(path)
   469  	if err != nil {
   470  		return err
   471  	}
   472  	defer fs.CheckClose(in, &err)
   473  	scanner := bufio.NewScanner(in)
   474  	for scanner.Scan() {
   475  		line := scanner.Text()
   476  		line = strings.TrimSpace(line)
   477  		if len(line) == 0 || line[0] == '#' || line[0] == ';' {
   478  			continue
   479  		}
   480  		err := fn(line)
   481  		if err != nil {
   482  			return err
   483  		}
   484  	}
   485  	return scanner.Err()
   486  }
   487  
   488  // DumpFilters dumps the filters in textual form, 1 per line
   489  func (f *Filter) DumpFilters() string {
   490  	rules := []string{}
   491  	if !f.ModTimeFrom.IsZero() {
   492  		rules = append(rules, fmt.Sprintf("Last-modified date must be equal or greater than: %s", f.ModTimeFrom.String()))
   493  	}
   494  	if !f.ModTimeTo.IsZero() {
   495  		rules = append(rules, fmt.Sprintf("Last-modified date must be equal or less than: %s", f.ModTimeTo.String()))
   496  	}
   497  	rules = append(rules, "--- File filter rules ---")
   498  	for _, rule := range f.fileRules.rules {
   499  		rules = append(rules, rule.String())
   500  	}
   501  	rules = append(rules, "--- Directory filter rules ---")
   502  	for _, dirRule := range f.dirRules.rules {
   503  		rules = append(rules, dirRule.String())
   504  	}
   505  	return strings.Join(rules, "\n")
   506  }
   507  
   508  // HaveFilesFrom returns true if --files-from has been supplied
   509  func (f *Filter) HaveFilesFrom() bool {
   510  	return f.files != nil
   511  }
   512  
   513  var errFilesFromNotSet = errors.New("--files-from not set so can't use Filter.ListR")
   514  
   515  // MakeListR makes function to return all the files set using --files-from
   516  func (f *Filter) MakeListR(ctx context.Context, NewObject func(ctx context.Context, remote string) (fs.Object, error)) fs.ListRFn {
   517  	return func(ctx context.Context, dir string, callback fs.ListRCallback) error {
   518  		if !f.HaveFilesFrom() {
   519  			return errFilesFromNotSet
   520  		}
   521  		var (
   522  			remotes = make(chan string, fs.Config.Checkers)
   523  			g       errgroup.Group
   524  		)
   525  		for i := 0; i < fs.Config.Checkers; i++ {
   526  			g.Go(func() (err error) {
   527  				var entries = make(fs.DirEntries, 1)
   528  				for remote := range remotes {
   529  					entries[0], err = NewObject(ctx, remote)
   530  					if err == fs.ErrorObjectNotFound {
   531  						// Skip files that are not found
   532  					} else if err != nil {
   533  						return err
   534  					} else {
   535  						err = callback(entries)
   536  						if err != nil {
   537  							return err
   538  						}
   539  					}
   540  				}
   541  				return nil
   542  			})
   543  		}
   544  		for remote := range f.files {
   545  			remotes <- remote
   546  		}
   547  		close(remotes)
   548  		return g.Wait()
   549  	}
   550  }
   551  
   552  // UsesDirectoryFilters returns true if the filter uses directory
   553  // filters and false if it doesn't.
   554  //
   555  // This is used in deciding whether to walk directories or use ListR
   556  func (f *Filter) UsesDirectoryFilters() bool {
   557  	if len(f.dirRules.rules) == 0 {
   558  		return false
   559  	}
   560  	rule := f.dirRules.rules[0]
   561  	re := rule.Regexp.String()
   562  	if rule.Include == true && re == "^.*$" {
   563  		return false
   564  	}
   565  	return true
   566  }