github.com/10XDev/rclone@v1.52.3-0.20200626220027-16af9ab76b2a/fs/filter/filter.go (about)

     1  // Package filter controls the filtering of files
     2  package filter
     3  
     4  import (
     5  	"bufio"
     6  	"context"
     7  	"fmt"
     8  	"log"
     9  	"os"
    10  	"path"
    11  	"regexp"
    12  	"strings"
    13  	"time"
    14  
    15  	"github.com/pkg/errors"
    16  	"github.com/rclone/rclone/fs"
    17  	"golang.org/x/sync/errgroup"
    18  )
    19  
    20  // Active is the globally active filter
    21  var Active = mustNewFilter(nil)
    22  
    23  // rule is one filter rule
    24  type rule struct {
    25  	Include bool
    26  	Regexp  *regexp.Regexp
    27  }
    28  
    29  // Match returns true if rule matches path
    30  func (r *rule) Match(path string) bool {
    31  	return r.Regexp.MatchString(path)
    32  }
    33  
    34  // String the rule
    35  func (r *rule) String() string {
    36  	c := "-"
    37  	if r.Include {
    38  		c = "+"
    39  	}
    40  	return fmt.Sprintf("%s %s", c, r.Regexp.String())
    41  }
    42  
    43  // rules is a slice of rules
    44  type rules struct {
    45  	rules    []rule
    46  	existing map[string]struct{}
    47  }
    48  
    49  // add adds a rule if it doesn't exist already
    50  func (rs *rules) add(Include bool, re *regexp.Regexp) {
    51  	if rs.existing == nil {
    52  		rs.existing = make(map[string]struct{})
    53  	}
    54  	newRule := rule{
    55  		Include: Include,
    56  		Regexp:  re,
    57  	}
    58  	newRuleString := newRule.String()
    59  	if _, ok := rs.existing[newRuleString]; ok {
    60  		return // rule already exists
    61  	}
    62  	rs.rules = append(rs.rules, newRule)
    63  	rs.existing[newRuleString] = struct{}{}
    64  }
    65  
    66  // clear clears all the rules
    67  func (rs *rules) clear() {
    68  	rs.rules = nil
    69  	rs.existing = nil
    70  }
    71  
    72  // len returns the number of rules
    73  func (rs *rules) len() int {
    74  	return len(rs.rules)
    75  }
    76  
    77  // FilesMap describes the map of files to transfer
    78  type FilesMap map[string]struct{}
    79  
    80  // Opt configures the filter
    81  type Opt struct {
    82  	DeleteExcluded bool
    83  	FilterRule     []string
    84  	FilterFrom     []string
    85  	ExcludeRule    []string
    86  	ExcludeFrom    []string
    87  	ExcludeFile    string
    88  	IncludeRule    []string
    89  	IncludeFrom    []string
    90  	FilesFrom      []string
    91  	FilesFromRaw   []string
    92  	MinAge         fs.Duration
    93  	MaxAge         fs.Duration
    94  	MinSize        fs.SizeSuffix
    95  	MaxSize        fs.SizeSuffix
    96  	IgnoreCase     bool
    97  }
    98  
    99  // DefaultOpt is the default config for the filter
   100  var DefaultOpt = Opt{
   101  	MinAge:  fs.DurationOff,
   102  	MaxAge:  fs.DurationOff,
   103  	MinSize: fs.SizeSuffix(-1),
   104  	MaxSize: fs.SizeSuffix(-1),
   105  }
   106  
   107  // Filter describes any filtering in operation
   108  type Filter struct {
   109  	Opt         Opt
   110  	ModTimeFrom time.Time
   111  	ModTimeTo   time.Time
   112  	fileRules   rules
   113  	dirRules    rules
   114  	files       FilesMap // files if filesFrom
   115  	dirs        FilesMap // dirs from filesFrom
   116  }
   117  
   118  // NewFilter parses the command line options and creates a Filter
   119  // object.  If opt is nil, then DefaultOpt will be used
   120  func NewFilter(opt *Opt) (f *Filter, err error) {
   121  	f = &Filter{}
   122  
   123  	// Make a copy of the options
   124  	if opt != nil {
   125  		f.Opt = *opt
   126  	} else {
   127  		f.Opt = DefaultOpt
   128  	}
   129  
   130  	// Filter flags
   131  	if f.Opt.MinAge.IsSet() {
   132  		f.ModTimeTo = time.Now().Add(-time.Duration(f.Opt.MinAge))
   133  		fs.Debugf(nil, "--min-age %v to %v", f.Opt.MinAge, f.ModTimeTo)
   134  	}
   135  	if f.Opt.MaxAge.IsSet() {
   136  		f.ModTimeFrom = time.Now().Add(-time.Duration(f.Opt.MaxAge))
   137  		if !f.ModTimeTo.IsZero() && f.ModTimeTo.Before(f.ModTimeFrom) {
   138  			log.Fatal("filter: --min-age can't be larger than --max-age")
   139  		}
   140  		fs.Debugf(nil, "--max-age %v to %v", f.Opt.MaxAge, f.ModTimeFrom)
   141  	}
   142  
   143  	addImplicitExclude := false
   144  	foundExcludeRule := false
   145  
   146  	for _, rule := range f.Opt.IncludeRule {
   147  		err = f.Add(true, rule)
   148  		if err != nil {
   149  			return nil, err
   150  		}
   151  		addImplicitExclude = true
   152  	}
   153  	for _, rule := range f.Opt.IncludeFrom {
   154  		err := forEachLine(rule, false, func(line string) error {
   155  			return f.Add(true, line)
   156  		})
   157  		if err != nil {
   158  			return nil, err
   159  		}
   160  		addImplicitExclude = true
   161  	}
   162  	for _, rule := range f.Opt.ExcludeRule {
   163  		err = f.Add(false, rule)
   164  		if err != nil {
   165  			return nil, err
   166  		}
   167  		foundExcludeRule = true
   168  	}
   169  	for _, rule := range f.Opt.ExcludeFrom {
   170  		err := forEachLine(rule, false, func(line string) error {
   171  			return f.Add(false, line)
   172  		})
   173  		if err != nil {
   174  			return nil, err
   175  		}
   176  		foundExcludeRule = true
   177  	}
   178  
   179  	if addImplicitExclude && foundExcludeRule {
   180  		fs.Errorf(nil, "Using --filter is recommended instead of both --include and --exclude as the order they are parsed in is indeterminate")
   181  	}
   182  
   183  	for _, rule := range f.Opt.FilterRule {
   184  		err = f.AddRule(rule)
   185  		if err != nil {
   186  			return nil, err
   187  		}
   188  	}
   189  	for _, rule := range f.Opt.FilterFrom {
   190  		err := forEachLine(rule, false, f.AddRule)
   191  		if err != nil {
   192  			return nil, err
   193  		}
   194  	}
   195  
   196  	inActive := f.InActive()
   197  
   198  	for _, rule := range f.Opt.FilesFrom {
   199  		if !inActive {
   200  			return nil, fmt.Errorf("The usage of --files-from overrides all other filters, it should be used alone or with --files-from-raw")
   201  		}
   202  		f.initAddFile() // init to show --files-from set even if no files within
   203  		err := forEachLine(rule, false, func(line string) error {
   204  			return f.AddFile(line)
   205  		})
   206  		if err != nil {
   207  			return nil, err
   208  		}
   209  	}
   210  
   211  	for _, rule := range f.Opt.FilesFromRaw {
   212  		// --files-from-raw can be used with --files-from, hence we do
   213  		// not need to get the value of f.InActive again
   214  		if !inActive {
   215  			return nil, fmt.Errorf("The usage of --files-from-raw overrides all other filters, it should be used alone or with --files-from")
   216  		}
   217  		f.initAddFile() // init to show --files-from set even if no files within
   218  		err := forEachLine(rule, true, func(line string) error {
   219  			return f.AddFile(line)
   220  		})
   221  		if err != nil {
   222  			return nil, err
   223  		}
   224  	}
   225  
   226  	if addImplicitExclude {
   227  		err = f.Add(false, "/**")
   228  		if err != nil {
   229  			return nil, err
   230  		}
   231  	}
   232  	if fs.Config.Dump&fs.DumpFilters != 0 {
   233  		fmt.Println("--- start filters ---")
   234  		fmt.Println(f.DumpFilters())
   235  		fmt.Println("--- end filters ---")
   236  	}
   237  	return f, nil
   238  }
   239  
   240  func mustNewFilter(opt *Opt) *Filter {
   241  	f, err := NewFilter(opt)
   242  	if err != nil {
   243  		panic(err)
   244  	}
   245  	return f
   246  }
   247  
   248  // addDirGlobs adds directory globs from the file glob passed in
   249  func (f *Filter) addDirGlobs(Include bool, glob string) error {
   250  	for _, dirGlob := range globToDirGlobs(glob) {
   251  		// Don't add "/" as we always include the root
   252  		if dirGlob == "/" {
   253  			continue
   254  		}
   255  		dirRe, err := globToRegexp(dirGlob, f.Opt.IgnoreCase)
   256  		if err != nil {
   257  			return err
   258  		}
   259  		f.dirRules.add(Include, dirRe)
   260  	}
   261  	return nil
   262  }
   263  
   264  // Add adds a filter rule with include or exclude status indicated
   265  func (f *Filter) Add(Include bool, glob string) error {
   266  	isDirRule := strings.HasSuffix(glob, "/")
   267  	isFileRule := !isDirRule
   268  	if strings.Contains(glob, "**") {
   269  		isDirRule, isFileRule = true, true
   270  	}
   271  	re, err := globToRegexp(glob, f.Opt.IgnoreCase)
   272  	if err != nil {
   273  		return err
   274  	}
   275  	if isFileRule {
   276  		f.fileRules.add(Include, re)
   277  		// If include rule work out what directories are needed to scan
   278  		// if exclude rule, we can't rule anything out
   279  		// Unless it is `*` which matches everything
   280  		// NB ** and /** are DirRules
   281  		if Include || glob == "*" {
   282  			err = f.addDirGlobs(Include, glob)
   283  			if err != nil {
   284  				return err
   285  			}
   286  		}
   287  	}
   288  	if isDirRule {
   289  		f.dirRules.add(Include, re)
   290  	}
   291  	return nil
   292  }
   293  
   294  // AddRule adds a filter rule with include/exclude indicated by the prefix
   295  //
   296  // These are
   297  //
   298  //   + glob
   299  //   - glob
   300  //   !
   301  //
   302  // '+' includes the glob, '-' excludes it and '!' resets the filter list
   303  //
   304  // Line comments may be introduced with '#' or ';'
   305  func (f *Filter) AddRule(rule string) error {
   306  	switch {
   307  	case rule == "!":
   308  		f.Clear()
   309  		return nil
   310  	case strings.HasPrefix(rule, "- "):
   311  		return f.Add(false, rule[2:])
   312  	case strings.HasPrefix(rule, "+ "):
   313  		return f.Add(true, rule[2:])
   314  	}
   315  	return errors.Errorf("malformed rule %q", rule)
   316  }
   317  
   318  // initAddFile creates f.files and f.dirs
   319  func (f *Filter) initAddFile() {
   320  	if f.files == nil {
   321  		f.files = make(FilesMap)
   322  		f.dirs = make(FilesMap)
   323  	}
   324  }
   325  
   326  // AddFile adds a single file to the files from list
   327  func (f *Filter) AddFile(file string) error {
   328  	f.initAddFile()
   329  	file = strings.Trim(file, "/")
   330  	f.files[file] = struct{}{}
   331  	// Put all the parent directories into f.dirs
   332  	for {
   333  		file = path.Dir(file)
   334  		if file == "." {
   335  			break
   336  		}
   337  		if _, found := f.dirs[file]; found {
   338  			break
   339  		}
   340  		f.dirs[file] = struct{}{}
   341  	}
   342  	return nil
   343  }
   344  
   345  // Files returns all the files from the `--files-from` list
   346  //
   347  // It may be nil if the list is empty
   348  func (f *Filter) Files() FilesMap {
   349  	return f.files
   350  }
   351  
   352  // Clear clears all the filter rules
   353  func (f *Filter) Clear() {
   354  	f.fileRules.clear()
   355  	f.dirRules.clear()
   356  }
   357  
   358  // InActive returns false if any filters are active
   359  func (f *Filter) InActive() bool {
   360  	return (f.files == nil &&
   361  		f.ModTimeFrom.IsZero() &&
   362  		f.ModTimeTo.IsZero() &&
   363  		f.Opt.MinSize < 0 &&
   364  		f.Opt.MaxSize < 0 &&
   365  		f.fileRules.len() == 0 &&
   366  		f.dirRules.len() == 0 &&
   367  		len(f.Opt.ExcludeFile) == 0)
   368  }
   369  
   370  // includeRemote returns whether this remote passes the filter rules.
   371  func (f *Filter) includeRemote(remote string) bool {
   372  	for _, rule := range f.fileRules.rules {
   373  		if rule.Match(remote) {
   374  			return rule.Include
   375  		}
   376  	}
   377  	return true
   378  }
   379  
   380  // ListContainsExcludeFile checks if exclude file is present in the list.
   381  func (f *Filter) ListContainsExcludeFile(entries fs.DirEntries) bool {
   382  	if len(f.Opt.ExcludeFile) == 0 {
   383  		return false
   384  	}
   385  	for _, entry := range entries {
   386  		obj, ok := entry.(fs.Object)
   387  		if ok {
   388  			basename := path.Base(obj.Remote())
   389  			if basename == f.Opt.ExcludeFile {
   390  				return true
   391  			}
   392  		}
   393  	}
   394  	return false
   395  }
   396  
   397  // IncludeDirectory returns a function which checks whether this
   398  // directory should be included in the sync or not.
   399  func (f *Filter) IncludeDirectory(ctx context.Context, fs fs.Fs) func(string) (bool, error) {
   400  	return func(remote string) (bool, error) {
   401  		remote = strings.Trim(remote, "/")
   402  		// first check if we need to remove directory based on
   403  		// the exclude file
   404  		excl, err := f.DirContainsExcludeFile(ctx, fs, remote)
   405  		if err != nil {
   406  			return false, err
   407  		}
   408  		if excl {
   409  			return false, nil
   410  		}
   411  
   412  		// filesFrom takes precedence
   413  		if f.files != nil {
   414  			_, include := f.dirs[remote]
   415  			return include, nil
   416  		}
   417  		remote += "/"
   418  		for _, rule := range f.dirRules.rules {
   419  			if rule.Match(remote) {
   420  				return rule.Include, nil
   421  			}
   422  		}
   423  
   424  		return true, nil
   425  	}
   426  }
   427  
   428  // DirContainsExcludeFile checks if exclude file is present in a
   429  // directory. If fs is nil, it works properly if ExcludeFile is an
   430  // empty string (for testing).
   431  func (f *Filter) DirContainsExcludeFile(ctx context.Context, fremote fs.Fs, remote string) (bool, error) {
   432  	if len(f.Opt.ExcludeFile) > 0 {
   433  		exists, err := fs.FileExists(ctx, fremote, path.Join(remote, f.Opt.ExcludeFile))
   434  		if err != nil {
   435  			return false, err
   436  		}
   437  		if exists {
   438  			return true, nil
   439  		}
   440  	}
   441  	return false, nil
   442  }
   443  
   444  // Include returns whether this object should be included into the
   445  // sync or not
   446  func (f *Filter) Include(remote string, size int64, modTime time.Time) bool {
   447  	// filesFrom takes precedence
   448  	if f.files != nil {
   449  		_, include := f.files[remote]
   450  		return include
   451  	}
   452  	if !f.ModTimeFrom.IsZero() && modTime.Before(f.ModTimeFrom) {
   453  		return false
   454  	}
   455  	if !f.ModTimeTo.IsZero() && modTime.After(f.ModTimeTo) {
   456  		return false
   457  	}
   458  	if f.Opt.MinSize >= 0 && size < int64(f.Opt.MinSize) {
   459  		return false
   460  	}
   461  	if f.Opt.MaxSize >= 0 && size > int64(f.Opt.MaxSize) {
   462  		return false
   463  	}
   464  	return f.includeRemote(remote)
   465  }
   466  
   467  // IncludeObject returns whether this object should be included into
   468  // the sync or not. This is a convenience function to avoid calling
   469  // o.ModTime(), which is an expensive operation.
   470  func (f *Filter) IncludeObject(ctx context.Context, o fs.Object) bool {
   471  	var modTime time.Time
   472  
   473  	if !f.ModTimeFrom.IsZero() || !f.ModTimeTo.IsZero() {
   474  		modTime = o.ModTime(ctx)
   475  	} else {
   476  		modTime = time.Unix(0, 0)
   477  	}
   478  
   479  	return f.Include(o.Remote(), o.Size(), modTime)
   480  }
   481  
   482  // forEachLine calls fn on every line in the file pointed to by path
   483  //
   484  // It ignores empty lines and lines starting with '#' or ';' if raw is false
   485  func forEachLine(path string, raw bool, fn func(string) error) (err error) {
   486  	var scanner *bufio.Scanner
   487  	if path == "-" {
   488  		scanner = bufio.NewScanner(os.Stdin)
   489  	} else {
   490  		in, err := os.Open(path)
   491  		if err != nil {
   492  			return err
   493  		}
   494  		scanner = bufio.NewScanner(in)
   495  		defer fs.CheckClose(in, &err)
   496  	}
   497  	for scanner.Scan() {
   498  		line := scanner.Text()
   499  		if !raw {
   500  			line = strings.TrimSpace(line)
   501  			if len(line) == 0 || line[0] == '#' || line[0] == ';' {
   502  				continue
   503  			}
   504  		}
   505  		err := fn(line)
   506  		if err != nil {
   507  			return err
   508  		}
   509  	}
   510  	return scanner.Err()
   511  }
   512  
   513  // DumpFilters dumps the filters in textual form, 1 per line
   514  func (f *Filter) DumpFilters() string {
   515  	rules := []string{}
   516  	if !f.ModTimeFrom.IsZero() {
   517  		rules = append(rules, fmt.Sprintf("Last-modified date must be equal or greater than: %s", f.ModTimeFrom.String()))
   518  	}
   519  	if !f.ModTimeTo.IsZero() {
   520  		rules = append(rules, fmt.Sprintf("Last-modified date must be equal or less than: %s", f.ModTimeTo.String()))
   521  	}
   522  	rules = append(rules, "--- File filter rules ---")
   523  	for _, rule := range f.fileRules.rules {
   524  		rules = append(rules, rule.String())
   525  	}
   526  	rules = append(rules, "--- Directory filter rules ---")
   527  	for _, dirRule := range f.dirRules.rules {
   528  		rules = append(rules, dirRule.String())
   529  	}
   530  	return strings.Join(rules, "\n")
   531  }
   532  
   533  // HaveFilesFrom returns true if --files-from has been supplied
   534  func (f *Filter) HaveFilesFrom() bool {
   535  	return f.files != nil
   536  }
   537  
   538  var errFilesFromNotSet = errors.New("--files-from not set so can't use Filter.ListR")
   539  
   540  // MakeListR makes function to return all the files set using --files-from
   541  func (f *Filter) MakeListR(ctx context.Context, NewObject func(ctx context.Context, remote string) (fs.Object, error)) fs.ListRFn {
   542  	return func(ctx context.Context, dir string, callback fs.ListRCallback) error {
   543  		if !f.HaveFilesFrom() {
   544  			return errFilesFromNotSet
   545  		}
   546  		var (
   547  			remotes = make(chan string, fs.Config.Checkers)
   548  			g       errgroup.Group
   549  		)
   550  		for i := 0; i < fs.Config.Checkers; i++ {
   551  			g.Go(func() (err error) {
   552  				var entries = make(fs.DirEntries, 1)
   553  				for remote := range remotes {
   554  					entries[0], err = NewObject(ctx, remote)
   555  					if err == fs.ErrorObjectNotFound {
   556  						// Skip files that are not found
   557  					} else if err != nil {
   558  						return err
   559  					} else {
   560  						err = callback(entries)
   561  						if err != nil {
   562  							return err
   563  						}
   564  					}
   565  				}
   566  				return nil
   567  			})
   568  		}
   569  		for remote := range f.files {
   570  			remotes <- remote
   571  		}
   572  		close(remotes)
   573  		return g.Wait()
   574  	}
   575  }
   576  
   577  // UsesDirectoryFilters returns true if the filter uses directory
   578  // filters and false if it doesn't.
   579  //
   580  // This is used in deciding whether to walk directories or use ListR
   581  func (f *Filter) UsesDirectoryFilters() bool {
   582  	if len(f.dirRules.rules) == 0 {
   583  		return false
   584  	}
   585  	rule := f.dirRules.rules[0]
   586  	re := rule.Regexp.String()
   587  	if rule.Include == true && re == "^.*$" {
   588  		return false
   589  	}
   590  	return true
   591  }