github.com/ncw/rclone@v1.48.1-0.20190724201158-a35aa1360e3e/fs/filter/filter.go (about)

     1  // Package filter controls the filtering of files
     2  package filter
     3  
     4  import (
     5  	"bufio"
     6  	"context"
     7  	"fmt"
     8  	"log"
     9  	"os"
    10  	"path"
    11  	"regexp"
    12  	"strings"
    13  	"time"
    14  
    15  	"github.com/ncw/rclone/fs"
    16  	"github.com/pkg/errors"
    17  	"golang.org/x/sync/errgroup"
    18  )
    19  
    20  // Active is the globally active filter
    21  var Active = mustNewFilter(nil)
    22  
    23  // rule is one filter rule
    24  type rule struct {
    25  	Include          bool
    26  	Regexp           *regexp.Regexp
    27  	boundedRecursion bool
    28  }
    29  
    30  // Match returns true if rule matches path
    31  func (r *rule) Match(path string) bool {
    32  	return r.Regexp.MatchString(path)
    33  }
    34  
    35  // String the rule
    36  func (r *rule) String() string {
    37  	c := "-"
    38  	if r.Include {
    39  		c = "+"
    40  	}
    41  	return fmt.Sprintf("%s %s", c, r.Regexp.String())
    42  }
    43  
    44  // rules is a slice of rules
    45  type rules struct {
    46  	rules    []rule
    47  	existing map[string]struct{}
    48  }
    49  
    50  // add adds a rule if it doesn't exist already
    51  func (rs *rules) add(Include bool, re *regexp.Regexp, boundedRecursion bool) {
    52  	if rs.existing == nil {
    53  		rs.existing = make(map[string]struct{})
    54  	}
    55  	newRule := rule{
    56  		Include:          Include,
    57  		Regexp:           re,
    58  		boundedRecursion: boundedRecursion,
    59  	}
    60  	newRuleString := newRule.String()
    61  	if _, ok := rs.existing[newRuleString]; ok {
    62  		return // rule already exists
    63  	}
    64  	rs.rules = append(rs.rules, newRule)
    65  	rs.existing[newRuleString] = struct{}{}
    66  }
    67  
    68  // clear clears all the rules
    69  func (rs *rules) clear() {
    70  	rs.rules = nil
    71  	rs.existing = nil
    72  }
    73  
    74  // len returns the number of rules
    75  func (rs *rules) len() int {
    76  	return len(rs.rules)
    77  }
    78  
    79  // boundedRecursion returns true if the set of filters would only
    80  // need bounded recursion to evaluate
    81  func (rs *rules) boundedRecursion() bool {
    82  	var (
    83  		excludeAll       = false
    84  		boundedRecursion = true
    85  	)
    86  	for _, rule := range rs.rules {
    87  		if rule.Include {
    88  			boundedRecursion = boundedRecursion && rule.boundedRecursion
    89  		} else if rule.Regexp.String() == `^.*$` {
    90  			excludeAll = true
    91  		}
    92  	}
    93  	return excludeAll && boundedRecursion
    94  }
    95  
    96  // FilesMap describes the map of files to transfer
    97  type FilesMap map[string]struct{}
    98  
    99  // Opt configures the filter
   100  type Opt struct {
   101  	DeleteExcluded bool
   102  	FilterRule     []string
   103  	FilterFrom     []string
   104  	ExcludeRule    []string
   105  	ExcludeFrom    []string
   106  	ExcludeFile    string
   107  	IncludeRule    []string
   108  	IncludeFrom    []string
   109  	FilesFrom      []string
   110  	MinAge         fs.Duration
   111  	MaxAge         fs.Duration
   112  	MinSize        fs.SizeSuffix
   113  	MaxSize        fs.SizeSuffix
   114  	IgnoreCase     bool
   115  }
   116  
   117  // DefaultOpt is the default config for the filter
   118  var DefaultOpt = Opt{
   119  	MinAge:  fs.DurationOff,
   120  	MaxAge:  fs.DurationOff,
   121  	MinSize: fs.SizeSuffix(-1),
   122  	MaxSize: fs.SizeSuffix(-1),
   123  }
   124  
   125  // Filter describes any filtering in operation
   126  type Filter struct {
   127  	Opt         Opt
   128  	ModTimeFrom time.Time
   129  	ModTimeTo   time.Time
   130  	fileRules   rules
   131  	dirRules    rules
   132  	files       FilesMap // files if filesFrom
   133  	dirs        FilesMap // dirs from filesFrom
   134  }
   135  
   136  // NewFilter parses the command line options and creates a Filter
   137  // object.  If opt is nil, then DefaultOpt will be used
   138  func NewFilter(opt *Opt) (f *Filter, err error) {
   139  	f = &Filter{}
   140  
   141  	// Make a copy of the options
   142  	if opt != nil {
   143  		f.Opt = *opt
   144  	} else {
   145  		f.Opt = DefaultOpt
   146  	}
   147  
   148  	// Filter flags
   149  	if f.Opt.MinAge.IsSet() {
   150  		f.ModTimeTo = time.Now().Add(-time.Duration(f.Opt.MinAge))
   151  		fs.Debugf(nil, "--min-age %v to %v", f.Opt.MinAge, f.ModTimeTo)
   152  	}
   153  	if f.Opt.MaxAge.IsSet() {
   154  		f.ModTimeFrom = time.Now().Add(-time.Duration(f.Opt.MaxAge))
   155  		if !f.ModTimeTo.IsZero() && f.ModTimeTo.Before(f.ModTimeFrom) {
   156  			log.Fatal("filter: --min-age can't be larger than --max-age")
   157  		}
   158  		fs.Debugf(nil, "--max-age %v to %v", f.Opt.MaxAge, f.ModTimeFrom)
   159  	}
   160  
   161  	addImplicitExclude := false
   162  	foundExcludeRule := false
   163  
   164  	for _, rule := range f.Opt.IncludeRule {
   165  		err = f.Add(true, rule)
   166  		if err != nil {
   167  			return nil, err
   168  		}
   169  		addImplicitExclude = true
   170  	}
   171  	for _, rule := range f.Opt.IncludeFrom {
   172  		err := forEachLine(rule, func(line string) error {
   173  			return f.Add(true, line)
   174  		})
   175  		if err != nil {
   176  			return nil, err
   177  		}
   178  		addImplicitExclude = true
   179  	}
   180  	for _, rule := range f.Opt.ExcludeRule {
   181  		err = f.Add(false, rule)
   182  		if err != nil {
   183  			return nil, err
   184  		}
   185  		foundExcludeRule = true
   186  	}
   187  	for _, rule := range f.Opt.ExcludeFrom {
   188  		err := forEachLine(rule, func(line string) error {
   189  			return f.Add(false, line)
   190  		})
   191  		if err != nil {
   192  			return nil, err
   193  		}
   194  		foundExcludeRule = true
   195  	}
   196  
   197  	if addImplicitExclude && foundExcludeRule {
   198  		fs.Errorf(nil, "Using --filter is recommended instead of both --include and --exclude as the order they are parsed in is indeterminate")
   199  	}
   200  
   201  	for _, rule := range f.Opt.FilterRule {
   202  		err = f.AddRule(rule)
   203  		if err != nil {
   204  			return nil, err
   205  		}
   206  	}
   207  	for _, rule := range f.Opt.FilterFrom {
   208  		err := forEachLine(rule, f.AddRule)
   209  		if err != nil {
   210  			return nil, err
   211  		}
   212  	}
   213  	for _, rule := range f.Opt.FilesFrom {
   214  		f.initAddFile() // init to show --files-from set even if no files within
   215  		err := forEachLine(rule, func(line string) error {
   216  			return f.AddFile(line)
   217  		})
   218  		if err != nil {
   219  			return nil, err
   220  		}
   221  	}
   222  	if addImplicitExclude {
   223  		err = f.Add(false, "/**")
   224  		if err != nil {
   225  			return nil, err
   226  		}
   227  	}
   228  	if fs.Config.Dump&fs.DumpFilters != 0 {
   229  		fmt.Println("--- start filters ---")
   230  		fmt.Println(f.DumpFilters())
   231  		fmt.Println("--- end filters ---")
   232  	}
   233  	return f, nil
   234  }
   235  
   236  func mustNewFilter(opt *Opt) *Filter {
   237  	f, err := NewFilter(opt)
   238  	if err != nil {
   239  		panic(err)
   240  	}
   241  	return f
   242  }
   243  
   244  // addDirGlobs adds directory globs from the file glob passed in
   245  func (f *Filter) addDirGlobs(Include bool, glob string) error {
   246  	for _, dirGlob := range globToDirGlobs(glob) {
   247  		// Don't add "/" as we always include the root
   248  		if dirGlob == "/" {
   249  			continue
   250  		}
   251  		dirRe, err := globToRegexp(dirGlob, f.Opt.IgnoreCase)
   252  		if err != nil {
   253  			return err
   254  		}
   255  		boundedRecursion := globBoundedRecursion(dirGlob)
   256  		f.dirRules.add(Include, dirRe, boundedRecursion)
   257  	}
   258  	return nil
   259  }
   260  
   261  // Add adds a filter rule with include or exclude status indicated
   262  func (f *Filter) Add(Include bool, glob string) error {
   263  	isDirRule := strings.HasSuffix(glob, "/")
   264  	isFileRule := !isDirRule
   265  	if strings.Contains(glob, "**") {
   266  		isDirRule, isFileRule = true, true
   267  	}
   268  	re, err := globToRegexp(glob, f.Opt.IgnoreCase)
   269  	if err != nil {
   270  		return err
   271  	}
   272  	boundedRecursion := globBoundedRecursion(glob)
   273  	if isFileRule {
   274  		f.fileRules.add(Include, re, boundedRecursion)
   275  		// If include rule work out what directories are needed to scan
   276  		// if exclude rule, we can't rule anything out
   277  		// Unless it is `*` which matches everything
   278  		// NB ** and /** are DirRules
   279  		if Include || glob == "*" {
   280  			err = f.addDirGlobs(Include, glob)
   281  			if err != nil {
   282  				return err
   283  			}
   284  		}
   285  	}
   286  	if isDirRule {
   287  		f.dirRules.add(Include, re, boundedRecursion)
   288  	}
   289  	return nil
   290  }
   291  
   292  // AddRule adds a filter rule with include/exclude indicated by the prefix
   293  //
   294  // These are
   295  //
   296  //   + glob
   297  //   - glob
   298  //   !
   299  //
   300  // '+' includes the glob, '-' excludes it and '!' resets the filter list
   301  //
   302  // Line comments may be introduced with '#' or ';'
   303  func (f *Filter) AddRule(rule string) error {
   304  	switch {
   305  	case rule == "!":
   306  		f.Clear()
   307  		return nil
   308  	case strings.HasPrefix(rule, "- "):
   309  		return f.Add(false, rule[2:])
   310  	case strings.HasPrefix(rule, "+ "):
   311  		return f.Add(true, rule[2:])
   312  	}
   313  	return errors.Errorf("malformed rule %q", rule)
   314  }
   315  
   316  // initAddFile creates f.files and f.dirs
   317  func (f *Filter) initAddFile() {
   318  	if f.files == nil {
   319  		f.files = make(FilesMap)
   320  		f.dirs = make(FilesMap)
   321  	}
   322  }
   323  
   324  // AddFile adds a single file to the files from list
   325  func (f *Filter) AddFile(file string) error {
   326  	f.initAddFile()
   327  	file = strings.Trim(file, "/")
   328  	f.files[file] = struct{}{}
   329  	// Put all the parent directories into f.dirs
   330  	for {
   331  		file = path.Dir(file)
   332  		if file == "." {
   333  			break
   334  		}
   335  		if _, found := f.dirs[file]; found {
   336  			break
   337  		}
   338  		f.dirs[file] = struct{}{}
   339  	}
   340  	return nil
   341  }
   342  
   343  // Files returns all the files from the `--files-from` list
   344  //
   345  // It may be nil if the list is empty
   346  func (f *Filter) Files() FilesMap {
   347  	return f.files
   348  }
   349  
   350  // Clear clears all the filter rules
   351  func (f *Filter) Clear() {
   352  	f.fileRules.clear()
   353  	f.dirRules.clear()
   354  }
   355  
   356  // InActive returns false if any filters are active
   357  func (f *Filter) InActive() bool {
   358  	return (f.files == nil &&
   359  		f.ModTimeFrom.IsZero() &&
   360  		f.ModTimeTo.IsZero() &&
   361  		f.Opt.MinSize < 0 &&
   362  		f.Opt.MaxSize < 0 &&
   363  		f.fileRules.len() == 0 &&
   364  		f.dirRules.len() == 0 &&
   365  		len(f.Opt.ExcludeFile) == 0)
   366  }
   367  
   368  // BoundedRecursion returns true if the filter can be evaluated with
   369  // bounded recursion only.
   370  func (f *Filter) BoundedRecursion() bool {
   371  	return f.fileRules.boundedRecursion()
   372  }
   373  
   374  // includeRemote returns whether this remote passes the filter rules.
   375  func (f *Filter) includeRemote(remote string) bool {
   376  	for _, rule := range f.fileRules.rules {
   377  		if rule.Match(remote) {
   378  			return rule.Include
   379  		}
   380  	}
   381  	return true
   382  }
   383  
   384  // ListContainsExcludeFile checks if exclude file is present in the list.
   385  func (f *Filter) ListContainsExcludeFile(entries fs.DirEntries) bool {
   386  	if len(f.Opt.ExcludeFile) == 0 {
   387  		return false
   388  	}
   389  	for _, entry := range entries {
   390  		obj, ok := entry.(fs.Object)
   391  		if ok {
   392  			basename := path.Base(obj.Remote())
   393  			if basename == f.Opt.ExcludeFile {
   394  				return true
   395  			}
   396  		}
   397  	}
   398  	return false
   399  }
   400  
   401  // IncludeDirectory returns a function which checks whether this
   402  // directory should be included in the sync or not.
   403  func (f *Filter) IncludeDirectory(ctx context.Context, fs fs.Fs) func(string) (bool, error) {
   404  	return func(remote string) (bool, error) {
   405  		remote = strings.Trim(remote, "/")
   406  		// first check if we need to remove directory based on
   407  		// the exclude file
   408  		excl, err := f.DirContainsExcludeFile(ctx, fs, remote)
   409  		if err != nil {
   410  			return false, err
   411  		}
   412  		if excl {
   413  			return false, nil
   414  		}
   415  
   416  		// filesFrom takes precedence
   417  		if f.files != nil {
   418  			_, include := f.dirs[remote]
   419  			return include, nil
   420  		}
   421  		remote += "/"
   422  		for _, rule := range f.dirRules.rules {
   423  			if rule.Match(remote) {
   424  				return rule.Include, nil
   425  			}
   426  		}
   427  
   428  		return true, nil
   429  	}
   430  }
   431  
   432  // DirContainsExcludeFile checks if exclude file is present in a
   433  // directroy. If fs is nil, it works properly if ExcludeFile is an
   434  // empty string (for testing).
   435  func (f *Filter) DirContainsExcludeFile(ctx context.Context, fremote fs.Fs, remote string) (bool, error) {
   436  	if len(f.Opt.ExcludeFile) > 0 {
   437  		exists, err := fs.FileExists(ctx, fremote, path.Join(remote, f.Opt.ExcludeFile))
   438  		if err != nil {
   439  			return false, err
   440  		}
   441  		if exists {
   442  			return true, nil
   443  		}
   444  	}
   445  	return false, nil
   446  }
   447  
   448  // Include returns whether this object should be included into the
   449  // sync or not
   450  func (f *Filter) Include(remote string, size int64, modTime time.Time) bool {
   451  	// filesFrom takes precedence
   452  	if f.files != nil {
   453  		_, include := f.files[remote]
   454  		return include
   455  	}
   456  	if !f.ModTimeFrom.IsZero() && modTime.Before(f.ModTimeFrom) {
   457  		return false
   458  	}
   459  	if !f.ModTimeTo.IsZero() && modTime.After(f.ModTimeTo) {
   460  		return false
   461  	}
   462  	if f.Opt.MinSize >= 0 && size < int64(f.Opt.MinSize) {
   463  		return false
   464  	}
   465  	if f.Opt.MaxSize >= 0 && size > int64(f.Opt.MaxSize) {
   466  		return false
   467  	}
   468  	return f.includeRemote(remote)
   469  }
   470  
   471  // IncludeObject returns whether this object should be included into
   472  // the sync or not. This is a convenience function to avoid calling
   473  // o.ModTime(), which is an expensive operation.
   474  func (f *Filter) IncludeObject(ctx context.Context, o fs.Object) bool {
   475  	var modTime time.Time
   476  
   477  	if !f.ModTimeFrom.IsZero() || !f.ModTimeTo.IsZero() {
   478  		modTime = o.ModTime(ctx)
   479  	} else {
   480  		modTime = time.Unix(0, 0)
   481  	}
   482  
   483  	return f.Include(o.Remote(), o.Size(), modTime)
   484  }
   485  
   486  // forEachLine calls fn on every line in the file pointed to by path
   487  //
   488  // It ignores empty lines and lines starting with '#' or ';'
   489  func forEachLine(path string, fn func(string) error) (err error) {
   490  	in, err := os.Open(path)
   491  	if err != nil {
   492  		return err
   493  	}
   494  	defer fs.CheckClose(in, &err)
   495  	scanner := bufio.NewScanner(in)
   496  	for scanner.Scan() {
   497  		line := scanner.Text()
   498  		line = strings.TrimSpace(line)
   499  		if len(line) == 0 || line[0] == '#' || line[0] == ';' {
   500  			continue
   501  		}
   502  		err := fn(line)
   503  		if err != nil {
   504  			return err
   505  		}
   506  	}
   507  	return scanner.Err()
   508  }
   509  
   510  // DumpFilters dumps the filters in textual form, 1 per line
   511  func (f *Filter) DumpFilters() string {
   512  	rules := []string{}
   513  	if !f.ModTimeFrom.IsZero() {
   514  		rules = append(rules, fmt.Sprintf("Last-modified date must be equal or greater than: %s", f.ModTimeFrom.String()))
   515  	}
   516  	if !f.ModTimeTo.IsZero() {
   517  		rules = append(rules, fmt.Sprintf("Last-modified date must be equal or less than: %s", f.ModTimeTo.String()))
   518  	}
   519  	rules = append(rules, "--- File filter rules ---")
   520  	for _, rule := range f.fileRules.rules {
   521  		rules = append(rules, rule.String())
   522  	}
   523  	rules = append(rules, "--- Directory filter rules ---")
   524  	for _, dirRule := range f.dirRules.rules {
   525  		rules = append(rules, dirRule.String())
   526  	}
   527  	return strings.Join(rules, "\n")
   528  }
   529  
   530  // HaveFilesFrom returns true if --files-from has been supplied
   531  func (f *Filter) HaveFilesFrom() bool {
   532  	return f.files != nil
   533  }
   534  
   535  var errFilesFromNotSet = errors.New("--files-from not set so can't use Filter.ListR")
   536  
   537  // MakeListR makes function to return all the files set using --files-from
   538  func (f *Filter) MakeListR(ctx context.Context, NewObject func(ctx context.Context, remote string) (fs.Object, error)) fs.ListRFn {
   539  	return func(ctx context.Context, dir string, callback fs.ListRCallback) error {
   540  		if !f.HaveFilesFrom() {
   541  			return errFilesFromNotSet
   542  		}
   543  		var (
   544  			remotes = make(chan string, fs.Config.Checkers)
   545  			g       errgroup.Group
   546  		)
   547  		for i := 0; i < fs.Config.Checkers; i++ {
   548  			g.Go(func() (err error) {
   549  				var entries = make(fs.DirEntries, 1)
   550  				for remote := range remotes {
   551  					entries[0], err = NewObject(ctx, remote)
   552  					if err == fs.ErrorObjectNotFound {
   553  						// Skip files that are not found
   554  					} else if err != nil {
   555  						return err
   556  					} else {
   557  						err = callback(entries)
   558  						if err != nil {
   559  							return err
   560  						}
   561  					}
   562  				}
   563  				return nil
   564  			})
   565  		}
   566  		for remote := range f.files {
   567  			remotes <- remote
   568  		}
   569  		close(remotes)
   570  		return g.Wait()
   571  	}
   572  }