github.com/10XDev/rclone@v1.52.3-0.20200626220027-16af9ab76b2a/fs/filter/glob.go (about)

     1  // rsync style glob parser
     2  
     3  package filter
     4  
     5  import (
     6  	"bytes"
     7  	"regexp"
     8  	"strings"
     9  
    10  	"github.com/pkg/errors"
    11  )
    12  
    13  // globToRegexp converts an rsync style glob to a regexp
    14  //
    15  // documented in filtering.md
    16  func globToRegexp(glob string, ignoreCase bool) (*regexp.Regexp, error) {
    17  	var re bytes.Buffer
    18  	if ignoreCase {
    19  		_, _ = re.WriteString("(?i)")
    20  	}
    21  	if strings.HasPrefix(glob, "/") {
    22  		glob = glob[1:]
    23  		_, _ = re.WriteRune('^')
    24  	} else {
    25  		_, _ = re.WriteString("(^|/)")
    26  	}
    27  	consecutiveStars := 0
    28  	insertStars := func() error {
    29  		if consecutiveStars > 0 {
    30  			switch consecutiveStars {
    31  			case 1:
    32  				_, _ = re.WriteString(`[^/]*`)
    33  			case 2:
    34  				_, _ = re.WriteString(`.*`)
    35  			default:
    36  				return errors.Errorf("too many stars in %q", glob)
    37  			}
    38  		}
    39  		consecutiveStars = 0
    40  		return nil
    41  	}
    42  	inBraces := false
    43  	inBrackets := 0
    44  	slashed := false
    45  	for _, c := range glob {
    46  		if slashed {
    47  			_, _ = re.WriteRune(c)
    48  			slashed = false
    49  			continue
    50  		}
    51  		if c != '*' {
    52  			err := insertStars()
    53  			if err != nil {
    54  				return nil, err
    55  			}
    56  		}
    57  		if inBrackets > 0 {
    58  			_, _ = re.WriteRune(c)
    59  			if c == '[' {
    60  				inBrackets++
    61  			}
    62  			if c == ']' {
    63  				inBrackets--
    64  			}
    65  			continue
    66  		}
    67  		switch c {
    68  		case '\\':
    69  			_, _ = re.WriteRune(c)
    70  			slashed = true
    71  		case '*':
    72  			consecutiveStars++
    73  		case '?':
    74  			_, _ = re.WriteString(`[^/]`)
    75  		case '[':
    76  			_, _ = re.WriteRune(c)
    77  			inBrackets++
    78  		case ']':
    79  			return nil, errors.Errorf("mismatched ']' in glob %q", glob)
    80  		case '{':
    81  			if inBraces {
    82  				return nil, errors.Errorf("can't nest '{' '}' in glob %q", glob)
    83  			}
    84  			inBraces = true
    85  			_, _ = re.WriteRune('(')
    86  		case '}':
    87  			if !inBraces {
    88  				return nil, errors.Errorf("mismatched '{' and '}' in glob %q", glob)
    89  			}
    90  			_, _ = re.WriteRune(')')
    91  			inBraces = false
    92  		case ',':
    93  			if inBraces {
    94  				_, _ = re.WriteRune('|')
    95  			} else {
    96  				_, _ = re.WriteRune(c)
    97  			}
    98  		case '.', '+', '(', ')', '|', '^', '$': // regexp meta characters not dealt with above
    99  			_, _ = re.WriteRune('\\')
   100  			_, _ = re.WriteRune(c)
   101  		default:
   102  			_, _ = re.WriteRune(c)
   103  		}
   104  	}
   105  	err := insertStars()
   106  	if err != nil {
   107  		return nil, err
   108  	}
   109  	if inBrackets > 0 {
   110  		return nil, errors.Errorf("mismatched '[' and ']' in glob %q", glob)
   111  	}
   112  	if inBraces {
   113  		return nil, errors.Errorf("mismatched '{' and '}' in glob %q", glob)
   114  	}
   115  	_, _ = re.WriteRune('$')
   116  	result, err := regexp.Compile(re.String())
   117  	if err != nil {
   118  		return nil, errors.Wrapf(err, "bad glob pattern %q (regexp %q)", glob, re.String())
   119  	}
   120  	return result, nil
   121  }
   122  
   123  var (
   124  	// Can't deal with / or ** in {}
   125  	tooHardRe = regexp.MustCompile(`{[^{}]*(\*\*|/)[^{}]*}`)
   126  
   127  	// Squash all /
   128  	squashSlash = regexp.MustCompile(`/{2,}`)
   129  )
   130  
   131  // globToDirGlobs takes a file glob and turns it into a series of
   132  // directory globs.  When matched with a directory (with a trailing /)
   133  // this should answer the question as to whether this glob could be in
   134  // this directory.
   135  func globToDirGlobs(glob string) (out []string) {
   136  	if tooHardRe.MatchString(glob) {
   137  		// Can't figure this one out so return any directory might match
   138  		out = append(out, "/**")
   139  		return out
   140  	}
   141  
   142  	// Get rid of multiple /s
   143  	glob = squashSlash.ReplaceAllString(glob, "/")
   144  
   145  	// Split on / or **
   146  	// (** can contain /)
   147  	for {
   148  		i := strings.LastIndex(glob, "/")
   149  		j := strings.LastIndex(glob, "**")
   150  		what := ""
   151  		if j > i {
   152  			i = j
   153  			what = "**"
   154  		}
   155  		if i < 0 {
   156  			if len(out) == 0 {
   157  				out = append(out, "/**")
   158  			}
   159  			break
   160  		}
   161  		glob = glob[:i]
   162  		newGlob := glob + what + "/"
   163  		if len(out) == 0 || out[len(out)-1] != newGlob {
   164  			out = append(out, newGlob)
   165  		}
   166  	}
   167  
   168  	return out
   169  }