github.com/rclone/rclone@v1.66.1-0.20240517100346-7b89735ae726/fs/filter/glob.go (about)

     1  // rsync style glob parser
     2  
     3  package filter
     4  
     5  import (
     6  	"bytes"
     7  	"fmt"
     8  	"regexp"
     9  	"strings"
    10  
    11  	"github.com/rclone/rclone/fs"
    12  )
    13  
    14  // GlobToRegexp converts an rsync style glob to a regexp
    15  //
    16  // documented in filtering.md
    17  func GlobToRegexp(glob string, ignoreCase bool) (*regexp.Regexp, error) {
    18  	var re bytes.Buffer
    19  	if ignoreCase {
    20  		_, _ = re.WriteString("(?i)")
    21  	}
    22  	if strings.HasPrefix(glob, "/") {
    23  		glob = glob[1:]
    24  		_ = re.WriteByte('^')
    25  	} else {
    26  		_, _ = re.WriteString("(^|/)")
    27  	}
    28  	consecutiveStars := 0
    29  	insertStars := func() error {
    30  		if consecutiveStars > 0 {
    31  			switch consecutiveStars {
    32  			case 1:
    33  				_, _ = re.WriteString(`[^/]*`)
    34  			case 2:
    35  				_, _ = re.WriteString(`.*`)
    36  			default:
    37  				return fmt.Errorf("too many stars in %q", glob)
    38  			}
    39  		}
    40  		consecutiveStars = 0
    41  		return nil
    42  	}
    43  	overwriteLastChar := func(c byte) {
    44  		buf := re.Bytes()
    45  		buf[len(buf)-1] = c
    46  	}
    47  	inBraces := false
    48  	inBrackets := 0
    49  	slashed := false
    50  	inRegexp := false    // inside {{ ... }}
    51  	inRegexpEnd := false // have received }} waiting for more
    52  	var next, last rune
    53  	for _, c := range glob {
    54  		next, last = c, next
    55  		if slashed {
    56  			_, _ = re.WriteRune(c)
    57  			slashed = false
    58  			continue
    59  		}
    60  		if inRegexpEnd {
    61  			if c == '}' {
    62  				// Regexp is ending with }} choose longest segment
    63  				// Replace final ) with }
    64  				overwriteLastChar('}')
    65  				_ = re.WriteByte(')')
    66  				continue
    67  			} else {
    68  				inRegexpEnd = false
    69  			}
    70  		}
    71  		if inRegexp {
    72  			if c == '}' && last == '}' {
    73  				inRegexp = false
    74  				inRegexpEnd = true
    75  				// Replace final } with )
    76  				overwriteLastChar(')')
    77  			} else {
    78  				_, _ = re.WriteRune(c)
    79  			}
    80  			continue
    81  		}
    82  		if c != '*' {
    83  			err := insertStars()
    84  			if err != nil {
    85  				return nil, err
    86  			}
    87  		}
    88  		if inBrackets > 0 {
    89  			_, _ = re.WriteRune(c)
    90  			if c == '[' {
    91  				inBrackets++
    92  			}
    93  			if c == ']' {
    94  				inBrackets--
    95  			}
    96  			continue
    97  		}
    98  		switch c {
    99  		case '\\':
   100  			_, _ = re.WriteRune(c)
   101  			slashed = true
   102  		case '*':
   103  			consecutiveStars++
   104  		case '?':
   105  			_, _ = re.WriteString(`[^/]`)
   106  		case '[':
   107  			_, _ = re.WriteRune(c)
   108  			inBrackets++
   109  		case ']':
   110  			return nil, fmt.Errorf("mismatched ']' in glob %q", glob)
   111  		case '{':
   112  			if inBraces {
   113  				if last == '{' {
   114  					inRegexp = true
   115  					inBraces = false
   116  				} else {
   117  					return nil, fmt.Errorf("can't nest '{' '}' in glob %q", glob)
   118  				}
   119  			} else {
   120  				inBraces = true
   121  				_ = re.WriteByte('(')
   122  			}
   123  		case '}':
   124  			if !inBraces {
   125  				return nil, fmt.Errorf("mismatched '{' and '}' in glob %q", glob)
   126  			}
   127  			_ = re.WriteByte(')')
   128  			inBraces = false
   129  		case ',':
   130  			if inBraces {
   131  				_ = re.WriteByte('|')
   132  			} else {
   133  				_, _ = re.WriteRune(c)
   134  			}
   135  		case '.', '+', '(', ')', '|', '^', '$': // regexp meta characters not dealt with above
   136  			_ = re.WriteByte('\\')
   137  			_, _ = re.WriteRune(c)
   138  		default:
   139  			_, _ = re.WriteRune(c)
   140  		}
   141  	}
   142  	err := insertStars()
   143  	if err != nil {
   144  		return nil, err
   145  	}
   146  	if inBrackets > 0 {
   147  		return nil, fmt.Errorf("mismatched '[' and ']' in glob %q", glob)
   148  	}
   149  	if inBraces {
   150  		return nil, fmt.Errorf("mismatched '{' and '}' in glob %q", glob)
   151  	}
   152  	if inRegexp {
   153  		return nil, fmt.Errorf("mismatched '{{' and '}}' in glob %q", glob)
   154  	}
   155  	_ = re.WriteByte('$')
   156  	result, err := regexp.Compile(re.String())
   157  	if err != nil {
   158  		return nil, fmt.Errorf("bad glob pattern %q (regexp %q): %w", glob, re.String(), err)
   159  	}
   160  	return result, nil
   161  }
   162  
   163  var (
   164  	// Can't deal with
   165  	//   / or ** in {}
   166  	//   {{ regexp }}
   167  	tooHardRe = regexp.MustCompile(`({[^{}]*(\*\*|/)[^{}]*})|\{\{|\}\}`)
   168  
   169  	// Squash all /
   170  	squashSlash = regexp.MustCompile(`/{2,}`)
   171  )
   172  
   173  // globToDirGlobs takes a file glob and turns it into a series of
   174  // directory globs.  When matched with a directory (with a trailing /)
   175  // this should answer the question as to whether this glob could be in
   176  // this directory.
   177  func globToDirGlobs(glob string) (out []string) {
   178  	if tooHardRe.MatchString(glob) {
   179  		// Can't figure this one out so return any directory might match
   180  		fs.Infof(nil, "Can't figure out directory filters from %q: looking in all directories", glob)
   181  		out = append(out, "/**")
   182  		return out
   183  	}
   184  
   185  	// Get rid of multiple /s
   186  	glob = squashSlash.ReplaceAllString(glob, "/")
   187  
   188  	// Split on / or **
   189  	// (** can contain /)
   190  	for {
   191  		i := strings.LastIndex(glob, "/")
   192  		j := strings.LastIndex(glob, "**")
   193  		what := ""
   194  		if j > i {
   195  			i = j
   196  			what = "**"
   197  		}
   198  		if i < 0 {
   199  			if len(out) == 0 {
   200  				out = append(out, "/**")
   201  			}
   202  			break
   203  		}
   204  		glob = glob[:i]
   205  		newGlob := glob + what + "/"
   206  		if len(out) == 0 || out[len(out)-1] != newGlob {
   207  			out = append(out, newGlob)
   208  		}
   209  	}
   210  
   211  	return out
   212  }