github.com/rclone/rclone@v1.66.1-0.20240517100346-7b89735ae726/fs/filter/glob.go (about) 1 // rsync style glob parser 2 3 package filter 4 5 import ( 6 "bytes" 7 "fmt" 8 "regexp" 9 "strings" 10 11 "github.com/rclone/rclone/fs" 12 ) 13 14 // GlobToRegexp converts an rsync style glob to a regexp 15 // 16 // documented in filtering.md 17 func GlobToRegexp(glob string, ignoreCase bool) (*regexp.Regexp, error) { 18 var re bytes.Buffer 19 if ignoreCase { 20 _, _ = re.WriteString("(?i)") 21 } 22 if strings.HasPrefix(glob, "/") { 23 glob = glob[1:] 24 _ = re.WriteByte('^') 25 } else { 26 _, _ = re.WriteString("(^|/)") 27 } 28 consecutiveStars := 0 29 insertStars := func() error { 30 if consecutiveStars > 0 { 31 switch consecutiveStars { 32 case 1: 33 _, _ = re.WriteString(`[^/]*`) 34 case 2: 35 _, _ = re.WriteString(`.*`) 36 default: 37 return fmt.Errorf("too many stars in %q", glob) 38 } 39 } 40 consecutiveStars = 0 41 return nil 42 } 43 overwriteLastChar := func(c byte) { 44 buf := re.Bytes() 45 buf[len(buf)-1] = c 46 } 47 inBraces := false 48 inBrackets := 0 49 slashed := false 50 inRegexp := false // inside {{ ... }} 51 inRegexpEnd := false // have received }} waiting for more 52 var next, last rune 53 for _, c := range glob { 54 next, last = c, next 55 if slashed { 56 _, _ = re.WriteRune(c) 57 slashed = false 58 continue 59 } 60 if inRegexpEnd { 61 if c == '}' { 62 // Regexp is ending with }} choose longest segment 63 // Replace final ) with } 64 overwriteLastChar('}') 65 _ = re.WriteByte(')') 66 continue 67 } else { 68 inRegexpEnd = false 69 } 70 } 71 if inRegexp { 72 if c == '}' && last == '}' { 73 inRegexp = false 74 inRegexpEnd = true 75 // Replace final } with ) 76 overwriteLastChar(')') 77 } else { 78 _, _ = re.WriteRune(c) 79 } 80 continue 81 } 82 if c != '*' { 83 err := insertStars() 84 if err != nil { 85 return nil, err 86 } 87 } 88 if inBrackets > 0 { 89 _, _ = re.WriteRune(c) 90 if c == '[' { 91 inBrackets++ 92 } 93 if c == ']' { 94 inBrackets-- 95 } 96 continue 97 } 98 switch c { 99 case '\\': 100 _, _ = re.WriteRune(c) 101 slashed = true 102 case '*': 103 consecutiveStars++ 104 case '?': 105 _, _ = re.WriteString(`[^/]`) 106 case '[': 107 _, _ = re.WriteRune(c) 108 inBrackets++ 109 case ']': 110 return nil, fmt.Errorf("mismatched ']' in glob %q", glob) 111 case '{': 112 if inBraces { 113 if last == '{' { 114 inRegexp = true 115 inBraces = false 116 } else { 117 return nil, fmt.Errorf("can't nest '{' '}' in glob %q", glob) 118 } 119 } else { 120 inBraces = true 121 _ = re.WriteByte('(') 122 } 123 case '}': 124 if !inBraces { 125 return nil, fmt.Errorf("mismatched '{' and '}' in glob %q", glob) 126 } 127 _ = re.WriteByte(')') 128 inBraces = false 129 case ',': 130 if inBraces { 131 _ = re.WriteByte('|') 132 } else { 133 _, _ = re.WriteRune(c) 134 } 135 case '.', '+', '(', ')', '|', '^', '$': // regexp meta characters not dealt with above 136 _ = re.WriteByte('\\') 137 _, _ = re.WriteRune(c) 138 default: 139 _, _ = re.WriteRune(c) 140 } 141 } 142 err := insertStars() 143 if err != nil { 144 return nil, err 145 } 146 if inBrackets > 0 { 147 return nil, fmt.Errorf("mismatched '[' and ']' in glob %q", glob) 148 } 149 if inBraces { 150 return nil, fmt.Errorf("mismatched '{' and '}' in glob %q", glob) 151 } 152 if inRegexp { 153 return nil, fmt.Errorf("mismatched '{{' and '}}' in glob %q", glob) 154 } 155 _ = re.WriteByte('$') 156 result, err := regexp.Compile(re.String()) 157 if err != nil { 158 return nil, fmt.Errorf("bad glob pattern %q (regexp %q): %w", glob, re.String(), err) 159 } 160 return result, nil 161 } 162 163 var ( 164 // Can't deal with 165 // / or ** in {} 166 // {{ regexp }} 167 tooHardRe = regexp.MustCompile(`({[^{}]*(\*\*|/)[^{}]*})|\{\{|\}\}`) 168 169 // Squash all / 170 squashSlash = regexp.MustCompile(`/{2,}`) 171 ) 172 173 // globToDirGlobs takes a file glob and turns it into a series of 174 // directory globs. When matched with a directory (with a trailing /) 175 // this should answer the question as to whether this glob could be in 176 // this directory. 177 func globToDirGlobs(glob string) (out []string) { 178 if tooHardRe.MatchString(glob) { 179 // Can't figure this one out so return any directory might match 180 fs.Infof(nil, "Can't figure out directory filters from %q: looking in all directories", glob) 181 out = append(out, "/**") 182 return out 183 } 184 185 // Get rid of multiple /s 186 glob = squashSlash.ReplaceAllString(glob, "/") 187 188 // Split on / or ** 189 // (** can contain /) 190 for { 191 i := strings.LastIndex(glob, "/") 192 j := strings.LastIndex(glob, "**") 193 what := "" 194 if j > i { 195 i = j 196 what = "**" 197 } 198 if i < 0 { 199 if len(out) == 0 { 200 out = append(out, "/**") 201 } 202 break 203 } 204 glob = glob[:i] 205 newGlob := glob + what + "/" 206 if len(out) == 0 || out[len(out)-1] != newGlob { 207 out = append(out, newGlob) 208 } 209 } 210 211 return out 212 }