golang.org/x/tools/gopls@v0.15.3/internal/test/integration/fake/glob/glob.go (about) 1 // Copyright 2023 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package glob implements an LSP-compliant glob pattern matcher for testing. 6 package glob 7 8 import ( 9 "errors" 10 "fmt" 11 "strings" 12 "unicode/utf8" 13 ) 14 15 // A Glob is an LSP-compliant glob pattern, as defined by the spec: 16 // https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#documentFilter 17 // 18 // NOTE: this implementation is currently only intended for testing. In order 19 // to make it production ready, we'd need to: 20 // - verify it against the VS Code implementation 21 // - add more tests 22 // - microbenchmark, likely avoiding the element interface 23 // - resolve the question of what is meant by "character". If it's a UTF-16 24 // code (as we suspect) it'll be a bit more work. 25 // 26 // Quoting from the spec: 27 // Glob patterns can have the following syntax: 28 // - `*` to match one or more characters in a path segment 29 // - `?` to match on one character in a path segment 30 // - `**` to match any number of path segments, including none 31 // - `{}` to group sub patterns into an OR expression. (e.g. `**/*.{ts,js}` 32 // matches all TypeScript and JavaScript files) 33 // - `[]` to declare a range of characters to match in a path segment 34 // (e.g., `example.[0-9]` to match on `example.0`, `example.1`, …) 35 // - `[!...]` to negate a range of characters to match in a path segment 36 // (e.g., `example.[!0-9]` to match on `example.a`, `example.b`, but 37 // not `example.0`) 38 // 39 // Expanding on this: 40 // - '/' matches one or more literal slashes. 41 // - any other character matches itself literally. 42 type Glob struct { 43 elems []element // pattern elements 44 } 45 46 // Parse builds a Glob for the given pattern, returning an error if the pattern 47 // is invalid. 48 func Parse(pattern string) (*Glob, error) { 49 g, _, err := parse(pattern, false) 50 return g, err 51 } 52 53 func parse(pattern string, nested bool) (*Glob, string, error) { 54 g := new(Glob) 55 for len(pattern) > 0 { 56 switch pattern[0] { 57 case '/': 58 pattern = pattern[1:] 59 g.elems = append(g.elems, slash{}) 60 61 case '*': 62 if len(pattern) > 1 && pattern[1] == '*' { 63 if (len(g.elems) > 0 && g.elems[len(g.elems)-1] != slash{}) || (len(pattern) > 2 && pattern[2] != '/') { 64 return nil, "", errors.New("** may only be adjacent to '/'") 65 } 66 pattern = pattern[2:] 67 g.elems = append(g.elems, starStar{}) 68 break 69 } 70 pattern = pattern[1:] 71 g.elems = append(g.elems, star{}) 72 73 case '?': 74 pattern = pattern[1:] 75 g.elems = append(g.elems, anyChar{}) 76 77 case '{': 78 var gs group 79 for pattern[0] != '}' { 80 pattern = pattern[1:] 81 g, pat, err := parse(pattern, true) 82 if err != nil { 83 return nil, "", err 84 } 85 if len(pat) == 0 { 86 return nil, "", errors.New("unmatched '{'") 87 } 88 pattern = pat 89 gs = append(gs, g) 90 } 91 pattern = pattern[1:] 92 g.elems = append(g.elems, gs) 93 94 case '}', ',': 95 if nested { 96 return g, pattern, nil 97 } 98 pattern = g.parseLiteral(pattern, false) 99 100 case '[': 101 pattern = pattern[1:] 102 if len(pattern) == 0 { 103 return nil, "", errBadRange 104 } 105 negate := false 106 if pattern[0] == '!' { 107 pattern = pattern[1:] 108 negate = true 109 } 110 low, sz, err := readRangeRune(pattern) 111 if err != nil { 112 return nil, "", err 113 } 114 pattern = pattern[sz:] 115 if len(pattern) == 0 || pattern[0] != '-' { 116 return nil, "", errBadRange 117 } 118 pattern = pattern[1:] 119 high, sz, err := readRangeRune(pattern) 120 if err != nil { 121 return nil, "", err 122 } 123 pattern = pattern[sz:] 124 if len(pattern) == 0 || pattern[0] != ']' { 125 return nil, "", errBadRange 126 } 127 pattern = pattern[1:] 128 g.elems = append(g.elems, charRange{negate, low, high}) 129 130 default: 131 pattern = g.parseLiteral(pattern, nested) 132 } 133 } 134 return g, "", nil 135 } 136 137 // helper for decoding a rune in range elements, e.g. [a-z] 138 func readRangeRune(input string) (rune, int, error) { 139 r, sz := utf8.DecodeRuneInString(input) 140 var err error 141 if r == utf8.RuneError { 142 // See the documentation for DecodeRuneInString. 143 switch sz { 144 case 0: 145 err = errBadRange 146 case 1: 147 err = errInvalidUTF8 148 } 149 } 150 return r, sz, err 151 } 152 153 var ( 154 errBadRange = errors.New("'[' patterns must be of the form [x-y]") 155 errInvalidUTF8 = errors.New("invalid UTF-8 encoding") 156 ) 157 158 func (g *Glob) parseLiteral(pattern string, nested bool) string { 159 var specialChars string 160 if nested { 161 specialChars = "*?{[/}," 162 } else { 163 specialChars = "*?{[/" 164 } 165 end := strings.IndexAny(pattern, specialChars) 166 if end == -1 { 167 end = len(pattern) 168 } 169 g.elems = append(g.elems, literal(pattern[:end])) 170 return pattern[end:] 171 } 172 173 func (g *Glob) String() string { 174 var b strings.Builder 175 for _, e := range g.elems { 176 fmt.Fprint(&b, e) 177 } 178 return b.String() 179 } 180 181 // element holds a glob pattern element, as defined below. 182 type element fmt.Stringer 183 184 // element types. 185 type ( 186 slash struct{} // One or more '/' separators 187 literal string // string literal, not containing /, *, ?, {}, or [] 188 star struct{} // * 189 anyChar struct{} // ? 190 starStar struct{} // ** 191 group []*Glob // {foo, bar, ...} grouping 192 charRange struct { // [a-z] character range 193 negate bool 194 low, high rune 195 } 196 ) 197 198 func (s slash) String() string { return "/" } 199 func (l literal) String() string { return string(l) } 200 func (s star) String() string { return "*" } 201 func (a anyChar) String() string { return "?" } 202 func (s starStar) String() string { return "**" } 203 func (g group) String() string { 204 var parts []string 205 for _, g := range g { 206 parts = append(parts, g.String()) 207 } 208 return "{" + strings.Join(parts, ",") + "}" 209 } 210 func (r charRange) String() string { 211 return "[" + string(r.low) + "-" + string(r.high) + "]" 212 } 213 214 // Match reports whether the input string matches the glob pattern. 215 func (g *Glob) Match(input string) bool { 216 return match(g.elems, input) 217 } 218 219 func match(elems []element, input string) (ok bool) { 220 var elem interface{} 221 for len(elems) > 0 { 222 elem, elems = elems[0], elems[1:] 223 switch elem := elem.(type) { 224 case slash: 225 if len(input) == 0 || input[0] != '/' { 226 return false 227 } 228 for input[0] == '/' { 229 input = input[1:] 230 } 231 232 case starStar: 233 // Special cases: 234 // - **/a matches "a" 235 // - **/ matches everything 236 // 237 // Note that if ** is followed by anything, it must be '/' (this is 238 // enforced by Parse). 239 if len(elems) > 0 { 240 elems = elems[1:] 241 } 242 243 // A trailing ** matches anything. 244 if len(elems) == 0 { 245 return true 246 } 247 248 // Backtracking: advance pattern segments until the remaining pattern 249 // elements match. 250 for len(input) != 0 { 251 if match(elems, input) { 252 return true 253 } 254 _, input = split(input) 255 } 256 return false 257 258 case literal: 259 if !strings.HasPrefix(input, string(elem)) { 260 return false 261 } 262 input = input[len(elem):] 263 264 case star: 265 var segInput string 266 segInput, input = split(input) 267 268 elemEnd := len(elems) 269 for i, e := range elems { 270 if e == (slash{}) { 271 elemEnd = i 272 break 273 } 274 } 275 segElems := elems[:elemEnd] 276 elems = elems[elemEnd:] 277 278 // A trailing * matches the entire segment. 279 if len(segElems) == 0 { 280 break 281 } 282 283 // Backtracking: advance characters until remaining subpattern elements 284 // match. 285 matched := false 286 for i := range segInput { 287 if match(segElems, segInput[i:]) { 288 matched = true 289 break 290 } 291 } 292 if !matched { 293 return false 294 } 295 296 case anyChar: 297 if len(input) == 0 || input[0] == '/' { 298 return false 299 } 300 input = input[1:] 301 302 case group: 303 // Append remaining pattern elements to each group member looking for a 304 // match. 305 var branch []element 306 for _, m := range elem { 307 branch = branch[:0] 308 branch = append(branch, m.elems...) 309 branch = append(branch, elems...) 310 if match(branch, input) { 311 return true 312 } 313 } 314 return false 315 316 case charRange: 317 if len(input) == 0 || input[0] == '/' { 318 return false 319 } 320 c, sz := utf8.DecodeRuneInString(input) 321 if c < elem.low || c > elem.high { 322 return false 323 } 324 input = input[sz:] 325 326 default: 327 panic(fmt.Sprintf("segment type %T not implemented", elem)) 328 } 329 } 330 331 return len(input) == 0 332 } 333 334 // split returns the portion before and after the first slash 335 // (or sequence of consecutive slashes). If there is no slash 336 // it returns (input, nil). 337 func split(input string) (first, rest string) { 338 i := strings.IndexByte(input, '/') 339 if i < 0 { 340 return input, "" 341 } 342 first = input[:i] 343 for j := i; j < len(input); j++ { 344 if input[j] != '/' { 345 return first, input[j:] 346 } 347 } 348 return first, "" 349 }