golang.org/x/tools/gopls@v0.15.3/internal/test/integration/fake/glob/glob.go (about)

     1  // Copyright 2023 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package glob implements an LSP-compliant glob pattern matcher for testing.
     6  package glob
     7  
     8  import (
     9  	"errors"
    10  	"fmt"
    11  	"strings"
    12  	"unicode/utf8"
    13  )
    14  
    15  // A Glob is an LSP-compliant glob pattern, as defined by the spec:
    16  // https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#documentFilter
    17  //
    18  // NOTE: this implementation is currently only intended for testing. In order
    19  // to make it production ready, we'd need to:
    20  //   - verify it against the VS Code implementation
    21  //   - add more tests
    22  //   - microbenchmark, likely avoiding the element interface
    23  //   - resolve the question of what is meant by "character". If it's a UTF-16
    24  //     code (as we suspect) it'll be a bit more work.
    25  //
    26  // Quoting from the spec:
    27  // Glob patterns can have the following syntax:
    28  //   - `*` to match one or more characters in a path segment
    29  //   - `?` to match on one character in a path segment
    30  //   - `**` to match any number of path segments, including none
    31  //   - `{}` to group sub patterns into an OR expression. (e.g. `**/*.{ts,js}`
    32  //     matches all TypeScript and JavaScript files)
    33  //   - `[]` to declare a range of characters to match in a path segment
    34  //     (e.g., `example.[0-9]` to match on `example.0`, `example.1`, …)
    35  //   - `[!...]` to negate a range of characters to match in a path segment
    36  //     (e.g., `example.[!0-9]` to match on `example.a`, `example.b`, but
    37  //     not `example.0`)
    38  //
    39  // Expanding on this:
    40  //   - '/' matches one or more literal slashes.
    41  //   - any other character matches itself literally.
    42  type Glob struct {
    43  	elems []element // pattern elements
    44  }
    45  
    46  // Parse builds a Glob for the given pattern, returning an error if the pattern
    47  // is invalid.
    48  func Parse(pattern string) (*Glob, error) {
    49  	g, _, err := parse(pattern, false)
    50  	return g, err
    51  }
    52  
    53  func parse(pattern string, nested bool) (*Glob, string, error) {
    54  	g := new(Glob)
    55  	for len(pattern) > 0 {
    56  		switch pattern[0] {
    57  		case '/':
    58  			pattern = pattern[1:]
    59  			g.elems = append(g.elems, slash{})
    60  
    61  		case '*':
    62  			if len(pattern) > 1 && pattern[1] == '*' {
    63  				if (len(g.elems) > 0 && g.elems[len(g.elems)-1] != slash{}) || (len(pattern) > 2 && pattern[2] != '/') {
    64  					return nil, "", errors.New("** may only be adjacent to '/'")
    65  				}
    66  				pattern = pattern[2:]
    67  				g.elems = append(g.elems, starStar{})
    68  				break
    69  			}
    70  			pattern = pattern[1:]
    71  			g.elems = append(g.elems, star{})
    72  
    73  		case '?':
    74  			pattern = pattern[1:]
    75  			g.elems = append(g.elems, anyChar{})
    76  
    77  		case '{':
    78  			var gs group
    79  			for pattern[0] != '}' {
    80  				pattern = pattern[1:]
    81  				g, pat, err := parse(pattern, true)
    82  				if err != nil {
    83  					return nil, "", err
    84  				}
    85  				if len(pat) == 0 {
    86  					return nil, "", errors.New("unmatched '{'")
    87  				}
    88  				pattern = pat
    89  				gs = append(gs, g)
    90  			}
    91  			pattern = pattern[1:]
    92  			g.elems = append(g.elems, gs)
    93  
    94  		case '}', ',':
    95  			if nested {
    96  				return g, pattern, nil
    97  			}
    98  			pattern = g.parseLiteral(pattern, false)
    99  
   100  		case '[':
   101  			pattern = pattern[1:]
   102  			if len(pattern) == 0 {
   103  				return nil, "", errBadRange
   104  			}
   105  			negate := false
   106  			if pattern[0] == '!' {
   107  				pattern = pattern[1:]
   108  				negate = true
   109  			}
   110  			low, sz, err := readRangeRune(pattern)
   111  			if err != nil {
   112  				return nil, "", err
   113  			}
   114  			pattern = pattern[sz:]
   115  			if len(pattern) == 0 || pattern[0] != '-' {
   116  				return nil, "", errBadRange
   117  			}
   118  			pattern = pattern[1:]
   119  			high, sz, err := readRangeRune(pattern)
   120  			if err != nil {
   121  				return nil, "", err
   122  			}
   123  			pattern = pattern[sz:]
   124  			if len(pattern) == 0 || pattern[0] != ']' {
   125  				return nil, "", errBadRange
   126  			}
   127  			pattern = pattern[1:]
   128  			g.elems = append(g.elems, charRange{negate, low, high})
   129  
   130  		default:
   131  			pattern = g.parseLiteral(pattern, nested)
   132  		}
   133  	}
   134  	return g, "", nil
   135  }
   136  
   137  // helper for decoding a rune in range elements, e.g. [a-z]
   138  func readRangeRune(input string) (rune, int, error) {
   139  	r, sz := utf8.DecodeRuneInString(input)
   140  	var err error
   141  	if r == utf8.RuneError {
   142  		// See the documentation for DecodeRuneInString.
   143  		switch sz {
   144  		case 0:
   145  			err = errBadRange
   146  		case 1:
   147  			err = errInvalidUTF8
   148  		}
   149  	}
   150  	return r, sz, err
   151  }
   152  
   153  var (
   154  	errBadRange    = errors.New("'[' patterns must be of the form [x-y]")
   155  	errInvalidUTF8 = errors.New("invalid UTF-8 encoding")
   156  )
   157  
   158  func (g *Glob) parseLiteral(pattern string, nested bool) string {
   159  	var specialChars string
   160  	if nested {
   161  		specialChars = "*?{[/},"
   162  	} else {
   163  		specialChars = "*?{[/"
   164  	}
   165  	end := strings.IndexAny(pattern, specialChars)
   166  	if end == -1 {
   167  		end = len(pattern)
   168  	}
   169  	g.elems = append(g.elems, literal(pattern[:end]))
   170  	return pattern[end:]
   171  }
   172  
   173  func (g *Glob) String() string {
   174  	var b strings.Builder
   175  	for _, e := range g.elems {
   176  		fmt.Fprint(&b, e)
   177  	}
   178  	return b.String()
   179  }
   180  
   181  // element holds a glob pattern element, as defined below.
   182  type element fmt.Stringer
   183  
   184  // element types.
   185  type (
   186  	slash     struct{} // One or more '/' separators
   187  	literal   string   // string literal, not containing /, *, ?, {}, or []
   188  	star      struct{} // *
   189  	anyChar   struct{} // ?
   190  	starStar  struct{} // **
   191  	group     []*Glob  // {foo, bar, ...} grouping
   192  	charRange struct { // [a-z] character range
   193  		negate    bool
   194  		low, high rune
   195  	}
   196  )
   197  
   198  func (s slash) String() string    { return "/" }
   199  func (l literal) String() string  { return string(l) }
   200  func (s star) String() string     { return "*" }
   201  func (a anyChar) String() string  { return "?" }
   202  func (s starStar) String() string { return "**" }
   203  func (g group) String() string {
   204  	var parts []string
   205  	for _, g := range g {
   206  		parts = append(parts, g.String())
   207  	}
   208  	return "{" + strings.Join(parts, ",") + "}"
   209  }
   210  func (r charRange) String() string {
   211  	return "[" + string(r.low) + "-" + string(r.high) + "]"
   212  }
   213  
   214  // Match reports whether the input string matches the glob pattern.
   215  func (g *Glob) Match(input string) bool {
   216  	return match(g.elems, input)
   217  }
   218  
   219  func match(elems []element, input string) (ok bool) {
   220  	var elem interface{}
   221  	for len(elems) > 0 {
   222  		elem, elems = elems[0], elems[1:]
   223  		switch elem := elem.(type) {
   224  		case slash:
   225  			if len(input) == 0 || input[0] != '/' {
   226  				return false
   227  			}
   228  			for input[0] == '/' {
   229  				input = input[1:]
   230  			}
   231  
   232  		case starStar:
   233  			// Special cases:
   234  			//  - **/a matches "a"
   235  			//  - **/ matches everything
   236  			//
   237  			// Note that if ** is followed by anything, it must be '/' (this is
   238  			// enforced by Parse).
   239  			if len(elems) > 0 {
   240  				elems = elems[1:]
   241  			}
   242  
   243  			// A trailing ** matches anything.
   244  			if len(elems) == 0 {
   245  				return true
   246  			}
   247  
   248  			// Backtracking: advance pattern segments until the remaining pattern
   249  			// elements match.
   250  			for len(input) != 0 {
   251  				if match(elems, input) {
   252  					return true
   253  				}
   254  				_, input = split(input)
   255  			}
   256  			return false
   257  
   258  		case literal:
   259  			if !strings.HasPrefix(input, string(elem)) {
   260  				return false
   261  			}
   262  			input = input[len(elem):]
   263  
   264  		case star:
   265  			var segInput string
   266  			segInput, input = split(input)
   267  
   268  			elemEnd := len(elems)
   269  			for i, e := range elems {
   270  				if e == (slash{}) {
   271  					elemEnd = i
   272  					break
   273  				}
   274  			}
   275  			segElems := elems[:elemEnd]
   276  			elems = elems[elemEnd:]
   277  
   278  			// A trailing * matches the entire segment.
   279  			if len(segElems) == 0 {
   280  				break
   281  			}
   282  
   283  			// Backtracking: advance characters until remaining subpattern elements
   284  			// match.
   285  			matched := false
   286  			for i := range segInput {
   287  				if match(segElems, segInput[i:]) {
   288  					matched = true
   289  					break
   290  				}
   291  			}
   292  			if !matched {
   293  				return false
   294  			}
   295  
   296  		case anyChar:
   297  			if len(input) == 0 || input[0] == '/' {
   298  				return false
   299  			}
   300  			input = input[1:]
   301  
   302  		case group:
   303  			// Append remaining pattern elements to each group member looking for a
   304  			// match.
   305  			var branch []element
   306  			for _, m := range elem {
   307  				branch = branch[:0]
   308  				branch = append(branch, m.elems...)
   309  				branch = append(branch, elems...)
   310  				if match(branch, input) {
   311  					return true
   312  				}
   313  			}
   314  			return false
   315  
   316  		case charRange:
   317  			if len(input) == 0 || input[0] == '/' {
   318  				return false
   319  			}
   320  			c, sz := utf8.DecodeRuneInString(input)
   321  			if c < elem.low || c > elem.high {
   322  				return false
   323  			}
   324  			input = input[sz:]
   325  
   326  		default:
   327  			panic(fmt.Sprintf("segment type %T not implemented", elem))
   328  		}
   329  	}
   330  
   331  	return len(input) == 0
   332  }
   333  
   334  // split returns the portion before and after the first slash
   335  // (or sequence of consecutive slashes). If there is no slash
   336  // it returns (input, nil).
   337  func split(input string) (first, rest string) {
   338  	i := strings.IndexByte(input, '/')
   339  	if i < 0 {
   340  		return input, ""
   341  	}
   342  	first = input[:i]
   343  	for j := i; j < len(input); j++ {
   344  		if input[j] != '/' {
   345  			return first, input[j:]
   346  		}
   347  	}
   348  	return first, ""
   349  }