github.com/AndrewDeryabin/doublestar/v4@v4.0.0-20230123132908-d9476b7d41be/globwalk.go (about)

     1  package doublestar
     2  
     3  import (
     4  	"errors"
     5  	"io/fs"
     6  	"path"
     7  	"path/filepath"
     8  	"strings"
     9  )
    10  
    11  // If returned from GlobWalkFunc, will cause GlobWalk to skip the current
    12  // directory. In other words, if the current path is a directory, GlobWalk will
    13  // not recurse into it. Otherwise, GlobWalk will skip the rest of the current
    14  // directory.
    15  var SkipDir = fs.SkipDir
    16  
    17  // Callback function for GlobWalk(). If the function returns an error, GlobWalk
    18  // will end immediately and return the same error.
    19  type GlobWalkFunc func(path string, d fs.DirEntry) error
    20  
    21  // GlobWalk calls the callback function `fn` for every file matching pattern.
    22  // The syntax of pattern is the same as in Match() and the behavior is the same
    23  // as Glob(), with regard to limitations (such as patterns containing `/./`,
    24  // `/../`, or starting with `/`). The pattern may describe hierarchical names
    25  // such as usr/*/bin/ed.
    26  //
    27  // GlobWalk may have a small performance benefit over Glob if you do not need a
    28  // slice of matches because it can avoid allocating memory for the matches.
    29  // Additionally, GlobWalk gives you access to the `fs.DirEntry` objects for
    30  // each match, and lets you quit early by returning a non-nil error from your
    31  // callback function. Like `io/fs.WalkDir`, if your callback returns `SkipDir`,
    32  // GlobWalk will skip the current directory. This means that if the current
    33  // path _is_ a directory, GlobWalk will not recurse into it. If the current
    34  // path is not a directory, the rest of the parent directory will be skipped.
    35  //
    36  // GlobWalk ignores file system errors such as I/O errors reading directories
    37  // by default. GlobWalk may return ErrBadPattern, reporting that the pattern is
    38  // malformed.
    39  //
    40  // To enable aborting on I/O errors, the WithFailOnIOErrors option can be
    41  // passed.
    42  //
    43  // Additionally, if the callback function `fn` returns an error, GlobWalk will
    44  // exit immediately and return that error.
    45  //
    46  // Like Glob(), this function assumes that your pattern uses `/` as the path
    47  // separator even if that's not correct for your OS (like Windows). If you
    48  // aren't sure if that's the case, you can use filepath.ToSlash() on your
    49  // pattern before calling GlobWalk().
    50  //
    51  // Note: users should _not_ count on the returned error,
    52  // doublestar.ErrBadPattern, being equal to path.ErrBadPattern.
    53  //
    54  func GlobWalk(fsys fs.FS, pattern string, fn GlobWalkFunc, opts ...GlobOption) error {
    55  	if !ValidatePattern(pattern) {
    56  		return ErrBadPattern
    57  	}
    58  
    59  	g := newGlob(opts...)
    60  	return g.doGlobWalk(fsys, pattern, true, true, fn)
    61  }
    62  
    63  // Actually execute GlobWalk
    64  //   - firstSegment is true if we're in the first segment of the pattern, ie,
    65  //     the right-most part where we can match files. If it's false, we're
    66  //     somewhere in the middle (or at the beginning) and can only match
    67  //     directories since there are path segments above us.
    68  //   - beforeMeta is true if we're exploring segments before any meta
    69  //     characters, ie, in a pattern such as `path/to/file*.txt`, the `path/to/`
    70  //     bit does not contain any meta characters.
    71  func (g *glob) doGlobWalk(fsys fs.FS, pattern string, firstSegment, beforeMeta bool, fn GlobWalkFunc) error {
    72  	patternStart := indexMeta(pattern)
    73  	if patternStart == -1 {
    74  		// pattern doesn't contain any meta characters - does a file matching the
    75  		// pattern exist?
    76  		// The pattern may contain escaped wildcard characters for an exact path match.
    77  		path := unescapeMeta(pattern)
    78  		info, pathExists, err := g.exists(fsys, path, beforeMeta)
    79  		if pathExists && (!firstSegment || !g.filesOnly || !info.IsDir()) {
    80  			err = fn(path, dirEntryFromFileInfo(info))
    81  			if err == SkipDir {
    82  				err = nil
    83  			}
    84  		}
    85  		return err
    86  	}
    87  
    88  	dir := "."
    89  	splitIdx := lastIndexSlashOrAlt(pattern)
    90  	if splitIdx != -1 {
    91  		if pattern[splitIdx] == '}' {
    92  			openingIdx := indexMatchedOpeningAlt(pattern[:splitIdx])
    93  			if openingIdx == -1 {
    94  				// if there's no matching opening index, technically Match() will treat
    95  				// an unmatched `}` as nothing special, so... we will, too!
    96  				splitIdx = lastIndexSlash(pattern[:splitIdx])
    97  				if splitIdx != -1 {
    98  					dir = pattern[:splitIdx]
    99  					pattern = pattern[splitIdx+1:]
   100  				}
   101  			} else {
   102  				// otherwise, we have to handle the alts:
   103  				return g.globAltsWalk(fsys, pattern, openingIdx, splitIdx, firstSegment, beforeMeta, fn)
   104  			}
   105  		} else {
   106  			dir = pattern[:splitIdx]
   107  			pattern = pattern[splitIdx+1:]
   108  		}
   109  	}
   110  
   111  	// if `splitIdx` is less than `patternStart`, we know `dir` has no meta
   112  	// characters. They would be equal if they are both -1, which means `dir`
   113  	// will be ".", and we know that doesn't have meta characters either.
   114  	if splitIdx <= patternStart {
   115  		return g.globDirWalk(fsys, dir, pattern, firstSegment, beforeMeta, fn)
   116  	}
   117  
   118  	return g.doGlobWalk(fsys, dir, false, beforeMeta, func(p string, d fs.DirEntry) error {
   119  		if err := g.globDirWalk(fsys, p, pattern, firstSegment, false, fn); err != nil {
   120  			return err
   121  		}
   122  		return nil
   123  	})
   124  }
   125  
   126  // handle alts in the glob pattern - `openingIdx` and `closingIdx` are the
   127  // indexes of `{` and `}`, respectively
   128  func (g *glob) globAltsWalk(fsys fs.FS, pattern string, openingIdx, closingIdx int, firstSegment, beforeMeta bool, fn GlobWalkFunc) (err error) {
   129  	var matches []DirEntryWithFullPath
   130  	startIdx := 0
   131  	afterIdx := closingIdx + 1
   132  	splitIdx := lastIndexSlashOrAlt(pattern[:openingIdx])
   133  	if splitIdx == -1 || pattern[splitIdx] == '}' {
   134  		// no common prefix
   135  		matches, err = g.doGlobAltsWalk(fsys, "", pattern, startIdx, openingIdx, closingIdx, afterIdx, firstSegment, beforeMeta, matches)
   136  		if err != nil {
   137  			return
   138  		}
   139  	} else {
   140  		// our alts have a common prefix that we can process first
   141  		startIdx = splitIdx + 1
   142  		innerBeforeMeta := beforeMeta && !hasMetaExceptAlts(pattern[:splitIdx])
   143  		err = g.doGlobWalk(fsys, pattern[:splitIdx], false, beforeMeta, func(p string, d fs.DirEntry) (e error) {
   144  			matches, e = g.doGlobAltsWalk(fsys, p, pattern, startIdx, openingIdx, closingIdx, afterIdx, firstSegment, innerBeforeMeta, matches)
   145  			return e
   146  		})
   147  		if err != nil {
   148  			return
   149  		}
   150  	}
   151  
   152  	skip := ""
   153  	for _, m := range matches {
   154  		if skip != "" {
   155  			// Because matches are sorted, we know that descendants of the skipped
   156  			// item must come immediately after the skipped item. If we find an item
   157  			// that does not have a prefix matching the skipped item, we know we're
   158  			// done skipping. I'm using strings.HasPrefix here because
   159  			// filepath.HasPrefix has been marked deprecated (and just calls
   160  			// strings.HasPrefix anyway). The reason it's deprecated is because it
   161  			// doesn't handle case-insensitive paths, nor does it guarantee that the
   162  			// prefix is actually a parent directory. Neither is an issue here: the
   163  			// paths come from the system so their cases will match, and we guarantee
   164  			// a parent directory by appending a slash to the prefix.
   165  			//
   166  			// NOTE: m.Path will always use slashes as path separators.
   167  			if strings.HasPrefix(m.Path, skip) {
   168  				continue
   169  			}
   170  			skip = ""
   171  		}
   172  		if err = fn(m.Path, m.Entry); err != nil {
   173  			if err == SkipDir {
   174  				isDir, err := g.isDir(fsys, "", m.Path, m.Entry)
   175  				if err != nil {
   176  					return err
   177  				}
   178  				if isDir {
   179  					// append a slash to guarantee `skip` will be treated as a parent dir
   180  					skip = m.Path + "/"
   181  				} else {
   182  					// Dir() calls Clean() which calls FromSlash(), so we need to convert
   183  					// back to slashes
   184  					skip = filepath.ToSlash(filepath.Dir(m.Path)) + "/"
   185  				}
   186  				err = nil
   187  				continue
   188  			}
   189  			return
   190  		}
   191  	}
   192  
   193  	return
   194  }
   195  
   196  // runs actual matching for alts
   197  func (g *glob) doGlobAltsWalk(fsys fs.FS, d, pattern string, startIdx, openingIdx, closingIdx, afterIdx int, firstSegment, beforeMeta bool, m []DirEntryWithFullPath) (matches []DirEntryWithFullPath, err error) {
   198  	matches = m
   199  	matchesLen := len(m)
   200  	patIdx := openingIdx + 1
   201  	for patIdx < closingIdx {
   202  		nextIdx := indexNextAlt(pattern[patIdx:closingIdx], true)
   203  		if nextIdx == -1 {
   204  			nextIdx = closingIdx
   205  		} else {
   206  			nextIdx += patIdx
   207  		}
   208  
   209  		alt := buildAlt(d, pattern, startIdx, openingIdx, patIdx, nextIdx, afterIdx)
   210  		err = g.doGlobWalk(fsys, alt, firstSegment, beforeMeta, func(p string, d fs.DirEntry) error {
   211  			// insertion sort, ignoring dups
   212  			insertIdx := matchesLen
   213  			for insertIdx > 0 && matches[insertIdx-1].Path > p {
   214  				insertIdx--
   215  			}
   216  			if insertIdx > 0 && matches[insertIdx-1].Path == p {
   217  				// dup
   218  				return nil
   219  			}
   220  
   221  			// append to grow the slice, then insert
   222  			entry := DirEntryWithFullPath{d, p}
   223  			matches = append(matches, entry)
   224  			for i := matchesLen; i > insertIdx; i-- {
   225  				matches[i] = matches[i-1]
   226  			}
   227  			matches[insertIdx] = entry
   228  			matchesLen++
   229  
   230  			return nil
   231  		})
   232  		if err != nil {
   233  			return
   234  		}
   235  
   236  		patIdx = nextIdx + 1
   237  	}
   238  
   239  	return
   240  }
   241  
   242  func (g *glob) globDirWalk(fsys fs.FS, dir, pattern string, canMatchFiles, beforeMeta bool, fn GlobWalkFunc) (e error) {
   243  	if pattern == "" {
   244  		if !canMatchFiles || !g.filesOnly {
   245  			// pattern can be an empty string if the original pattern ended in a
   246  			// slash, in which case, we should just return dir, but only if it
   247  			// actually exists and it's a directory (or a symlink to a directory)
   248  			info, isDir, err := g.isPathDir(fsys, dir, beforeMeta)
   249  			if err != nil {
   250  				return err
   251  			}
   252  			if isDir {
   253  				e = fn(dir, dirEntryFromFileInfo(info))
   254  				if e == SkipDir {
   255  					e = nil
   256  				}
   257  			}
   258  		}
   259  		return
   260  	}
   261  
   262  	if pattern == "**" {
   263  		// `**` can match *this* dir
   264  		info, dirExists, err := g.exists(fsys, dir, beforeMeta)
   265  		if err != nil {
   266  			return err
   267  		}
   268  		if !dirExists || !info.IsDir() {
   269  			return nil
   270  		}
   271  		if !canMatchFiles || !g.filesOnly {
   272  			if e = fn(dir, dirEntryFromFileInfo(info)); e != nil {
   273  				if e == SkipDir {
   274  					e = nil
   275  				}
   276  				return
   277  			}
   278  		}
   279  		return g.globDoubleStarWalk(fsys, dir, canMatchFiles, fn)
   280  	}
   281  
   282  	dirs, err := fs.ReadDir(fsys, dir)
   283  	if err != nil {
   284  		if errors.Is(err, fs.ErrNotExist) {
   285  			return g.handlePatternNotExist(beforeMeta)
   286  		}
   287  		return g.forwardErrIfFailOnIOErrors(err)
   288  	}
   289  
   290  	var matched bool
   291  	for _, info := range dirs {
   292  		name := info.Name()
   293  		matched, e = matchWithSeparator(pattern, name, '/', false)
   294  		if e != nil {
   295  			return
   296  		}
   297  		if matched {
   298  			matched = canMatchFiles
   299  			if !matched || g.filesOnly {
   300  				matched, e = g.isDir(fsys, dir, name, info)
   301  				if e != nil {
   302  					return e
   303  				}
   304  				if canMatchFiles {
   305  					// if we're here, it's because g.filesOnly
   306  					// is set and we don't want directories
   307  					matched = !matched
   308  				}
   309  			}
   310  			if matched {
   311  				if e = fn(path.Join(dir, name), info); e != nil {
   312  					if e == SkipDir {
   313  						e = nil
   314  					}
   315  					return
   316  				}
   317  			}
   318  		}
   319  	}
   320  
   321  	return
   322  }
   323  
   324  // recursively walk files/directories in a directory
   325  func (g *glob) globDoubleStarWalk(fsys fs.FS, dir string, canMatchFiles bool, fn GlobWalkFunc) (e error) {
   326  	dirs, err := fs.ReadDir(fsys, dir)
   327  	if err != nil {
   328  		if errors.Is(err, fs.ErrNotExist) {
   329  			// This function is only ever called after we know the top-most directory
   330  			// exists, so, if we ever get here, we know we'll never return
   331  			// ErrPatternNotExist.
   332  			return nil
   333  		}
   334  		return g.forwardErrIfFailOnIOErrors(err)
   335  	}
   336  
   337  	for _, info := range dirs {
   338  		name := info.Name()
   339  		isDir, err := g.isDir(fsys, dir, name, info)
   340  		if err != nil {
   341  			return err
   342  		}
   343  
   344  		if isDir {
   345  			p := path.Join(dir, name)
   346  			if !canMatchFiles || !g.filesOnly {
   347  				// `**` can match *this* dir, so add it
   348  				if e = fn(p, info); e != nil {
   349  					if e == SkipDir {
   350  						e = nil
   351  						continue
   352  					}
   353  					return
   354  				}
   355  			}
   356  			if e = g.globDoubleStarWalk(fsys, p, canMatchFiles, fn); e != nil {
   357  				return
   358  			}
   359  		} else if canMatchFiles {
   360  			if e = fn(path.Join(dir, name), info); e != nil {
   361  				if e == SkipDir {
   362  					e = nil
   363  				}
   364  				return
   365  			}
   366  		}
   367  	}
   368  
   369  	return
   370  }
   371  
   372  type DirEntryFromFileInfo struct {
   373  	fi fs.FileInfo
   374  }
   375  
   376  func (d *DirEntryFromFileInfo) Name() string {
   377  	return d.fi.Name()
   378  }
   379  
   380  func (d *DirEntryFromFileInfo) IsDir() bool {
   381  	return d.fi.IsDir()
   382  }
   383  
   384  func (d *DirEntryFromFileInfo) Type() fs.FileMode {
   385  	return d.fi.Mode().Type()
   386  }
   387  
   388  func (d *DirEntryFromFileInfo) Info() (fs.FileInfo, error) {
   389  	return d.fi, nil
   390  }
   391  
   392  func dirEntryFromFileInfo(fi fs.FileInfo) fs.DirEntry {
   393  	return &DirEntryFromFileInfo{fi}
   394  }
   395  
   396  type DirEntryWithFullPath struct {
   397  	Entry fs.DirEntry
   398  	Path  string
   399  }
   400  
   401  func hasMetaExceptAlts(s string) bool {
   402  	var c byte
   403  	l := len(s)
   404  	for i := 0; i < l; i++ {
   405  		c = s[i]
   406  		if c == '*' || c == '?' || c == '[' {
   407  			return true
   408  		} else if c == '\\' {
   409  			// skip next byte
   410  			i++
   411  		}
   412  	}
   413  	return false
   414  }