github.com/u-root/u-root@v7.0.1-0.20200915234505-ad7babab0a8e+incompatible/cmds/core/elvish/glob/glob.go

github.com/u-root/u-root@v7.0.1-0.20200915234505-ad7babab0a8e+incompatible/cmds/core/elvish/glob/glob.go (about)

     1  // Package glob implements globbing for elvish.
     2  package glob
     3  
     4  import (
     5  	"io/ioutil"
     6  	"os"
     7  	"runtime"
     8  	"unicode/utf8"
     9  )
    10  
    11  // TODO: Use native path separators instead of always using /.
    12  
    13  // Glob returns a list of file names satisfying the given pattern.
    14  func Glob(p string, cb func(string) bool) bool {
    15  	return Parse(p).Glob(cb)
    16  }
    17  
    18  // Glob returns a list of file names satisfying the Pattern.
    19  func (p Pattern) Glob(cb func(string) bool) bool {
    20  	segs := p.Segments
    21  	dir := ""
    22  
    23  	// XXX: This is a hack solely for supporting globs that start with ~ in the
    24  	// eval package.
    25  	if p.DirOverride != "" {
    26  		dir = p.DirOverride
    27  	}
    28  
    29  	if len(segs) > 0 && IsSlash(segs[0]) {
    30  		segs = segs[1:]
    31  		dir += "/"
    32  	} else if runtime.GOOS == "windows" && len(segs) > 1 && IsLiteral(segs[0]) && IsSlash(segs[1]) {
    33  		// TODO: Handle UNC.
    34  		elem := segs[0].(Literal).Data
    35  		if isDrive(elem) {
    36  			segs = segs[2:]
    37  			dir = elem + "/"
    38  		}
    39  	}
    40  
    41  	return glob(segs, dir, cb)
    42  }
    43  
    44  func isDrive(s string) bool {
    45  	return len(s) == 2 && s[1] == ':' &&
    46  		(('a' <= s[0] && s[1] <= 'z') || ('A' <= s[0] && s[0] <= 'Z'))
    47  }
    48  
    49  // glob finds all filenames matching the given Segments in the given dir, and
    50  // calls the callback on all of them. If the callback returns false, globbing is
    51  // interrupted, and glob returns false. Otherwise it returns true.
    52  func glob(segs []Segment, dir string, cb func(string) bool) bool {
    53  	// Consume non-wildcard path elements simply by following the path. This may
    54  	// seem like an optimization, but is actually required for "." and ".." to
    55  	// be used as path elements, as they do not appear in the result of ReadDir.
    56  	for len(segs) > 1 && IsLiteral(segs[0]) && IsSlash(segs[1]) {
    57  		elem := segs[0].(Literal).Data
    58  		segs = segs[2:]
    59  		dir += elem + "/"
    60  		if info, err := os.Stat(dir); err != nil || !info.IsDir() {
    61  			return true
    62  		}
    63  	}
    64  
    65  	if len(segs) == 0 {
    66  		return cb(dir)
    67  	} else if len(segs) == 1 && IsLiteral(segs[0]) {
    68  		path := dir + segs[0].(Literal).Data
    69  		if _, err := os.Stat(path); err == nil {
    70  			return cb(path)
    71  		}
    72  		return true
    73  	}
    74  
    75  	infos, err := readDir(dir)
    76  	if err != nil {
    77  		// XXX Silently drop the error
    78  		return true
    79  	}
    80  
    81  	i := -1
    82  	// nexti moves i to the next index in segs that is either / or ** (in other
    83  	// words, something that matches /).
    84  	nexti := func() {
    85  		for i++; i < len(segs); i++ {
    86  			if IsSlash(segs[i]) || IsWild1(segs[i], StarStar) {
    87  				break
    88  			}
    89  		}
    90  	}
    91  	nexti()
    92  
    93  	// Enumerate the position of the first slash. In the presence of multiple
    94  	// **'s in the pattern, the first slash may be in any of those.
    95  	//
    96  	// For instance, in x**y**z, the first slash may be in the first ** or the
    97  	// second:
    98  	// 1) If it is in the first, then pattern is equivalent to x*/**y**z. We
    99  	//    match directories with x* and recurse in each subdirectory with the
   100  	//    pattern **y**z.
   101  	// 2) If it is the in the second, we know that since the first ** can no
   102  	//    longer contain any slashes, we treat it as * (this is done in
   103  	//    matchElement). The pattern is now equivalent to x*y*/**z. We match
   104  	//    directories with x*y* and recurse in each subdirectory with the
   105  	//    pattern **z.
   106  	//
   107  	// The rules are:
   108  	// 1) For each **, we treat it as */** and all previous ones as *. We match
   109  	//    subdirectories with the part before /, and recurse in subdirectories
   110  	//    with the pattern after /.
   111  	// 2) If a literal / is encountered, we return after recursing in the
   112  	//    subdirectories.
   113  	for i < len(segs) {
   114  		slash := IsSlash(segs[i])
   115  		var first, rest []Segment
   116  		if slash {
   117  			// segs = x/y. Match dir with x, recurse on y.
   118  			first, rest = segs[:i], segs[i+1:]
   119  		} else {
   120  			// segs = x**y. Match dir with x*, recurse on **y.
   121  			first, rest = segs[:i+1], segs[i:]
   122  		}
   123  
   124  		for _, info := range infos {
   125  			name := info.Name()
   126  			if matchElement(first, name) && info.IsDir() {
   127  				if !glob(rest, dir+name+"/", cb) {
   128  					return false
   129  				}
   130  			}
   131  		}
   132  
   133  		if slash {
   134  			// First slash cannot appear later than a slash in the pattern.
   135  			return true
   136  		}
   137  		nexti()
   138  	}
   139  
   140  	// If we reach here, it is possible to have no slashes at all. Simply match
   141  	// the entire pattern with all files.
   142  	for _, info := range infos {
   143  		name := info.Name()
   144  		if matchElement(segs, name) {
   145  			if !cb(dir + name) {
   146  				return false
   147  			}
   148  		}
   149  	}
   150  	return true
   151  }
   152  
   153  // readDir is just like ioutil.ReadDir except that it treats an argument of ""
   154  // as ".".
   155  func readDir(dir string) ([]os.FileInfo, error) {
   156  	if dir == "" {
   157  		dir = "."
   158  	}
   159  	return ioutil.ReadDir(dir)
   160  }
   161  
   162  // matchElement matches a path element against segments, which may not contain
   163  // any Slash segments. It treats StarStar segments as they are Star segments.
   164  func matchElement(segs []Segment, name string) bool {
   165  	if len(segs) == 0 {
   166  		return name == ""
   167  	}
   168  	// If the name start with "." and the first segment is a Wild, only match
   169  	// when MatchHidden is true.
   170  	if len(name) > 0 && name[0] == '.' && IsWild(segs[0]) && !segs[0].(Wild).MatchHidden {
   171  		return false
   172  	}
   173  segs:
   174  	for len(segs) > 0 {
   175  		// Find a chunk. A chunk is an optional Star followed by a run of
   176  		// fixed-length segments (Literal and Question).
   177  		var i int
   178  		for i = 1; i < len(segs); i++ {
   179  			if IsWild2(segs[i], Star, StarStar) {
   180  				break
   181  			}
   182  		}
   183  
   184  		chunk := segs[:i]
   185  		startsWithStar := IsWild2(chunk[0], Star, StarStar)
   186  		var startingStar Wild
   187  		if startsWithStar {
   188  			startingStar = chunk[0].(Wild)
   189  			chunk = chunk[1:]
   190  		}
   191  		segs = segs[i:]
   192  
   193  		// NOTE A quick path when len(segs) == 0 can be implemented: match
   194  		// backwards.
   195  
   196  		// Match at the current position. If this is the last chunk, we need to
   197  		// make sure name is exhausted by the matching.
   198  		ok, rest := matchFixedLength(chunk, name)
   199  		if ok && (rest == "" || len(segs) > 0) {
   200  			name = rest
   201  			continue
   202  		}
   203  
   204  		if startsWithStar {
   205  			// NOTE An optimization is to make the upper bound not len(names),
   206  			// but rather len(names) - LB(# bytes segs can match)
   207  			for i, r := range name {
   208  				j := i + len(string(r))
   209  				// Match name[:j] with the starting *, and the rest with chunk.
   210  				if !startingStar.Match(r) {
   211  					break
   212  				}
   213  				ok, rest := matchFixedLength(chunk, name[j:])
   214  				if ok && (rest == "" || len(segs) > 0) {
   215  					name = rest
   216  					continue segs
   217  				}
   218  			}
   219  		}
   220  		return false
   221  	}
   222  	return name == ""
   223  }
   224  
   225  // matchFixedLength returns whether a run of fixed-length segments (Literal and
   226  // Question) matches a prefix of name. It returns whether the match is
   227  // successful and if if it is, the remaining part of name.
   228  func matchFixedLength(segs []Segment, name string) (bool, string) {
   229  	for _, seg := range segs {
   230  		if name == "" {
   231  			return false, ""
   232  		}
   233  		switch seg := seg.(type) {
   234  		case Literal:
   235  			n := len(seg.Data)
   236  			if len(name) < n || name[:n] != seg.Data {
   237  				return false, ""
   238  			}
   239  			name = name[n:]
   240  		case Wild:
   241  			if seg.Type == Question {
   242  				r, n := utf8.DecodeRuneInString(name)
   243  				if !seg.Match(r) {
   244  					return false, ""
   245  				}
   246  				name = name[n:]
   247  			} else {
   248  				panic("matchFixedLength given non-question wild segment")
   249  			}
   250  		default:
   251  			panic("matchFixedLength given non-literal non-wild segment")
   252  		}
   253  	}
   254  	return true, name
   255  }