github.com/hattya/go.sh@v0.0.0-20240328132134-f53276d95cc6/pattern/pattern.go (about)

     1  //
     2  // go.sh/pattern :: pattern.go
     3  //
     4  //   Copyright (c) 2021 Akinori Hattori <hattya@gmail.com>
     5  //
     6  //   SPDX-License-Identifier: MIT
     7  //
     8  
     9  // Package pattern implements the pattern matching notation.
    10  package pattern
    11  
    12  import (
    13  	"errors"
    14  	"io"
    15  	"os"
    16  	"regexp"
    17  	"sort"
    18  	"strings"
    19  	"unicode/utf8"
    20  )
    21  
    22  // NoMatch indicates that the pattern does not match anything.
    23  var NoMatch = errors.New("no match")
    24  
    25  // Mode controls the behavior of Match.
    26  type Mode uint
    27  
    28  const (
    29  	Smallest Mode = 1 << iota // smallest match
    30  	Largest                   // largest match
    31  	Suffix                    // pattern matching with suffix
    32  	Prefix                    // pattern matching with prefix
    33  )
    34  
    35  // Match returns a string holding the portion of the match in s of the
    36  // patterns. The patterns will be joined by "|", and translated into a
    37  // regular expression.
    38  // If no match is found, the error returned is NoMatch.
    39  //
    40  // Longest is default and has priority. Suffix and Prefix are mutually
    41  // exclusive.
    42  func Match(patterns []string, mode Mode, s string) (string, error) {
    43  	if mode&Suffix != 0 && mode&Prefix != 0 {
    44  		return "", NoMatch
    45  	}
    46  	rx, err := compile(patterns, mode)
    47  	if err != nil {
    48  		return "", err
    49  	}
    50  	if m := rx.FindStringSubmatch(s); m != nil {
    51  		for mode&Smallest != 0 && mode&Suffix != 0 {
    52  			s = s[len(s)-len(m[0]):]
    53  			r, w := utf8.DecodeRuneInString(s)
    54  			if r == utf8.RuneError {
    55  				if w == 0 {
    56  					break
    57  				} else {
    58  					m[0] = m[0][w:]
    59  					continue
    60  				}
    61  			}
    62  			sm := rx.FindStringSubmatch(s[w:])
    63  			if sm == nil {
    64  				break
    65  			}
    66  			m = sm
    67  		}
    68  		return m[1], nil
    69  	}
    70  	return "", NoMatch
    71  }
    72  
    73  // Glob returns paths that matches pattern.
    74  func Glob(pattern string) ([]string, error) {
    75  	if pattern == "" {
    76  		return nil, nil
    77  	}
    78  	base, pattern := split(pattern)
    79  	paths := []string{base}
    80  	for pattern != "" {
    81  		i, w := indexSep(pattern)
    82  		var sep string
    83  		if i == -1 {
    84  			i = len(pattern)
    85  		} else {
    86  			sep = pattern[i+w-1 : i+w]
    87  		}
    88  
    89  		switch {
    90  		case i > 0:
    91  			var matches []string
    92  			if name, lit := unquote(pattern[:i]); lit {
    93  				// literal
    94  				for _, p := range paths {
    95  					if p == "." {
    96  						p = name
    97  					} else {
    98  						p += name
    99  					}
   100  					if _, err := os.Lstat(p); err == nil {
   101  						matches = append(matches, p+sep)
   102  					}
   103  				}
   104  			} else {
   105  				// pattern
   106  				rx, err := compile([]string{pattern[:i]}, Prefix|Suffix)
   107  				if err != nil {
   108  					return nil, err
   109  				}
   110  				for _, p := range paths {
   111  					err := glob(p, rx, func(name string) {
   112  						if p != "." {
   113  							name = p + name
   114  						}
   115  						matches = append(matches, name+sep)
   116  					})
   117  					if err != nil {
   118  						return nil, err
   119  					}
   120  				}
   121  			}
   122  			if len(matches) == 0 {
   123  				// no match
   124  				return nil, nil
   125  			}
   126  			paths = matches
   127  			sort.Strings(paths)
   128  		case w > 0:
   129  			// sep
   130  			for i := range paths {
   131  				paths[i] += sep
   132  			}
   133  		}
   134  		pattern = pattern[i+w:]
   135  	}
   136  	return paths, nil
   137  }
   138  
   139  func glob(path string, rx *regexp.Regexp, fn func(string)) error {
   140  	d, err := os.Open(path)
   141  	if err != nil {
   142  		return nil
   143  	}
   144  	defer d.Close()
   145  
   146  	var dot bool
   147  	if strings.HasPrefix(rx.String(), `^(\.`) {
   148  		dot = true
   149  		for _, n := range []string{".", ".."} {
   150  			if rx.MatchString(n) {
   151  				fn(n)
   152  			}
   153  		}
   154  	}
   155  	for {
   156  		switch n, err := d.Readdirnames(1); {
   157  		case err != nil:
   158  			if err == io.EOF {
   159  				return nil
   160  			}
   161  			return err
   162  		case rx.MatchString(n[0]):
   163  			if dot || !strings.HasPrefix(n[0], ".") {
   164  				fn(n[0])
   165  			}
   166  		}
   167  	}
   168  }
   169  
   170  func unquote(s string) (string, bool) {
   171  	var b strings.Builder
   172  	var esc bool
   173  	for _, r := range s {
   174  		switch r {
   175  		case utf8.RuneError:
   176  			return "", false
   177  		case '\\':
   178  			if !esc {
   179  				esc = true
   180  				continue
   181  			}
   182  		case '?', '*', '[':
   183  			if !esc {
   184  				return "", false
   185  			}
   186  		}
   187  		b.WriteRune(r)
   188  		esc = false
   189  	}
   190  	return b.String(), true
   191  }
   192  
   193  func compile(patterns []string, mode Mode) (*regexp.Regexp, error) {
   194  	var b strings.Builder
   195  	if mode&Prefix != 0 {
   196  		b.WriteByte('^')
   197  	}
   198  	b.WriteByte('(')
   199  	for i, pat := range patterns {
   200  		if i > 0 {
   201  			b.WriteByte('|')
   202  		}
   203  	Pattern:
   204  		for pat != "" {
   205  			r, w := utf8.DecodeRuneInString(pat)
   206  			switch r {
   207  			case utf8.RuneError:
   208  				b.WriteString(pat[:w])
   209  			case '?':
   210  				b.WriteByte('.')
   211  			case '*':
   212  				if mode&Smallest == 0 || mode&Largest != 0 {
   213  					b.WriteString(".*")
   214  				} else {
   215  					b.WriteString(".*?")
   216  				}
   217  			case '[':
   218  				b.WriteByte('[')
   219  				pat = pat[w:]
   220  				r, w = utf8.DecodeRuneInString(pat)
   221  				if r == '^' || r == '!' {
   222  					b.WriteByte('^')
   223  					pat = pat[w:]
   224  					r, w = utf8.DecodeRuneInString(pat)
   225  				}
   226  				if r == ']' {
   227  					b.WriteByte(']')
   228  					pat = pat[w:]
   229  					r, w = utf8.DecodeRuneInString(pat)
   230  				}
   231  			Bracket:
   232  				for {
   233  					switch r {
   234  					case utf8.RuneError:
   235  						if w == 0 {
   236  							break Pattern
   237  						}
   238  						b.WriteString(pat[:w])
   239  					case '[':
   240  						b.WriteByte('[')
   241  						pat = pat[w:]
   242  						r, w = utf8.DecodeRuneInString(pat)
   243  						switch r {
   244  						case utf8.RuneError:
   245  							if w == 0 {
   246  								break Pattern
   247  							}
   248  							b.WriteString(pat[:w])
   249  						case '.', '=', ':':
   250  							b.WriteRune(r)
   251  							pat = pat[w:]
   252  							j := strings.Index(pat, string(r)+"]")
   253  							if j == -1 {
   254  								break Bracket
   255  							}
   256  							w = j + 2
   257  							b.WriteString(pat[:w])
   258  						default:
   259  							b.WriteRune(r)
   260  							break Bracket
   261  						}
   262  					case ']':
   263  						b.WriteByte(']')
   264  						break Bracket
   265  					case '\\':
   266  						pat = pat[w:]
   267  						r, w = utf8.DecodeRuneInString(pat)
   268  						switch r {
   269  						case utf8.RuneError:
   270  							b.WriteByte('\\')
   271  							if w == 0 {
   272  								break Pattern
   273  							}
   274  							b.WriteString(pat[:w])
   275  						case '!', '-', '[', ']', '^':
   276  							b.WriteByte('\\')
   277  						}
   278  						b.WriteRune(r)
   279  					default:
   280  						b.WriteRune(r)
   281  					}
   282  					pat = pat[w:]
   283  					r, w = utf8.DecodeRuneInString(pat)
   284  				}
   285  			case '\\':
   286  				pat = pat[w:]
   287  				r, w = utf8.DecodeRuneInString(pat)
   288  				switch r {
   289  				case utf8.RuneError:
   290  					b.WriteByte('\\')
   291  					if w == 0 {
   292  						break Pattern
   293  					}
   294  					b.WriteString(pat[:w])
   295  				case '\\', '.', '+', '*', '?', '(', ')', '|', '[', ']', '{', '}', '^', '$':
   296  					b.WriteByte('\\')
   297  				}
   298  				b.WriteRune(r)
   299  			case '.', '+', '(', ')', '|', '{', '}', '^', '$':
   300  				b.WriteByte('\\')
   301  				b.WriteRune(r)
   302  			default:
   303  				b.WriteRune(r)
   304  			}
   305  			pat = pat[w:]
   306  		}
   307  	}
   308  	b.WriteByte(')')
   309  	if mode&Suffix != 0 {
   310  		b.WriteByte('$')
   311  	}
   312  	return regexp.Compile(b.String())
   313  }