github.com/hattya/go.sh@v0.0.0-20240328132134-f53276d95cc6/pattern/pattern.go (about) 1 // 2 // go.sh/pattern :: pattern.go 3 // 4 // Copyright (c) 2021 Akinori Hattori <hattya@gmail.com> 5 // 6 // SPDX-License-Identifier: MIT 7 // 8 9 // Package pattern implements the pattern matching notation. 10 package pattern 11 12 import ( 13 "errors" 14 "io" 15 "os" 16 "regexp" 17 "sort" 18 "strings" 19 "unicode/utf8" 20 ) 21 22 // NoMatch indicates that the pattern does not match anything. 23 var NoMatch = errors.New("no match") 24 25 // Mode controls the behavior of Match. 26 type Mode uint 27 28 const ( 29 Smallest Mode = 1 << iota // smallest match 30 Largest // largest match 31 Suffix // pattern matching with suffix 32 Prefix // pattern matching with prefix 33 ) 34 35 // Match returns a string holding the portion of the match in s of the 36 // patterns. The patterns will be joined by "|", and translated into a 37 // regular expression. 38 // If no match is found, the error returned is NoMatch. 39 // 40 // Longest is default and has priority. Suffix and Prefix are mutually 41 // exclusive. 42 func Match(patterns []string, mode Mode, s string) (string, error) { 43 if mode&Suffix != 0 && mode&Prefix != 0 { 44 return "", NoMatch 45 } 46 rx, err := compile(patterns, mode) 47 if err != nil { 48 return "", err 49 } 50 if m := rx.FindStringSubmatch(s); m != nil { 51 for mode&Smallest != 0 && mode&Suffix != 0 { 52 s = s[len(s)-len(m[0]):] 53 r, w := utf8.DecodeRuneInString(s) 54 if r == utf8.RuneError { 55 if w == 0 { 56 break 57 } else { 58 m[0] = m[0][w:] 59 continue 60 } 61 } 62 sm := rx.FindStringSubmatch(s[w:]) 63 if sm == nil { 64 break 65 } 66 m = sm 67 } 68 return m[1], nil 69 } 70 return "", NoMatch 71 } 72 73 // Glob returns paths that matches pattern. 74 func Glob(pattern string) ([]string, error) { 75 if pattern == "" { 76 return nil, nil 77 } 78 base, pattern := split(pattern) 79 paths := []string{base} 80 for pattern != "" { 81 i, w := indexSep(pattern) 82 var sep string 83 if i == -1 { 84 i = len(pattern) 85 } else { 86 sep = pattern[i+w-1 : i+w] 87 } 88 89 switch { 90 case i > 0: 91 var matches []string 92 if name, lit := unquote(pattern[:i]); lit { 93 // literal 94 for _, p := range paths { 95 if p == "." { 96 p = name 97 } else { 98 p += name 99 } 100 if _, err := os.Lstat(p); err == nil { 101 matches = append(matches, p+sep) 102 } 103 } 104 } else { 105 // pattern 106 rx, err := compile([]string{pattern[:i]}, Prefix|Suffix) 107 if err != nil { 108 return nil, err 109 } 110 for _, p := range paths { 111 err := glob(p, rx, func(name string) { 112 if p != "." { 113 name = p + name 114 } 115 matches = append(matches, name+sep) 116 }) 117 if err != nil { 118 return nil, err 119 } 120 } 121 } 122 if len(matches) == 0 { 123 // no match 124 return nil, nil 125 } 126 paths = matches 127 sort.Strings(paths) 128 case w > 0: 129 // sep 130 for i := range paths { 131 paths[i] += sep 132 } 133 } 134 pattern = pattern[i+w:] 135 } 136 return paths, nil 137 } 138 139 func glob(path string, rx *regexp.Regexp, fn func(string)) error { 140 d, err := os.Open(path) 141 if err != nil { 142 return nil 143 } 144 defer d.Close() 145 146 var dot bool 147 if strings.HasPrefix(rx.String(), `^(\.`) { 148 dot = true 149 for _, n := range []string{".", ".."} { 150 if rx.MatchString(n) { 151 fn(n) 152 } 153 } 154 } 155 for { 156 switch n, err := d.Readdirnames(1); { 157 case err != nil: 158 if err == io.EOF { 159 return nil 160 } 161 return err 162 case rx.MatchString(n[0]): 163 if dot || !strings.HasPrefix(n[0], ".") { 164 fn(n[0]) 165 } 166 } 167 } 168 } 169 170 func unquote(s string) (string, bool) { 171 var b strings.Builder 172 var esc bool 173 for _, r := range s { 174 switch r { 175 case utf8.RuneError: 176 return "", false 177 case '\\': 178 if !esc { 179 esc = true 180 continue 181 } 182 case '?', '*', '[': 183 if !esc { 184 return "", false 185 } 186 } 187 b.WriteRune(r) 188 esc = false 189 } 190 return b.String(), true 191 } 192 193 func compile(patterns []string, mode Mode) (*regexp.Regexp, error) { 194 var b strings.Builder 195 if mode&Prefix != 0 { 196 b.WriteByte('^') 197 } 198 b.WriteByte('(') 199 for i, pat := range patterns { 200 if i > 0 { 201 b.WriteByte('|') 202 } 203 Pattern: 204 for pat != "" { 205 r, w := utf8.DecodeRuneInString(pat) 206 switch r { 207 case utf8.RuneError: 208 b.WriteString(pat[:w]) 209 case '?': 210 b.WriteByte('.') 211 case '*': 212 if mode&Smallest == 0 || mode&Largest != 0 { 213 b.WriteString(".*") 214 } else { 215 b.WriteString(".*?") 216 } 217 case '[': 218 b.WriteByte('[') 219 pat = pat[w:] 220 r, w = utf8.DecodeRuneInString(pat) 221 if r == '^' || r == '!' { 222 b.WriteByte('^') 223 pat = pat[w:] 224 r, w = utf8.DecodeRuneInString(pat) 225 } 226 if r == ']' { 227 b.WriteByte(']') 228 pat = pat[w:] 229 r, w = utf8.DecodeRuneInString(pat) 230 } 231 Bracket: 232 for { 233 switch r { 234 case utf8.RuneError: 235 if w == 0 { 236 break Pattern 237 } 238 b.WriteString(pat[:w]) 239 case '[': 240 b.WriteByte('[') 241 pat = pat[w:] 242 r, w = utf8.DecodeRuneInString(pat) 243 switch r { 244 case utf8.RuneError: 245 if w == 0 { 246 break Pattern 247 } 248 b.WriteString(pat[:w]) 249 case '.', '=', ':': 250 b.WriteRune(r) 251 pat = pat[w:] 252 j := strings.Index(pat, string(r)+"]") 253 if j == -1 { 254 break Bracket 255 } 256 w = j + 2 257 b.WriteString(pat[:w]) 258 default: 259 b.WriteRune(r) 260 break Bracket 261 } 262 case ']': 263 b.WriteByte(']') 264 break Bracket 265 case '\\': 266 pat = pat[w:] 267 r, w = utf8.DecodeRuneInString(pat) 268 switch r { 269 case utf8.RuneError: 270 b.WriteByte('\\') 271 if w == 0 { 272 break Pattern 273 } 274 b.WriteString(pat[:w]) 275 case '!', '-', '[', ']', '^': 276 b.WriteByte('\\') 277 } 278 b.WriteRune(r) 279 default: 280 b.WriteRune(r) 281 } 282 pat = pat[w:] 283 r, w = utf8.DecodeRuneInString(pat) 284 } 285 case '\\': 286 pat = pat[w:] 287 r, w = utf8.DecodeRuneInString(pat) 288 switch r { 289 case utf8.RuneError: 290 b.WriteByte('\\') 291 if w == 0 { 292 break Pattern 293 } 294 b.WriteString(pat[:w]) 295 case '\\', '.', '+', '*', '?', '(', ')', '|', '[', ']', '{', '}', '^', '$': 296 b.WriteByte('\\') 297 } 298 b.WriteRune(r) 299 case '.', '+', '(', ')', '|', '{', '}', '^', '$': 300 b.WriteByte('\\') 301 b.WriteRune(r) 302 default: 303 b.WriteRune(r) 304 } 305 pat = pat[w:] 306 } 307 } 308 b.WriteByte(')') 309 if mode&Suffix != 0 { 310 b.WriteByte('$') 311 } 312 return regexp.Compile(b.String()) 313 }