github.com/sudo-bmitch/version-bump@v0.0.0-20240503123857-70b0e3f646dd/internal/filesearch/filesearch.go (about) 1 // Package filesearch is used to retrieve files for scanning 2 package filesearch 3 4 import ( 5 "fmt" 6 "io" 7 "os" 8 "path/filepath" 9 "regexp" 10 "sort" 11 "strings" 12 13 "github.com/sudo-bmitch/version-bump/internal/config" 14 ) 15 16 type walk struct { 17 conf map[string]*config.File // list of conf entries to search for 18 confKey []string // list of keys from the conf, list aligns with confPat 19 confPat []*pattern // patterns for each conf entry 20 paths []string // list of files/dirs to process 21 curPath [][]string // current directory queue, curPath[i+1][] = subdir entries of curPath[i][0] 22 curConf int // index of last returned conf, used when a path matches multiple scans 23 // matched map[string]bool // TODO: list of entries that have already been matched and can be skipped, matches need to be for both filename and confName 24 } 25 26 // New returns a directory traversal struct, implementing the Next() method to walk all paths according to conf 27 func New(paths []string, conf map[string]*config.File) (*walk, error) { 28 if len(paths) == 0 { 29 paths = []string{"."} 30 } 31 confKey := make([]string, 0, len(conf)) 32 for i := range conf { 33 confKey = append(confKey, i) 34 } 35 sort.Strings(confKey) 36 confPat := make([]*pattern, len(confKey)) 37 for i, name := range confKey { 38 p, err := newPattern(name) 39 if err != nil { 40 return nil, fmt.Errorf("failed to parse \"%s\": %w", name, err) 41 } 42 confPat[i] = p 43 } 44 return &walk{ 45 conf: conf, 46 confKey: confKey, 47 confPat: confPat, 48 paths: paths, 49 curPath: [][]string{}, 50 curConf: -1, 51 }, nil 52 } 53 54 // Next returns: filename, name of the matching File config, and any errors 55 func (w *walk) Next() (string, string, error) { 56 // loop until EOF, fatal error, or match found 57 for { 58 // if all conf entries checked on the current path have been checked, pop last entry 59 if w.curConf+1 >= len(w.confPat) { 60 w.popCurPath() 61 } 62 63 // if the entire tree has been walked, add the next entry from paths to curPath 64 if len(w.curPath) == 0 { 65 if len(w.paths) == 0 { 66 return "", "", fmt.Errorf("end of list%.0w", io.EOF) 67 } 68 pathSplit := strings.Split(filepath.Clean(w.paths[0]), string(filepath.Separator)) 69 w.paths = w.paths[1:] 70 w.curPath = make([][]string, len(pathSplit)) 71 for i := range pathSplit { 72 w.curPath[i] = []string{pathSplit[i]} 73 } 74 w.curConf = -1 75 } 76 77 // build the current path and stat it 78 fileSplit := make([]string, len(w.curPath)) 79 for i := range w.curPath { 80 fileSplit[i] = w.curPath[i][0] 81 } 82 filename := filepath.Join(fileSplit...) 83 fi, err := os.Stat(filename) 84 if err != nil { 85 return "", "", fmt.Errorf("failed to read file %s: %w", filename, err) 86 } 87 88 // for directories 89 if fi.IsDir() { 90 // remove/skip if no matching w.conf prefix 91 foundPrefix := false 92 // always search the current dir, regexp will not otherwise match this 93 if filename == "." || filename == "/" { 94 foundPrefix = true 95 } 96 for i := 0; i < len(w.confPat) && !foundPrefix; i++ { 97 if w.confPat[i].match(filename, true) { 98 foundPrefix = true 99 } 100 } 101 if !foundPrefix { 102 w.popCurPath() 103 continue 104 } 105 // else add subdir entries 106 deList, err := os.ReadDir(filename) 107 if err != nil { 108 w.popCurPath() 109 return "", "", fmt.Errorf("failed to read directory %s: %w", filename, err) 110 } 111 if len(deList) == 0 { 112 w.popCurPath() 113 continue 114 } 115 deNames := make([]string, len(deList)) 116 for i := range deList { 117 deNames[i] = deList[i].Name() 118 } 119 w.curPath = append(w.curPath, deNames) 120 continue 121 } 122 123 // for files, check each conf to see if it matches 124 w.curConf++ 125 for w.curConf < len(w.confPat) { 126 if w.confPat[w.curConf].match(filename, false) { 127 return filename, w.confKey[w.curConf], nil 128 } 129 w.curConf++ 130 } 131 } 132 } 133 134 // popCurPath is used to finish processing of the curPath, removing the top entry 135 func (w *walk) popCurPath() { 136 // remove last path entry, recursive if entry is was the last entry in subDir 137 for { 138 i := len(w.curPath) - 1 139 if i < 0 { 140 // end of curPath 141 return 142 } 143 // last subdir contains multiple entries, remove head 144 if len(w.curPath[i]) > 1 { 145 w.curPath[i] = w.curPath[i][1:] 146 w.curConf = -1 147 return 148 } 149 // last entry in subdir, remove and repeat in parent 150 w.curPath = w.curPath[:i] 151 } 152 } 153 154 // pattern is used to compare a file or directory to a regexp 155 type pattern struct { 156 full, prefix *regexp.Regexp 157 } 158 159 // newPattern converts a string to a set of regexp's for matching the full file or directory 160 func newPattern(expr string) (*pattern, error) { 161 expr = filepath.Clean(expr) 162 reParts := []string{} 163 reCurStr := "" 164 state := "default" 165 for _, ch := range expr { 166 switch state { 167 case "default": 168 switch ch { 169 case '\\': 170 state = "escape" 171 case '*': 172 state = "star" 173 case '/': 174 reParts = append(reParts, reCurStr) 175 // "**/" matches an empty path too, so separator is optional 176 if reCurStr == ".*" || reCurStr == regexp.QuoteMeta(string(filepath.Separator))+".*" { 177 reCurStr = regexp.QuoteMeta(string(filepath.Separator)) + "?" 178 } else { 179 reCurStr = regexp.QuoteMeta(string(filepath.Separator)) 180 } 181 default: 182 reCurStr += regexp.QuoteMeta(string(ch)) 183 } 184 case "escape": 185 reCurStr += "\\" + string(ch) 186 state = "default" 187 case "star": 188 state = "default" 189 if ch == '*' { 190 // ** matches anything, even across path separators 191 reCurStr += ".*" 192 } else { 193 // * matches only within the current path 194 reCurStr += "[^" + regexp.QuoteMeta(string(filepath.Separator)) + "]*" 195 if ch == '\\' { 196 state = "escape" 197 } else if ch == '/' { 198 reParts = append(reParts, reCurStr) 199 reCurStr = regexp.QuoteMeta(string(filepath.Separator)) 200 } else { 201 reCurStr += regexp.QuoteMeta(string(ch)) 202 } 203 } 204 } 205 } 206 if state == "star" { 207 reCurStr += "[^" + regexp.QuoteMeta(string(filepath.Separator)) + "]*" 208 } 209 reParts = append(reParts, reCurStr) 210 211 // full match requires the entire path to match 212 reFullStr := "^" + strings.Join(reParts, "") + "$" 213 // partial match makes every successive path entry optional 214 rePartStr := "^" + strings.Join(reParts, "(?:") 215 for i := 0; i < len(reParts)-1; i++ { 216 rePartStr += ")?" 217 } 218 rePartStr += "$" 219 p := pattern{ 220 full: regexp.MustCompile(reFullStr), 221 prefix: regexp.MustCompile(rePartStr), 222 } 223 224 return &p, nil 225 } 226 227 // isMatch indicates if a pattern matches a specific file (or dir prefix) 228 func (p *pattern) match(filename string, prefix bool) bool { 229 filename = filepath.Clean(filename) 230 if prefix { 231 return p.prefix.Match([]byte(filename)) 232 } 233 return p.full.Match([]byte(filename)) 234 }