github.com/sudo-bmitch/version-bump@v0.0.0-20240503123857-70b0e3f646dd/internal/filesearch/filesearch.go (about)

     1  // Package filesearch is used to retrieve files for scanning
     2  package filesearch
     3  
     4  import (
     5  	"fmt"
     6  	"io"
     7  	"os"
     8  	"path/filepath"
     9  	"regexp"
    10  	"sort"
    11  	"strings"
    12  
    13  	"github.com/sudo-bmitch/version-bump/internal/config"
    14  )
    15  
    16  type walk struct {
    17  	conf    map[string]*config.File // list of conf entries to search for
    18  	confKey []string                // list of keys from the conf, list aligns with confPat
    19  	confPat []*pattern              // patterns for each conf entry
    20  	paths   []string                // list of files/dirs to process
    21  	curPath [][]string              // current directory queue, curPath[i+1][] = subdir entries of curPath[i][0]
    22  	curConf int                     // index of last returned conf, used when a path matches multiple scans
    23  	// matched map[string]bool // TODO: list of entries that have already been matched and can be skipped, matches need to be for both filename and confName
    24  }
    25  
    26  // New returns a directory traversal struct, implementing the Next() method to walk all paths according to conf
    27  func New(paths []string, conf map[string]*config.File) (*walk, error) {
    28  	if len(paths) == 0 {
    29  		paths = []string{"."}
    30  	}
    31  	confKey := make([]string, 0, len(conf))
    32  	for i := range conf {
    33  		confKey = append(confKey, i)
    34  	}
    35  	sort.Strings(confKey)
    36  	confPat := make([]*pattern, len(confKey))
    37  	for i, name := range confKey {
    38  		p, err := newPattern(name)
    39  		if err != nil {
    40  			return nil, fmt.Errorf("failed to parse \"%s\": %w", name, err)
    41  		}
    42  		confPat[i] = p
    43  	}
    44  	return &walk{
    45  		conf:    conf,
    46  		confKey: confKey,
    47  		confPat: confPat,
    48  		paths:   paths,
    49  		curPath: [][]string{},
    50  		curConf: -1,
    51  	}, nil
    52  }
    53  
    54  // Next returns: filename, name of the matching File config, and any errors
    55  func (w *walk) Next() (string, string, error) {
    56  	// loop until EOF, fatal error, or match found
    57  	for {
    58  		// if all conf entries checked on the current path have been checked, pop last entry
    59  		if w.curConf+1 >= len(w.confPat) {
    60  			w.popCurPath()
    61  		}
    62  
    63  		// if the entire tree has been walked, add the next entry from paths to curPath
    64  		if len(w.curPath) == 0 {
    65  			if len(w.paths) == 0 {
    66  				return "", "", fmt.Errorf("end of list%.0w", io.EOF)
    67  			}
    68  			pathSplit := strings.Split(filepath.Clean(w.paths[0]), string(filepath.Separator))
    69  			w.paths = w.paths[1:]
    70  			w.curPath = make([][]string, len(pathSplit))
    71  			for i := range pathSplit {
    72  				w.curPath[i] = []string{pathSplit[i]}
    73  			}
    74  			w.curConf = -1
    75  		}
    76  
    77  		// build the current path and stat it
    78  		fileSplit := make([]string, len(w.curPath))
    79  		for i := range w.curPath {
    80  			fileSplit[i] = w.curPath[i][0]
    81  		}
    82  		filename := filepath.Join(fileSplit...)
    83  		fi, err := os.Stat(filename)
    84  		if err != nil {
    85  			return "", "", fmt.Errorf("failed to read file %s: %w", filename, err)
    86  		}
    87  
    88  		// for directories
    89  		if fi.IsDir() {
    90  			// remove/skip if no matching w.conf prefix
    91  			foundPrefix := false
    92  			// always search the current dir, regexp will not otherwise match this
    93  			if filename == "." || filename == "/" {
    94  				foundPrefix = true
    95  			}
    96  			for i := 0; i < len(w.confPat) && !foundPrefix; i++ {
    97  				if w.confPat[i].match(filename, true) {
    98  					foundPrefix = true
    99  				}
   100  			}
   101  			if !foundPrefix {
   102  				w.popCurPath()
   103  				continue
   104  			}
   105  			// else add subdir entries
   106  			deList, err := os.ReadDir(filename)
   107  			if err != nil {
   108  				w.popCurPath()
   109  				return "", "", fmt.Errorf("failed to read directory %s: %w", filename, err)
   110  			}
   111  			if len(deList) == 0 {
   112  				w.popCurPath()
   113  				continue
   114  			}
   115  			deNames := make([]string, len(deList))
   116  			for i := range deList {
   117  				deNames[i] = deList[i].Name()
   118  			}
   119  			w.curPath = append(w.curPath, deNames)
   120  			continue
   121  		}
   122  
   123  		// for files, check each conf to see if it matches
   124  		w.curConf++
   125  		for w.curConf < len(w.confPat) {
   126  			if w.confPat[w.curConf].match(filename, false) {
   127  				return filename, w.confKey[w.curConf], nil
   128  			}
   129  			w.curConf++
   130  		}
   131  	}
   132  }
   133  
   134  // popCurPath is used to finish processing of the curPath, removing the top entry
   135  func (w *walk) popCurPath() {
   136  	// remove last path entry, recursive if entry is was the last entry in subDir
   137  	for {
   138  		i := len(w.curPath) - 1
   139  		if i < 0 {
   140  			// end of curPath
   141  			return
   142  		}
   143  		// last subdir contains multiple entries, remove head
   144  		if len(w.curPath[i]) > 1 {
   145  			w.curPath[i] = w.curPath[i][1:]
   146  			w.curConf = -1
   147  			return
   148  		}
   149  		// last entry in subdir, remove and repeat in parent
   150  		w.curPath = w.curPath[:i]
   151  	}
   152  }
   153  
   154  // pattern is used to compare a file or directory to a regexp
   155  type pattern struct {
   156  	full, prefix *regexp.Regexp
   157  }
   158  
   159  // newPattern converts a string to a set of regexp's for matching the full file or directory
   160  func newPattern(expr string) (*pattern, error) {
   161  	expr = filepath.Clean(expr)
   162  	reParts := []string{}
   163  	reCurStr := ""
   164  	state := "default"
   165  	for _, ch := range expr {
   166  		switch state {
   167  		case "default":
   168  			switch ch {
   169  			case '\\':
   170  				state = "escape"
   171  			case '*':
   172  				state = "star"
   173  			case '/':
   174  				reParts = append(reParts, reCurStr)
   175  				// "**/" matches an empty path too, so separator is optional
   176  				if reCurStr == ".*" || reCurStr == regexp.QuoteMeta(string(filepath.Separator))+".*" {
   177  					reCurStr = regexp.QuoteMeta(string(filepath.Separator)) + "?"
   178  				} else {
   179  					reCurStr = regexp.QuoteMeta(string(filepath.Separator))
   180  				}
   181  			default:
   182  				reCurStr += regexp.QuoteMeta(string(ch))
   183  			}
   184  		case "escape":
   185  			reCurStr += "\\" + string(ch)
   186  			state = "default"
   187  		case "star":
   188  			state = "default"
   189  			if ch == '*' {
   190  				// ** matches anything, even across path separators
   191  				reCurStr += ".*"
   192  			} else {
   193  				// * matches only within the current path
   194  				reCurStr += "[^" + regexp.QuoteMeta(string(filepath.Separator)) + "]*"
   195  				if ch == '\\' {
   196  					state = "escape"
   197  				} else if ch == '/' {
   198  					reParts = append(reParts, reCurStr)
   199  					reCurStr = regexp.QuoteMeta(string(filepath.Separator))
   200  				} else {
   201  					reCurStr += regexp.QuoteMeta(string(ch))
   202  				}
   203  			}
   204  		}
   205  	}
   206  	if state == "star" {
   207  		reCurStr += "[^" + regexp.QuoteMeta(string(filepath.Separator)) + "]*"
   208  	}
   209  	reParts = append(reParts, reCurStr)
   210  
   211  	// full match requires the entire path to match
   212  	reFullStr := "^" + strings.Join(reParts, "") + "$"
   213  	// partial match makes every successive path entry optional
   214  	rePartStr := "^" + strings.Join(reParts, "(?:")
   215  	for i := 0; i < len(reParts)-1; i++ {
   216  		rePartStr += ")?"
   217  	}
   218  	rePartStr += "$"
   219  	p := pattern{
   220  		full:   regexp.MustCompile(reFullStr),
   221  		prefix: regexp.MustCompile(rePartStr),
   222  	}
   223  
   224  	return &p, nil
   225  }
   226  
   227  // isMatch indicates if a pattern matches a specific file (or dir prefix)
   228  func (p *pattern) match(filename string, prefix bool) bool {
   229  	filename = filepath.Clean(filename)
   230  	if prefix {
   231  		return p.prefix.Match([]byte(filename))
   232  	}
   233  	return p.full.Match([]byte(filename))
   234  }