github.com/verrazzano/verrazzano@v1.7.1/tools/vz/pkg/internal/util/files/text.go (about) 1 // Copyright (c) 2021, 2024, Oracle and/or its affiliates. 2 // Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl. 3 4 // Package files handles searching 5 package files 6 7 import ( 8 "bufio" 9 "errors" 10 "fmt" 11 "go.uber.org/zap" 12 "io" 13 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 14 "os" 15 "regexp" 16 "time" 17 ) 18 19 // TimeRange is used when searching requires time bounded matches 20 // Handling of a TimeRange: 21 // 22 // if not specified, match is included 23 // if StartTime is supplied (not zero) matches at/after that time are included 24 // if EndTime is supplied (not zero) matches at/before that time are included 25 type TimeRange struct { 26 StartTime metav1.Time 27 EndTime metav1.Time 28 } 29 30 // TextMatch supplies information about the matched text 31 type TextMatch struct { 32 FileName string 33 FileLine int 34 Timestamp metav1.Time 35 MatchedText string 36 } 37 38 var ZeroTime = metav1.NewTime(time.Time{}) 39 40 // TODO: May move to only functions which require pre-compiled regular expressions, and have the pre-compiled at 41 // compilation time rather than at runtime 42 43 // SearchMatches will search the list of TextMatch using a search expression and will return all that match 44 func SearchMatches(log *zap.SugaredLogger, matchesToSearch []TextMatch, searchMatchRe *regexp.Regexp) (matches []TextMatch, err error) { 45 for _, matchToSearch := range matchesToSearch { 46 if searchMatchRe.MatchString(matchToSearch.MatchedText) { 47 matches = append(matches, matchToSearch) 48 } 49 } 50 return matches, nil 51 } 52 53 // SearchFiles will search the list of files that are already known for text that matches 54 func SearchFiles(log *zap.SugaredLogger, rootDirectory string, files []string, searchMatchRe *regexp.Regexp, timeRange *TimeRange) (matches []TextMatch, err error) { 55 if searchMatchRe == nil { 56 return nil, fmt.Errorf("SaerchFilesRe requires a regular expression") 57 } 58 59 if len(files) == 0 { 60 log.Debugf("SearchFilesRe was not given any files, return nil") 61 return nil, nil 62 } 63 64 for _, fileName := range files { 65 matchesFromFile, err := SearchFile(log, fileName, searchMatchRe, timeRange) 66 if err != nil { 67 log.Debugf("failure opening %s", fileName, err) 68 return nil, err 69 } 70 if len(matchesFromFile) > 0 { 71 matches = append(matches, matchesFromFile...) 72 } 73 } 74 return matches, nil 75 } 76 77 // SearchFile search a file 78 func SearchFile(log *zap.SugaredLogger, fileName string, searchMatchRe *regexp.Regexp, timeRange *TimeRange) (matches []TextMatch, err error) { 79 if searchMatchRe == nil { 80 return nil, fmt.Errorf("SearchFileRe requires a regular expression") 81 } 82 83 if len(fileName) == 0 { 84 log.Debugf("SearchFileRe was not given a file, return nil") 85 return nil, nil 86 } 87 88 file, err := os.Open(fileName) 89 if err != nil { 90 log.Debugf("failure opening %s", fileName, err) 91 return nil, err 92 } 93 defer file.Close() 94 95 fileStat, err := file.Stat() 96 if err != nil { 97 log.Debugf("failure getting stat for %s", fileName, err) 98 return nil, err 99 } 100 if fileStat.IsDir() { 101 log.Debugf("Skipping directory in search %s", fileName) 102 return nil, nil 103 } 104 if !fileStat.Mode().IsRegular() { 105 log.Debugf("Skipping non-regular file in search %s", fileName) 106 return nil, nil 107 } 108 109 // Had issues with token too large using the scanner, so using a reader instead 110 reader := bufio.NewReader(file) 111 lineNumber := 0 112 var match TextMatch 113 for { 114 line, readErr := reader.ReadString('\n') 115 if readErr != nil && readErr != io.EOF { 116 // If we had an unexpected failure we fail 117 log.Debugf("failure reading file %s", fileName, readErr) 118 return nil, readErr 119 } 120 if len(line) > 0 { 121 // See if we have a match 122 lineNumber++ 123 matched := searchMatchRe.Find([]byte(line)) 124 if len(matched) > 0 { 125 timestamp := ExtractTimeIfPresent(line) 126 if IsInTimeRange(timestamp, timeRange) { 127 match.Timestamp = timestamp 128 match.FileLine = lineNumber 129 match.FileName = fileName 130 match.MatchedText = line 131 matches = append(matches, match) 132 } 133 } 134 } 135 // If we hit EOF, we're done 136 if readErr == io.EOF { 137 break 138 } 139 } 140 141 return matches, nil 142 } 143 144 // FindFilesAndSearch will search across files that match a specified expression 145 func FindFilesAndSearch(log *zap.SugaredLogger, rootDirectory string, fileMatchRe *regexp.Regexp, searchMatchRe *regexp.Regexp, timeRange *TimeRange) (matches []TextMatch, err error) { 146 if len(rootDirectory) == 0 { 147 return nil, errors.New("FindFilesAndSearch requires rootDirectory") 148 } 149 150 if fileMatchRe == nil { 151 return nil, errors.New("FindFilesAndSearch requires fileMatch expression") 152 } 153 154 if searchMatchRe == nil { 155 return nil, errors.New("FindFilesAndSearch requires a search expression be supplied") 156 } 157 158 // Get the list of files that match 159 filesToSearch, err := GetMatchingFileNames(log, rootDirectory, fileMatchRe) 160 if err != nil { 161 log.Debugf("FindFilesAndSearch failed", err) 162 return nil, err 163 } 164 165 // Note that SearchFiles will detect if no files were found so just call it 166 return SearchFiles(log, rootDirectory, filesToSearch, searchMatchRe, timeRange) 167 } 168 169 // ExtractTimeIfPresent determines if the text matches a known pattern which has a timestamp in it (such as known log formats) 170 // and will extract the timestamp into a wrappered metav1.Time. If there is no timestamp found it will return a zero time value 171 func ExtractTimeIfPresent(inputText string) metav1.Time { 172 // TODO: Add known log timestamp patterns, and parse out the times 173 return ZeroTime 174 } 175 176 // IsInTimeRange will check if a specified time is within the specified range 177 // It will return true if: 178 // - there is no time range specified 179 // - a time range is specified and the time specified is in that range (see TimeRange type) 180 // 181 // Otherwise it will return false: 182 // - if the time is zero and there is a range specified, it can't determine it 183 // - if the time is not within the range specified 184 func IsInTimeRange(timeToCheck metav1.Time, timeRange *TimeRange) bool { 185 // If there is no time range, then all times would match it 186 if timeRange == nil || (timeRange.StartTime.IsZero() && timeRange.EndTime.IsZero()) { 187 return true 188 } 189 190 // We know there is some time range specified, so if there is no input time to check 191 // we can't determine if it is in the range, so return false 192 if timeToCheck.IsZero() { 193 return false 194 } 195 196 // if the start/end times in a range are zero, they don't limit the range 197 isAfterStart := timeRange.StartTime.IsZero() || !timeToCheck.Before(&timeRange.StartTime) 198 isBeforeEnd := timeRange.EndTime.IsZero() || timeToCheck.Before(&timeRange.EndTime) || timeToCheck.Equal(&timeRange.EndTime) 199 return isAfterStart && isBeforeEnd 200 }