github.com/nextlinux/gosbom@v0.81.1-0.20230627115839-1ff50c281391/gosbom/pkg/cataloger/python/parse_requirements.go (about) 1 package python 2 3 import ( 4 "bufio" 5 "fmt" 6 "regexp" 7 "strings" 8 "unicode" 9 10 "github.com/nextlinux/gosbom/gosbom/artifact" 11 "github.com/nextlinux/gosbom/gosbom/file" 12 "github.com/nextlinux/gosbom/gosbom/pkg" 13 "github.com/nextlinux/gosbom/gosbom/pkg/cataloger/generic" 14 "github.com/nextlinux/gosbom/internal/log" 15 ) 16 17 var _ generic.Parser = parseRequirementsTxt 18 19 var ( 20 extrasRegex = regexp.MustCompile(`\[.*\]`) 21 urlRegex = regexp.MustCompile("@.*git.*") 22 ) 23 24 // parseRequirementsTxt takes a Python requirements.txt file, returning all Python packages that are locked to a 25 // specific version. 26 func parseRequirementsTxt(_ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { 27 var packages []pkg.Package 28 29 scanner := bufio.NewScanner(reader) 30 for scanner.Scan() { 31 line := scanner.Text() 32 rawLineNoComments := removeTrailingComment(line) 33 line = trimRequirementsTxtLine(line) 34 35 if line == "" { 36 // nothing to parse on this line 37 continue 38 } 39 40 if strings.HasPrefix(line, "-e") { 41 // editable packages aren't parsed (yet) 42 continue 43 } 44 45 if !strings.Contains(line, "==") { 46 // a package without a version, or a range (unpinned) which does not tell us 47 // exactly what will be installed. 48 continue 49 } 50 51 // parse a new requirement 52 parts := strings.Split(line, "==") 53 if len(parts) < 2 { 54 // this should never happen, but just in case 55 log.WithFields("path", reader.RealPath).Warnf("unable to parse requirements.txt line: %q", line) 56 continue 57 } 58 59 // check if the version contains hash declarations on the same line 60 version, _ := parseVersionAndHashes(parts[1]) 61 62 name := strings.TrimSpace(parts[0]) 63 version = strings.TrimFunc(version, func(r rune) bool { 64 return !unicode.IsLetter(r) && !unicode.IsNumber(r) 65 }) 66 67 // TODO: Update to support more than only == 68 versionConstraint := fmt.Sprintf("== %s", version) 69 70 if name == "" || version == "" { 71 log.WithFields("path", reader.RealPath).Debugf("found empty package in requirements.txt line: %q", line) 72 continue 73 } 74 packages = append( 75 packages, 76 newPackageForRequirementsWithMetadata( 77 name, 78 version, 79 pkg.PythonRequirementsMetadata{ 80 Name: name, 81 Extras: parseExtras(rawLineNoComments), 82 VersionConstraint: versionConstraint, 83 URL: parseURL(rawLineNoComments), 84 Markers: parseMarkers(rawLineNoComments), 85 }, 86 reader.Location.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation), 87 ), 88 ) 89 } 90 91 if err := scanner.Err(); err != nil { 92 return nil, nil, fmt.Errorf("failed to parse python requirements file: %w", err) 93 } 94 95 return packages, nil, nil 96 } 97 98 func parseVersionAndHashes(version string) (string, []string) { 99 parts := strings.Split(version, "--hash=") 100 if len(parts) < 2 { 101 return version, nil 102 } 103 104 return parts[0], parts[1:] 105 } 106 107 // trimRequirementsTxtLine removes content from the given requirements.txt line 108 // that should not be considered for parsing. 109 func trimRequirementsTxtLine(line string) string { 110 line = strings.TrimSpace(line) 111 line = removeTrailingComment(line) 112 line = removeEnvironmentMarkers(line) 113 line = checkForRegex(line) // remove extras and url from line if found 114 115 return line 116 } 117 118 // removeTrailingComment takes a requirements.txt line and strips off comment strings. 119 func removeTrailingComment(line string) string { 120 parts := strings.SplitN(line, "#", 2) 121 if len(parts) < 2 { 122 // there aren't any comments 123 124 return line 125 } 126 127 return parts[0] 128 } 129 130 // removeEnvironmentMarkers removes any instances of environment markers (delimited by ';') from the line. 131 // For more information, see https://www.python.org/dev/peps/pep-0508/#environment-markers. 132 func removeEnvironmentMarkers(line string) string { 133 parts := strings.SplitN(line, ";", 2) 134 if len(parts) < 2 { 135 // there aren't any environment markers 136 137 return line 138 } 139 140 return parts[0] 141 } 142 143 func parseExtras(packageName string) []string { 144 if extrasRegex.MatchString(packageName) { 145 // Remove square brackets 146 extras := strings.TrimFunc(extrasRegex.FindString(packageName), func(r rune) bool { 147 return !unicode.IsLetter(r) && !unicode.IsNumber(r) 148 }) 149 150 // Remove any additional whitespace 151 extras = strings.ReplaceAll(extras, " ", "") 152 153 return strings.Split(extras, ",") 154 } 155 156 return []string{} 157 } 158 159 func parseMarkers(line string) map[string]string { 160 markers := map[string]string{} 161 parts := strings.SplitN(line, ";", 2) 162 163 if len(parts) == 2 { 164 splittableMarkers := parts[1] 165 166 for _, combineString := range []string{" or ", " and "} { 167 splittableMarkers = strings.TrimSpace( 168 strings.ReplaceAll(splittableMarkers, combineString, ","), 169 ) 170 } 171 172 splittableMarkers = strings.TrimSpace(splittableMarkers) 173 174 for _, mark := range strings.Split(splittableMarkers, ",") { 175 markparts := strings.Split(mark, " ") 176 markers[markparts[0]] = strings.Join(markparts[1:], " ") 177 } 178 } 179 180 return markers 181 } 182 183 func parseURL(line string) string { 184 parts := strings.Split(line, "@") 185 186 if len(parts) > 1 { 187 desiredIndex := -1 188 189 for index, part := range parts { 190 part := strings.TrimFunc(part, func(r rune) bool { 191 return !unicode.IsLetter(r) && !unicode.IsNumber(r) 192 }) 193 194 if strings.HasPrefix(part, "git") { 195 desiredIndex = index 196 break 197 } 198 } 199 200 if desiredIndex != -1 { 201 return strings.TrimSpace(strings.Join(parts[desiredIndex:], "@")) 202 } 203 } 204 205 return "" 206 } 207 208 // function to check a string for all possilbe regex expressions, replacing it if found 209 func checkForRegex(stringToCheck string) string { 210 stringToReturn := stringToCheck 211 212 for _, r := range []*regexp.Regexp{ 213 urlRegex, 214 extrasRegex, 215 } { 216 if r.MatchString(stringToCheck) { 217 stringToReturn = r.ReplaceAllString(stringToCheck, "") 218 } 219 } 220 221 return stringToReturn 222 }