github.com/noqcks/syft@v0.0.0-20230920222752-a9e2c4e288e5/syft/pkg/cataloger/python/parse_requirements.go (about) 1 package python 2 3 import ( 4 "bufio" 5 "fmt" 6 "regexp" 7 "strings" 8 "unicode" 9 10 pep440 "github.com/aquasecurity/go-pep440-version" 11 "github.com/mitchellh/mapstructure" 12 13 "github.com/anchore/syft/internal" 14 "github.com/anchore/syft/internal/log" 15 "github.com/anchore/syft/syft/artifact" 16 "github.com/anchore/syft/syft/file" 17 "github.com/anchore/syft/syft/pkg" 18 "github.com/anchore/syft/syft/pkg/cataloger/generic" 19 ) 20 21 const ( 22 // given the example requirement: 23 // requests[security] == 2.8.* ; python_version < "2.7" and sys_platform == "linux" \ 24 // --hash=sha256:a9b3aaa1904eeb78e32394cd46c6f37ac0fb4af6dc488daa58971bdc7d7fcaf3 \ 25 // --hash=sha256:e9535b8c84dc9571a48999094fda7f33e63c3f1b74f3e5f3ac0105a58405bb65 # some comment 26 27 // namePattern matches: requests[security] 28 namePattern = `(?P<name>\w[\w\[\],\s-_]+)` 29 30 // versionConstraintPattern matches: == 2.8.* 31 versionConstraintPattern = `(?P<versionConstraint>([^\S\r\n]*[~=>!<]+\s*[0-9a-zA-Z.*]+[^\S\r\n]*,?)+)?(@[^\S\r\n]*(?P<url>[^;]*))?` 32 33 // markersPattern matches: python_version < "2.7" and sys_platform == "linux" 34 markersPattern = `(;(?P<markers>.*))?` 35 36 // hashesPattern matches: --hash=sha256:a9b3aaa1904eeb78e32394cd46c6f37ac0fb4af6dc488daa58971bdc7d7fcaf3 --hash=sha256:e9535b8c84dc9571a48999094fda7f33e63c3f1b74f3e5f3ac0105a58405bb65 37 hashesPattern = `(?P<hashes>([^\S\r\n]*--hash=[a-zA-Z0-9:]+)+)?` 38 39 // whiteSpaceNoNewlinePattern matches: (any whitespace character except for \r and \n) 40 whiteSpaceNoNewlinePattern = `[^\S\r\n]*` 41 ) 42 43 var requirementPattern = regexp.MustCompile( 44 `^` + 45 whiteSpaceNoNewlinePattern + 46 namePattern + 47 whiteSpaceNoNewlinePattern + 48 versionConstraintPattern + 49 markersPattern + 50 hashesPattern, 51 ) 52 53 type unprocessedRequirement struct { 54 Name string `mapstructure:"name"` 55 VersionConstraint string `mapstructure:"versionConstraint"` 56 Markers string `mapstructure:"markers"` 57 URL string `mapstructure:"url"` 58 Hashes string `mapstructure:"hashes"` 59 } 60 61 func newRequirement(raw string) *unprocessedRequirement { 62 var r unprocessedRequirement 63 64 values := internal.MatchNamedCaptureGroups(requirementPattern, raw) 65 66 if err := mapstructure.Decode(values, &r); err != nil { 67 return nil 68 } 69 70 r.Name = strings.TrimSpace(r.Name) 71 r.VersionConstraint = strings.TrimSpace(r.VersionConstraint) 72 r.Markers = strings.TrimSpace(r.Markers) 73 r.URL = strings.TrimSpace(r.URL) 74 r.Hashes = strings.TrimSpace(r.Hashes) 75 76 if r.Name == "" { 77 return nil 78 } 79 80 return &r 81 } 82 83 type requirementsParser struct { 84 guessUnpinnedRequirements bool 85 } 86 87 func newRequirementsParser(cfg CatalogerConfig) requirementsParser { 88 return requirementsParser{ 89 guessUnpinnedRequirements: cfg.GuessUnpinnedRequirements, 90 } 91 } 92 93 // parseRequirementsTxt takes a Python requirements.txt file, returning all Python packages that are locked to a 94 // specific version. 95 func (rp requirementsParser) parseRequirementsTxt(_ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { 96 var packages []pkg.Package 97 98 scanner := bufio.NewScanner(reader) 99 var lastLine string 100 for scanner.Scan() { 101 line := trimRequirementsTxtLine(scanner.Text()) 102 103 if lastLine != "" { 104 line = lastLine + line 105 lastLine = "" 106 } 107 108 // remove line continuations... smashes the file into a single line 109 if strings.HasSuffix(line, "\\") { 110 // this line is a continuation of the previous line 111 lastLine += strings.TrimSuffix(line, "\\") 112 continue 113 } 114 115 if line == "" { 116 // nothing to parse on this line 117 continue 118 } 119 120 if strings.HasPrefix(line, "-e") { 121 // editable packages aren't parsed (yet) 122 continue 123 } 124 125 req := newRequirement(line) 126 if req == nil { 127 log.WithFields("path", reader.RealPath).Warnf("unable to parse requirements.txt line: %q", line) 128 continue 129 } 130 131 name := removeExtras(req.Name) 132 version := parseVersion(req.VersionConstraint, rp.guessUnpinnedRequirements) 133 134 if version == "" { 135 log.WithFields("path", reader.RealPath).Tracef("unable to determine package version in requirements.txt line: %q", line) 136 continue 137 } 138 139 packages = append( 140 packages, 141 newPackageForRequirementsWithMetadata( 142 name, 143 version, 144 pkg.PythonRequirementsMetadata{ 145 Name: name, 146 Extras: parseExtras(req.Name), 147 VersionConstraint: req.VersionConstraint, 148 URL: parseURL(req.URL), 149 Markers: req.Markers, 150 }, 151 reader.Location.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation), 152 ), 153 ) 154 } 155 156 if err := scanner.Err(); err != nil { 157 return nil, nil, fmt.Errorf("failed to parse python requirements file: %w", err) 158 } 159 160 return packages, nil, nil 161 } 162 163 func parseVersion(version string, guessFromConstraint bool) string { 164 if isPinnedConstraint(version) { 165 return strings.TrimSpace(strings.ReplaceAll(version, "==", "")) 166 } 167 168 if guessFromConstraint { 169 return guessVersion(version) 170 } 171 172 return "" 173 } 174 175 func isPinnedConstraint(version string) bool { 176 return strings.Contains(version, "==") && !strings.ContainsAny(version, "*,<>!") 177 } 178 179 func guessVersion(constraint string) string { 180 // handle "2.8.*" -> "2.8.0" 181 constraint = strings.ReplaceAll(constraint, "*", "0") 182 if isPinnedConstraint(constraint) { 183 return strings.TrimSpace(strings.ReplaceAll(constraint, "==", "")) 184 } 185 186 constraints := strings.Split(constraint, ",") 187 filteredVersions := map[string]struct{}{} 188 for _, part := range constraints { 189 if strings.Contains(part, "!=") { 190 parts := strings.Split(part, "!=") 191 filteredVersions[strings.TrimSpace(parts[1])] = struct{}{} 192 } 193 } 194 195 var closestVersion *pep440.Version 196 for _, part := range constraints { 197 // ignore any parts that do not have '=' in them, >,<,~ are not valid semver 198 parts := strings.SplitAfter(part, "=") 199 if len(parts) < 2 { 200 continue 201 } 202 version, err := pep440.Parse(strings.TrimSpace(parts[1])) 203 if err != nil { 204 // ignore any parts that are not valid semver 205 continue 206 } 207 if _, ok := filteredVersions[version.String()]; ok { 208 continue 209 } 210 211 if strings.Contains(part, "==") { 212 parts := strings.Split(part, "==") 213 return strings.TrimSpace(parts[1]) 214 } 215 216 if closestVersion == nil || version.GreaterThan(*closestVersion) { 217 closestVersion = &version 218 } 219 } 220 if closestVersion == nil { 221 return "" 222 } 223 224 return closestVersion.String() 225 } 226 227 // trimRequirementsTxtLine removes content from the given requirements.txt line 228 // that should not be considered for parsing. 229 func trimRequirementsTxtLine(line string) string { 230 line = strings.TrimSpace(line) 231 line = removeTrailingComment(line) 232 233 return line 234 } 235 236 // removeTrailingComment takes a requirements.txt line and strips off comment strings. 237 func removeTrailingComment(line string) string { 238 parts := strings.SplitN(line, "#", 2) 239 if len(parts) < 2 { 240 // there aren't any comments 241 242 return line 243 } 244 245 return parts[0] 246 } 247 248 func removeExtras(packageName string) string { 249 start := strings.Index(packageName, "[") 250 if start == -1 { 251 return packageName 252 } 253 254 return strings.TrimSpace(packageName[:start]) 255 } 256 257 func parseExtras(packageName string) []string { 258 var extras []string 259 260 start := strings.Index(packageName, "[") 261 stop := strings.Index(packageName, "]") 262 if start == -1 || stop == -1 { 263 return extras 264 } 265 266 extraString := packageName[start+1 : stop] 267 for _, extra := range strings.Split(extraString, ",") { 268 extras = append(extras, strings.TrimSpace(extra)) 269 } 270 return extras 271 } 272 273 func parseURL(line string) string { 274 parts := strings.Split(line, "@") 275 276 if len(parts) > 1 { 277 desiredIndex := -1 278 279 for index, part := range parts { 280 part := strings.TrimFunc(part, func(r rune) bool { 281 return !unicode.IsLetter(r) && !unicode.IsNumber(r) 282 }) 283 284 if strings.HasPrefix(part, "git") { 285 desiredIndex = index 286 break 287 } 288 } 289 290 if desiredIndex != -1 { 291 return strings.TrimSpace(strings.Join(parts[desiredIndex:], "@")) 292 } 293 } 294 295 return "" 296 }