github.com/anchore/syft@v1.38.2/syft/pkg/cataloger/python/parse_requirements.go (about) 1 package python 2 3 import ( 4 "bufio" 5 "context" 6 "fmt" 7 "regexp" 8 "strings" 9 "unicode" 10 11 pep440 "github.com/aquasecurity/go-pep440-version" 12 "github.com/go-viper/mapstructure/v2" 13 14 "github.com/anchore/syft/internal" 15 "github.com/anchore/syft/internal/log" 16 "github.com/anchore/syft/internal/unknown" 17 "github.com/anchore/syft/syft/artifact" 18 "github.com/anchore/syft/syft/file" 19 "github.com/anchore/syft/syft/pkg" 20 "github.com/anchore/syft/syft/pkg/cataloger/generic" 21 ) 22 23 const ( 24 // given the example requirement: 25 // requests[security] == 2.8.* ; python_version < "2.7" and sys_platform == "linux" \ 26 // --hash=sha256:a9b3aaa1904eeb78e32394cd46c6f37ac0fb4af6dc488daa58971bdc7d7fcaf3 \ 27 // --hash=sha256:e9535b8c84dc9571a48999094fda7f33e63c3f1b74f3e5f3ac0105a58405bb65 # some comment 28 29 // namePattern matches: requests[security] 30 namePattern = `(?P<name>\w[\w\[\],\s-_\.]+)` 31 32 // versionConstraintPattern matches: == 2.8.* 33 versionConstraintPattern = `(?P<versionConstraint>([^\S\r\n]*[~=>!<]+\s*[0-9a-zA-Z.*]+[^\S\r\n]*,?)+)?(@[^\S\r\n]*(?P<url>[^;]*))?` 34 35 // markersPattern matches: python_version < "2.7" and sys_platform == "linux" 36 markersPattern = `(;(?P<markers>.*))?` 37 38 // hashesPattern matches: --hash=sha256:a9b3aaa1904eeb78e32394cd46c6f37ac0fb4af6dc488daa58971bdc7d7fcaf3 --hash=sha256:e9535b8c84dc9571a48999094fda7f33e63c3f1b74f3e5f3ac0105a58405bb65 39 hashesPattern = `(?P<hashes>([^\S\r\n]*--hash=[a-zA-Z0-9:]+)+)?` 40 41 // whiteSpaceNoNewlinePattern matches: (any whitespace character except for \r and \n) 42 whiteSpaceNoNewlinePattern = `[^\S\r\n]*` 43 ) 44 45 var requirementPattern = regexp.MustCompile( 46 `^` + 47 whiteSpaceNoNewlinePattern + 48 namePattern + 49 whiteSpaceNoNewlinePattern + 50 versionConstraintPattern + 51 markersPattern + 52 hashesPattern, 53 ) 54 55 type unprocessedRequirement struct { 56 Name string `mapstructure:"name"` 57 VersionConstraint string `mapstructure:"versionConstraint"` 58 Markers string `mapstructure:"markers"` 59 URL string `mapstructure:"url"` 60 Hashes string `mapstructure:"hashes"` 61 } 62 63 func newRequirement(raw string) *unprocessedRequirement { 64 var r unprocessedRequirement 65 66 values := internal.MatchNamedCaptureGroups(requirementPattern, raw) 67 68 if err := mapstructure.Decode(values, &r); err != nil { 69 return nil 70 } 71 72 r.Name = strings.TrimSpace(r.Name) 73 r.VersionConstraint = strings.TrimSpace(r.VersionConstraint) 74 r.Markers = strings.TrimSpace(r.Markers) 75 r.URL = strings.TrimSpace(r.URL) 76 r.Hashes = strings.TrimSpace(r.Hashes) 77 78 if r.Name == "" { 79 return nil 80 } 81 82 return &r 83 } 84 85 type requirementsParser struct { 86 cfg CatalogerConfig 87 licenseResolver pythonLicenseResolver 88 } 89 90 func newRequirementsParser(cfg CatalogerConfig) requirementsParser { 91 return requirementsParser{ 92 cfg: cfg, 93 licenseResolver: newPythonLicenseResolver(cfg), 94 } 95 } 96 97 // parseRequirementsTxt takes a Python requirements.txt file, returning all Python packages that are locked to a 98 // specific version. 99 func (rp requirementsParser) parseRequirementsTxt(ctx context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { 100 var errs error 101 var packages []pkg.Package 102 103 scanner := bufio.NewScanner(reader) 104 var lastLine string 105 for scanner.Scan() { 106 line := trimRequirementsTxtLine(scanner.Text()) 107 108 if lastLine != "" { 109 line = lastLine + line 110 lastLine = "" 111 } 112 113 // remove line continuations... smashes the file into a single line 114 if strings.HasSuffix(line, "\\") { 115 // this line is a continuation of the previous line 116 lastLine += strings.TrimSuffix(line, "\\") 117 continue 118 } 119 120 if line == "" { 121 // nothing to parse on this line 122 continue 123 } 124 125 if strings.HasPrefix(line, "-e") { 126 // editable packages aren't parsed (yet) 127 continue 128 } 129 130 req := newRequirement(line) 131 if req == nil { 132 log.WithFields("path", reader.RealPath, "line", line).Debug("unable to parse requirements.txt line") 133 errs = unknown.Appendf(errs, reader, "unable to parse requirements.txt line: %q", line) 134 continue 135 } 136 137 name := removeExtras(req.Name) 138 version := parseVersion(req.VersionConstraint, rp.cfg.GuessUnpinnedRequirements) 139 140 if version == "" { 141 log.WithFields("path", reader.RealPath, "line", line).Trace("unable to determine package version in requirements.txt line") 142 errs = unknown.Appendf(errs, reader, "unable to determine package version in requirements.txt line: %q", line) 143 continue 144 } 145 146 packages = append( 147 packages, 148 newPackageForRequirementsWithMetadata( 149 ctx, 150 rp.licenseResolver, 151 name, 152 version, 153 pkg.PythonRequirementsEntry{ 154 Name: name, 155 Extras: parseExtras(req.Name), 156 VersionConstraint: req.VersionConstraint, 157 URL: parseURL(req.URL), 158 Markers: req.Markers, 159 }, 160 reader.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation), 161 ), 162 ) 163 } 164 165 if err := scanner.Err(); err != nil { 166 return nil, nil, fmt.Errorf("failed to parse python requirements file: %w", err) 167 } 168 169 return packages, nil, unknown.Join(errs, unknown.IfEmptyf(packages, "unable to determine packages")) 170 } 171 172 func parseVersion(version string, guessFromConstraint bool) string { 173 if isPinnedConstraint(version) { 174 return strings.TrimSpace(strings.ReplaceAll(version, "==", "")) 175 } 176 177 if guessFromConstraint { 178 return guessVersion(version) 179 } 180 181 return "" 182 } 183 184 func isPinnedConstraint(version string) bool { 185 return strings.Contains(version, "==") && !strings.ContainsAny(version, "*,<>!") 186 } 187 188 func guessVersion(constraint string) string { 189 // handle "2.8.*" -> "2.8.0" 190 constraint = strings.ReplaceAll(constraint, "*", "0") 191 if isPinnedConstraint(constraint) { 192 return strings.TrimSpace(strings.ReplaceAll(constraint, "==", "")) 193 } 194 195 constraints := strings.Split(constraint, ",") 196 filteredVersions := map[string]struct{}{} 197 for _, part := range constraints { 198 if strings.Contains(part, "!=") { 199 parts := strings.Split(part, "!=") 200 filteredVersions[strings.TrimSpace(parts[1])] = struct{}{} 201 } 202 } 203 204 var closestVersion *pep440.Version 205 for _, part := range constraints { 206 // ignore any parts that do not have '=' in them, >,<,~ are not valid semver 207 parts := strings.SplitAfter(part, "=") 208 if len(parts) < 2 { 209 continue 210 } 211 version, err := pep440.Parse(strings.TrimSpace(parts[1])) 212 if err != nil { 213 // ignore any parts that are not valid semver 214 continue 215 } 216 if _, ok := filteredVersions[version.String()]; ok { 217 continue 218 } 219 220 if strings.Contains(part, "==") { 221 parts := strings.Split(part, "==") 222 return strings.TrimSpace(parts[1]) 223 } 224 225 if closestVersion == nil || version.GreaterThan(*closestVersion) { 226 closestVersion = &version 227 } 228 } 229 if closestVersion == nil { 230 return "" 231 } 232 233 return closestVersion.String() 234 } 235 236 // trimRequirementsTxtLine removes content from the given requirements.txt line 237 // that should not be considered for parsing. 238 func trimRequirementsTxtLine(line string) string { 239 line = strings.TrimSpace(line) 240 line = removeTrailingComment(line) 241 242 return line 243 } 244 245 // removeTrailingComment takes a requirements.txt line and strips off comment strings. 246 func removeTrailingComment(line string) string { 247 parts := strings.SplitN(line, "#", 2) 248 if len(parts) < 2 { 249 // there aren't any comments 250 251 return line 252 } 253 254 return parts[0] 255 } 256 257 func removeExtras(packageName string) string { 258 start := strings.Index(packageName, "[") 259 if start == -1 { 260 return packageName 261 } 262 263 return strings.TrimSpace(packageName[:start]) 264 } 265 266 func parseExtras(packageName string) []string { 267 var extras []string 268 269 start := strings.Index(packageName, "[") 270 stop := strings.Index(packageName, "]") 271 if start == -1 || stop == -1 { 272 return extras 273 } 274 275 extraString := packageName[start+1 : stop] 276 for _, extra := range strings.Split(extraString, ",") { 277 extras = append(extras, strings.TrimSpace(extra)) 278 } 279 return extras 280 } 281 282 func parseURL(line string) string { 283 parts := strings.Split(line, "@") 284 285 if len(parts) > 1 { 286 desiredIndex := -1 287 288 for index, part := range parts { 289 part := strings.TrimFunc(part, func(r rune) bool { 290 return !unicode.IsLetter(r) && !unicode.IsNumber(r) 291 }) 292 293 if strings.HasPrefix(part, "git") { 294 desiredIndex = index 295 break 296 } 297 } 298 299 if desiredIndex != -1 { 300 return strings.TrimSpace(strings.Join(parts[desiredIndex:], "@")) 301 } 302 } 303 304 return "" 305 }