github.com/anchore/syft@v1.4.2-0.20240516191711-1bec1fc5d397/syft/pkg/cataloger/python/parse_requirements.go (about) 1 package python 2 3 import ( 4 "bufio" 5 "context" 6 "fmt" 7 "regexp" 8 "strings" 9 "unicode" 10 11 pep440 "github.com/aquasecurity/go-pep440-version" 12 "github.com/mitchellh/mapstructure" 13 14 "github.com/anchore/syft/internal" 15 "github.com/anchore/syft/internal/log" 16 "github.com/anchore/syft/syft/artifact" 17 "github.com/anchore/syft/syft/file" 18 "github.com/anchore/syft/syft/pkg" 19 "github.com/anchore/syft/syft/pkg/cataloger/generic" 20 ) 21 22 const ( 23 // given the example requirement: 24 // requests[security] == 2.8.* ; python_version < "2.7" and sys_platform == "linux" \ 25 // --hash=sha256:a9b3aaa1904eeb78e32394cd46c6f37ac0fb4af6dc488daa58971bdc7d7fcaf3 \ 26 // --hash=sha256:e9535b8c84dc9571a48999094fda7f33e63c3f1b74f3e5f3ac0105a58405bb65 # some comment 27 28 // namePattern matches: requests[security] 29 namePattern = `(?P<name>\w[\w\[\],\s-_]+)` 30 31 // versionConstraintPattern matches: == 2.8.* 32 versionConstraintPattern = `(?P<versionConstraint>([^\S\r\n]*[~=>!<]+\s*[0-9a-zA-Z.*]+[^\S\r\n]*,?)+)?(@[^\S\r\n]*(?P<url>[^;]*))?` 33 34 // markersPattern matches: python_version < "2.7" and sys_platform == "linux" 35 markersPattern = `(;(?P<markers>.*))?` 36 37 // hashesPattern matches: --hash=sha256:a9b3aaa1904eeb78e32394cd46c6f37ac0fb4af6dc488daa58971bdc7d7fcaf3 --hash=sha256:e9535b8c84dc9571a48999094fda7f33e63c3f1b74f3e5f3ac0105a58405bb65 38 hashesPattern = `(?P<hashes>([^\S\r\n]*--hash=[a-zA-Z0-9:]+)+)?` 39 40 // whiteSpaceNoNewlinePattern matches: (any whitespace character except for \r and \n) 41 whiteSpaceNoNewlinePattern = `[^\S\r\n]*` 42 ) 43 44 var requirementPattern = regexp.MustCompile( 45 `^` + 46 whiteSpaceNoNewlinePattern + 47 namePattern + 48 whiteSpaceNoNewlinePattern + 49 versionConstraintPattern + 50 markersPattern + 51 hashesPattern, 52 ) 53 54 type unprocessedRequirement struct { 55 Name string `mapstructure:"name"` 56 VersionConstraint string `mapstructure:"versionConstraint"` 57 Markers string `mapstructure:"markers"` 58 URL string `mapstructure:"url"` 59 Hashes string `mapstructure:"hashes"` 60 } 61 62 func newRequirement(raw string) *unprocessedRequirement { 63 var r unprocessedRequirement 64 65 values := internal.MatchNamedCaptureGroups(requirementPattern, raw) 66 67 if err := mapstructure.Decode(values, &r); err != nil { 68 return nil 69 } 70 71 r.Name = strings.TrimSpace(r.Name) 72 r.VersionConstraint = strings.TrimSpace(r.VersionConstraint) 73 r.Markers = strings.TrimSpace(r.Markers) 74 r.URL = strings.TrimSpace(r.URL) 75 r.Hashes = strings.TrimSpace(r.Hashes) 76 77 if r.Name == "" { 78 return nil 79 } 80 81 return &r 82 } 83 84 type requirementsParser struct { 85 guessUnpinnedRequirements bool 86 } 87 88 func newRequirementsParser(cfg CatalogerConfig) requirementsParser { 89 return requirementsParser{ 90 guessUnpinnedRequirements: cfg.GuessUnpinnedRequirements, 91 } 92 } 93 94 // parseRequirementsTxt takes a Python requirements.txt file, returning all Python packages that are locked to a 95 // specific version. 96 func (rp requirementsParser) parseRequirementsTxt(_ context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { 97 var packages []pkg.Package 98 99 scanner := bufio.NewScanner(reader) 100 var lastLine string 101 for scanner.Scan() { 102 line := trimRequirementsTxtLine(scanner.Text()) 103 104 if lastLine != "" { 105 line = lastLine + line 106 lastLine = "" 107 } 108 109 // remove line continuations... smashes the file into a single line 110 if strings.HasSuffix(line, "\\") { 111 // this line is a continuation of the previous line 112 lastLine += strings.TrimSuffix(line, "\\") 113 continue 114 } 115 116 if line == "" { 117 // nothing to parse on this line 118 continue 119 } 120 121 if strings.HasPrefix(line, "-e") { 122 // editable packages aren't parsed (yet) 123 continue 124 } 125 126 req := newRequirement(line) 127 if req == nil { 128 log.WithFields("path", reader.RealPath).Warnf("unable to parse requirements.txt line: %q", line) 129 continue 130 } 131 132 name := removeExtras(req.Name) 133 version := parseVersion(req.VersionConstraint, rp.guessUnpinnedRequirements) 134 135 if version == "" { 136 log.WithFields("path", reader.RealPath).Tracef("unable to determine package version in requirements.txt line: %q", line) 137 continue 138 } 139 140 packages = append( 141 packages, 142 newPackageForRequirementsWithMetadata( 143 name, 144 version, 145 pkg.PythonRequirementsEntry{ 146 Name: name, 147 Extras: parseExtras(req.Name), 148 VersionConstraint: req.VersionConstraint, 149 URL: parseURL(req.URL), 150 Markers: req.Markers, 151 }, 152 reader.Location.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation), 153 ), 154 ) 155 } 156 157 if err := scanner.Err(); err != nil { 158 return nil, nil, fmt.Errorf("failed to parse python requirements file: %w", err) 159 } 160 161 return packages, nil, nil 162 } 163 164 func parseVersion(version string, guessFromConstraint bool) string { 165 if isPinnedConstraint(version) { 166 return strings.TrimSpace(strings.ReplaceAll(version, "==", "")) 167 } 168 169 if guessFromConstraint { 170 return guessVersion(version) 171 } 172 173 return "" 174 } 175 176 func isPinnedConstraint(version string) bool { 177 return strings.Contains(version, "==") && !strings.ContainsAny(version, "*,<>!") 178 } 179 180 func guessVersion(constraint string) string { 181 // handle "2.8.*" -> "2.8.0" 182 constraint = strings.ReplaceAll(constraint, "*", "0") 183 if isPinnedConstraint(constraint) { 184 return strings.TrimSpace(strings.ReplaceAll(constraint, "==", "")) 185 } 186 187 constraints := strings.Split(constraint, ",") 188 filteredVersions := map[string]struct{}{} 189 for _, part := range constraints { 190 if strings.Contains(part, "!=") { 191 parts := strings.Split(part, "!=") 192 filteredVersions[strings.TrimSpace(parts[1])] = struct{}{} 193 } 194 } 195 196 var closestVersion *pep440.Version 197 for _, part := range constraints { 198 // ignore any parts that do not have '=' in them, >,<,~ are not valid semver 199 parts := strings.SplitAfter(part, "=") 200 if len(parts) < 2 { 201 continue 202 } 203 version, err := pep440.Parse(strings.TrimSpace(parts[1])) 204 if err != nil { 205 // ignore any parts that are not valid semver 206 continue 207 } 208 if _, ok := filteredVersions[version.String()]; ok { 209 continue 210 } 211 212 if strings.Contains(part, "==") { 213 parts := strings.Split(part, "==") 214 return strings.TrimSpace(parts[1]) 215 } 216 217 if closestVersion == nil || version.GreaterThan(*closestVersion) { 218 closestVersion = &version 219 } 220 } 221 if closestVersion == nil { 222 return "" 223 } 224 225 return closestVersion.String() 226 } 227 228 // trimRequirementsTxtLine removes content from the given requirements.txt line 229 // that should not be considered for parsing. 230 func trimRequirementsTxtLine(line string) string { 231 line = strings.TrimSpace(line) 232 line = removeTrailingComment(line) 233 234 return line 235 } 236 237 // removeTrailingComment takes a requirements.txt line and strips off comment strings. 238 func removeTrailingComment(line string) string { 239 parts := strings.SplitN(line, "#", 2) 240 if len(parts) < 2 { 241 // there aren't any comments 242 243 return line 244 } 245 246 return parts[0] 247 } 248 249 func removeExtras(packageName string) string { 250 start := strings.Index(packageName, "[") 251 if start == -1 { 252 return packageName 253 } 254 255 return strings.TrimSpace(packageName[:start]) 256 } 257 258 func parseExtras(packageName string) []string { 259 var extras []string 260 261 start := strings.Index(packageName, "[") 262 stop := strings.Index(packageName, "]") 263 if start == -1 || stop == -1 { 264 return extras 265 } 266 267 extraString := packageName[start+1 : stop] 268 for _, extra := range strings.Split(extraString, ",") { 269 extras = append(extras, strings.TrimSpace(extra)) 270 } 271 return extras 272 } 273 274 func parseURL(line string) string { 275 parts := strings.Split(line, "@") 276 277 if len(parts) > 1 { 278 desiredIndex := -1 279 280 for index, part := range parts { 281 part := strings.TrimFunc(part, func(r rune) bool { 282 return !unicode.IsLetter(r) && !unicode.IsNumber(r) 283 }) 284 285 if strings.HasPrefix(part, "git") { 286 desiredIndex = index 287 break 288 } 289 } 290 291 if desiredIndex != -1 { 292 return strings.TrimSpace(strings.Join(parts[desiredIndex:], "@")) 293 } 294 } 295 296 return "" 297 }