github.com/lineaje-labs/syft@v0.98.1-0.20231227153149-9e393f60ff1b/syft/pkg/cataloger/python/parse_requirements.go (about) 1 package python 2 3 import ( 4 "bufio" 5 "fmt" 6 "regexp" 7 "strings" 8 "unicode" 9 10 pep440 "github.com/aquasecurity/go-pep440-version" 11 "github.com/mitchellh/mapstructure" 12 13 "github.com/anchore/syft/syft/artifact" 14 "github.com/anchore/syft/syft/file" 15 "github.com/anchore/syft/syft/pkg" 16 "github.com/anchore/syft/syft/pkg/cataloger/generic" 17 "github.com/lineaje-labs/syft/internal" 18 "github.com/lineaje-labs/syft/internal/log" 19 ) 20 21 const ( 22 // given the example requirement: 23 // requests[security] == 2.8.* ; python_version < "2.7" and sys_platform == "linux" \ 24 // --hash=sha256:a9b3aaa1904eeb78e32394cd46c6f37ac0fb4af6dc488daa58971bdc7d7fcaf3 \ 25 // --hash=sha256:e9535b8c84dc9571a48999094fda7f33e63c3f1b74f3e5f3ac0105a58405bb65 # some comment 26 27 // namePattern matches: requests[security] 28 namePattern = `(?P<name>\w[\w\[\],\s-_]+)` 29 30 // versionConstraintPattern matches: == 2.8.* 31 versionConstraintPattern = `(?P<versionConstraint>([^\S\r\n]*[~=>!<]+\s*[0-9a-zA-Z.*]+[^\S\r\n]*,?)+)?(@[^\S\r\n]*(?P<url>[^;]*))?` 32 33 // markersPattern matches: python_version < "2.7" and sys_platform == "linux" 34 markersPattern = `(;(?P<markers>.*))?` 35 36 // hashesPattern matches: --hash=sha256:a9b3aaa1904eeb78e32394cd46c6f37ac0fb4af6dc488daa58971bdc7d7fcaf3 --hash=sha256:e9535b8c84dc9571a48999094fda7f33e63c3f1b74f3e5f3ac0105a58405bb65 37 hashesPattern = `(?P<hashes>([^\S\r\n]*--hash=[a-zA-Z0-9:]+)+)?` 38 39 // whiteSpaceNoNewlinePattern matches: (any whitespace character except for \r and \n) 40 whiteSpaceNoNewlinePattern = `[^\S\r\n]*` 41 ) 42 43 var requirementPattern = regexp.MustCompile( 44 `^` + 45 whiteSpaceNoNewlinePattern + 46 namePattern + 47 whiteSpaceNoNewlinePattern + 48 versionConstraintPattern + 49 markersPattern + 50 hashesPattern, 51 ) 52 53 type unprocessedRequirement struct { 54 Name string `mapstructure:"name"` 55 VersionConstraint string `mapstructure:"versionConstraint"` 56 Markers string `mapstructure:"markers"` 57 URL string `mapstructure:"url"` 58 Hashes string `mapstructure:"hashes"` 59 } 60 61 func newRequirement(raw string) *unprocessedRequirement { 62 var r unprocessedRequirement 63 64 values := internal.MatchNamedCaptureGroups(requirementPattern, raw) 65 66 if err := mapstructure.Decode(values, &r); err != nil { 67 return nil 68 } 69 70 r.Name = strings.TrimSpace(r.Name) 71 r.VersionConstraint = strings.TrimSpace(r.VersionConstraint) 72 r.Markers = strings.TrimSpace(r.Markers) 73 r.URL = strings.TrimSpace(r.URL) 74 r.Hashes = strings.TrimSpace(r.Hashes) 75 76 if r.Name == "" { 77 return nil 78 } 79 80 return &r 81 } 82 83 type requirementsParser struct { 84 guessUnpinnedRequirements bool 85 } 86 87 func newRequirementsParser(cfg CatalogerConfig) requirementsParser { 88 return requirementsParser{ 89 guessUnpinnedRequirements: cfg.GuessUnpinnedRequirements, 90 } 91 } 92 93 // parseRequirementsTxt takes a Python requirements.txt file, returning all Python packages that are locked to a 94 // specific version. 95 func (rp requirementsParser) parseRequirementsTxt( 96 _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser, 97 ) ([]pkg.Package, []artifact.Relationship, error) { 98 var packages []pkg.Package 99 100 scanner := bufio.NewScanner(reader) 101 var lastLine string 102 for scanner.Scan() { 103 line := trimRequirementsTxtLine(scanner.Text()) 104 105 if lastLine != "" { 106 line = lastLine + line 107 lastLine = "" 108 } 109 110 // remove line continuations... smashes the file into a single line 111 if strings.HasSuffix(line, "\\") { 112 // this line is a continuation of the previous line 113 lastLine += strings.TrimSuffix(line, "\\") 114 continue 115 } 116 117 if line == "" { 118 // nothing to parse on this line 119 continue 120 } 121 122 if strings.HasPrefix(line, "-e") { 123 // editable packages aren't parsed (yet) 124 continue 125 } 126 127 req := newRequirement(line) 128 if req == nil { 129 log.WithFields("path", reader.RealPath).Warnf("unable to parse requirements.txt line: %q", line) 130 continue 131 } 132 133 name := removeExtras(req.Name) 134 version := parseVersion(req.VersionConstraint, rp.guessUnpinnedRequirements) 135 136 if version == "" { 137 log.WithFields("path", reader.RealPath).Tracef("unable to determine package version in requirements.txt line: %q", line) 138 continue 139 } 140 141 packages = append( 142 packages, 143 newPackageForRequirementsWithMetadata( 144 name, 145 version, 146 pkg.PythonRequirementsEntry{ 147 Name: name, 148 Extras: parseExtras(req.Name), 149 VersionConstraint: req.VersionConstraint, 150 URL: parseURL(req.URL), 151 Markers: req.Markers, 152 }, 153 reader.Location.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation), 154 ), 155 ) 156 } 157 158 if err := scanner.Err(); err != nil { 159 return nil, nil, fmt.Errorf("failed to parse python requirements file: %w", err) 160 } 161 162 return packages, nil, nil 163 } 164 165 func parseVersion(version string, guessFromConstraint bool) string { 166 if isPinnedConstraint(version) { 167 return strings.TrimSpace(strings.ReplaceAll(version, "==", "")) 168 } 169 170 if guessFromConstraint { 171 return guessVersion(version) 172 } 173 174 return "" 175 } 176 177 func isPinnedConstraint(version string) bool { 178 return strings.Contains(version, "==") && !strings.ContainsAny(version, "*,<>!") 179 } 180 181 func guessVersion(constraint string) string { 182 // handle "2.8.*" -> "2.8.0" 183 constraint = strings.ReplaceAll(constraint, "*", "0") 184 if isPinnedConstraint(constraint) { 185 return strings.TrimSpace(strings.ReplaceAll(constraint, "==", "")) 186 } 187 188 constraints := strings.Split(constraint, ",") 189 filteredVersions := map[string]struct{}{} 190 for _, part := range constraints { 191 if strings.Contains(part, "!=") { 192 parts := strings.Split(part, "!=") 193 filteredVersions[strings.TrimSpace(parts[1])] = struct{}{} 194 } 195 } 196 197 var closestVersion *pep440.Version 198 for _, part := range constraints { 199 // ignore any parts that do not have '=' in them, >,<,~ are not valid semver 200 parts := strings.SplitAfter(part, "=") 201 if len(parts) < 2 { 202 continue 203 } 204 version, err := pep440.Parse(strings.TrimSpace(parts[1])) 205 if err != nil { 206 // ignore any parts that are not valid semver 207 continue 208 } 209 if _, ok := filteredVersions[version.String()]; ok { 210 continue 211 } 212 213 if strings.Contains(part, "==") { 214 parts := strings.Split(part, "==") 215 return strings.TrimSpace(parts[1]) 216 } 217 218 if closestVersion == nil || version.GreaterThan(*closestVersion) { 219 closestVersion = &version 220 } 221 } 222 if closestVersion == nil { 223 return "" 224 } 225 226 return closestVersion.String() 227 } 228 229 // trimRequirementsTxtLine removes content from the given requirements.txt line 230 // that should not be considered for parsing. 231 func trimRequirementsTxtLine(line string) string { 232 line = strings.TrimSpace(line) 233 line = removeTrailingComment(line) 234 235 return line 236 } 237 238 // removeTrailingComment takes a requirements.txt line and strips off comment strings. 239 func removeTrailingComment(line string) string { 240 parts := strings.SplitN(line, "#", 2) 241 if len(parts) < 2 { 242 // there aren't any comments 243 244 return line 245 } 246 247 return parts[0] 248 } 249 250 func removeExtras(packageName string) string { 251 start := strings.Index(packageName, "[") 252 if start == -1 { 253 return packageName 254 } 255 256 return strings.TrimSpace(packageName[:start]) 257 } 258 259 func parseExtras(packageName string) []string { 260 var extras []string 261 262 start := strings.Index(packageName, "[") 263 stop := strings.Index(packageName, "]") 264 if start == -1 || stop == -1 { 265 return extras 266 } 267 268 extraString := packageName[start+1 : stop] 269 for _, extra := range strings.Split(extraString, ",") { 270 extras = append(extras, strings.TrimSpace(extra)) 271 } 272 return extras 273 } 274 275 func parseURL(line string) string { 276 parts := strings.Split(line, "@") 277 278 if len(parts) > 1 { 279 desiredIndex := -1 280 281 for index, part := range parts { 282 part := strings.TrimFunc(part, func(r rune) bool { 283 return !unicode.IsLetter(r) && !unicode.IsNumber(r) 284 }) 285 286 if strings.HasPrefix(part, "git") { 287 desiredIndex = index 288 break 289 } 290 } 291 292 if desiredIndex != -1 { 293 return strings.TrimSpace(strings.Join(parts[desiredIndex:], "@")) 294 } 295 } 296 297 return "" 298 }