github.com/anchore/syft@v1.38.2/syft/pkg/cataloger/python/parse_requirements.go (about)

     1  package python
     2  
     3  import (
     4  	"bufio"
     5  	"context"
     6  	"fmt"
     7  	"regexp"
     8  	"strings"
     9  	"unicode"
    10  
    11  	pep440 "github.com/aquasecurity/go-pep440-version"
    12  	"github.com/go-viper/mapstructure/v2"
    13  
    14  	"github.com/anchore/syft/internal"
    15  	"github.com/anchore/syft/internal/log"
    16  	"github.com/anchore/syft/internal/unknown"
    17  	"github.com/anchore/syft/syft/artifact"
    18  	"github.com/anchore/syft/syft/file"
    19  	"github.com/anchore/syft/syft/pkg"
    20  	"github.com/anchore/syft/syft/pkg/cataloger/generic"
    21  )
    22  
    23  const (
    24  	// given the example requirement:
    25  	//    requests[security] == 2.8.* ; python_version < "2.7" and sys_platform == "linux"  \
    26  	//      --hash=sha256:a9b3aaa1904eeb78e32394cd46c6f37ac0fb4af6dc488daa58971bdc7d7fcaf3 \
    27  	//      --hash=sha256:e9535b8c84dc9571a48999094fda7f33e63c3f1b74f3e5f3ac0105a58405bb65  # some comment
    28  
    29  	// namePattern matches: requests[security]
    30  	namePattern = `(?P<name>\w[\w\[\],\s-_\.]+)`
    31  
    32  	// versionConstraintPattern matches: == 2.8.*
    33  	versionConstraintPattern = `(?P<versionConstraint>([^\S\r\n]*[~=>!<]+\s*[0-9a-zA-Z.*]+[^\S\r\n]*,?)+)?(@[^\S\r\n]*(?P<url>[^;]*))?`
    34  
    35  	// markersPattern matches: python_version < "2.7" and sys_platform == "linux"
    36  	markersPattern = `(;(?P<markers>.*))?`
    37  
    38  	// hashesPattern matches: --hash=sha256:a9b3aaa1904eeb78e32394cd46c6f37ac0fb4af6dc488daa58971bdc7d7fcaf3 --hash=sha256:e9535b8c84dc9571a48999094fda7f33e63c3f1b74f3e5f3ac0105a58405bb65
    39  	hashesPattern = `(?P<hashes>([^\S\r\n]*--hash=[a-zA-Z0-9:]+)+)?`
    40  
    41  	// whiteSpaceNoNewlinePattern matches: (any whitespace character except for \r and \n)
    42  	whiteSpaceNoNewlinePattern = `[^\S\r\n]*`
    43  )
    44  
    45  var requirementPattern = regexp.MustCompile(
    46  	`^` +
    47  		whiteSpaceNoNewlinePattern +
    48  		namePattern +
    49  		whiteSpaceNoNewlinePattern +
    50  		versionConstraintPattern +
    51  		markersPattern +
    52  		hashesPattern,
    53  )
    54  
    55  type unprocessedRequirement struct {
    56  	Name              string `mapstructure:"name"`
    57  	VersionConstraint string `mapstructure:"versionConstraint"`
    58  	Markers           string `mapstructure:"markers"`
    59  	URL               string `mapstructure:"url"`
    60  	Hashes            string `mapstructure:"hashes"`
    61  }
    62  
    63  func newRequirement(raw string) *unprocessedRequirement {
    64  	var r unprocessedRequirement
    65  
    66  	values := internal.MatchNamedCaptureGroups(requirementPattern, raw)
    67  
    68  	if err := mapstructure.Decode(values, &r); err != nil {
    69  		return nil
    70  	}
    71  
    72  	r.Name = strings.TrimSpace(r.Name)
    73  	r.VersionConstraint = strings.TrimSpace(r.VersionConstraint)
    74  	r.Markers = strings.TrimSpace(r.Markers)
    75  	r.URL = strings.TrimSpace(r.URL)
    76  	r.Hashes = strings.TrimSpace(r.Hashes)
    77  
    78  	if r.Name == "" {
    79  		return nil
    80  	}
    81  
    82  	return &r
    83  }
    84  
    85  type requirementsParser struct {
    86  	cfg             CatalogerConfig
    87  	licenseResolver pythonLicenseResolver
    88  }
    89  
    90  func newRequirementsParser(cfg CatalogerConfig) requirementsParser {
    91  	return requirementsParser{
    92  		cfg:             cfg,
    93  		licenseResolver: newPythonLicenseResolver(cfg),
    94  	}
    95  }
    96  
    97  // parseRequirementsTxt takes a Python requirements.txt file, returning all Python packages that are locked to a
    98  // specific version.
    99  func (rp requirementsParser) parseRequirementsTxt(ctx context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
   100  	var errs error
   101  	var packages []pkg.Package
   102  
   103  	scanner := bufio.NewScanner(reader)
   104  	var lastLine string
   105  	for scanner.Scan() {
   106  		line := trimRequirementsTxtLine(scanner.Text())
   107  
   108  		if lastLine != "" {
   109  			line = lastLine + line
   110  			lastLine = ""
   111  		}
   112  
   113  		// remove line continuations... smashes the file into a single line
   114  		if strings.HasSuffix(line, "\\") {
   115  			// this line is a continuation of the previous line
   116  			lastLine += strings.TrimSuffix(line, "\\")
   117  			continue
   118  		}
   119  
   120  		if line == "" {
   121  			// nothing to parse on this line
   122  			continue
   123  		}
   124  
   125  		if strings.HasPrefix(line, "-e") {
   126  			// editable packages aren't parsed (yet)
   127  			continue
   128  		}
   129  
   130  		req := newRequirement(line)
   131  		if req == nil {
   132  			log.WithFields("path", reader.RealPath, "line", line).Debug("unable to parse requirements.txt line")
   133  			errs = unknown.Appendf(errs, reader, "unable to parse requirements.txt line: %q", line)
   134  			continue
   135  		}
   136  
   137  		name := removeExtras(req.Name)
   138  		version := parseVersion(req.VersionConstraint, rp.cfg.GuessUnpinnedRequirements)
   139  
   140  		if version == "" {
   141  			log.WithFields("path", reader.RealPath, "line", line).Trace("unable to determine package version in requirements.txt line")
   142  			errs = unknown.Appendf(errs, reader, "unable to determine package version in requirements.txt line: %q", line)
   143  			continue
   144  		}
   145  
   146  		packages = append(
   147  			packages,
   148  			newPackageForRequirementsWithMetadata(
   149  				ctx,
   150  				rp.licenseResolver,
   151  				name,
   152  				version,
   153  				pkg.PythonRequirementsEntry{
   154  					Name:              name,
   155  					Extras:            parseExtras(req.Name),
   156  					VersionConstraint: req.VersionConstraint,
   157  					URL:               parseURL(req.URL),
   158  					Markers:           req.Markers,
   159  				},
   160  				reader.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation),
   161  			),
   162  		)
   163  	}
   164  
   165  	if err := scanner.Err(); err != nil {
   166  		return nil, nil, fmt.Errorf("failed to parse python requirements file: %w", err)
   167  	}
   168  
   169  	return packages, nil, unknown.Join(errs, unknown.IfEmptyf(packages, "unable to determine packages"))
   170  }
   171  
   172  func parseVersion(version string, guessFromConstraint bool) string {
   173  	if isPinnedConstraint(version) {
   174  		return strings.TrimSpace(strings.ReplaceAll(version, "==", ""))
   175  	}
   176  
   177  	if guessFromConstraint {
   178  		return guessVersion(version)
   179  	}
   180  
   181  	return ""
   182  }
   183  
   184  func isPinnedConstraint(version string) bool {
   185  	return strings.Contains(version, "==") && !strings.ContainsAny(version, "*,<>!")
   186  }
   187  
   188  func guessVersion(constraint string) string {
   189  	// handle "2.8.*" -> "2.8.0"
   190  	constraint = strings.ReplaceAll(constraint, "*", "0")
   191  	if isPinnedConstraint(constraint) {
   192  		return strings.TrimSpace(strings.ReplaceAll(constraint, "==", ""))
   193  	}
   194  
   195  	constraints := strings.Split(constraint, ",")
   196  	filteredVersions := map[string]struct{}{}
   197  	for _, part := range constraints {
   198  		if strings.Contains(part, "!=") {
   199  			parts := strings.Split(part, "!=")
   200  			filteredVersions[strings.TrimSpace(parts[1])] = struct{}{}
   201  		}
   202  	}
   203  
   204  	var closestVersion *pep440.Version
   205  	for _, part := range constraints {
   206  		// ignore any parts that do not have '=' in them, >,<,~ are not valid semver
   207  		parts := strings.SplitAfter(part, "=")
   208  		if len(parts) < 2 {
   209  			continue
   210  		}
   211  		version, err := pep440.Parse(strings.TrimSpace(parts[1]))
   212  		if err != nil {
   213  			// ignore any parts that are not valid semver
   214  			continue
   215  		}
   216  		if _, ok := filteredVersions[version.String()]; ok {
   217  			continue
   218  		}
   219  
   220  		if strings.Contains(part, "==") {
   221  			parts := strings.Split(part, "==")
   222  			return strings.TrimSpace(parts[1])
   223  		}
   224  
   225  		if closestVersion == nil || version.GreaterThan(*closestVersion) {
   226  			closestVersion = &version
   227  		}
   228  	}
   229  	if closestVersion == nil {
   230  		return ""
   231  	}
   232  
   233  	return closestVersion.String()
   234  }
   235  
   236  // trimRequirementsTxtLine removes content from the given requirements.txt line
   237  // that should not be considered for parsing.
   238  func trimRequirementsTxtLine(line string) string {
   239  	line = strings.TrimSpace(line)
   240  	line = removeTrailingComment(line)
   241  
   242  	return line
   243  }
   244  
   245  // removeTrailingComment takes a requirements.txt line and strips off comment strings.
   246  func removeTrailingComment(line string) string {
   247  	parts := strings.SplitN(line, "#", 2)
   248  	if len(parts) < 2 {
   249  		// there aren't any comments
   250  
   251  		return line
   252  	}
   253  
   254  	return parts[0]
   255  }
   256  
   257  func removeExtras(packageName string) string {
   258  	start := strings.Index(packageName, "[")
   259  	if start == -1 {
   260  		return packageName
   261  	}
   262  
   263  	return strings.TrimSpace(packageName[:start])
   264  }
   265  
   266  func parseExtras(packageName string) []string {
   267  	var extras []string
   268  
   269  	start := strings.Index(packageName, "[")
   270  	stop := strings.Index(packageName, "]")
   271  	if start == -1 || stop == -1 {
   272  		return extras
   273  	}
   274  
   275  	extraString := packageName[start+1 : stop]
   276  	for _, extra := range strings.Split(extraString, ",") {
   277  		extras = append(extras, strings.TrimSpace(extra))
   278  	}
   279  	return extras
   280  }
   281  
   282  func parseURL(line string) string {
   283  	parts := strings.Split(line, "@")
   284  
   285  	if len(parts) > 1 {
   286  		desiredIndex := -1
   287  
   288  		for index, part := range parts {
   289  			part := strings.TrimFunc(part, func(r rune) bool {
   290  				return !unicode.IsLetter(r) && !unicode.IsNumber(r)
   291  			})
   292  
   293  			if strings.HasPrefix(part, "git") {
   294  				desiredIndex = index
   295  				break
   296  			}
   297  		}
   298  
   299  		if desiredIndex != -1 {
   300  			return strings.TrimSpace(strings.Join(parts[desiredIndex:], "@"))
   301  		}
   302  	}
   303  
   304  	return ""
   305  }