github.com/anchore/syft@v1.4.2-0.20240516191711-1bec1fc5d397/syft/pkg/cataloger/python/parse_requirements.go (about)

     1  package python
     2  
     3  import (
     4  	"bufio"
     5  	"context"
     6  	"fmt"
     7  	"regexp"
     8  	"strings"
     9  	"unicode"
    10  
    11  	pep440 "github.com/aquasecurity/go-pep440-version"
    12  	"github.com/mitchellh/mapstructure"
    13  
    14  	"github.com/anchore/syft/internal"
    15  	"github.com/anchore/syft/internal/log"
    16  	"github.com/anchore/syft/syft/artifact"
    17  	"github.com/anchore/syft/syft/file"
    18  	"github.com/anchore/syft/syft/pkg"
    19  	"github.com/anchore/syft/syft/pkg/cataloger/generic"
    20  )
    21  
    22  const (
    23  	// given the example requirement:
    24  	//    requests[security] == 2.8.* ; python_version < "2.7" and sys_platform == "linux"  \
    25  	//      --hash=sha256:a9b3aaa1904eeb78e32394cd46c6f37ac0fb4af6dc488daa58971bdc7d7fcaf3 \
    26  	//      --hash=sha256:e9535b8c84dc9571a48999094fda7f33e63c3f1b74f3e5f3ac0105a58405bb65  # some comment
    27  
    28  	// namePattern matches: requests[security]
    29  	namePattern = `(?P<name>\w[\w\[\],\s-_]+)`
    30  
    31  	// versionConstraintPattern matches: == 2.8.*
    32  	versionConstraintPattern = `(?P<versionConstraint>([^\S\r\n]*[~=>!<]+\s*[0-9a-zA-Z.*]+[^\S\r\n]*,?)+)?(@[^\S\r\n]*(?P<url>[^;]*))?`
    33  
    34  	// markersPattern matches: python_version < "2.7" and sys_platform == "linux"
    35  	markersPattern = `(;(?P<markers>.*))?`
    36  
    37  	// hashesPattern matches: --hash=sha256:a9b3aaa1904eeb78e32394cd46c6f37ac0fb4af6dc488daa58971bdc7d7fcaf3 --hash=sha256:e9535b8c84dc9571a48999094fda7f33e63c3f1b74f3e5f3ac0105a58405bb65
    38  	hashesPattern = `(?P<hashes>([^\S\r\n]*--hash=[a-zA-Z0-9:]+)+)?`
    39  
    40  	// whiteSpaceNoNewlinePattern matches: (any whitespace character except for \r and \n)
    41  	whiteSpaceNoNewlinePattern = `[^\S\r\n]*`
    42  )
    43  
    44  var requirementPattern = regexp.MustCompile(
    45  	`^` +
    46  		whiteSpaceNoNewlinePattern +
    47  		namePattern +
    48  		whiteSpaceNoNewlinePattern +
    49  		versionConstraintPattern +
    50  		markersPattern +
    51  		hashesPattern,
    52  )
    53  
    54  type unprocessedRequirement struct {
    55  	Name              string `mapstructure:"name"`
    56  	VersionConstraint string `mapstructure:"versionConstraint"`
    57  	Markers           string `mapstructure:"markers"`
    58  	URL               string `mapstructure:"url"`
    59  	Hashes            string `mapstructure:"hashes"`
    60  }
    61  
    62  func newRequirement(raw string) *unprocessedRequirement {
    63  	var r unprocessedRequirement
    64  
    65  	values := internal.MatchNamedCaptureGroups(requirementPattern, raw)
    66  
    67  	if err := mapstructure.Decode(values, &r); err != nil {
    68  		return nil
    69  	}
    70  
    71  	r.Name = strings.TrimSpace(r.Name)
    72  	r.VersionConstraint = strings.TrimSpace(r.VersionConstraint)
    73  	r.Markers = strings.TrimSpace(r.Markers)
    74  	r.URL = strings.TrimSpace(r.URL)
    75  	r.Hashes = strings.TrimSpace(r.Hashes)
    76  
    77  	if r.Name == "" {
    78  		return nil
    79  	}
    80  
    81  	return &r
    82  }
    83  
    84  type requirementsParser struct {
    85  	guessUnpinnedRequirements bool
    86  }
    87  
    88  func newRequirementsParser(cfg CatalogerConfig) requirementsParser {
    89  	return requirementsParser{
    90  		guessUnpinnedRequirements: cfg.GuessUnpinnedRequirements,
    91  	}
    92  }
    93  
    94  // parseRequirementsTxt takes a Python requirements.txt file, returning all Python packages that are locked to a
    95  // specific version.
    96  func (rp requirementsParser) parseRequirementsTxt(_ context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
    97  	var packages []pkg.Package
    98  
    99  	scanner := bufio.NewScanner(reader)
   100  	var lastLine string
   101  	for scanner.Scan() {
   102  		line := trimRequirementsTxtLine(scanner.Text())
   103  
   104  		if lastLine != "" {
   105  			line = lastLine + line
   106  			lastLine = ""
   107  		}
   108  
   109  		// remove line continuations... smashes the file into a single line
   110  		if strings.HasSuffix(line, "\\") {
   111  			// this line is a continuation of the previous line
   112  			lastLine += strings.TrimSuffix(line, "\\")
   113  			continue
   114  		}
   115  
   116  		if line == "" {
   117  			// nothing to parse on this line
   118  			continue
   119  		}
   120  
   121  		if strings.HasPrefix(line, "-e") {
   122  			// editable packages aren't parsed (yet)
   123  			continue
   124  		}
   125  
   126  		req := newRequirement(line)
   127  		if req == nil {
   128  			log.WithFields("path", reader.RealPath).Warnf("unable to parse requirements.txt line: %q", line)
   129  			continue
   130  		}
   131  
   132  		name := removeExtras(req.Name)
   133  		version := parseVersion(req.VersionConstraint, rp.guessUnpinnedRequirements)
   134  
   135  		if version == "" {
   136  			log.WithFields("path", reader.RealPath).Tracef("unable to determine package version in requirements.txt line: %q", line)
   137  			continue
   138  		}
   139  
   140  		packages = append(
   141  			packages,
   142  			newPackageForRequirementsWithMetadata(
   143  				name,
   144  				version,
   145  				pkg.PythonRequirementsEntry{
   146  					Name:              name,
   147  					Extras:            parseExtras(req.Name),
   148  					VersionConstraint: req.VersionConstraint,
   149  					URL:               parseURL(req.URL),
   150  					Markers:           req.Markers,
   151  				},
   152  				reader.Location.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation),
   153  			),
   154  		)
   155  	}
   156  
   157  	if err := scanner.Err(); err != nil {
   158  		return nil, nil, fmt.Errorf("failed to parse python requirements file: %w", err)
   159  	}
   160  
   161  	return packages, nil, nil
   162  }
   163  
   164  func parseVersion(version string, guessFromConstraint bool) string {
   165  	if isPinnedConstraint(version) {
   166  		return strings.TrimSpace(strings.ReplaceAll(version, "==", ""))
   167  	}
   168  
   169  	if guessFromConstraint {
   170  		return guessVersion(version)
   171  	}
   172  
   173  	return ""
   174  }
   175  
   176  func isPinnedConstraint(version string) bool {
   177  	return strings.Contains(version, "==") && !strings.ContainsAny(version, "*,<>!")
   178  }
   179  
   180  func guessVersion(constraint string) string {
   181  	// handle "2.8.*" -> "2.8.0"
   182  	constraint = strings.ReplaceAll(constraint, "*", "0")
   183  	if isPinnedConstraint(constraint) {
   184  		return strings.TrimSpace(strings.ReplaceAll(constraint, "==", ""))
   185  	}
   186  
   187  	constraints := strings.Split(constraint, ",")
   188  	filteredVersions := map[string]struct{}{}
   189  	for _, part := range constraints {
   190  		if strings.Contains(part, "!=") {
   191  			parts := strings.Split(part, "!=")
   192  			filteredVersions[strings.TrimSpace(parts[1])] = struct{}{}
   193  		}
   194  	}
   195  
   196  	var closestVersion *pep440.Version
   197  	for _, part := range constraints {
   198  		// ignore any parts that do not have '=' in them, >,<,~ are not valid semver
   199  		parts := strings.SplitAfter(part, "=")
   200  		if len(parts) < 2 {
   201  			continue
   202  		}
   203  		version, err := pep440.Parse(strings.TrimSpace(parts[1]))
   204  		if err != nil {
   205  			// ignore any parts that are not valid semver
   206  			continue
   207  		}
   208  		if _, ok := filteredVersions[version.String()]; ok {
   209  			continue
   210  		}
   211  
   212  		if strings.Contains(part, "==") {
   213  			parts := strings.Split(part, "==")
   214  			return strings.TrimSpace(parts[1])
   215  		}
   216  
   217  		if closestVersion == nil || version.GreaterThan(*closestVersion) {
   218  			closestVersion = &version
   219  		}
   220  	}
   221  	if closestVersion == nil {
   222  		return ""
   223  	}
   224  
   225  	return closestVersion.String()
   226  }
   227  
   228  // trimRequirementsTxtLine removes content from the given requirements.txt line
   229  // that should not be considered for parsing.
   230  func trimRequirementsTxtLine(line string) string {
   231  	line = strings.TrimSpace(line)
   232  	line = removeTrailingComment(line)
   233  
   234  	return line
   235  }
   236  
   237  // removeTrailingComment takes a requirements.txt line and strips off comment strings.
   238  func removeTrailingComment(line string) string {
   239  	parts := strings.SplitN(line, "#", 2)
   240  	if len(parts) < 2 {
   241  		// there aren't any comments
   242  
   243  		return line
   244  	}
   245  
   246  	return parts[0]
   247  }
   248  
   249  func removeExtras(packageName string) string {
   250  	start := strings.Index(packageName, "[")
   251  	if start == -1 {
   252  		return packageName
   253  	}
   254  
   255  	return strings.TrimSpace(packageName[:start])
   256  }
   257  
   258  func parseExtras(packageName string) []string {
   259  	var extras []string
   260  
   261  	start := strings.Index(packageName, "[")
   262  	stop := strings.Index(packageName, "]")
   263  	if start == -1 || stop == -1 {
   264  		return extras
   265  	}
   266  
   267  	extraString := packageName[start+1 : stop]
   268  	for _, extra := range strings.Split(extraString, ",") {
   269  		extras = append(extras, strings.TrimSpace(extra))
   270  	}
   271  	return extras
   272  }
   273  
   274  func parseURL(line string) string {
   275  	parts := strings.Split(line, "@")
   276  
   277  	if len(parts) > 1 {
   278  		desiredIndex := -1
   279  
   280  		for index, part := range parts {
   281  			part := strings.TrimFunc(part, func(r rune) bool {
   282  				return !unicode.IsLetter(r) && !unicode.IsNumber(r)
   283  			})
   284  
   285  			if strings.HasPrefix(part, "git") {
   286  				desiredIndex = index
   287  				break
   288  			}
   289  		}
   290  
   291  		if desiredIndex != -1 {
   292  			return strings.TrimSpace(strings.Join(parts[desiredIndex:], "@"))
   293  		}
   294  	}
   295  
   296  	return ""
   297  }