github.com/noqcks/syft@v0.0.0-20230920222752-a9e2c4e288e5/syft/pkg/cataloger/python/parse_requirements.go (about)

     1  package python
     2  
     3  import (
     4  	"bufio"
     5  	"fmt"
     6  	"regexp"
     7  	"strings"
     8  	"unicode"
     9  
    10  	pep440 "github.com/aquasecurity/go-pep440-version"
    11  	"github.com/mitchellh/mapstructure"
    12  
    13  	"github.com/anchore/syft/internal"
    14  	"github.com/anchore/syft/internal/log"
    15  	"github.com/anchore/syft/syft/artifact"
    16  	"github.com/anchore/syft/syft/file"
    17  	"github.com/anchore/syft/syft/pkg"
    18  	"github.com/anchore/syft/syft/pkg/cataloger/generic"
    19  )
    20  
    21  const (
    22  	// given the example requirement:
    23  	//    requests[security] == 2.8.* ; python_version < "2.7" and sys_platform == "linux"  \
    24  	//      --hash=sha256:a9b3aaa1904eeb78e32394cd46c6f37ac0fb4af6dc488daa58971bdc7d7fcaf3 \
    25  	//      --hash=sha256:e9535b8c84dc9571a48999094fda7f33e63c3f1b74f3e5f3ac0105a58405bb65  # some comment
    26  
    27  	// namePattern matches: requests[security]
    28  	namePattern = `(?P<name>\w[\w\[\],\s-_]+)`
    29  
    30  	// versionConstraintPattern matches: == 2.8.*
    31  	versionConstraintPattern = `(?P<versionConstraint>([^\S\r\n]*[~=>!<]+\s*[0-9a-zA-Z.*]+[^\S\r\n]*,?)+)?(@[^\S\r\n]*(?P<url>[^;]*))?`
    32  
    33  	// markersPattern matches: python_version < "2.7" and sys_platform == "linux"
    34  	markersPattern = `(;(?P<markers>.*))?`
    35  
    36  	// hashesPattern matches: --hash=sha256:a9b3aaa1904eeb78e32394cd46c6f37ac0fb4af6dc488daa58971bdc7d7fcaf3 --hash=sha256:e9535b8c84dc9571a48999094fda7f33e63c3f1b74f3e5f3ac0105a58405bb65
    37  	hashesPattern = `(?P<hashes>([^\S\r\n]*--hash=[a-zA-Z0-9:]+)+)?`
    38  
    39  	// whiteSpaceNoNewlinePattern matches: (any whitespace character except for \r and \n)
    40  	whiteSpaceNoNewlinePattern = `[^\S\r\n]*`
    41  )
    42  
    43  var requirementPattern = regexp.MustCompile(
    44  	`^` +
    45  		whiteSpaceNoNewlinePattern +
    46  		namePattern +
    47  		whiteSpaceNoNewlinePattern +
    48  		versionConstraintPattern +
    49  		markersPattern +
    50  		hashesPattern,
    51  )
    52  
    53  type unprocessedRequirement struct {
    54  	Name              string `mapstructure:"name"`
    55  	VersionConstraint string `mapstructure:"versionConstraint"`
    56  	Markers           string `mapstructure:"markers"`
    57  	URL               string `mapstructure:"url"`
    58  	Hashes            string `mapstructure:"hashes"`
    59  }
    60  
    61  func newRequirement(raw string) *unprocessedRequirement {
    62  	var r unprocessedRequirement
    63  
    64  	values := internal.MatchNamedCaptureGroups(requirementPattern, raw)
    65  
    66  	if err := mapstructure.Decode(values, &r); err != nil {
    67  		return nil
    68  	}
    69  
    70  	r.Name = strings.TrimSpace(r.Name)
    71  	r.VersionConstraint = strings.TrimSpace(r.VersionConstraint)
    72  	r.Markers = strings.TrimSpace(r.Markers)
    73  	r.URL = strings.TrimSpace(r.URL)
    74  	r.Hashes = strings.TrimSpace(r.Hashes)
    75  
    76  	if r.Name == "" {
    77  		return nil
    78  	}
    79  
    80  	return &r
    81  }
    82  
    83  type requirementsParser struct {
    84  	guessUnpinnedRequirements bool
    85  }
    86  
    87  func newRequirementsParser(cfg CatalogerConfig) requirementsParser {
    88  	return requirementsParser{
    89  		guessUnpinnedRequirements: cfg.GuessUnpinnedRequirements,
    90  	}
    91  }
    92  
    93  // parseRequirementsTxt takes a Python requirements.txt file, returning all Python packages that are locked to a
    94  // specific version.
    95  func (rp requirementsParser) parseRequirementsTxt(_ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
    96  	var packages []pkg.Package
    97  
    98  	scanner := bufio.NewScanner(reader)
    99  	var lastLine string
   100  	for scanner.Scan() {
   101  		line := trimRequirementsTxtLine(scanner.Text())
   102  
   103  		if lastLine != "" {
   104  			line = lastLine + line
   105  			lastLine = ""
   106  		}
   107  
   108  		// remove line continuations... smashes the file into a single line
   109  		if strings.HasSuffix(line, "\\") {
   110  			// this line is a continuation of the previous line
   111  			lastLine += strings.TrimSuffix(line, "\\")
   112  			continue
   113  		}
   114  
   115  		if line == "" {
   116  			// nothing to parse on this line
   117  			continue
   118  		}
   119  
   120  		if strings.HasPrefix(line, "-e") {
   121  			// editable packages aren't parsed (yet)
   122  			continue
   123  		}
   124  
   125  		req := newRequirement(line)
   126  		if req == nil {
   127  			log.WithFields("path", reader.RealPath).Warnf("unable to parse requirements.txt line: %q", line)
   128  			continue
   129  		}
   130  
   131  		name := removeExtras(req.Name)
   132  		version := parseVersion(req.VersionConstraint, rp.guessUnpinnedRequirements)
   133  
   134  		if version == "" {
   135  			log.WithFields("path", reader.RealPath).Tracef("unable to determine package version in requirements.txt line: %q", line)
   136  			continue
   137  		}
   138  
   139  		packages = append(
   140  			packages,
   141  			newPackageForRequirementsWithMetadata(
   142  				name,
   143  				version,
   144  				pkg.PythonRequirementsMetadata{
   145  					Name:              name,
   146  					Extras:            parseExtras(req.Name),
   147  					VersionConstraint: req.VersionConstraint,
   148  					URL:               parseURL(req.URL),
   149  					Markers:           req.Markers,
   150  				},
   151  				reader.Location.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation),
   152  			),
   153  		)
   154  	}
   155  
   156  	if err := scanner.Err(); err != nil {
   157  		return nil, nil, fmt.Errorf("failed to parse python requirements file: %w", err)
   158  	}
   159  
   160  	return packages, nil, nil
   161  }
   162  
   163  func parseVersion(version string, guessFromConstraint bool) string {
   164  	if isPinnedConstraint(version) {
   165  		return strings.TrimSpace(strings.ReplaceAll(version, "==", ""))
   166  	}
   167  
   168  	if guessFromConstraint {
   169  		return guessVersion(version)
   170  	}
   171  
   172  	return ""
   173  }
   174  
   175  func isPinnedConstraint(version string) bool {
   176  	return strings.Contains(version, "==") && !strings.ContainsAny(version, "*,<>!")
   177  }
   178  
   179  func guessVersion(constraint string) string {
   180  	// handle "2.8.*" -> "2.8.0"
   181  	constraint = strings.ReplaceAll(constraint, "*", "0")
   182  	if isPinnedConstraint(constraint) {
   183  		return strings.TrimSpace(strings.ReplaceAll(constraint, "==", ""))
   184  	}
   185  
   186  	constraints := strings.Split(constraint, ",")
   187  	filteredVersions := map[string]struct{}{}
   188  	for _, part := range constraints {
   189  		if strings.Contains(part, "!=") {
   190  			parts := strings.Split(part, "!=")
   191  			filteredVersions[strings.TrimSpace(parts[1])] = struct{}{}
   192  		}
   193  	}
   194  
   195  	var closestVersion *pep440.Version
   196  	for _, part := range constraints {
   197  		// ignore any parts that do not have '=' in them, >,<,~ are not valid semver
   198  		parts := strings.SplitAfter(part, "=")
   199  		if len(parts) < 2 {
   200  			continue
   201  		}
   202  		version, err := pep440.Parse(strings.TrimSpace(parts[1]))
   203  		if err != nil {
   204  			// ignore any parts that are not valid semver
   205  			continue
   206  		}
   207  		if _, ok := filteredVersions[version.String()]; ok {
   208  			continue
   209  		}
   210  
   211  		if strings.Contains(part, "==") {
   212  			parts := strings.Split(part, "==")
   213  			return strings.TrimSpace(parts[1])
   214  		}
   215  
   216  		if closestVersion == nil || version.GreaterThan(*closestVersion) {
   217  			closestVersion = &version
   218  		}
   219  	}
   220  	if closestVersion == nil {
   221  		return ""
   222  	}
   223  
   224  	return closestVersion.String()
   225  }
   226  
   227  // trimRequirementsTxtLine removes content from the given requirements.txt line
   228  // that should not be considered for parsing.
   229  func trimRequirementsTxtLine(line string) string {
   230  	line = strings.TrimSpace(line)
   231  	line = removeTrailingComment(line)
   232  
   233  	return line
   234  }
   235  
   236  // removeTrailingComment takes a requirements.txt line and strips off comment strings.
   237  func removeTrailingComment(line string) string {
   238  	parts := strings.SplitN(line, "#", 2)
   239  	if len(parts) < 2 {
   240  		// there aren't any comments
   241  
   242  		return line
   243  	}
   244  
   245  	return parts[0]
   246  }
   247  
   248  func removeExtras(packageName string) string {
   249  	start := strings.Index(packageName, "[")
   250  	if start == -1 {
   251  		return packageName
   252  	}
   253  
   254  	return strings.TrimSpace(packageName[:start])
   255  }
   256  
   257  func parseExtras(packageName string) []string {
   258  	var extras []string
   259  
   260  	start := strings.Index(packageName, "[")
   261  	stop := strings.Index(packageName, "]")
   262  	if start == -1 || stop == -1 {
   263  		return extras
   264  	}
   265  
   266  	extraString := packageName[start+1 : stop]
   267  	for _, extra := range strings.Split(extraString, ",") {
   268  		extras = append(extras, strings.TrimSpace(extra))
   269  	}
   270  	return extras
   271  }
   272  
   273  func parseURL(line string) string {
   274  	parts := strings.Split(line, "@")
   275  
   276  	if len(parts) > 1 {
   277  		desiredIndex := -1
   278  
   279  		for index, part := range parts {
   280  			part := strings.TrimFunc(part, func(r rune) bool {
   281  				return !unicode.IsLetter(r) && !unicode.IsNumber(r)
   282  			})
   283  
   284  			if strings.HasPrefix(part, "git") {
   285  				desiredIndex = index
   286  				break
   287  			}
   288  		}
   289  
   290  		if desiredIndex != -1 {
   291  			return strings.TrimSpace(strings.Join(parts[desiredIndex:], "@"))
   292  		}
   293  	}
   294  
   295  	return ""
   296  }