github.com/lineaje-labs/syft@v0.98.1-0.20231227153149-9e393f60ff1b/syft/pkg/cataloger/python/parse_requirements.go (about)

     1  package python
     2  
     3  import (
     4  	"bufio"
     5  	"fmt"
     6  	"regexp"
     7  	"strings"
     8  	"unicode"
     9  
    10  	pep440 "github.com/aquasecurity/go-pep440-version"
    11  	"github.com/mitchellh/mapstructure"
    12  
    13  	"github.com/anchore/syft/syft/artifact"
    14  	"github.com/anchore/syft/syft/file"
    15  	"github.com/anchore/syft/syft/pkg"
    16  	"github.com/anchore/syft/syft/pkg/cataloger/generic"
    17  	"github.com/lineaje-labs/syft/internal"
    18  	"github.com/lineaje-labs/syft/internal/log"
    19  )
    20  
    21  const (
    22  	// given the example requirement:
    23  	//    requests[security] == 2.8.* ; python_version < "2.7" and sys_platform == "linux"  \
    24  	//      --hash=sha256:a9b3aaa1904eeb78e32394cd46c6f37ac0fb4af6dc488daa58971bdc7d7fcaf3 \
    25  	//      --hash=sha256:e9535b8c84dc9571a48999094fda7f33e63c3f1b74f3e5f3ac0105a58405bb65  # some comment
    26  
    27  	// namePattern matches: requests[security]
    28  	namePattern = `(?P<name>\w[\w\[\],\s-_]+)`
    29  
    30  	// versionConstraintPattern matches: == 2.8.*
    31  	versionConstraintPattern = `(?P<versionConstraint>([^\S\r\n]*[~=>!<]+\s*[0-9a-zA-Z.*]+[^\S\r\n]*,?)+)?(@[^\S\r\n]*(?P<url>[^;]*))?`
    32  
    33  	// markersPattern matches: python_version < "2.7" and sys_platform == "linux"
    34  	markersPattern = `(;(?P<markers>.*))?`
    35  
    36  	// hashesPattern matches: --hash=sha256:a9b3aaa1904eeb78e32394cd46c6f37ac0fb4af6dc488daa58971bdc7d7fcaf3 --hash=sha256:e9535b8c84dc9571a48999094fda7f33e63c3f1b74f3e5f3ac0105a58405bb65
    37  	hashesPattern = `(?P<hashes>([^\S\r\n]*--hash=[a-zA-Z0-9:]+)+)?`
    38  
    39  	// whiteSpaceNoNewlinePattern matches: (any whitespace character except for \r and \n)
    40  	whiteSpaceNoNewlinePattern = `[^\S\r\n]*`
    41  )
    42  
    43  var requirementPattern = regexp.MustCompile(
    44  	`^` +
    45  		whiteSpaceNoNewlinePattern +
    46  		namePattern +
    47  		whiteSpaceNoNewlinePattern +
    48  		versionConstraintPattern +
    49  		markersPattern +
    50  		hashesPattern,
    51  )
    52  
    53  type unprocessedRequirement struct {
    54  	Name              string `mapstructure:"name"`
    55  	VersionConstraint string `mapstructure:"versionConstraint"`
    56  	Markers           string `mapstructure:"markers"`
    57  	URL               string `mapstructure:"url"`
    58  	Hashes            string `mapstructure:"hashes"`
    59  }
    60  
    61  func newRequirement(raw string) *unprocessedRequirement {
    62  	var r unprocessedRequirement
    63  
    64  	values := internal.MatchNamedCaptureGroups(requirementPattern, raw)
    65  
    66  	if err := mapstructure.Decode(values, &r); err != nil {
    67  		return nil
    68  	}
    69  
    70  	r.Name = strings.TrimSpace(r.Name)
    71  	r.VersionConstraint = strings.TrimSpace(r.VersionConstraint)
    72  	r.Markers = strings.TrimSpace(r.Markers)
    73  	r.URL = strings.TrimSpace(r.URL)
    74  	r.Hashes = strings.TrimSpace(r.Hashes)
    75  
    76  	if r.Name == "" {
    77  		return nil
    78  	}
    79  
    80  	return &r
    81  }
    82  
    83  type requirementsParser struct {
    84  	guessUnpinnedRequirements bool
    85  }
    86  
    87  func newRequirementsParser(cfg CatalogerConfig) requirementsParser {
    88  	return requirementsParser{
    89  		guessUnpinnedRequirements: cfg.GuessUnpinnedRequirements,
    90  	}
    91  }
    92  
    93  // parseRequirementsTxt takes a Python requirements.txt file, returning all Python packages that are locked to a
    94  // specific version.
    95  func (rp requirementsParser) parseRequirementsTxt(
    96  	_ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser,
    97  ) ([]pkg.Package, []artifact.Relationship, error) {
    98  	var packages []pkg.Package
    99  
   100  	scanner := bufio.NewScanner(reader)
   101  	var lastLine string
   102  	for scanner.Scan() {
   103  		line := trimRequirementsTxtLine(scanner.Text())
   104  
   105  		if lastLine != "" {
   106  			line = lastLine + line
   107  			lastLine = ""
   108  		}
   109  
   110  		// remove line continuations... smashes the file into a single line
   111  		if strings.HasSuffix(line, "\\") {
   112  			// this line is a continuation of the previous line
   113  			lastLine += strings.TrimSuffix(line, "\\")
   114  			continue
   115  		}
   116  
   117  		if line == "" {
   118  			// nothing to parse on this line
   119  			continue
   120  		}
   121  
   122  		if strings.HasPrefix(line, "-e") {
   123  			// editable packages aren't parsed (yet)
   124  			continue
   125  		}
   126  
   127  		req := newRequirement(line)
   128  		if req == nil {
   129  			log.WithFields("path", reader.RealPath).Warnf("unable to parse requirements.txt line: %q", line)
   130  			continue
   131  		}
   132  
   133  		name := removeExtras(req.Name)
   134  		version := parseVersion(req.VersionConstraint, rp.guessUnpinnedRequirements)
   135  
   136  		if version == "" {
   137  			log.WithFields("path", reader.RealPath).Tracef("unable to determine package version in requirements.txt line: %q", line)
   138  			continue
   139  		}
   140  
   141  		packages = append(
   142  			packages,
   143  			newPackageForRequirementsWithMetadata(
   144  				name,
   145  				version,
   146  				pkg.PythonRequirementsEntry{
   147  					Name:              name,
   148  					Extras:            parseExtras(req.Name),
   149  					VersionConstraint: req.VersionConstraint,
   150  					URL:               parseURL(req.URL),
   151  					Markers:           req.Markers,
   152  				},
   153  				reader.Location.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation),
   154  			),
   155  		)
   156  	}
   157  
   158  	if err := scanner.Err(); err != nil {
   159  		return nil, nil, fmt.Errorf("failed to parse python requirements file: %w", err)
   160  	}
   161  
   162  	return packages, nil, nil
   163  }
   164  
   165  func parseVersion(version string, guessFromConstraint bool) string {
   166  	if isPinnedConstraint(version) {
   167  		return strings.TrimSpace(strings.ReplaceAll(version, "==", ""))
   168  	}
   169  
   170  	if guessFromConstraint {
   171  		return guessVersion(version)
   172  	}
   173  
   174  	return ""
   175  }
   176  
   177  func isPinnedConstraint(version string) bool {
   178  	return strings.Contains(version, "==") && !strings.ContainsAny(version, "*,<>!")
   179  }
   180  
   181  func guessVersion(constraint string) string {
   182  	// handle "2.8.*" -> "2.8.0"
   183  	constraint = strings.ReplaceAll(constraint, "*", "0")
   184  	if isPinnedConstraint(constraint) {
   185  		return strings.TrimSpace(strings.ReplaceAll(constraint, "==", ""))
   186  	}
   187  
   188  	constraints := strings.Split(constraint, ",")
   189  	filteredVersions := map[string]struct{}{}
   190  	for _, part := range constraints {
   191  		if strings.Contains(part, "!=") {
   192  			parts := strings.Split(part, "!=")
   193  			filteredVersions[strings.TrimSpace(parts[1])] = struct{}{}
   194  		}
   195  	}
   196  
   197  	var closestVersion *pep440.Version
   198  	for _, part := range constraints {
   199  		// ignore any parts that do not have '=' in them, >,<,~ are not valid semver
   200  		parts := strings.SplitAfter(part, "=")
   201  		if len(parts) < 2 {
   202  			continue
   203  		}
   204  		version, err := pep440.Parse(strings.TrimSpace(parts[1]))
   205  		if err != nil {
   206  			// ignore any parts that are not valid semver
   207  			continue
   208  		}
   209  		if _, ok := filteredVersions[version.String()]; ok {
   210  			continue
   211  		}
   212  
   213  		if strings.Contains(part, "==") {
   214  			parts := strings.Split(part, "==")
   215  			return strings.TrimSpace(parts[1])
   216  		}
   217  
   218  		if closestVersion == nil || version.GreaterThan(*closestVersion) {
   219  			closestVersion = &version
   220  		}
   221  	}
   222  	if closestVersion == nil {
   223  		return ""
   224  	}
   225  
   226  	return closestVersion.String()
   227  }
   228  
   229  // trimRequirementsTxtLine removes content from the given requirements.txt line
   230  // that should not be considered for parsing.
   231  func trimRequirementsTxtLine(line string) string {
   232  	line = strings.TrimSpace(line)
   233  	line = removeTrailingComment(line)
   234  
   235  	return line
   236  }
   237  
   238  // removeTrailingComment takes a requirements.txt line and strips off comment strings.
   239  func removeTrailingComment(line string) string {
   240  	parts := strings.SplitN(line, "#", 2)
   241  	if len(parts) < 2 {
   242  		// there aren't any comments
   243  
   244  		return line
   245  	}
   246  
   247  	return parts[0]
   248  }
   249  
   250  func removeExtras(packageName string) string {
   251  	start := strings.Index(packageName, "[")
   252  	if start == -1 {
   253  		return packageName
   254  	}
   255  
   256  	return strings.TrimSpace(packageName[:start])
   257  }
   258  
   259  func parseExtras(packageName string) []string {
   260  	var extras []string
   261  
   262  	start := strings.Index(packageName, "[")
   263  	stop := strings.Index(packageName, "]")
   264  	if start == -1 || stop == -1 {
   265  		return extras
   266  	}
   267  
   268  	extraString := packageName[start+1 : stop]
   269  	for _, extra := range strings.Split(extraString, ",") {
   270  		extras = append(extras, strings.TrimSpace(extra))
   271  	}
   272  	return extras
   273  }
   274  
   275  func parseURL(line string) string {
   276  	parts := strings.Split(line, "@")
   277  
   278  	if len(parts) > 1 {
   279  		desiredIndex := -1
   280  
   281  		for index, part := range parts {
   282  			part := strings.TrimFunc(part, func(r rune) bool {
   283  				return !unicode.IsLetter(r) && !unicode.IsNumber(r)
   284  			})
   285  
   286  			if strings.HasPrefix(part, "git") {
   287  				desiredIndex = index
   288  				break
   289  			}
   290  		}
   291  
   292  		if desiredIndex != -1 {
   293  			return strings.TrimSpace(strings.Join(parts[desiredIndex:], "@"))
   294  		}
   295  	}
   296  
   297  	return ""
   298  }