github.com/anchore/syft@v1.38.2/syft/pkg/cataloger/debian/parse_copyright.go (about)

     1  package debian
     2  
     3  import (
     4  	"io"
     5  	"regexp"
     6  	"sort"
     7  	"strings"
     8  
     9  	"github.com/scylladb/go-set/strset"
    10  
    11  	"github.com/anchore/syft/internal"
    12  )
    13  
    14  // For more information see: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/#license-syntax
    15  
    16  var (
    17  	licensePattern                          = regexp.MustCompile(`^License: (?P<license>\S*)`)
    18  	commonLicensePathPattern                = regexp.MustCompile(`/usr/share/common-licenses/(?P<license>[0-9A-Za-z_.\-]+)`)
    19  	licenseFirstSentenceAfterHeadingPattern = regexp.MustCompile(`(?is)^[^\n]+?\n[-]+?\n+(?P<license>.*?\.)`)
    20  	licenseAgreementHeadingPattern          = regexp.MustCompile(`(?i)^\s*(?P<license>LICENSE AGREEMENT(?: FOR .+?)?)\s*$`)
    21  )
    22  
    23  func parseLicensesFromCopyright(reader io.Reader) []string {
    24  	findings := strset.New()
    25  	data, err := io.ReadAll(reader)
    26  	if err != nil {
    27  		// Fail-safe: return nothing if unable to read
    28  		return []string{}
    29  	}
    30  
    31  	content := string(data)
    32  	lines := strings.Split(content, "\n")
    33  	for _, line := range lines {
    34  		if value := findLicenseClause(licensePattern, line); value != "" {
    35  			findings.Add(value)
    36  		}
    37  		if value := findLicenseClause(commonLicensePathPattern, line); value != "" {
    38  			findings.Add(value)
    39  		}
    40  		if value := findLicenseClause(licenseAgreementHeadingPattern, line); value != "" {
    41  			findings.Add(value)
    42  		}
    43  	}
    44  
    45  	// some copyright files have a license declaration after the heading ex:
    46  	// End User License Agreement\n--------------------------
    47  	// we want to try and find these multi-line license declarations and make exceptions for them
    48  	if value := findLicenseClause(licenseFirstSentenceAfterHeadingPattern, content); value != "" {
    49  		findings.Add(value)
    50  	}
    51  
    52  	results := findings.List()
    53  	sort.Strings(results)
    54  
    55  	return results
    56  }
    57  
    58  func findLicenseClause(pattern *regexp.Regexp, line string) string {
    59  	valueGroup := "license"
    60  	matchesByGroup := internal.MatchNamedCaptureGroups(pattern, line)
    61  
    62  	candidate, ok := matchesByGroup[valueGroup]
    63  	if !ok {
    64  		return ""
    65  	}
    66  
    67  	return ensureIsSingleLicense(candidate)
    68  }
    69  
    70  var multiLicenseExceptions = []string{
    71  	"NVIDIA Software License Agreement",
    72  }
    73  
    74  func ensureIsSingleLicense(candidate string) (license string) {
    75  	candidate = strings.TrimSpace(strings.ReplaceAll(candidate, "\n", " "))
    76  
    77  	// Check for exceptions first
    78  	for _, exception := range multiLicenseExceptions {
    79  		if strings.Contains(candidate, exception) {
    80  			return strings.TrimSuffix(candidate, ".")
    81  		}
    82  	}
    83  	if strings.Contains(candidate, " or ") || strings.Contains(candidate, " and ") {
    84  		// make sure this is not one of the license exceptions
    85  		// this is a multi-license summary, ignore this as other recurrent license lines should cover this
    86  		return
    87  	}
    88  	if candidate != "" && strings.ToLower(candidate) != "none" {
    89  		// the license may be at the end of a sentence, clean . characters
    90  		license = strings.TrimSuffix(candidate, ".")
    91  	}
    92  	return license
    93  }