github.com/nextlinux/gosbom@v0.81.1-0.20230627115839-1ff50c281391/gosbom/pkg/cataloger/common/cpe/generate.go (about)

     1  package cpe
     2  
     3  import (
     4  	"bufio"
     5  	"bytes"
     6  	"fmt"
     7  	"sort"
     8  	"strings"
     9  
    10  	"github.com/facebookincubator/nvdtools/wfn"
    11  	"github.com/nextlinux/gosbom/gosbom/cpe"
    12  	"github.com/nextlinux/gosbom/gosbom/pkg"
    13  	"github.com/nextlinux/gosbom/internal"
    14  	"github.com/scylladb/go-set/strset"
    15  )
    16  
    17  // knownVendors contains vendor strings that are known to exist in
    18  // the CPE database, so they will be preferred over other candidates:
    19  var knownVendors = strset.New("apache")
    20  
    21  func newCPE(product, vendor, version, targetSW string) *wfn.Attributes {
    22  	c := *(wfn.NewAttributesWithAny())
    23  	c.Part = "a"
    24  	c.Product = product
    25  	c.Vendor = vendor
    26  	c.Version = version
    27  	c.TargetSW = targetSW
    28  	if cpe.ValidateString(cpe.String(c)) != nil {
    29  		return nil
    30  	}
    31  	return &c
    32  }
    33  
    34  // Generate Create a list of CPEs for a given package, trying to guess the vendor, product tuple. We should be trying to
    35  // generate the minimal set of representative CPEs, which implies that optional fields should not be included
    36  // (such as target SW).
    37  func Generate(p pkg.Package) []cpe.CPE {
    38  	vendors := candidateVendors(p)
    39  	products := candidateProducts(p)
    40  	if len(products) == 0 {
    41  		return nil
    42  	}
    43  
    44  	keys := internal.NewStringSet()
    45  	cpes := make([]cpe.CPE, 0)
    46  	for _, product := range products {
    47  		for _, vendor := range vendors {
    48  			// prevent duplicate entries...
    49  			key := fmt.Sprintf("%s|%s|%s", product, vendor, p.Version)
    50  			if keys.Contains(key) {
    51  				continue
    52  			}
    53  			keys.Add(key)
    54  			// add a new entry...
    55  			if c := newCPE(product, vendor, p.Version, wfn.Any); c != nil {
    56  				cpes = append(cpes, *c)
    57  			}
    58  		}
    59  	}
    60  
    61  	// filter out any known combinations that don't accurately represent this package
    62  	cpes = filter(cpes, p, cpeFilters...)
    63  
    64  	sort.Sort(cpe.BySpecificity(cpes))
    65  
    66  	return cpes
    67  }
    68  
    69  func candidateVendors(p pkg.Package) []string {
    70  	// in ecosystems where the packaging metadata does not have a clear field to indicate a vendor (or a field that
    71  	// could be interpreted indirectly as such) the project name tends to be a common stand in. Examples of this
    72  	// are the elasticsearch gem, xstream jar, and rack gem... all of these cases you can find vulnerabilities
    73  	// with CPEs where the vendor is the product name and doesn't appear to be derived from any available package
    74  	// metadata.
    75  	vendors := newFieldCandidateSet(candidateProducts(p)...)
    76  
    77  	switch p.Language {
    78  	case pkg.JavaScript:
    79  		// for JavaScript if we find node.js as a package then the vendor is "nodejs"
    80  		if p.Name == "node.js" {
    81  			vendors.addValue("nodejs")
    82  		}
    83  	case pkg.Ruby:
    84  		vendors.addValue("ruby-lang")
    85  	case pkg.Go:
    86  		// replace all candidates with only the golang-specific helper
    87  		vendors.clear()
    88  
    89  		vendor := candidateVendorForGo(p.Name)
    90  		if vendor != "" {
    91  			vendors.addValue(vendor)
    92  		}
    93  	}
    94  
    95  	switch p.MetadataType {
    96  	case pkg.RpmMetadataType:
    97  		vendors.union(candidateVendorsForRPM(p))
    98  	case pkg.GemMetadataType:
    99  		vendors.union(candidateVendorsForRuby(p))
   100  	case pkg.PythonPackageMetadataType:
   101  		vendors.union(candidateVendorsForPython(p))
   102  	case pkg.JavaMetadataType:
   103  		vendors.union(candidateVendorsForJava(p))
   104  	case pkg.ApkMetadataType:
   105  		vendors.union(candidateVendorsForAPK(p))
   106  	case pkg.NpmPackageJSONMetadataType:
   107  		vendors.union(candidateVendorsForJavascript(p))
   108  	}
   109  
   110  	// We should no longer be generating vendor candidates with these values ["" and "*"]
   111  	// (since CPEs will match any other value)
   112  	vendors.removeByValue("")
   113  	vendors.removeByValue("*")
   114  
   115  	// try swapping hyphens for underscores, vice versa, and removing separators altogether
   116  	addDelimiterVariations(vendors)
   117  
   118  	// generate sub-selections of each candidate based on separators (e.g. jenkins-ci -> [jenkins, jenkins-ci])
   119  	addAllSubSelections(vendors)
   120  
   121  	// add more candidates based on the package info for each vendor candidate
   122  	for _, vendor := range vendors.uniqueValues() {
   123  		vendors.addValue(findAdditionalVendors(defaultCandidateAdditions, p.Type, p.Name, vendor)...)
   124  	}
   125  
   126  	// remove known mis
   127  	vendors.removeByValue(findVendorsToRemove(defaultCandidateRemovals, p.Type, p.Name)...)
   128  
   129  	uniqueVendors := vendors.uniqueValues()
   130  
   131  	// if any known vendor was detected, pick that one.
   132  	for _, vendor := range uniqueVendors {
   133  		if knownVendors.Has(vendor) {
   134  			return []string{vendor}
   135  		}
   136  	}
   137  
   138  	return uniqueVendors
   139  }
   140  
   141  func candidateProducts(p pkg.Package) []string {
   142  	products := newFieldCandidateSet(p.Name)
   143  
   144  	switch {
   145  	case p.Language == pkg.Python:
   146  		if !strings.HasPrefix(p.Name, "python") {
   147  			products.addValue("python-" + p.Name)
   148  		}
   149  	case p.Language == pkg.Java || p.MetadataType == pkg.JavaMetadataType:
   150  		products.addValue(candidateProductsForJava(p)...)
   151  	case p.Language == pkg.Go:
   152  		// replace all candidates with only the golang-specific helper
   153  		products.clear()
   154  
   155  		prod := candidateProductForGo(p.Name)
   156  		if prod != "" {
   157  			products.addValue(prod)
   158  		}
   159  	}
   160  
   161  	if p.MetadataType == pkg.ApkMetadataType {
   162  		products.union(candidateProductsForAPK(p))
   163  	}
   164  
   165  	// it is never OK to have candidates with these values ["" and "*"] (since CPEs will match any other value)
   166  	products.removeByValue("")
   167  	products.removeByValue("*")
   168  
   169  	// try swapping hyphens for underscores, vice versa, and removing separators altogether
   170  	addDelimiterVariations(products)
   171  
   172  	// add known candidate additions
   173  	products.addValue(findAdditionalProducts(defaultCandidateAdditions, p.Type, p.Name)...)
   174  
   175  	// remove known candidate removals
   176  	products.removeByValue(findProductsToRemove(defaultCandidateRemovals, p.Type, p.Name)...)
   177  
   178  	return products.uniqueValues()
   179  }
   180  
   181  func addAllSubSelections(fields fieldCandidateSet) {
   182  	candidatesForVariations := fields.copy()
   183  	candidatesForVariations.removeWhere(subSelectionsDisallowed)
   184  
   185  	for _, candidate := range candidatesForVariations.values() {
   186  		fields.addValue(generateSubSelections(candidate)...)
   187  	}
   188  }
   189  
   190  // generateSubSelections attempts to split a field by hyphens and underscores and return a list of sensible sub-selections
   191  // that can be used as product or vendor candidates. E.g. jenkins-ci-tools -> [jenkins-ci-tools, jenkins-ci, jenkins].
   192  func generateSubSelections(field string) (results []string) {
   193  	scanner := bufio.NewScanner(strings.NewReader(field))
   194  	scanner.Split(scanByHyphenOrUnderscore)
   195  	var lastToken uint8
   196  	for scanner.Scan() {
   197  		rawCandidate := scanner.Text()
   198  		if len(rawCandidate) == 0 {
   199  			break
   200  		}
   201  
   202  		// trim any number of hyphen or underscore that is prefixed/suffixed on the given candidate. Since
   203  		// scanByHyphenOrUnderscore preserves delimiters (hyphens and underscores) they are guaranteed to be at least
   204  		// prefixed.
   205  		candidate := strings.TrimFunc(rawCandidate, trimHyphenOrUnderscore)
   206  
   207  		// capture the result (if there is content)
   208  		if len(candidate) > 0 {
   209  			if len(results) > 0 {
   210  				results = append(results, results[len(results)-1]+string(lastToken)+candidate)
   211  			} else {
   212  				results = append(results, candidate)
   213  			}
   214  		}
   215  
   216  		// keep track of the trailing separator for the next loop
   217  		lastToken = rawCandidate[len(rawCandidate)-1]
   218  	}
   219  	return results
   220  }
   221  
   222  // trimHyphenOrUnderscore is a character filter function for use with strings.TrimFunc in order to remove any hyphen or underscores.
   223  func trimHyphenOrUnderscore(r rune) bool {
   224  	switch r {
   225  	case '-', '_':
   226  		return true
   227  	}
   228  	return false
   229  }
   230  
   231  // scanByHyphenOrUnderscore splits on hyphen or underscore and includes the separator in the split
   232  func scanByHyphenOrUnderscore(data []byte, atEOF bool) (advance int, token []byte, err error) {
   233  	if atEOF && len(data) == 0 {
   234  		return 0, nil, nil
   235  	}
   236  	if i := bytes.IndexAny(data, "-_"); i >= 0 {
   237  		return i + 1, data[0 : i+1], nil
   238  	}
   239  
   240  	if atEOF {
   241  		return len(data), data, nil
   242  	}
   243  
   244  	return 0, nil, nil
   245  }
   246  
   247  func addDelimiterVariations(fields fieldCandidateSet) {
   248  	candidatesForVariations := fields.copy()
   249  	candidatesForVariations.removeWhere(delimiterVariationsDisallowed)
   250  
   251  	for _, candidate := range candidatesForVariations.list() {
   252  		field := candidate.value
   253  		hasHyphen := strings.Contains(field, "-")
   254  		hasUnderscore := strings.Contains(field, "_")
   255  
   256  		if hasHyphen {
   257  			// provide variations of hyphen candidates with an underscore
   258  			newValue := strings.ReplaceAll(field, "-", "_")
   259  			underscoreCandidate := candidate
   260  			underscoreCandidate.value = newValue
   261  			fields.add(underscoreCandidate)
   262  		}
   263  
   264  		if hasUnderscore {
   265  			// provide variations of underscore candidates with a hyphen
   266  			newValue := strings.ReplaceAll(field, "_", "-")
   267  			hyphenCandidate := candidate
   268  			hyphenCandidate.value = newValue
   269  			fields.add(hyphenCandidate)
   270  		}
   271  	}
   272  }