github.com/noqcks/syft@v0.0.0-20230920222752-a9e2c4e288e5/syft/pkg/cataloger/common/cpe/dictionary/index-generator/generate.go (about)

     1  package main
     2  
     3  import (
     4  	"compress/gzip"
     5  	"encoding/json"
     6  	"encoding/xml"
     7  	"fmt"
     8  	"io"
     9  	"log"
    10  	"strings"
    11  
    12  	"github.com/facebookincubator/nvdtools/wfn"
    13  	"golang.org/x/exp/slices"
    14  
    15  	"github.com/anchore/syft/syft/pkg/cataloger/common/cpe/dictionary"
    16  )
    17  
    18  func generateIndexedDictionaryJSON(rawGzipData io.Reader) ([]byte, error) {
    19  	gzipReader, err := gzip.NewReader(rawGzipData)
    20  	if err != nil {
    21  		return nil, fmt.Errorf("unable to decompress CPE dictionary: %w", err)
    22  	}
    23  	defer gzipReader.Close()
    24  
    25  	// Read XML data
    26  	data, err := io.ReadAll(gzipReader)
    27  	if err != nil {
    28  		return nil, fmt.Errorf("unable to read CPE dictionary: %w", err)
    29  	}
    30  
    31  	// Unmarshal XML
    32  	var cpeList CpeList
    33  	if err := xml.Unmarshal(data, &cpeList); err != nil {
    34  		return nil, fmt.Errorf("unable to unmarshal CPE dictionary XML: %w", err)
    35  	}
    36  
    37  	// Filter out data that's not applicable here
    38  	cpeList = filterCpeList(cpeList)
    39  
    40  	// Create indexed dictionary to help with looking up CPEs
    41  	indexedDictionary := indexCPEList(cpeList)
    42  
    43  	// Convert to JSON
    44  	jsonData, err := json.MarshalIndent(indexedDictionary, "", "  ")
    45  	if err != nil {
    46  		return nil, fmt.Errorf("unable to marshal CPE dictionary to JSON: %w", err)
    47  	}
    48  	return jsonData, nil
    49  }
    50  
    51  // filterCpeList removes CPE items that are not applicable to software packages.
    52  func filterCpeList(cpeList CpeList) CpeList {
    53  	var processedCpeList CpeList
    54  
    55  	seen := make(map[string]struct{})
    56  
    57  	for _, cpeItem := range cpeList.CpeItems {
    58  		// Skip CPE items that don't have any references.
    59  		if len(cpeItem.References) == 0 {
    60  			continue
    61  		}
    62  
    63  		// Skip CPE items where the CPE URI doesn't meet our criteria.
    64  		parsedName, err := wfn.Parse(cpeItem.Name)
    65  		if err != nil {
    66  			log.Printf("unable to parse CPE URI %q: %s", cpeItem.Name, err)
    67  		}
    68  
    69  		if slices.Contains([]string{"h", "o"}, parsedName.Part) {
    70  			continue
    71  		}
    72  
    73  		normalizedName := normalizeCPE(parsedName).BindToURI()
    74  		if _, ok := seen[normalizedName]; ok {
    75  			continue
    76  		}
    77  		seen[normalizedName] = struct{}{}
    78  		cpeItem.Name = normalizedName
    79  
    80  		parsedCPE, err := wfn.Parse(cpeItem.Cpe23Item.Name)
    81  		if err != nil {
    82  			log.Printf("unable to parse CPE value %q: %s", cpeItem.Cpe23Item.Name, err)
    83  		}
    84  
    85  		cpeItem.Cpe23Item.Name = normalizeCPE(parsedCPE).BindToFmtString()
    86  
    87  		processedCpeList.CpeItems = append(processedCpeList.CpeItems, cpeItem)
    88  	}
    89  
    90  	return processedCpeList
    91  }
    92  
    93  // normalizeCPE removes the version and update parts of a CPE.
    94  func normalizeCPE(cpe *wfn.Attributes) *wfn.Attributes {
    95  	cpeCopy := *cpe
    96  
    97  	cpeCopy.Version = ""
    98  	cpeCopy.Update = ""
    99  
   100  	return &cpeCopy
   101  }
   102  
   103  const (
   104  	prefixForNPMPackages    = "https://www.npmjs.com/package/"
   105  	prefixForRubyGems       = "https://rubygems.org/gems/"
   106  	prefixForRubyGemsHTTP   = "http://rubygems.org/gems/"
   107  	prefixForNativeRubyGems = "https://github.com/ruby/"
   108  	prefixForPyPIPackages   = "https://pypi.org/project/"
   109  	prefixForJenkinsPlugins = "https://github.com/jenkinsci/"
   110  	prefixForRustCrates     = "https://crates.io/crates/"
   111  )
   112  
   113  // indexCPEList creates an index of CPEs by ecosystem.
   114  func indexCPEList(list CpeList) *dictionary.Indexed {
   115  	indexed := &dictionary.Indexed{
   116  		EcosystemPackages: make(map[string]dictionary.Packages),
   117  	}
   118  
   119  	for _, cpeItem := range list.CpeItems {
   120  		cpeItemName := cpeItem.Cpe23Item.Name
   121  
   122  		for _, reference := range cpeItem.References {
   123  			ref := reference.Reference.Href
   124  
   125  			switch {
   126  			case strings.HasPrefix(ref, prefixForNPMPackages):
   127  				addEntryForNPMPackage(indexed, ref, cpeItemName)
   128  
   129  			case strings.HasPrefix(ref, prefixForRubyGems), strings.HasPrefix(ref, prefixForRubyGemsHTTP):
   130  				addEntryForRubyGem(indexed, ref, cpeItemName)
   131  
   132  			case strings.HasPrefix(ref, prefixForNativeRubyGems):
   133  				addEntryForNativeRubyGem(indexed, ref, cpeItemName)
   134  
   135  			case strings.HasPrefix(ref, prefixForPyPIPackages):
   136  				addEntryForPyPIPackage(indexed, ref, cpeItemName)
   137  
   138  			case strings.HasPrefix(ref, prefixForJenkinsPlugins):
   139  				// It _might_ be a jenkins plugin!
   140  				addEntryForJenkinsPlugin(indexed, ref, cpeItemName)
   141  
   142  			case strings.HasPrefix(ref, prefixForRustCrates):
   143  				addEntryForRustCrate(indexed, ref, cpeItemName)
   144  			}
   145  		}
   146  	}
   147  
   148  	return indexed
   149  }
   150  
   151  func addEntryForRustCrate(indexed *dictionary.Indexed, ref string, cpeItemName string) {
   152  	// Prune off the non-package-name parts of the URL
   153  	ref = strings.TrimPrefix(ref, prefixForRustCrates)
   154  	ref = strings.Split(ref, "/")[0]
   155  
   156  	if _, ok := indexed.EcosystemPackages[dictionary.EcosystemRustCrates]; !ok {
   157  		indexed.EcosystemPackages[dictionary.EcosystemRustCrates] = make(dictionary.Packages)
   158  	}
   159  
   160  	indexed.EcosystemPackages[dictionary.EcosystemRustCrates][ref] = cpeItemName
   161  }
   162  
   163  func addEntryForJenkinsPlugin(indexed *dictionary.Indexed, ref string, cpeItemName string) {
   164  	// Prune off the non-package-name parts of the URL
   165  	ref = strings.TrimPrefix(ref, prefixForJenkinsPlugins)
   166  	ref = strings.Split(ref, "/")[0]
   167  
   168  	if !strings.HasSuffix(ref, "-plugin") {
   169  		// It's not a jenkins plugin!
   170  		return
   171  	}
   172  
   173  	ref = strings.TrimSuffix(ref, "-plugin")
   174  
   175  	if _, ok := indexed.EcosystemPackages[dictionary.EcosystemJenkinsPlugins]; !ok {
   176  		indexed.EcosystemPackages[dictionary.EcosystemJenkinsPlugins] = make(dictionary.Packages)
   177  	}
   178  
   179  	indexed.EcosystemPackages[dictionary.EcosystemJenkinsPlugins][ref] = cpeItemName
   180  }
   181  
   182  func addEntryForPyPIPackage(indexed *dictionary.Indexed, ref string, cpeItemName string) {
   183  	// Prune off the non-package-name parts of the URL
   184  	ref = strings.TrimPrefix(ref, prefixForPyPIPackages)
   185  	ref = strings.Split(ref, "/")[0]
   186  
   187  	if _, ok := indexed.EcosystemPackages[dictionary.EcosystemPyPI]; !ok {
   188  		indexed.EcosystemPackages[dictionary.EcosystemPyPI] = make(dictionary.Packages)
   189  	}
   190  
   191  	indexed.EcosystemPackages[dictionary.EcosystemPyPI][ref] = cpeItemName
   192  }
   193  
   194  func addEntryForNativeRubyGem(indexed *dictionary.Indexed, ref string, cpeItemName string) {
   195  	// Prune off the non-package-name parts of the URL
   196  	ref = strings.TrimPrefix(ref, prefixForNativeRubyGems)
   197  	ref = strings.Split(ref, "/")[0]
   198  
   199  	if _, ok := indexed.EcosystemPackages[dictionary.EcosystemRubyGems]; !ok {
   200  		indexed.EcosystemPackages[dictionary.EcosystemRubyGems] = make(dictionary.Packages)
   201  	}
   202  
   203  	indexed.EcosystemPackages[dictionary.EcosystemRubyGems][ref] = cpeItemName
   204  }
   205  
   206  func addEntryForRubyGem(indexed *dictionary.Indexed, ref string, cpeItemName string) {
   207  	// Prune off the non-package-name parts of the URL
   208  	ref = strings.TrimPrefix(ref, prefixForRubyGems)
   209  	ref = strings.TrimPrefix(ref, prefixForRubyGemsHTTP)
   210  	ref = strings.Split(ref, "/")[0]
   211  
   212  	if _, ok := indexed.EcosystemPackages[dictionary.EcosystemRubyGems]; !ok {
   213  		indexed.EcosystemPackages[dictionary.EcosystemRubyGems] = make(dictionary.Packages)
   214  	}
   215  
   216  	indexed.EcosystemPackages[dictionary.EcosystemRubyGems][ref] = cpeItemName
   217  }
   218  
   219  func addEntryForNPMPackage(indexed *dictionary.Indexed, ref string, cpeItemName string) {
   220  	// Prune off the non-package-name parts of the URL
   221  	ref = strings.Split(ref, "/v/")[0]
   222  	ref = strings.Split(ref, "?")[0]
   223  	ref = strings.TrimPrefix(ref, prefixForNPMPackages)
   224  
   225  	if _, ok := indexed.EcosystemPackages[dictionary.EcosystemNPM]; !ok {
   226  		indexed.EcosystemPackages[dictionary.EcosystemNPM] = make(dictionary.Packages)
   227  	}
   228  
   229  	indexed.EcosystemPackages[dictionary.EcosystemNPM][ref] = cpeItemName
   230  }