github.com/noqcks/syft@v0.0.0-20230920222752-a9e2c4e288e5/syft/pkg/cataloger/common/cpe/dictionary/index-generator/generate.go (about) 1 package main 2 3 import ( 4 "compress/gzip" 5 "encoding/json" 6 "encoding/xml" 7 "fmt" 8 "io" 9 "log" 10 "strings" 11 12 "github.com/facebookincubator/nvdtools/wfn" 13 "golang.org/x/exp/slices" 14 15 "github.com/anchore/syft/syft/pkg/cataloger/common/cpe/dictionary" 16 ) 17 18 func generateIndexedDictionaryJSON(rawGzipData io.Reader) ([]byte, error) { 19 gzipReader, err := gzip.NewReader(rawGzipData) 20 if err != nil { 21 return nil, fmt.Errorf("unable to decompress CPE dictionary: %w", err) 22 } 23 defer gzipReader.Close() 24 25 // Read XML data 26 data, err := io.ReadAll(gzipReader) 27 if err != nil { 28 return nil, fmt.Errorf("unable to read CPE dictionary: %w", err) 29 } 30 31 // Unmarshal XML 32 var cpeList CpeList 33 if err := xml.Unmarshal(data, &cpeList); err != nil { 34 return nil, fmt.Errorf("unable to unmarshal CPE dictionary XML: %w", err) 35 } 36 37 // Filter out data that's not applicable here 38 cpeList = filterCpeList(cpeList) 39 40 // Create indexed dictionary to help with looking up CPEs 41 indexedDictionary := indexCPEList(cpeList) 42 43 // Convert to JSON 44 jsonData, err := json.MarshalIndent(indexedDictionary, "", " ") 45 if err != nil { 46 return nil, fmt.Errorf("unable to marshal CPE dictionary to JSON: %w", err) 47 } 48 return jsonData, nil 49 } 50 51 // filterCpeList removes CPE items that are not applicable to software packages. 52 func filterCpeList(cpeList CpeList) CpeList { 53 var processedCpeList CpeList 54 55 seen := make(map[string]struct{}) 56 57 for _, cpeItem := range cpeList.CpeItems { 58 // Skip CPE items that don't have any references. 59 if len(cpeItem.References) == 0 { 60 continue 61 } 62 63 // Skip CPE items where the CPE URI doesn't meet our criteria. 64 parsedName, err := wfn.Parse(cpeItem.Name) 65 if err != nil { 66 log.Printf("unable to parse CPE URI %q: %s", cpeItem.Name, err) 67 } 68 69 if slices.Contains([]string{"h", "o"}, parsedName.Part) { 70 continue 71 } 72 73 normalizedName := normalizeCPE(parsedName).BindToURI() 74 if _, ok := seen[normalizedName]; ok { 75 continue 76 } 77 seen[normalizedName] = struct{}{} 78 cpeItem.Name = normalizedName 79 80 parsedCPE, err := wfn.Parse(cpeItem.Cpe23Item.Name) 81 if err != nil { 82 log.Printf("unable to parse CPE value %q: %s", cpeItem.Cpe23Item.Name, err) 83 } 84 85 cpeItem.Cpe23Item.Name = normalizeCPE(parsedCPE).BindToFmtString() 86 87 processedCpeList.CpeItems = append(processedCpeList.CpeItems, cpeItem) 88 } 89 90 return processedCpeList 91 } 92 93 // normalizeCPE removes the version and update parts of a CPE. 94 func normalizeCPE(cpe *wfn.Attributes) *wfn.Attributes { 95 cpeCopy := *cpe 96 97 cpeCopy.Version = "" 98 cpeCopy.Update = "" 99 100 return &cpeCopy 101 } 102 103 const ( 104 prefixForNPMPackages = "https://www.npmjs.com/package/" 105 prefixForRubyGems = "https://rubygems.org/gems/" 106 prefixForRubyGemsHTTP = "http://rubygems.org/gems/" 107 prefixForNativeRubyGems = "https://github.com/ruby/" 108 prefixForPyPIPackages = "https://pypi.org/project/" 109 prefixForJenkinsPlugins = "https://github.com/jenkinsci/" 110 prefixForRustCrates = "https://crates.io/crates/" 111 ) 112 113 // indexCPEList creates an index of CPEs by ecosystem. 114 func indexCPEList(list CpeList) *dictionary.Indexed { 115 indexed := &dictionary.Indexed{ 116 EcosystemPackages: make(map[string]dictionary.Packages), 117 } 118 119 for _, cpeItem := range list.CpeItems { 120 cpeItemName := cpeItem.Cpe23Item.Name 121 122 for _, reference := range cpeItem.References { 123 ref := reference.Reference.Href 124 125 switch { 126 case strings.HasPrefix(ref, prefixForNPMPackages): 127 addEntryForNPMPackage(indexed, ref, cpeItemName) 128 129 case strings.HasPrefix(ref, prefixForRubyGems), strings.HasPrefix(ref, prefixForRubyGemsHTTP): 130 addEntryForRubyGem(indexed, ref, cpeItemName) 131 132 case strings.HasPrefix(ref, prefixForNativeRubyGems): 133 addEntryForNativeRubyGem(indexed, ref, cpeItemName) 134 135 case strings.HasPrefix(ref, prefixForPyPIPackages): 136 addEntryForPyPIPackage(indexed, ref, cpeItemName) 137 138 case strings.HasPrefix(ref, prefixForJenkinsPlugins): 139 // It _might_ be a jenkins plugin! 140 addEntryForJenkinsPlugin(indexed, ref, cpeItemName) 141 142 case strings.HasPrefix(ref, prefixForRustCrates): 143 addEntryForRustCrate(indexed, ref, cpeItemName) 144 } 145 } 146 } 147 148 return indexed 149 } 150 151 func addEntryForRustCrate(indexed *dictionary.Indexed, ref string, cpeItemName string) { 152 // Prune off the non-package-name parts of the URL 153 ref = strings.TrimPrefix(ref, prefixForRustCrates) 154 ref = strings.Split(ref, "/")[0] 155 156 if _, ok := indexed.EcosystemPackages[dictionary.EcosystemRustCrates]; !ok { 157 indexed.EcosystemPackages[dictionary.EcosystemRustCrates] = make(dictionary.Packages) 158 } 159 160 indexed.EcosystemPackages[dictionary.EcosystemRustCrates][ref] = cpeItemName 161 } 162 163 func addEntryForJenkinsPlugin(indexed *dictionary.Indexed, ref string, cpeItemName string) { 164 // Prune off the non-package-name parts of the URL 165 ref = strings.TrimPrefix(ref, prefixForJenkinsPlugins) 166 ref = strings.Split(ref, "/")[0] 167 168 if !strings.HasSuffix(ref, "-plugin") { 169 // It's not a jenkins plugin! 170 return 171 } 172 173 ref = strings.TrimSuffix(ref, "-plugin") 174 175 if _, ok := indexed.EcosystemPackages[dictionary.EcosystemJenkinsPlugins]; !ok { 176 indexed.EcosystemPackages[dictionary.EcosystemJenkinsPlugins] = make(dictionary.Packages) 177 } 178 179 indexed.EcosystemPackages[dictionary.EcosystemJenkinsPlugins][ref] = cpeItemName 180 } 181 182 func addEntryForPyPIPackage(indexed *dictionary.Indexed, ref string, cpeItemName string) { 183 // Prune off the non-package-name parts of the URL 184 ref = strings.TrimPrefix(ref, prefixForPyPIPackages) 185 ref = strings.Split(ref, "/")[0] 186 187 if _, ok := indexed.EcosystemPackages[dictionary.EcosystemPyPI]; !ok { 188 indexed.EcosystemPackages[dictionary.EcosystemPyPI] = make(dictionary.Packages) 189 } 190 191 indexed.EcosystemPackages[dictionary.EcosystemPyPI][ref] = cpeItemName 192 } 193 194 func addEntryForNativeRubyGem(indexed *dictionary.Indexed, ref string, cpeItemName string) { 195 // Prune off the non-package-name parts of the URL 196 ref = strings.TrimPrefix(ref, prefixForNativeRubyGems) 197 ref = strings.Split(ref, "/")[0] 198 199 if _, ok := indexed.EcosystemPackages[dictionary.EcosystemRubyGems]; !ok { 200 indexed.EcosystemPackages[dictionary.EcosystemRubyGems] = make(dictionary.Packages) 201 } 202 203 indexed.EcosystemPackages[dictionary.EcosystemRubyGems][ref] = cpeItemName 204 } 205 206 func addEntryForRubyGem(indexed *dictionary.Indexed, ref string, cpeItemName string) { 207 // Prune off the non-package-name parts of the URL 208 ref = strings.TrimPrefix(ref, prefixForRubyGems) 209 ref = strings.TrimPrefix(ref, prefixForRubyGemsHTTP) 210 ref = strings.Split(ref, "/")[0] 211 212 if _, ok := indexed.EcosystemPackages[dictionary.EcosystemRubyGems]; !ok { 213 indexed.EcosystemPackages[dictionary.EcosystemRubyGems] = make(dictionary.Packages) 214 } 215 216 indexed.EcosystemPackages[dictionary.EcosystemRubyGems][ref] = cpeItemName 217 } 218 219 func addEntryForNPMPackage(indexed *dictionary.Indexed, ref string, cpeItemName string) { 220 // Prune off the non-package-name parts of the URL 221 ref = strings.Split(ref, "/v/")[0] 222 ref = strings.Split(ref, "?")[0] 223 ref = strings.TrimPrefix(ref, prefixForNPMPackages) 224 225 if _, ok := indexed.EcosystemPackages[dictionary.EcosystemNPM]; !ok { 226 indexed.EcosystemPackages[dictionary.EcosystemNPM] = make(dictionary.Packages) 227 } 228 229 indexed.EcosystemPackages[dictionary.EcosystemNPM][ref] = cpeItemName 230 }