github.com/nextlinux/gosbom@v0.81.1-0.20230627115839-1ff50c281391/gosbom/pkg/cataloger/common/cpe/generate.go (about) 1 package cpe 2 3 import ( 4 "bufio" 5 "bytes" 6 "fmt" 7 "sort" 8 "strings" 9 10 "github.com/facebookincubator/nvdtools/wfn" 11 "github.com/nextlinux/gosbom/gosbom/cpe" 12 "github.com/nextlinux/gosbom/gosbom/pkg" 13 "github.com/nextlinux/gosbom/internal" 14 "github.com/scylladb/go-set/strset" 15 ) 16 17 // knownVendors contains vendor strings that are known to exist in 18 // the CPE database, so they will be preferred over other candidates: 19 var knownVendors = strset.New("apache") 20 21 func newCPE(product, vendor, version, targetSW string) *wfn.Attributes { 22 c := *(wfn.NewAttributesWithAny()) 23 c.Part = "a" 24 c.Product = product 25 c.Vendor = vendor 26 c.Version = version 27 c.TargetSW = targetSW 28 if cpe.ValidateString(cpe.String(c)) != nil { 29 return nil 30 } 31 return &c 32 } 33 34 // Generate Create a list of CPEs for a given package, trying to guess the vendor, product tuple. We should be trying to 35 // generate the minimal set of representative CPEs, which implies that optional fields should not be included 36 // (such as target SW). 37 func Generate(p pkg.Package) []cpe.CPE { 38 vendors := candidateVendors(p) 39 products := candidateProducts(p) 40 if len(products) == 0 { 41 return nil 42 } 43 44 keys := internal.NewStringSet() 45 cpes := make([]cpe.CPE, 0) 46 for _, product := range products { 47 for _, vendor := range vendors { 48 // prevent duplicate entries... 49 key := fmt.Sprintf("%s|%s|%s", product, vendor, p.Version) 50 if keys.Contains(key) { 51 continue 52 } 53 keys.Add(key) 54 // add a new entry... 55 if c := newCPE(product, vendor, p.Version, wfn.Any); c != nil { 56 cpes = append(cpes, *c) 57 } 58 } 59 } 60 61 // filter out any known combinations that don't accurately represent this package 62 cpes = filter(cpes, p, cpeFilters...) 63 64 sort.Sort(cpe.BySpecificity(cpes)) 65 66 return cpes 67 } 68 69 func candidateVendors(p pkg.Package) []string { 70 // in ecosystems where the packaging metadata does not have a clear field to indicate a vendor (or a field that 71 // could be interpreted indirectly as such) the project name tends to be a common stand in. Examples of this 72 // are the elasticsearch gem, xstream jar, and rack gem... all of these cases you can find vulnerabilities 73 // with CPEs where the vendor is the product name and doesn't appear to be derived from any available package 74 // metadata. 75 vendors := newFieldCandidateSet(candidateProducts(p)...) 76 77 switch p.Language { 78 case pkg.JavaScript: 79 // for JavaScript if we find node.js as a package then the vendor is "nodejs" 80 if p.Name == "node.js" { 81 vendors.addValue("nodejs") 82 } 83 case pkg.Ruby: 84 vendors.addValue("ruby-lang") 85 case pkg.Go: 86 // replace all candidates with only the golang-specific helper 87 vendors.clear() 88 89 vendor := candidateVendorForGo(p.Name) 90 if vendor != "" { 91 vendors.addValue(vendor) 92 } 93 } 94 95 switch p.MetadataType { 96 case pkg.RpmMetadataType: 97 vendors.union(candidateVendorsForRPM(p)) 98 case pkg.GemMetadataType: 99 vendors.union(candidateVendorsForRuby(p)) 100 case pkg.PythonPackageMetadataType: 101 vendors.union(candidateVendorsForPython(p)) 102 case pkg.JavaMetadataType: 103 vendors.union(candidateVendorsForJava(p)) 104 case pkg.ApkMetadataType: 105 vendors.union(candidateVendorsForAPK(p)) 106 case pkg.NpmPackageJSONMetadataType: 107 vendors.union(candidateVendorsForJavascript(p)) 108 } 109 110 // We should no longer be generating vendor candidates with these values ["" and "*"] 111 // (since CPEs will match any other value) 112 vendors.removeByValue("") 113 vendors.removeByValue("*") 114 115 // try swapping hyphens for underscores, vice versa, and removing separators altogether 116 addDelimiterVariations(vendors) 117 118 // generate sub-selections of each candidate based on separators (e.g. jenkins-ci -> [jenkins, jenkins-ci]) 119 addAllSubSelections(vendors) 120 121 // add more candidates based on the package info for each vendor candidate 122 for _, vendor := range vendors.uniqueValues() { 123 vendors.addValue(findAdditionalVendors(defaultCandidateAdditions, p.Type, p.Name, vendor)...) 124 } 125 126 // remove known mis 127 vendors.removeByValue(findVendorsToRemove(defaultCandidateRemovals, p.Type, p.Name)...) 128 129 uniqueVendors := vendors.uniqueValues() 130 131 // if any known vendor was detected, pick that one. 132 for _, vendor := range uniqueVendors { 133 if knownVendors.Has(vendor) { 134 return []string{vendor} 135 } 136 } 137 138 return uniqueVendors 139 } 140 141 func candidateProducts(p pkg.Package) []string { 142 products := newFieldCandidateSet(p.Name) 143 144 switch { 145 case p.Language == pkg.Python: 146 if !strings.HasPrefix(p.Name, "python") { 147 products.addValue("python-" + p.Name) 148 } 149 case p.Language == pkg.Java || p.MetadataType == pkg.JavaMetadataType: 150 products.addValue(candidateProductsForJava(p)...) 151 case p.Language == pkg.Go: 152 // replace all candidates with only the golang-specific helper 153 products.clear() 154 155 prod := candidateProductForGo(p.Name) 156 if prod != "" { 157 products.addValue(prod) 158 } 159 } 160 161 if p.MetadataType == pkg.ApkMetadataType { 162 products.union(candidateProductsForAPK(p)) 163 } 164 165 // it is never OK to have candidates with these values ["" and "*"] (since CPEs will match any other value) 166 products.removeByValue("") 167 products.removeByValue("*") 168 169 // try swapping hyphens for underscores, vice versa, and removing separators altogether 170 addDelimiterVariations(products) 171 172 // add known candidate additions 173 products.addValue(findAdditionalProducts(defaultCandidateAdditions, p.Type, p.Name)...) 174 175 // remove known candidate removals 176 products.removeByValue(findProductsToRemove(defaultCandidateRemovals, p.Type, p.Name)...) 177 178 return products.uniqueValues() 179 } 180 181 func addAllSubSelections(fields fieldCandidateSet) { 182 candidatesForVariations := fields.copy() 183 candidatesForVariations.removeWhere(subSelectionsDisallowed) 184 185 for _, candidate := range candidatesForVariations.values() { 186 fields.addValue(generateSubSelections(candidate)...) 187 } 188 } 189 190 // generateSubSelections attempts to split a field by hyphens and underscores and return a list of sensible sub-selections 191 // that can be used as product or vendor candidates. E.g. jenkins-ci-tools -> [jenkins-ci-tools, jenkins-ci, jenkins]. 192 func generateSubSelections(field string) (results []string) { 193 scanner := bufio.NewScanner(strings.NewReader(field)) 194 scanner.Split(scanByHyphenOrUnderscore) 195 var lastToken uint8 196 for scanner.Scan() { 197 rawCandidate := scanner.Text() 198 if len(rawCandidate) == 0 { 199 break 200 } 201 202 // trim any number of hyphen or underscore that is prefixed/suffixed on the given candidate. Since 203 // scanByHyphenOrUnderscore preserves delimiters (hyphens and underscores) they are guaranteed to be at least 204 // prefixed. 205 candidate := strings.TrimFunc(rawCandidate, trimHyphenOrUnderscore) 206 207 // capture the result (if there is content) 208 if len(candidate) > 0 { 209 if len(results) > 0 { 210 results = append(results, results[len(results)-1]+string(lastToken)+candidate) 211 } else { 212 results = append(results, candidate) 213 } 214 } 215 216 // keep track of the trailing separator for the next loop 217 lastToken = rawCandidate[len(rawCandidate)-1] 218 } 219 return results 220 } 221 222 // trimHyphenOrUnderscore is a character filter function for use with strings.TrimFunc in order to remove any hyphen or underscores. 223 func trimHyphenOrUnderscore(r rune) bool { 224 switch r { 225 case '-', '_': 226 return true 227 } 228 return false 229 } 230 231 // scanByHyphenOrUnderscore splits on hyphen or underscore and includes the separator in the split 232 func scanByHyphenOrUnderscore(data []byte, atEOF bool) (advance int, token []byte, err error) { 233 if atEOF && len(data) == 0 { 234 return 0, nil, nil 235 } 236 if i := bytes.IndexAny(data, "-_"); i >= 0 { 237 return i + 1, data[0 : i+1], nil 238 } 239 240 if atEOF { 241 return len(data), data, nil 242 } 243 244 return 0, nil, nil 245 } 246 247 func addDelimiterVariations(fields fieldCandidateSet) { 248 candidatesForVariations := fields.copy() 249 candidatesForVariations.removeWhere(delimiterVariationsDisallowed) 250 251 for _, candidate := range candidatesForVariations.list() { 252 field := candidate.value 253 hasHyphen := strings.Contains(field, "-") 254 hasUnderscore := strings.Contains(field, "_") 255 256 if hasHyphen { 257 // provide variations of hyphen candidates with an underscore 258 newValue := strings.ReplaceAll(field, "-", "_") 259 underscoreCandidate := candidate 260 underscoreCandidate.value = newValue 261 fields.add(underscoreCandidate) 262 } 263 264 if hasUnderscore { 265 // provide variations of underscore candidates with a hyphen 266 newValue := strings.ReplaceAll(field, "_", "-") 267 hyphenCandidate := candidate 268 hyphenCandidate.value = newValue 269 fields.add(hyphenCandidate) 270 } 271 } 272 }